scout_apm 1.6.8 → 2.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +8 -1
  3. data/CHANGELOG.markdown +7 -57
  4. data/ext/allocations/allocations.c +84 -0
  5. data/ext/allocations/extconf.rb +3 -0
  6. data/lib/scout_apm/agent/reporting.rb +9 -32
  7. data/lib/scout_apm/agent.rb +45 -31
  8. data/lib/scout_apm/app_server_load.rb +1 -2
  9. data/lib/scout_apm/attribute_arranger.rb +0 -4
  10. data/lib/scout_apm/background_worker.rb +6 -9
  11. data/lib/scout_apm/bucket_name_splitter.rb +3 -3
  12. data/lib/scout_apm/call_set.rb +1 -0
  13. data/lib/scout_apm/config.rb +110 -66
  14. data/lib/scout_apm/environment.rb +16 -10
  15. data/lib/scout_apm/framework_integrations/rails_2.rb +12 -14
  16. data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +5 -17
  17. data/lib/scout_apm/framework_integrations/ruby.rb +0 -4
  18. data/lib/scout_apm/framework_integrations/sinatra.rb +0 -4
  19. data/lib/scout_apm/histogram.rb +0 -20
  20. data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +1 -4
  21. data/lib/scout_apm/instruments/active_record.rb +149 -8
  22. data/lib/scout_apm/instruments/mongoid.rb +5 -78
  23. data/lib/scout_apm/instruments/process/process_cpu.rb +0 -12
  24. data/lib/scout_apm/instruments/process/process_memory.rb +14 -43
  25. data/lib/scout_apm/layaway.rb +34 -134
  26. data/lib/scout_apm/layaway_file.rb +50 -27
  27. data/lib/scout_apm/layer.rb +45 -1
  28. data/lib/scout_apm/layer_converters/allocation_metric_converter.rb +17 -0
  29. data/lib/scout_apm/layer_converters/converter_base.rb +4 -6
  30. data/lib/scout_apm/layer_converters/job_converter.rb +1 -0
  31. data/lib/scout_apm/layer_converters/metric_converter.rb +2 -1
  32. data/lib/scout_apm/layer_converters/slow_job_converter.rb +42 -21
  33. data/lib/scout_apm/layer_converters/slow_request_converter.rb +58 -37
  34. data/lib/scout_apm/metric_meta.rb +1 -5
  35. data/lib/scout_apm/metric_set.rb +6 -15
  36. data/lib/scout_apm/reporter.rb +4 -6
  37. data/lib/scout_apm/serializers/metrics_to_json_serializer.rb +5 -1
  38. data/lib/scout_apm/serializers/payload_serializer_to_json.rb +1 -3
  39. data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +5 -3
  40. data/lib/scout_apm/slow_job_policy.rb +19 -89
  41. data/lib/scout_apm/slow_job_record.rb +12 -20
  42. data/lib/scout_apm/slow_request_policy.rb +12 -80
  43. data/lib/scout_apm/slow_transaction.rb +16 -20
  44. data/lib/scout_apm/stackprof_tree_collapser.rb +103 -0
  45. data/lib/scout_apm/store.rb +16 -78
  46. data/lib/scout_apm/tracked_request.rb +53 -36
  47. data/lib/scout_apm/utils/active_record_metric_name.rb +2 -0
  48. data/lib/scout_apm/utils/fake_stack_prof.rb +40 -0
  49. data/lib/scout_apm/utils/klass_helper.rb +26 -0
  50. data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
  51. data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
  52. data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
  53. data/lib/scout_apm/version.rb +1 -1
  54. data/lib/scout_apm.rb +13 -7
  55. data/scout_apm.gemspec +3 -1
  56. data/test/test_helper.rb +3 -4
  57. data/test/unit/layaway_test.rb +8 -5
  58. data/test/unit/serializers/payload_serializer_test.rb +2 -2
  59. data/test/unit/slow_item_set_test.rb +1 -2
  60. data/test/unit/sql_sanitizer_test.rb +0 -6
  61. metadata +28 -20
  62. data/LICENSE.md +0 -27
  63. data/lib/scout_apm/instruments/grape.rb +0 -69
  64. data/lib/scout_apm/instruments/percentile_sampler.rb +0 -37
  65. data/lib/scout_apm/request_histograms.rb +0 -46
  66. data/lib/scout_apm/scored_item_set.rb +0 -79
  67. data/test/unit/metric_set_test.rb +0 -101
  68. data/test/unit/scored_item_set_test.rb +0 -65
  69. data/test/unit/slow_request_policy_test.rb +0 -42
@@ -4,29 +4,23 @@ module ScoutApm
4
4
  def initialize(*)
5
5
  @backtraces = []
6
6
  super
7
-
8
- # After call to super, so @request is populated
9
- @points = if request.job?
10
- ScoutApm::Agent.instance.slow_job_policy.score(request)
11
- else
12
- -1
13
- end
14
7
  end
15
8
 
16
- def name
17
- request.unique_name
18
- end
9
+ def call
10
+ return unless request.job?
19
11
 
20
- def score
21
- @points
22
- end
12
+ job_name = [queue_layer.name, job_layer.name]
23
13
 
24
- def call
25
- return nil unless request.job?
26
- return nil unless queue_layer
27
- return nil unless job_layer
14
+ slow_enough = ScoutApm::Agent.instance.slow_job_policy.slow?(job_name, root_layer.total_call_time)
15
+ return unless slow_enough
16
+
17
+ # record the change in memory usage
18
+ mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(request.capture_mem_delta!)
28
19
 
29
- ScoutApm::Agent.instance.slow_job_policy.stored!(request)
20
+ timing_metrics, allocation_metrics = create_metrics
21
+ unless ScoutApm::Instruments::Allocations::ENABLED
22
+ allocation_metrics = {}
23
+ end
30
24
 
31
25
  SlowJobRecord.new(
32
26
  queue_layer.name,
@@ -35,8 +29,11 @@ module ScoutApm
35
29
  job_layer.total_call_time,
36
30
  job_layer.total_exclusive_time,
37
31
  request.context,
38
- create_metrics,
39
- score)
32
+ timing_metrics,
33
+ allocation_metrics,
34
+ mem_delta,
35
+ job_layer.total_allocations
36
+ )
40
37
  end
41
38
 
42
39
  def queue_layer
@@ -47,8 +44,15 @@ module ScoutApm
47
44
  @job_layer ||= find_first_layer_of_type("Job")
48
45
  end
49
46
 
47
+ def find_first_layer_of_type(layer_type)
48
+ walker.walk do |layer|
49
+ return layer if layer.type == layer_type
50
+ end
51
+ end
52
+
50
53
  def create_metrics
51
54
  metric_hash = Hash.new
55
+ allocation_metric_hash = Hash.new
52
56
 
53
57
  # Keep a list of subscopes, but only ever use the front one. The rest
54
58
  # get pushed/popped in cases when we have many levels of subscopable
@@ -68,6 +72,14 @@ module ScoutApm
68
72
  end
69
73
 
70
74
  walker.walk do |layer|
75
+ # Sometimes we start capturing a layer without knowing if we really
76
+ # want to make an entry for it. See ActiveRecord instrumentation for
77
+ # an example. We start capturing before we know if a query is cached
78
+ # or not, and want to skip any cached queries.
79
+ next if layer.annotations[:ignorable]
80
+
81
+ # The queue_layer is useful to capture for other reasons, but doesn't
82
+ # create a MetricMeta/Stat of its own
71
83
  next if layer == queue_layer
72
84
 
73
85
  meta_options = if subscope_layers.first && layer != subscope_layers.first # Don't scope under ourself.
@@ -82,6 +94,8 @@ module ScoutApm
82
94
  # Specific Metric
83
95
  meta_options.merge!(:desc => layer.desc.to_s) if layer.desc
84
96
  meta = MetricMeta.new(layer.legacy_metric_name, meta_options)
97
+ meta.extra.merge!(layer.annotations)
98
+
85
99
  if layer.backtrace
86
100
  bt = ScoutApm::Utils::BacktraceParser.new(layer.backtrace).call
87
101
  if bt.any? # we could walk thru the call stack and not find in-app code
@@ -95,19 +109,26 @@ module ScoutApm
95
109
  end
96
110
 
97
111
  metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
112
+ allocation_metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
98
113
  stat = metric_hash[meta]
99
114
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
115
+ stat = allocation_metric_hash[meta]
116
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
100
117
 
101
118
  # Merged Metric (no specifics, just sum up by type)
102
119
  meta = MetricMeta.new("#{layer.type}/all")
103
120
  metric_hash[meta] ||= MetricStats.new(false)
121
+ allocation_metric_hash[meta] ||= MetricStats.new(false)
104
122
  stat = metric_hash[meta]
105
123
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
124
+ stat = allocation_metric_hash[meta]
125
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
106
126
  end
107
127
 
108
128
  metric_hash = attach_backtraces(metric_hash)
129
+ allocation_metric_hash = attach_backtraces(allocation_metric_hash)
109
130
 
110
- metric_hash
131
+ [metric_hash,allocation_metric_hash]
111
132
  end
112
133
 
113
134
  def attach_backtraces(metric_hash)
@@ -4,50 +4,49 @@ module ScoutApm
4
4
  def initialize(*)
5
5
  @backtraces = [] # An Array of MetricMetas that have a backtrace
6
6
  super
7
-
8
- # After call to super, so @request is populated
9
- @points = if request.web?
10
- ScoutApm::Agent.instance.slow_request_policy.score(request)
11
- else
12
- -1
13
- end
14
- end
15
-
16
- def name
17
- request.unique_name
18
7
  end
19
8
 
20
- def score
21
- @points
22
- end
23
-
24
- # Unconditionally attempts to convert this into a SlowTransaction object.
25
- # Can return nil if the request didn't have any scope_layer.
26
9
  def call
27
10
  scope = scope_layer
28
- return nil unless scope
11
+ return [nil, {}] unless scope
12
+
13
+ policy = ScoutApm::Agent.instance.slow_request_policy.capture_type(root_layer.total_call_time)
14
+ if policy == ScoutApm::SlowRequestPolicy::CAPTURE_NONE
15
+ return [nil, {}]
16
+ end
29
17
 
30
- ScoutApm::Agent.instance.slow_request_policy.stored!(request)
18
+ # record the change in memory usage
19
+ mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(@request.capture_mem_delta!)
20
+
21
+ # increment the slow transaction count if this is a slow transaction.
22
+ meta = MetricMeta.new("SlowTransaction/#{scope.legacy_metric_name}")
23
+ stat = MetricStats.new
24
+ stat.update!(1)
31
25
 
32
26
  uri = request.annotations[:uri] || ""
33
27
 
34
- (ScoutApm::Agent.instance.config.value("ignore_traces") || []).each do |pattern|
35
- if /#{pattern}/ =~ uri
36
- ScoutApm::Agent.instance.logger.debug("Skipped recording a trace for #{uri} due to `ignore_traces` pattern: #{pattern}")
37
- return nil
38
- end
28
+ timing_metrics, allocation_metrics = create_metrics
29
+ unless ScoutApm::Instruments::Allocations::ENABLED
30
+ allocation_metrics = {}
39
31
  end
40
32
 
41
- metrics = create_metrics
42
-
43
- SlowTransaction.new(uri,
44
- scope.legacy_metric_name,
45
- root_layer.total_call_time,
46
- metrics,
47
- request.context,
48
- root_layer.stop_time,
49
- [], # stackprof
50
- @points)
33
+ # Disable stackprof output for now
34
+ stackprof = [] # request.stackprof
35
+
36
+ [
37
+ SlowTransaction.new(uri,
38
+ scope.legacy_metric_name,
39
+ root_layer.total_call_time,
40
+ timing_metrics,
41
+ allocation_metrics,
42
+ request.context,
43
+ root_layer.stop_time,
44
+ stackprof,
45
+ mem_delta,
46
+ root_layer.total_allocations
47
+ ),
48
+ { meta => stat }
49
+ ]
51
50
  end
52
51
 
53
52
  # Iterates over the TrackedRequest's MetricMetas that have backtraces and attaches each to correct MetricMeta in the Metric Hash.
@@ -58,12 +57,14 @@ module ScoutApm
58
57
  metric_hash
59
58
  end
60
59
 
61
- # Full metrics from this request. These get aggregated in Store for the
62
- # overview metrics, or stored permanently in a SlowTransaction
60
+ # Full metrics from this request. These get stored permanently in a SlowTransaction.
63
61
  # Some merging of metrics will happen here, so if a request calls the same
64
62
  # ActiveRecord or View repeatedly, it'll get merged.
63
+ #
64
+ # This returns a 2-element of Metric Hashes (the first element is timing metrics, the second element is allocation metrics)
65
65
  def create_metrics
66
66
  metric_hash = Hash.new
67
+ allocation_metric_hash = Hash.new
67
68
 
68
69
  # Keep a list of subscopes, but only ever use the front one. The rest
69
70
  # get pushed/popped in cases when we have many levels of subscopable
@@ -83,6 +84,14 @@ module ScoutApm
83
84
  end
84
85
 
85
86
  walker.walk do |layer|
87
+ # Sometimes we start capturing a layer without knowing if we really
88
+ # want to make an entry for it. See ActiveRecord instrumentation for
89
+ # an example. We start capturing before we know if a query is cached
90
+ # or not, and want to skip any cached queries.
91
+ if layer.annotations[:ignorable]
92
+ next
93
+ end
94
+
86
95
  meta_options = if subscope_layers.first && layer != subscope_layers.first # Don't scope under ourself.
87
96
  subscope_name = subscope_layers.first.legacy_metric_name
88
97
  {:scope => subscope_name}
@@ -95,6 +104,7 @@ module ScoutApm
95
104
  # Specific Metric
96
105
  meta_options.merge!(:desc => layer.desc.to_s) if layer.desc
97
106
  meta = MetricMeta.new(layer.legacy_metric_name, meta_options)
107
+ meta.extra.merge!(layer.annotations)
98
108
  if layer.backtrace
99
109
  bt = ScoutApm::Utils::BacktraceParser.new(layer.backtrace).call
100
110
  if bt.any? # we could walk thru the call stack and not find in-app code
@@ -109,19 +119,30 @@ module ScoutApm
109
119
  end
110
120
  end
111
121
  metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
122
+ allocation_metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
123
+ # timing
112
124
  stat = metric_hash[meta]
113
125
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
126
+ # allocations
127
+ stat = allocation_metric_hash[meta]
128
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
114
129
 
115
130
  # Merged Metric (no specifics, just sum up by type)
116
131
  meta = MetricMeta.new("#{layer.type}/all")
117
132
  metric_hash[meta] ||= MetricStats.new(false)
133
+ allocation_metric_hash[meta] ||= MetricStats.new(false)
134
+ # timing
118
135
  stat = metric_hash[meta]
119
136
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
137
+ # allocations
138
+ stat = allocation_metric_hash[meta]
139
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
120
140
  end
121
141
 
122
142
  metric_hash = attach_backtraces(metric_hash)
143
+ allocation_metric_hash = attach_backtraces(allocation_metric_hash)
123
144
 
124
- metric_hash
145
+ [metric_hash,allocation_metric_hash]
125
146
  end
126
147
  end
127
148
  end
@@ -17,11 +17,7 @@ class MetricMeta
17
17
 
18
18
  # Unsure if type or bucket is a better name.
19
19
  def type
20
- bucket_type
21
- end
22
-
23
- def name
24
- bucket_name
20
+ bucket
25
21
  end
26
22
 
27
23
  # A key metric is the "core" of a request - either the Rails controller reached, or the background Job executed
@@ -2,7 +2,7 @@ module ScoutApm
2
2
  class MetricSet
3
3
  # We can't aggregate CPU, Memory, Capacity, or Controller, so pass through these metrics directly
4
4
  # TODO: Figure out a way to not have this duplicate what's in Samplers, and also on server's ingest
5
- PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction", "Percentile", "Job"]
5
+ PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction"]
6
6
 
7
7
  attr_reader :metrics
8
8
 
@@ -23,15 +23,11 @@ module ScoutApm
23
23
  @metrics[meta].combine!(stat)
24
24
 
25
25
  elsif meta.type == "Errors" # Sadly special cased, we want both raw and aggregate values
26
- # When combining MetricSets between different
27
- @metrics[meta] ||= MetricStats.new
28
- @metrics[meta].combine!(stat)
29
-
30
- if !@combine_in_progress
31
- agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
32
- @metrics[agg_meta] ||= MetricStats.new
33
- @metrics[agg_meta].combine!(stat)
34
- end
26
+ @metrics[meta] ||= MetricStats.new
27
+ @metrics[meta].combine!(stat)
28
+ agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
29
+ @metrics[agg_meta] ||= MetricStats.new
30
+ @metrics[agg_meta].combine!(stat)
35
31
 
36
32
  else # Combine down to a single /all key
37
33
  agg_meta = MetricMeta.new("#{meta.type}/all", :scope => meta.scope)
@@ -40,13 +36,8 @@ module ScoutApm
40
36
  end
41
37
  end
42
38
 
43
- # Sets a combine_in_progress flag to prevent double-counting Error metrics.
44
- # Without it, the Errors/Request number would be increasingly off as
45
- # metric_sets get merged in.
46
39
  def combine!(other)
47
- @combine_in_progress = true
48
40
  absorb_all(other.metrics)
49
- @combine_in_progress = false
50
41
  self
51
42
  end
52
43
  end
@@ -17,10 +17,8 @@ module ScoutApm
17
17
 
18
18
  # TODO: Parse & return a real response object, not the HTTP Response object
19
19
  def report(payload, headers = {})
20
- # Some posts (typically ones under development) bypass the ingestion pipeline and go directly to the webserver. They use direct_host instead of host
21
- hosts = [:deploy_hook, :instant_trace].include?(type) ? config.value('direct_host') : config.value('host')
20
+ Array(config.value('host')).each do |host|
22
21
 
23
- Array(hosts).each do |host|
24
22
  full_uri = uri(host)
25
23
  response = post(full_uri, payload, headers)
26
24
  unless response && response.is_a?(Net::HTTPSuccess)
@@ -36,7 +34,7 @@ module ScoutApm
36
34
  when :app_server_load
37
35
  URI.parse("#{host}/apps/app_server_load.scout?key=#{config.value('key')}&name=#{CGI.escape(Environment.instance.application_name)}")
38
36
  when :deploy_hook
39
- URI.parse("#{host}/apps/deploy.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}")
37
+ URI.parse("https://apm.scoutapp.com/apps/deploy.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}")
40
38
  end.tap{|u| logger.debug("Posting to #{u.to_s}")}
41
39
  end
42
40
 
@@ -58,7 +56,7 @@ module ScoutApm
58
56
  private
59
57
 
60
58
  def post(uri, body, headers = Hash.new)
61
- response = nil
59
+ response = :connection_failed
62
60
  request(uri) do |connection|
63
61
  post = Net::HTTP::Post.new( uri.path +
64
62
  (uri.query ? ('?' + uri.query) : ''),
@@ -84,7 +82,7 @@ module ScoutApm
84
82
  logger.debug "/#{type} FAILED: #{response.inspect}"
85
83
  end
86
84
  rescue Exception
87
- logger.debug "Exception sending request to server: \n#{$!.message}\n\t#{$!.backtrace.join("\n\t")}"
85
+ logger.info "Exception sending request to server: \n#{$!.message}\n\t#{$!.backtrace.join("\n\t")}"
88
86
  ensure
89
87
  response
90
88
  end
@@ -9,7 +9,11 @@ module ScoutApm
9
9
  end
10
10
 
11
11
  def as_json
12
- metrics.map{|meta, stat| metric_as_json(meta, stat) }
12
+ if metrics
13
+ metrics.map{|meta, stat| metric_as_json(meta, stat) }
14
+ else
15
+ nil
16
+ end
13
17
  end
14
18
 
15
19
  # Children metrics is a hash of meta=>stat pairs. Leave empty for no children.
@@ -22,7 +22,7 @@ module ScoutApm
22
22
 
23
23
  def rearrange_the_slow_transactions(slow_transactions)
24
24
  slow_transactions.to_a.map do |slow_t|
25
- slow_t.as_json.merge(:metrics => rearrange_the_metrics(slow_t.metrics))
25
+ slow_t.as_json.merge(:metrics => rearrange_the_metrics(slow_t.metrics), :allocation_metrics => rearrange_the_metrics(slow_t.allocation_metrics))
26
26
  end
27
27
  end
28
28
 
@@ -59,8 +59,6 @@ module ScoutApm
59
59
  "[#{all_the_elements.join(",")}]"
60
60
  when Numeric
61
61
  formatee
62
- when Time
63
- %Q["#{formatee.iso8601}"]
64
62
  when nil
65
63
  "null"
66
64
  else # strings and everything
@@ -17,11 +17,13 @@ module ScoutApm
17
17
  "time" => job.time,
18
18
  "total_time" => job.total_time,
19
19
  "exclusive_time" => job.exclusive_time,
20
-
20
+ "mem_delta" => job.mem_delta,
21
+ "allocations" => job.allocations,
22
+ "seconds_since_startup" => job.seconds_since_startup,
23
+ "hostname" => job.hostname,
21
24
  "metrics" => MetricsToJsonSerializer.new(job.metrics).as_json, # New style of metrics
25
+ "allocation_metrics" => MetricsToJsonSerializer.new(job.allocation_metrics).as_json, # New style of metrics
22
26
  "context" => job.context.to_hash,
23
-
24
- "score" => job.score,
25
27
  }
26
28
  end
27
29
  end
@@ -1,99 +1,29 @@
1
- # Long running class that determines if, and in how much detail a potentially
2
- # slow job should be recorded in
3
-
1
+ # Create one of these at startup time, and ask it if a certain worker's
2
+ # processing time is slow enough for us to collect a slow trace.
3
+ #
4
+ # Keeps track of a histogram of times for each worker class (spearately), and
5
+ # uses a percentile of normal to mark individual runs as "slow".
6
+ #
7
+ # This assumes that all worker calls will be requested once to `slow?`, so that
8
+ # the data can be stored
4
9
  module ScoutApm
5
10
  class SlowJobPolicy
6
- CAPTURE_TYPES = [
7
- CAPTURE_DETAIL = "capture_detail",
8
- CAPTURE_NONE = "capture_none",
9
- ]
10
-
11
- # Adjust speed points. See the function
12
- POINT_MULTIPLIER_SPEED = 0.25
13
-
14
- # For each minute we haven't seen an endpoint
15
- POINT_MULTIPLIER_AGE = 0.25
16
-
17
- # Outliers are worth up to "1000ms" of weight
18
- POINT_MULTIPLIER_PERCENTILE = 1.0
19
-
20
- # A hash of Job Names to the last time we stored a slow trace for it.
21
- #
22
- # Defaults to a start time that is pretty close to application boot time.
23
- # So the "age" of an endpoint we've never seen is the time the application
24
- # has been running.
25
- attr_reader :last_seen
11
+ DEFAULT_HISTOGRAM_SIZE = 50
26
12
 
13
+ QUANTILE = 95
27
14
 
28
- def initialize
29
- zero_time = Time.now
30
- @last_seen = Hash.new { |h, k| h[k] = zero_time }
15
+ def initialize(histogram_size = DEFAULT_HISTOGRAM_SIZE)
16
+ @histograms = Hash.new { |h, k| h[k] = NumericHistogram.new(histogram_size) }
31
17
  end
32
18
 
33
- def stored!(request)
34
- last_seen[unique_name_for(request)] = Time.now
35
- end
36
-
37
- # Determine if this job trace should be fully analyzed by scoring it
38
- # across several metrics, and then determining if that's good enough to
39
- # make it into this minute's payload.
40
- #
41
- # Due to the combining nature of the agent & layaway file, there's no
42
- # guarantee that a high scoring local champion will still be a winner when
43
- # they go up to "regionals" and are compared against the other processes
44
- # running on a node.
45
- def score(request)
46
- unique_name = request.unique_name
47
- if unique_name == :unknown
48
- return -1 # A negative score, should never be good enough to store.
49
- end
50
-
51
- total_time = request.root_layer.total_call_time
52
-
53
- # How long has it been since we've seen this?
54
- age = Time.now - last_seen[unique_name]
55
-
56
- # What approximate percentile was this request?
57
- percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
58
-
59
- return speed_points(total_time) + percentile_points(percentile) + age_points(age)
60
- end
61
-
62
- private
63
-
64
- def unique_name_for(request)
65
- scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
66
- if scope_layer
67
- scope_layer.legacy_metric_name
68
- else
69
- :unknown
70
- end
71
- end
72
-
73
- # Time in seconds
74
- # Logarithm keeps huge times from swamping the other metrics.
75
- # 1+ is necessary to keep the log function in positive territory.
76
- def speed_points(time)
77
- Math.log(1 + time) * POINT_MULTIPLIER_SPEED
78
- end
79
-
80
- def percentile_points(percentile)
81
- if percentile < 40
82
- 0.4 # Don't put much emphasis on capturing low percentiles.
83
- elsif percentile < 60
84
- 1.4 # Highest here to get mean traces
85
- elsif percentile < 90
86
- 0.7 # Between 60 & 90% is fine.
87
- elsif percentile >= 90
88
- 1.4 # Highest here to get 90+%ile traces
89
- else
90
- # impossible.
91
- percentile
92
- end
93
- end
19
+ # worker: just the worker class name. "PasswordResetJob" or similar
20
+ # total_time: runtime of the job in seconds
21
+ # returns true if this request should be stored in higher trace detail, false otherwise
22
+ def slow?(worker, total_time)
23
+ @histograms[worker].add(total_time)
24
+ return false if @histograms[worker].total == 1 # First call is never slow
94
25
 
95
- def age_points(age)
96
- age / 60.0 * POINT_MULTIPLIER_AGE
26
+ total_time >= @histograms[worker].quantile(QUANTILE)
97
27
  end
98
28
  end
99
29
  end
@@ -14,10 +14,13 @@ module ScoutApm
14
14
  alias_method :total_call_time, :total_time
15
15
 
16
16
  attr_reader :metrics
17
+ attr_reader :allocation_metrics
18
+ attr_reader :mem_delta
19
+ attr_reader :allocations
20
+ attr_reader :hostname
21
+ attr_reader :seconds_since_startup
17
22
 
18
- attr_reader :score
19
-
20
- def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, score)
23
+ def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations)
21
24
  @queue_name = queue_name
22
25
  @job_name = job_name
23
26
  @time = time
@@ -25,28 +28,17 @@ module ScoutApm
25
28
  @exclusive_time = exclusive_time
26
29
  @context = context
27
30
  @metrics = metrics
28
- @score = score
31
+ @allocation_metrics = allocation_metrics
32
+ @mem_delta = mem_delta
33
+ @allocations = allocations
34
+ @seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
35
+ @hostname = ScoutApm::Environment.instance.hostname
36
+ ScoutApm::Agent.instance.logger.debug { "Slow Job [#{metric_name}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
29
37
  end
30
38
 
31
39
  def metric_name
32
40
  "Job/#{queue_name}/#{job_name}"
33
41
  end
34
42
 
35
- ########################
36
- # Scorable interface
37
- #
38
- # Needed so we can merge ScoredItemSet instances
39
- def call
40
- self
41
- end
42
-
43
- def name
44
- metric_name
45
- end
46
-
47
- def score
48
- @score
49
- end
50
-
51
43
  end
52
44
  end