scout_apm 1.6.8 → 2.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +8 -1
  3. data/CHANGELOG.markdown +7 -57
  4. data/ext/allocations/allocations.c +84 -0
  5. data/ext/allocations/extconf.rb +3 -0
  6. data/lib/scout_apm/agent/reporting.rb +9 -32
  7. data/lib/scout_apm/agent.rb +45 -31
  8. data/lib/scout_apm/app_server_load.rb +1 -2
  9. data/lib/scout_apm/attribute_arranger.rb +0 -4
  10. data/lib/scout_apm/background_worker.rb +6 -9
  11. data/lib/scout_apm/bucket_name_splitter.rb +3 -3
  12. data/lib/scout_apm/call_set.rb +1 -0
  13. data/lib/scout_apm/config.rb +110 -66
  14. data/lib/scout_apm/environment.rb +16 -10
  15. data/lib/scout_apm/framework_integrations/rails_2.rb +12 -14
  16. data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +5 -17
  17. data/lib/scout_apm/framework_integrations/ruby.rb +0 -4
  18. data/lib/scout_apm/framework_integrations/sinatra.rb +0 -4
  19. data/lib/scout_apm/histogram.rb +0 -20
  20. data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +1 -4
  21. data/lib/scout_apm/instruments/active_record.rb +149 -8
  22. data/lib/scout_apm/instruments/mongoid.rb +5 -78
  23. data/lib/scout_apm/instruments/process/process_cpu.rb +0 -12
  24. data/lib/scout_apm/instruments/process/process_memory.rb +14 -43
  25. data/lib/scout_apm/layaway.rb +34 -134
  26. data/lib/scout_apm/layaway_file.rb +50 -27
  27. data/lib/scout_apm/layer.rb +45 -1
  28. data/lib/scout_apm/layer_converters/allocation_metric_converter.rb +17 -0
  29. data/lib/scout_apm/layer_converters/converter_base.rb +4 -6
  30. data/lib/scout_apm/layer_converters/job_converter.rb +1 -0
  31. data/lib/scout_apm/layer_converters/metric_converter.rb +2 -1
  32. data/lib/scout_apm/layer_converters/slow_job_converter.rb +42 -21
  33. data/lib/scout_apm/layer_converters/slow_request_converter.rb +58 -37
  34. data/lib/scout_apm/metric_meta.rb +1 -5
  35. data/lib/scout_apm/metric_set.rb +6 -15
  36. data/lib/scout_apm/reporter.rb +4 -6
  37. data/lib/scout_apm/serializers/metrics_to_json_serializer.rb +5 -1
  38. data/lib/scout_apm/serializers/payload_serializer_to_json.rb +1 -3
  39. data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +5 -3
  40. data/lib/scout_apm/slow_job_policy.rb +19 -89
  41. data/lib/scout_apm/slow_job_record.rb +12 -20
  42. data/lib/scout_apm/slow_request_policy.rb +12 -80
  43. data/lib/scout_apm/slow_transaction.rb +16 -20
  44. data/lib/scout_apm/stackprof_tree_collapser.rb +103 -0
  45. data/lib/scout_apm/store.rb +16 -78
  46. data/lib/scout_apm/tracked_request.rb +53 -36
  47. data/lib/scout_apm/utils/active_record_metric_name.rb +2 -0
  48. data/lib/scout_apm/utils/fake_stack_prof.rb +40 -0
  49. data/lib/scout_apm/utils/klass_helper.rb +26 -0
  50. data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
  51. data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
  52. data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
  53. data/lib/scout_apm/version.rb +1 -1
  54. data/lib/scout_apm.rb +13 -7
  55. data/scout_apm.gemspec +3 -1
  56. data/test/test_helper.rb +3 -4
  57. data/test/unit/layaway_test.rb +8 -5
  58. data/test/unit/serializers/payload_serializer_test.rb +2 -2
  59. data/test/unit/slow_item_set_test.rb +1 -2
  60. data/test/unit/sql_sanitizer_test.rb +0 -6
  61. metadata +28 -20
  62. data/LICENSE.md +0 -27
  63. data/lib/scout_apm/instruments/grape.rb +0 -69
  64. data/lib/scout_apm/instruments/percentile_sampler.rb +0 -37
  65. data/lib/scout_apm/request_histograms.rb +0 -46
  66. data/lib/scout_apm/scored_item_set.rb +0 -79
  67. data/test/unit/metric_set_test.rb +0 -101
  68. data/test/unit/scored_item_set_test.rb +0 -65
  69. data/test/unit/slow_request_policy_test.rb +0 -42
@@ -4,29 +4,23 @@ module ScoutApm
4
4
  def initialize(*)
5
5
  @backtraces = []
6
6
  super
7
-
8
- # After call to super, so @request is populated
9
- @points = if request.job?
10
- ScoutApm::Agent.instance.slow_job_policy.score(request)
11
- else
12
- -1
13
- end
14
7
  end
15
8
 
16
- def name
17
- request.unique_name
18
- end
9
+ def call
10
+ return unless request.job?
19
11
 
20
- def score
21
- @points
22
- end
12
+ job_name = [queue_layer.name, job_layer.name]
23
13
 
24
- def call
25
- return nil unless request.job?
26
- return nil unless queue_layer
27
- return nil unless job_layer
14
+ slow_enough = ScoutApm::Agent.instance.slow_job_policy.slow?(job_name, root_layer.total_call_time)
15
+ return unless slow_enough
16
+
17
+ # record the change in memory usage
18
+ mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(request.capture_mem_delta!)
28
19
 
29
- ScoutApm::Agent.instance.slow_job_policy.stored!(request)
20
+ timing_metrics, allocation_metrics = create_metrics
21
+ unless ScoutApm::Instruments::Allocations::ENABLED
22
+ allocation_metrics = {}
23
+ end
30
24
 
31
25
  SlowJobRecord.new(
32
26
  queue_layer.name,
@@ -35,8 +29,11 @@ module ScoutApm
35
29
  job_layer.total_call_time,
36
30
  job_layer.total_exclusive_time,
37
31
  request.context,
38
- create_metrics,
39
- score)
32
+ timing_metrics,
33
+ allocation_metrics,
34
+ mem_delta,
35
+ job_layer.total_allocations
36
+ )
40
37
  end
41
38
 
42
39
  def queue_layer
@@ -47,8 +44,15 @@ module ScoutApm
47
44
  @job_layer ||= find_first_layer_of_type("Job")
48
45
  end
49
46
 
47
+ def find_first_layer_of_type(layer_type)
48
+ walker.walk do |layer|
49
+ return layer if layer.type == layer_type
50
+ end
51
+ end
52
+
50
53
  def create_metrics
51
54
  metric_hash = Hash.new
55
+ allocation_metric_hash = Hash.new
52
56
 
53
57
  # Keep a list of subscopes, but only ever use the front one. The rest
54
58
  # get pushed/popped in cases when we have many levels of subscopable
@@ -68,6 +72,14 @@ module ScoutApm
68
72
  end
69
73
 
70
74
  walker.walk do |layer|
75
+ # Sometimes we start capturing a layer without knowing if we really
76
+ # want to make an entry for it. See ActiveRecord instrumentation for
77
+ # an example. We start capturing before we know if a query is cached
78
+ # or not, and want to skip any cached queries.
79
+ next if layer.annotations[:ignorable]
80
+
81
+ # The queue_layer is useful to capture for other reasons, but doesn't
82
+ # create a MetricMeta/Stat of its own
71
83
  next if layer == queue_layer
72
84
 
73
85
  meta_options = if subscope_layers.first && layer != subscope_layers.first # Don't scope under ourself.
@@ -82,6 +94,8 @@ module ScoutApm
82
94
  # Specific Metric
83
95
  meta_options.merge!(:desc => layer.desc.to_s) if layer.desc
84
96
  meta = MetricMeta.new(layer.legacy_metric_name, meta_options)
97
+ meta.extra.merge!(layer.annotations)
98
+
85
99
  if layer.backtrace
86
100
  bt = ScoutApm::Utils::BacktraceParser.new(layer.backtrace).call
87
101
  if bt.any? # we could walk thru the call stack and not find in-app code
@@ -95,19 +109,26 @@ module ScoutApm
95
109
  end
96
110
 
97
111
  metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
112
+ allocation_metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
98
113
  stat = metric_hash[meta]
99
114
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
115
+ stat = allocation_metric_hash[meta]
116
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
100
117
 
101
118
  # Merged Metric (no specifics, just sum up by type)
102
119
  meta = MetricMeta.new("#{layer.type}/all")
103
120
  metric_hash[meta] ||= MetricStats.new(false)
121
+ allocation_metric_hash[meta] ||= MetricStats.new(false)
104
122
  stat = metric_hash[meta]
105
123
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
124
+ stat = allocation_metric_hash[meta]
125
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
106
126
  end
107
127
 
108
128
  metric_hash = attach_backtraces(metric_hash)
129
+ allocation_metric_hash = attach_backtraces(allocation_metric_hash)
109
130
 
110
- metric_hash
131
+ [metric_hash,allocation_metric_hash]
111
132
  end
112
133
 
113
134
  def attach_backtraces(metric_hash)
@@ -4,50 +4,49 @@ module ScoutApm
4
4
  def initialize(*)
5
5
  @backtraces = [] # An Array of MetricMetas that have a backtrace
6
6
  super
7
-
8
- # After call to super, so @request is populated
9
- @points = if request.web?
10
- ScoutApm::Agent.instance.slow_request_policy.score(request)
11
- else
12
- -1
13
- end
14
- end
15
-
16
- def name
17
- request.unique_name
18
7
  end
19
8
 
20
- def score
21
- @points
22
- end
23
-
24
- # Unconditionally attempts to convert this into a SlowTransaction object.
25
- # Can return nil if the request didn't have any scope_layer.
26
9
  def call
27
10
  scope = scope_layer
28
- return nil unless scope
11
+ return [nil, {}] unless scope
12
+
13
+ policy = ScoutApm::Agent.instance.slow_request_policy.capture_type(root_layer.total_call_time)
14
+ if policy == ScoutApm::SlowRequestPolicy::CAPTURE_NONE
15
+ return [nil, {}]
16
+ end
29
17
 
30
- ScoutApm::Agent.instance.slow_request_policy.stored!(request)
18
+ # record the change in memory usage
19
+ mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(@request.capture_mem_delta!)
20
+
21
+ # increment the slow transaction count if this is a slow transaction.
22
+ meta = MetricMeta.new("SlowTransaction/#{scope.legacy_metric_name}")
23
+ stat = MetricStats.new
24
+ stat.update!(1)
31
25
 
32
26
  uri = request.annotations[:uri] || ""
33
27
 
34
- (ScoutApm::Agent.instance.config.value("ignore_traces") || []).each do |pattern|
35
- if /#{pattern}/ =~ uri
36
- ScoutApm::Agent.instance.logger.debug("Skipped recording a trace for #{uri} due to `ignore_traces` pattern: #{pattern}")
37
- return nil
38
- end
28
+ timing_metrics, allocation_metrics = create_metrics
29
+ unless ScoutApm::Instruments::Allocations::ENABLED
30
+ allocation_metrics = {}
39
31
  end
40
32
 
41
- metrics = create_metrics
42
-
43
- SlowTransaction.new(uri,
44
- scope.legacy_metric_name,
45
- root_layer.total_call_time,
46
- metrics,
47
- request.context,
48
- root_layer.stop_time,
49
- [], # stackprof
50
- @points)
33
+ # Disable stackprof output for now
34
+ stackprof = [] # request.stackprof
35
+
36
+ [
37
+ SlowTransaction.new(uri,
38
+ scope.legacy_metric_name,
39
+ root_layer.total_call_time,
40
+ timing_metrics,
41
+ allocation_metrics,
42
+ request.context,
43
+ root_layer.stop_time,
44
+ stackprof,
45
+ mem_delta,
46
+ root_layer.total_allocations
47
+ ),
48
+ { meta => stat }
49
+ ]
51
50
  end
52
51
 
53
52
  # Iterates over the TrackedRequest's MetricMetas that have backtraces and attaches each to correct MetricMeta in the Metric Hash.
@@ -58,12 +57,14 @@ module ScoutApm
58
57
  metric_hash
59
58
  end
60
59
 
61
- # Full metrics from this request. These get aggregated in Store for the
62
- # overview metrics, or stored permanently in a SlowTransaction
60
+ # Full metrics from this request. These get stored permanently in a SlowTransaction.
63
61
  # Some merging of metrics will happen here, so if a request calls the same
64
62
  # ActiveRecord or View repeatedly, it'll get merged.
63
+ #
64
+ # This returns a 2-element of Metric Hashes (the first element is timing metrics, the second element is allocation metrics)
65
65
  def create_metrics
66
66
  metric_hash = Hash.new
67
+ allocation_metric_hash = Hash.new
67
68
 
68
69
  # Keep a list of subscopes, but only ever use the front one. The rest
69
70
  # get pushed/popped in cases when we have many levels of subscopable
@@ -83,6 +84,14 @@ module ScoutApm
83
84
  end
84
85
 
85
86
  walker.walk do |layer|
87
+ # Sometimes we start capturing a layer without knowing if we really
88
+ # want to make an entry for it. See ActiveRecord instrumentation for
89
+ # an example. We start capturing before we know if a query is cached
90
+ # or not, and want to skip any cached queries.
91
+ if layer.annotations[:ignorable]
92
+ next
93
+ end
94
+
86
95
  meta_options = if subscope_layers.first && layer != subscope_layers.first # Don't scope under ourself.
87
96
  subscope_name = subscope_layers.first.legacy_metric_name
88
97
  {:scope => subscope_name}
@@ -95,6 +104,7 @@ module ScoutApm
95
104
  # Specific Metric
96
105
  meta_options.merge!(:desc => layer.desc.to_s) if layer.desc
97
106
  meta = MetricMeta.new(layer.legacy_metric_name, meta_options)
107
+ meta.extra.merge!(layer.annotations)
98
108
  if layer.backtrace
99
109
  bt = ScoutApm::Utils::BacktraceParser.new(layer.backtrace).call
100
110
  if bt.any? # we could walk thru the call stack and not find in-app code
@@ -109,19 +119,30 @@ module ScoutApm
109
119
  end
110
120
  end
111
121
  metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
122
+ allocation_metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
123
+ # timing
112
124
  stat = metric_hash[meta]
113
125
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
126
+ # allocations
127
+ stat = allocation_metric_hash[meta]
128
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
114
129
 
115
130
  # Merged Metric (no specifics, just sum up by type)
116
131
  meta = MetricMeta.new("#{layer.type}/all")
117
132
  metric_hash[meta] ||= MetricStats.new(false)
133
+ allocation_metric_hash[meta] ||= MetricStats.new(false)
134
+ # timing
118
135
  stat = metric_hash[meta]
119
136
  stat.update!(layer.total_call_time, layer.total_exclusive_time)
137
+ # allocations
138
+ stat = allocation_metric_hash[meta]
139
+ stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
120
140
  end
121
141
 
122
142
  metric_hash = attach_backtraces(metric_hash)
143
+ allocation_metric_hash = attach_backtraces(allocation_metric_hash)
123
144
 
124
- metric_hash
145
+ [metric_hash,allocation_metric_hash]
125
146
  end
126
147
  end
127
148
  end
@@ -17,11 +17,7 @@ class MetricMeta
17
17
 
18
18
  # Unsure if type or bucket is a better name.
19
19
  def type
20
- bucket_type
21
- end
22
-
23
- def name
24
- bucket_name
20
+ bucket
25
21
  end
26
22
 
27
23
  # A key metric is the "core" of a request - either the Rails controller reached, or the background Job executed
@@ -2,7 +2,7 @@ module ScoutApm
2
2
  class MetricSet
3
3
  # We can't aggregate CPU, Memory, Capacity, or Controller, so pass through these metrics directly
4
4
  # TODO: Figure out a way to not have this duplicate what's in Samplers, and also on server's ingest
5
- PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction", "Percentile", "Job"]
5
+ PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction"]
6
6
 
7
7
  attr_reader :metrics
8
8
 
@@ -23,15 +23,11 @@ module ScoutApm
23
23
  @metrics[meta].combine!(stat)
24
24
 
25
25
  elsif meta.type == "Errors" # Sadly special cased, we want both raw and aggregate values
26
- # When combining MetricSets between different
27
- @metrics[meta] ||= MetricStats.new
28
- @metrics[meta].combine!(stat)
29
-
30
- if !@combine_in_progress
31
- agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
32
- @metrics[agg_meta] ||= MetricStats.new
33
- @metrics[agg_meta].combine!(stat)
34
- end
26
+ @metrics[meta] ||= MetricStats.new
27
+ @metrics[meta].combine!(stat)
28
+ agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
29
+ @metrics[agg_meta] ||= MetricStats.new
30
+ @metrics[agg_meta].combine!(stat)
35
31
 
36
32
  else # Combine down to a single /all key
37
33
  agg_meta = MetricMeta.new("#{meta.type}/all", :scope => meta.scope)
@@ -40,13 +36,8 @@ module ScoutApm
40
36
  end
41
37
  end
42
38
 
43
- # Sets a combine_in_progress flag to prevent double-counting Error metrics.
44
- # Without it, the Errors/Request number would be increasingly off as
45
- # metric_sets get merged in.
46
39
  def combine!(other)
47
- @combine_in_progress = true
48
40
  absorb_all(other.metrics)
49
- @combine_in_progress = false
50
41
  self
51
42
  end
52
43
  end
@@ -17,10 +17,8 @@ module ScoutApm
17
17
 
18
18
  # TODO: Parse & return a real response object, not the HTTP Response object
19
19
  def report(payload, headers = {})
20
- # Some posts (typically ones under development) bypass the ingestion pipeline and go directly to the webserver. They use direct_host instead of host
21
- hosts = [:deploy_hook, :instant_trace].include?(type) ? config.value('direct_host') : config.value('host')
20
+ Array(config.value('host')).each do |host|
22
21
 
23
- Array(hosts).each do |host|
24
22
  full_uri = uri(host)
25
23
  response = post(full_uri, payload, headers)
26
24
  unless response && response.is_a?(Net::HTTPSuccess)
@@ -36,7 +34,7 @@ module ScoutApm
36
34
  when :app_server_load
37
35
  URI.parse("#{host}/apps/app_server_load.scout?key=#{config.value('key')}&name=#{CGI.escape(Environment.instance.application_name)}")
38
36
  when :deploy_hook
39
- URI.parse("#{host}/apps/deploy.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}")
37
+ URI.parse("https://apm.scoutapp.com/apps/deploy.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}")
40
38
  end.tap{|u| logger.debug("Posting to #{u.to_s}")}
41
39
  end
42
40
 
@@ -58,7 +56,7 @@ module ScoutApm
58
56
  private
59
57
 
60
58
  def post(uri, body, headers = Hash.new)
61
- response = nil
59
+ response = :connection_failed
62
60
  request(uri) do |connection|
63
61
  post = Net::HTTP::Post.new( uri.path +
64
62
  (uri.query ? ('?' + uri.query) : ''),
@@ -84,7 +82,7 @@ module ScoutApm
84
82
  logger.debug "/#{type} FAILED: #{response.inspect}"
85
83
  end
86
84
  rescue Exception
87
- logger.debug "Exception sending request to server: \n#{$!.message}\n\t#{$!.backtrace.join("\n\t")}"
85
+ logger.info "Exception sending request to server: \n#{$!.message}\n\t#{$!.backtrace.join("\n\t")}"
88
86
  ensure
89
87
  response
90
88
  end
@@ -9,7 +9,11 @@ module ScoutApm
9
9
  end
10
10
 
11
11
  def as_json
12
- metrics.map{|meta, stat| metric_as_json(meta, stat) }
12
+ if metrics
13
+ metrics.map{|meta, stat| metric_as_json(meta, stat) }
14
+ else
15
+ nil
16
+ end
13
17
  end
14
18
 
15
19
  # Children metrics is a hash of meta=>stat pairs. Leave empty for no children.
@@ -22,7 +22,7 @@ module ScoutApm
22
22
 
23
23
  def rearrange_the_slow_transactions(slow_transactions)
24
24
  slow_transactions.to_a.map do |slow_t|
25
- slow_t.as_json.merge(:metrics => rearrange_the_metrics(slow_t.metrics))
25
+ slow_t.as_json.merge(:metrics => rearrange_the_metrics(slow_t.metrics), :allocation_metrics => rearrange_the_metrics(slow_t.allocation_metrics))
26
26
  end
27
27
  end
28
28
 
@@ -59,8 +59,6 @@ module ScoutApm
59
59
  "[#{all_the_elements.join(",")}]"
60
60
  when Numeric
61
61
  formatee
62
- when Time
63
- %Q["#{formatee.iso8601}"]
64
62
  when nil
65
63
  "null"
66
64
  else # strings and everything
@@ -17,11 +17,13 @@ module ScoutApm
17
17
  "time" => job.time,
18
18
  "total_time" => job.total_time,
19
19
  "exclusive_time" => job.exclusive_time,
20
-
20
+ "mem_delta" => job.mem_delta,
21
+ "allocations" => job.allocations,
22
+ "seconds_since_startup" => job.seconds_since_startup,
23
+ "hostname" => job.hostname,
21
24
  "metrics" => MetricsToJsonSerializer.new(job.metrics).as_json, # New style of metrics
25
+ "allocation_metrics" => MetricsToJsonSerializer.new(job.allocation_metrics).as_json, # New style of metrics
22
26
  "context" => job.context.to_hash,
23
-
24
- "score" => job.score,
25
27
  }
26
28
  end
27
29
  end
@@ -1,99 +1,29 @@
1
- # Long running class that determines if, and in how much detail a potentially
2
- # slow job should be recorded in
3
-
1
+ # Create one of these at startup time, and ask it if a certain worker's
2
+ # processing time is slow enough for us to collect a slow trace.
3
+ #
4
+ # Keeps track of a histogram of times for each worker class (spearately), and
5
+ # uses a percentile of normal to mark individual runs as "slow".
6
+ #
7
+ # This assumes that all worker calls will be requested once to `slow?`, so that
8
+ # the data can be stored
4
9
  module ScoutApm
5
10
  class SlowJobPolicy
6
- CAPTURE_TYPES = [
7
- CAPTURE_DETAIL = "capture_detail",
8
- CAPTURE_NONE = "capture_none",
9
- ]
10
-
11
- # Adjust speed points. See the function
12
- POINT_MULTIPLIER_SPEED = 0.25
13
-
14
- # For each minute we haven't seen an endpoint
15
- POINT_MULTIPLIER_AGE = 0.25
16
-
17
- # Outliers are worth up to "1000ms" of weight
18
- POINT_MULTIPLIER_PERCENTILE = 1.0
19
-
20
- # A hash of Job Names to the last time we stored a slow trace for it.
21
- #
22
- # Defaults to a start time that is pretty close to application boot time.
23
- # So the "age" of an endpoint we've never seen is the time the application
24
- # has been running.
25
- attr_reader :last_seen
11
+ DEFAULT_HISTOGRAM_SIZE = 50
26
12
 
13
+ QUANTILE = 95
27
14
 
28
- def initialize
29
- zero_time = Time.now
30
- @last_seen = Hash.new { |h, k| h[k] = zero_time }
15
+ def initialize(histogram_size = DEFAULT_HISTOGRAM_SIZE)
16
+ @histograms = Hash.new { |h, k| h[k] = NumericHistogram.new(histogram_size) }
31
17
  end
32
18
 
33
- def stored!(request)
34
- last_seen[unique_name_for(request)] = Time.now
35
- end
36
-
37
- # Determine if this job trace should be fully analyzed by scoring it
38
- # across several metrics, and then determining if that's good enough to
39
- # make it into this minute's payload.
40
- #
41
- # Due to the combining nature of the agent & layaway file, there's no
42
- # guarantee that a high scoring local champion will still be a winner when
43
- # they go up to "regionals" and are compared against the other processes
44
- # running on a node.
45
- def score(request)
46
- unique_name = request.unique_name
47
- if unique_name == :unknown
48
- return -1 # A negative score, should never be good enough to store.
49
- end
50
-
51
- total_time = request.root_layer.total_call_time
52
-
53
- # How long has it been since we've seen this?
54
- age = Time.now - last_seen[unique_name]
55
-
56
- # What approximate percentile was this request?
57
- percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
58
-
59
- return speed_points(total_time) + percentile_points(percentile) + age_points(age)
60
- end
61
-
62
- private
63
-
64
- def unique_name_for(request)
65
- scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
66
- if scope_layer
67
- scope_layer.legacy_metric_name
68
- else
69
- :unknown
70
- end
71
- end
72
-
73
- # Time in seconds
74
- # Logarithm keeps huge times from swamping the other metrics.
75
- # 1+ is necessary to keep the log function in positive territory.
76
- def speed_points(time)
77
- Math.log(1 + time) * POINT_MULTIPLIER_SPEED
78
- end
79
-
80
- def percentile_points(percentile)
81
- if percentile < 40
82
- 0.4 # Don't put much emphasis on capturing low percentiles.
83
- elsif percentile < 60
84
- 1.4 # Highest here to get mean traces
85
- elsif percentile < 90
86
- 0.7 # Between 60 & 90% is fine.
87
- elsif percentile >= 90
88
- 1.4 # Highest here to get 90+%ile traces
89
- else
90
- # impossible.
91
- percentile
92
- end
93
- end
19
+ # worker: just the worker class name. "PasswordResetJob" or similar
20
+ # total_time: runtime of the job in seconds
21
+ # returns true if this request should be stored in higher trace detail, false otherwise
22
+ def slow?(worker, total_time)
23
+ @histograms[worker].add(total_time)
24
+ return false if @histograms[worker].total == 1 # First call is never slow
94
25
 
95
- def age_points(age)
96
- age / 60.0 * POINT_MULTIPLIER_AGE
26
+ total_time >= @histograms[worker].quantile(QUANTILE)
97
27
  end
98
28
  end
99
29
  end
@@ -14,10 +14,13 @@ module ScoutApm
14
14
  alias_method :total_call_time, :total_time
15
15
 
16
16
  attr_reader :metrics
17
+ attr_reader :allocation_metrics
18
+ attr_reader :mem_delta
19
+ attr_reader :allocations
20
+ attr_reader :hostname
21
+ attr_reader :seconds_since_startup
17
22
 
18
- attr_reader :score
19
-
20
- def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, score)
23
+ def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations)
21
24
  @queue_name = queue_name
22
25
  @job_name = job_name
23
26
  @time = time
@@ -25,28 +28,17 @@ module ScoutApm
25
28
  @exclusive_time = exclusive_time
26
29
  @context = context
27
30
  @metrics = metrics
28
- @score = score
31
+ @allocation_metrics = allocation_metrics
32
+ @mem_delta = mem_delta
33
+ @allocations = allocations
34
+ @seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
35
+ @hostname = ScoutApm::Environment.instance.hostname
36
+ ScoutApm::Agent.instance.logger.debug { "Slow Job [#{metric_name}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
29
37
  end
30
38
 
31
39
  def metric_name
32
40
  "Job/#{queue_name}/#{job_name}"
33
41
  end
34
42
 
35
- ########################
36
- # Scorable interface
37
- #
38
- # Needed so we can merge ScoredItemSet instances
39
- def call
40
- self
41
- end
42
-
43
- def name
44
- metric_name
45
- end
46
-
47
- def score
48
- @score
49
- end
50
-
51
43
  end
52
44
  end