scout_apm 1.6.8 → 2.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +8 -1
- data/CHANGELOG.markdown +7 -57
- data/ext/allocations/allocations.c +84 -0
- data/ext/allocations/extconf.rb +3 -0
- data/lib/scout_apm/agent/reporting.rb +9 -32
- data/lib/scout_apm/agent.rb +45 -31
- data/lib/scout_apm/app_server_load.rb +1 -2
- data/lib/scout_apm/attribute_arranger.rb +0 -4
- data/lib/scout_apm/background_worker.rb +6 -9
- data/lib/scout_apm/bucket_name_splitter.rb +3 -3
- data/lib/scout_apm/call_set.rb +1 -0
- data/lib/scout_apm/config.rb +110 -66
- data/lib/scout_apm/environment.rb +16 -10
- data/lib/scout_apm/framework_integrations/rails_2.rb +12 -14
- data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +5 -17
- data/lib/scout_apm/framework_integrations/ruby.rb +0 -4
- data/lib/scout_apm/framework_integrations/sinatra.rb +0 -4
- data/lib/scout_apm/histogram.rb +0 -20
- data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +1 -4
- data/lib/scout_apm/instruments/active_record.rb +149 -8
- data/lib/scout_apm/instruments/mongoid.rb +5 -78
- data/lib/scout_apm/instruments/process/process_cpu.rb +0 -12
- data/lib/scout_apm/instruments/process/process_memory.rb +14 -43
- data/lib/scout_apm/layaway.rb +34 -134
- data/lib/scout_apm/layaway_file.rb +50 -27
- data/lib/scout_apm/layer.rb +45 -1
- data/lib/scout_apm/layer_converters/allocation_metric_converter.rb +17 -0
- data/lib/scout_apm/layer_converters/converter_base.rb +4 -6
- data/lib/scout_apm/layer_converters/job_converter.rb +1 -0
- data/lib/scout_apm/layer_converters/metric_converter.rb +2 -1
- data/lib/scout_apm/layer_converters/slow_job_converter.rb +42 -21
- data/lib/scout_apm/layer_converters/slow_request_converter.rb +58 -37
- data/lib/scout_apm/metric_meta.rb +1 -5
- data/lib/scout_apm/metric_set.rb +6 -15
- data/lib/scout_apm/reporter.rb +4 -6
- data/lib/scout_apm/serializers/metrics_to_json_serializer.rb +5 -1
- data/lib/scout_apm/serializers/payload_serializer_to_json.rb +1 -3
- data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +5 -3
- data/lib/scout_apm/slow_job_policy.rb +19 -89
- data/lib/scout_apm/slow_job_record.rb +12 -20
- data/lib/scout_apm/slow_request_policy.rb +12 -80
- data/lib/scout_apm/slow_transaction.rb +16 -20
- data/lib/scout_apm/stackprof_tree_collapser.rb +103 -0
- data/lib/scout_apm/store.rb +16 -78
- data/lib/scout_apm/tracked_request.rb +53 -36
- data/lib/scout_apm/utils/active_record_metric_name.rb +2 -0
- data/lib/scout_apm/utils/fake_stack_prof.rb +40 -0
- data/lib/scout_apm/utils/klass_helper.rb +26 -0
- data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
- data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
- data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
- data/lib/scout_apm/version.rb +1 -1
- data/lib/scout_apm.rb +13 -7
- data/scout_apm.gemspec +3 -1
- data/test/test_helper.rb +3 -4
- data/test/unit/layaway_test.rb +8 -5
- data/test/unit/serializers/payload_serializer_test.rb +2 -2
- data/test/unit/slow_item_set_test.rb +1 -2
- data/test/unit/sql_sanitizer_test.rb +0 -6
- metadata +28 -20
- data/LICENSE.md +0 -27
- data/lib/scout_apm/instruments/grape.rb +0 -69
- data/lib/scout_apm/instruments/percentile_sampler.rb +0 -37
- data/lib/scout_apm/request_histograms.rb +0 -46
- data/lib/scout_apm/scored_item_set.rb +0 -79
- data/test/unit/metric_set_test.rb +0 -101
- data/test/unit/scored_item_set_test.rb +0 -65
- data/test/unit/slow_request_policy_test.rb +0 -42
@@ -4,29 +4,23 @@ module ScoutApm
|
|
4
4
|
def initialize(*)
|
5
5
|
@backtraces = []
|
6
6
|
super
|
7
|
-
|
8
|
-
# After call to super, so @request is populated
|
9
|
-
@points = if request.job?
|
10
|
-
ScoutApm::Agent.instance.slow_job_policy.score(request)
|
11
|
-
else
|
12
|
-
-1
|
13
|
-
end
|
14
7
|
end
|
15
8
|
|
16
|
-
def
|
17
|
-
request.
|
18
|
-
end
|
9
|
+
def call
|
10
|
+
return unless request.job?
|
19
11
|
|
20
|
-
|
21
|
-
@points
|
22
|
-
end
|
12
|
+
job_name = [queue_layer.name, job_layer.name]
|
23
13
|
|
24
|
-
|
25
|
-
return
|
26
|
-
|
27
|
-
|
14
|
+
slow_enough = ScoutApm::Agent.instance.slow_job_policy.slow?(job_name, root_layer.total_call_time)
|
15
|
+
return unless slow_enough
|
16
|
+
|
17
|
+
# record the change in memory usage
|
18
|
+
mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(request.capture_mem_delta!)
|
28
19
|
|
29
|
-
|
20
|
+
timing_metrics, allocation_metrics = create_metrics
|
21
|
+
unless ScoutApm::Instruments::Allocations::ENABLED
|
22
|
+
allocation_metrics = {}
|
23
|
+
end
|
30
24
|
|
31
25
|
SlowJobRecord.new(
|
32
26
|
queue_layer.name,
|
@@ -35,8 +29,11 @@ module ScoutApm
|
|
35
29
|
job_layer.total_call_time,
|
36
30
|
job_layer.total_exclusive_time,
|
37
31
|
request.context,
|
38
|
-
|
39
|
-
|
32
|
+
timing_metrics,
|
33
|
+
allocation_metrics,
|
34
|
+
mem_delta,
|
35
|
+
job_layer.total_allocations
|
36
|
+
)
|
40
37
|
end
|
41
38
|
|
42
39
|
def queue_layer
|
@@ -47,8 +44,15 @@ module ScoutApm
|
|
47
44
|
@job_layer ||= find_first_layer_of_type("Job")
|
48
45
|
end
|
49
46
|
|
47
|
+
def find_first_layer_of_type(layer_type)
|
48
|
+
walker.walk do |layer|
|
49
|
+
return layer if layer.type == layer_type
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
50
53
|
def create_metrics
|
51
54
|
metric_hash = Hash.new
|
55
|
+
allocation_metric_hash = Hash.new
|
52
56
|
|
53
57
|
# Keep a list of subscopes, but only ever use the front one. The rest
|
54
58
|
# get pushed/popped in cases when we have many levels of subscopable
|
@@ -68,6 +72,14 @@ module ScoutApm
|
|
68
72
|
end
|
69
73
|
|
70
74
|
walker.walk do |layer|
|
75
|
+
# Sometimes we start capturing a layer without knowing if we really
|
76
|
+
# want to make an entry for it. See ActiveRecord instrumentation for
|
77
|
+
# an example. We start capturing before we know if a query is cached
|
78
|
+
# or not, and want to skip any cached queries.
|
79
|
+
next if layer.annotations[:ignorable]
|
80
|
+
|
81
|
+
# The queue_layer is useful to capture for other reasons, but doesn't
|
82
|
+
# create a MetricMeta/Stat of its own
|
71
83
|
next if layer == queue_layer
|
72
84
|
|
73
85
|
meta_options = if subscope_layers.first && layer != subscope_layers.first # Don't scope under ourself.
|
@@ -82,6 +94,8 @@ module ScoutApm
|
|
82
94
|
# Specific Metric
|
83
95
|
meta_options.merge!(:desc => layer.desc.to_s) if layer.desc
|
84
96
|
meta = MetricMeta.new(layer.legacy_metric_name, meta_options)
|
97
|
+
meta.extra.merge!(layer.annotations)
|
98
|
+
|
85
99
|
if layer.backtrace
|
86
100
|
bt = ScoutApm::Utils::BacktraceParser.new(layer.backtrace).call
|
87
101
|
if bt.any? # we could walk thru the call stack and not find in-app code
|
@@ -95,19 +109,26 @@ module ScoutApm
|
|
95
109
|
end
|
96
110
|
|
97
111
|
metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
|
112
|
+
allocation_metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
|
98
113
|
stat = metric_hash[meta]
|
99
114
|
stat.update!(layer.total_call_time, layer.total_exclusive_time)
|
115
|
+
stat = allocation_metric_hash[meta]
|
116
|
+
stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
|
100
117
|
|
101
118
|
# Merged Metric (no specifics, just sum up by type)
|
102
119
|
meta = MetricMeta.new("#{layer.type}/all")
|
103
120
|
metric_hash[meta] ||= MetricStats.new(false)
|
121
|
+
allocation_metric_hash[meta] ||= MetricStats.new(false)
|
104
122
|
stat = metric_hash[meta]
|
105
123
|
stat.update!(layer.total_call_time, layer.total_exclusive_time)
|
124
|
+
stat = allocation_metric_hash[meta]
|
125
|
+
stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
|
106
126
|
end
|
107
127
|
|
108
128
|
metric_hash = attach_backtraces(metric_hash)
|
129
|
+
allocation_metric_hash = attach_backtraces(allocation_metric_hash)
|
109
130
|
|
110
|
-
metric_hash
|
131
|
+
[metric_hash,allocation_metric_hash]
|
111
132
|
end
|
112
133
|
|
113
134
|
def attach_backtraces(metric_hash)
|
@@ -4,50 +4,49 @@ module ScoutApm
|
|
4
4
|
def initialize(*)
|
5
5
|
@backtraces = [] # An Array of MetricMetas that have a backtrace
|
6
6
|
super
|
7
|
-
|
8
|
-
# After call to super, so @request is populated
|
9
|
-
@points = if request.web?
|
10
|
-
ScoutApm::Agent.instance.slow_request_policy.score(request)
|
11
|
-
else
|
12
|
-
-1
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def name
|
17
|
-
request.unique_name
|
18
7
|
end
|
19
8
|
|
20
|
-
def score
|
21
|
-
@points
|
22
|
-
end
|
23
|
-
|
24
|
-
# Unconditionally attempts to convert this into a SlowTransaction object.
|
25
|
-
# Can return nil if the request didn't have any scope_layer.
|
26
9
|
def call
|
27
10
|
scope = scope_layer
|
28
|
-
return nil unless scope
|
11
|
+
return [nil, {}] unless scope
|
12
|
+
|
13
|
+
policy = ScoutApm::Agent.instance.slow_request_policy.capture_type(root_layer.total_call_time)
|
14
|
+
if policy == ScoutApm::SlowRequestPolicy::CAPTURE_NONE
|
15
|
+
return [nil, {}]
|
16
|
+
end
|
29
17
|
|
30
|
-
|
18
|
+
# record the change in memory usage
|
19
|
+
mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(@request.capture_mem_delta!)
|
20
|
+
|
21
|
+
# increment the slow transaction count if this is a slow transaction.
|
22
|
+
meta = MetricMeta.new("SlowTransaction/#{scope.legacy_metric_name}")
|
23
|
+
stat = MetricStats.new
|
24
|
+
stat.update!(1)
|
31
25
|
|
32
26
|
uri = request.annotations[:uri] || ""
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
return nil
|
38
|
-
end
|
28
|
+
timing_metrics, allocation_metrics = create_metrics
|
29
|
+
unless ScoutApm::Instruments::Allocations::ENABLED
|
30
|
+
allocation_metrics = {}
|
39
31
|
end
|
40
32
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
33
|
+
# Disable stackprof output for now
|
34
|
+
stackprof = [] # request.stackprof
|
35
|
+
|
36
|
+
[
|
37
|
+
SlowTransaction.new(uri,
|
38
|
+
scope.legacy_metric_name,
|
39
|
+
root_layer.total_call_time,
|
40
|
+
timing_metrics,
|
41
|
+
allocation_metrics,
|
42
|
+
request.context,
|
43
|
+
root_layer.stop_time,
|
44
|
+
stackprof,
|
45
|
+
mem_delta,
|
46
|
+
root_layer.total_allocations
|
47
|
+
),
|
48
|
+
{ meta => stat }
|
49
|
+
]
|
51
50
|
end
|
52
51
|
|
53
52
|
# Iterates over the TrackedRequest's MetricMetas that have backtraces and attaches each to correct MetricMeta in the Metric Hash.
|
@@ -58,12 +57,14 @@ module ScoutApm
|
|
58
57
|
metric_hash
|
59
58
|
end
|
60
59
|
|
61
|
-
# Full metrics from this request. These get
|
62
|
-
# overview metrics, or stored permanently in a SlowTransaction
|
60
|
+
# Full metrics from this request. These get stored permanently in a SlowTransaction.
|
63
61
|
# Some merging of metrics will happen here, so if a request calls the same
|
64
62
|
# ActiveRecord or View repeatedly, it'll get merged.
|
63
|
+
#
|
64
|
+
# This returns a 2-element of Metric Hashes (the first element is timing metrics, the second element is allocation metrics)
|
65
65
|
def create_metrics
|
66
66
|
metric_hash = Hash.new
|
67
|
+
allocation_metric_hash = Hash.new
|
67
68
|
|
68
69
|
# Keep a list of subscopes, but only ever use the front one. The rest
|
69
70
|
# get pushed/popped in cases when we have many levels of subscopable
|
@@ -83,6 +84,14 @@ module ScoutApm
|
|
83
84
|
end
|
84
85
|
|
85
86
|
walker.walk do |layer|
|
87
|
+
# Sometimes we start capturing a layer without knowing if we really
|
88
|
+
# want to make an entry for it. See ActiveRecord instrumentation for
|
89
|
+
# an example. We start capturing before we know if a query is cached
|
90
|
+
# or not, and want to skip any cached queries.
|
91
|
+
if layer.annotations[:ignorable]
|
92
|
+
next
|
93
|
+
end
|
94
|
+
|
86
95
|
meta_options = if subscope_layers.first && layer != subscope_layers.first # Don't scope under ourself.
|
87
96
|
subscope_name = subscope_layers.first.legacy_metric_name
|
88
97
|
{:scope => subscope_name}
|
@@ -95,6 +104,7 @@ module ScoutApm
|
|
95
104
|
# Specific Metric
|
96
105
|
meta_options.merge!(:desc => layer.desc.to_s) if layer.desc
|
97
106
|
meta = MetricMeta.new(layer.legacy_metric_name, meta_options)
|
107
|
+
meta.extra.merge!(layer.annotations)
|
98
108
|
if layer.backtrace
|
99
109
|
bt = ScoutApm::Utils::BacktraceParser.new(layer.backtrace).call
|
100
110
|
if bt.any? # we could walk thru the call stack and not find in-app code
|
@@ -109,19 +119,30 @@ module ScoutApm
|
|
109
119
|
end
|
110
120
|
end
|
111
121
|
metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
|
122
|
+
allocation_metric_hash[meta] ||= MetricStats.new( meta_options.has_key?(:scope) )
|
123
|
+
# timing
|
112
124
|
stat = metric_hash[meta]
|
113
125
|
stat.update!(layer.total_call_time, layer.total_exclusive_time)
|
126
|
+
# allocations
|
127
|
+
stat = allocation_metric_hash[meta]
|
128
|
+
stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
|
114
129
|
|
115
130
|
# Merged Metric (no specifics, just sum up by type)
|
116
131
|
meta = MetricMeta.new("#{layer.type}/all")
|
117
132
|
metric_hash[meta] ||= MetricStats.new(false)
|
133
|
+
allocation_metric_hash[meta] ||= MetricStats.new(false)
|
134
|
+
# timing
|
118
135
|
stat = metric_hash[meta]
|
119
136
|
stat.update!(layer.total_call_time, layer.total_exclusive_time)
|
137
|
+
# allocations
|
138
|
+
stat = allocation_metric_hash[meta]
|
139
|
+
stat.update!(layer.total_allocations, layer.total_exclusive_allocations)
|
120
140
|
end
|
121
141
|
|
122
142
|
metric_hash = attach_backtraces(metric_hash)
|
143
|
+
allocation_metric_hash = attach_backtraces(allocation_metric_hash)
|
123
144
|
|
124
|
-
metric_hash
|
145
|
+
[metric_hash,allocation_metric_hash]
|
125
146
|
end
|
126
147
|
end
|
127
148
|
end
|
@@ -17,11 +17,7 @@ class MetricMeta
|
|
17
17
|
|
18
18
|
# Unsure if type or bucket is a better name.
|
19
19
|
def type
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
def name
|
24
|
-
bucket_name
|
20
|
+
bucket
|
25
21
|
end
|
26
22
|
|
27
23
|
# A key metric is the "core" of a request - either the Rails controller reached, or the background Job executed
|
data/lib/scout_apm/metric_set.rb
CHANGED
@@ -2,7 +2,7 @@ module ScoutApm
|
|
2
2
|
class MetricSet
|
3
3
|
# We can't aggregate CPU, Memory, Capacity, or Controller, so pass through these metrics directly
|
4
4
|
# TODO: Figure out a way to not have this duplicate what's in Samplers, and also on server's ingest
|
5
|
-
PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction"
|
5
|
+
PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction"]
|
6
6
|
|
7
7
|
attr_reader :metrics
|
8
8
|
|
@@ -23,15 +23,11 @@ module ScoutApm
|
|
23
23
|
@metrics[meta].combine!(stat)
|
24
24
|
|
25
25
|
elsif meta.type == "Errors" # Sadly special cased, we want both raw and aggregate values
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
|
32
|
-
@metrics[agg_meta] ||= MetricStats.new
|
33
|
-
@metrics[agg_meta].combine!(stat)
|
34
|
-
end
|
26
|
+
@metrics[meta] ||= MetricStats.new
|
27
|
+
@metrics[meta].combine!(stat)
|
28
|
+
agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
|
29
|
+
@metrics[agg_meta] ||= MetricStats.new
|
30
|
+
@metrics[agg_meta].combine!(stat)
|
35
31
|
|
36
32
|
else # Combine down to a single /all key
|
37
33
|
agg_meta = MetricMeta.new("#{meta.type}/all", :scope => meta.scope)
|
@@ -40,13 +36,8 @@ module ScoutApm
|
|
40
36
|
end
|
41
37
|
end
|
42
38
|
|
43
|
-
# Sets a combine_in_progress flag to prevent double-counting Error metrics.
|
44
|
-
# Without it, the Errors/Request number would be increasingly off as
|
45
|
-
# metric_sets get merged in.
|
46
39
|
def combine!(other)
|
47
|
-
@combine_in_progress = true
|
48
40
|
absorb_all(other.metrics)
|
49
|
-
@combine_in_progress = false
|
50
41
|
self
|
51
42
|
end
|
52
43
|
end
|
data/lib/scout_apm/reporter.rb
CHANGED
@@ -17,10 +17,8 @@ module ScoutApm
|
|
17
17
|
|
18
18
|
# TODO: Parse & return a real response object, not the HTTP Response object
|
19
19
|
def report(payload, headers = {})
|
20
|
-
|
21
|
-
hosts = [:deploy_hook, :instant_trace].include?(type) ? config.value('direct_host') : config.value('host')
|
20
|
+
Array(config.value('host')).each do |host|
|
22
21
|
|
23
|
-
Array(hosts).each do |host|
|
24
22
|
full_uri = uri(host)
|
25
23
|
response = post(full_uri, payload, headers)
|
26
24
|
unless response && response.is_a?(Net::HTTPSuccess)
|
@@ -36,7 +34,7 @@ module ScoutApm
|
|
36
34
|
when :app_server_load
|
37
35
|
URI.parse("#{host}/apps/app_server_load.scout?key=#{config.value('key')}&name=#{CGI.escape(Environment.instance.application_name)}")
|
38
36
|
when :deploy_hook
|
39
|
-
URI.parse("
|
37
|
+
URI.parse("https://apm.scoutapp.com/apps/deploy.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}")
|
40
38
|
end.tap{|u| logger.debug("Posting to #{u.to_s}")}
|
41
39
|
end
|
42
40
|
|
@@ -58,7 +56,7 @@ module ScoutApm
|
|
58
56
|
private
|
59
57
|
|
60
58
|
def post(uri, body, headers = Hash.new)
|
61
|
-
response =
|
59
|
+
response = :connection_failed
|
62
60
|
request(uri) do |connection|
|
63
61
|
post = Net::HTTP::Post.new( uri.path +
|
64
62
|
(uri.query ? ('?' + uri.query) : ''),
|
@@ -84,7 +82,7 @@ module ScoutApm
|
|
84
82
|
logger.debug "/#{type} FAILED: #{response.inspect}"
|
85
83
|
end
|
86
84
|
rescue Exception
|
87
|
-
logger.
|
85
|
+
logger.info "Exception sending request to server: \n#{$!.message}\n\t#{$!.backtrace.join("\n\t")}"
|
88
86
|
ensure
|
89
87
|
response
|
90
88
|
end
|
@@ -9,7 +9,11 @@ module ScoutApm
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def as_json
|
12
|
-
metrics
|
12
|
+
if metrics
|
13
|
+
metrics.map{|meta, stat| metric_as_json(meta, stat) }
|
14
|
+
else
|
15
|
+
nil
|
16
|
+
end
|
13
17
|
end
|
14
18
|
|
15
19
|
# Children metrics is a hash of meta=>stat pairs. Leave empty for no children.
|
@@ -22,7 +22,7 @@ module ScoutApm
|
|
22
22
|
|
23
23
|
def rearrange_the_slow_transactions(slow_transactions)
|
24
24
|
slow_transactions.to_a.map do |slow_t|
|
25
|
-
slow_t.as_json.merge(:metrics => rearrange_the_metrics(slow_t.metrics))
|
25
|
+
slow_t.as_json.merge(:metrics => rearrange_the_metrics(slow_t.metrics), :allocation_metrics => rearrange_the_metrics(slow_t.allocation_metrics))
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -59,8 +59,6 @@ module ScoutApm
|
|
59
59
|
"[#{all_the_elements.join(",")}]"
|
60
60
|
when Numeric
|
61
61
|
formatee
|
62
|
-
when Time
|
63
|
-
%Q["#{formatee.iso8601}"]
|
64
62
|
when nil
|
65
63
|
"null"
|
66
64
|
else # strings and everything
|
@@ -17,11 +17,13 @@ module ScoutApm
|
|
17
17
|
"time" => job.time,
|
18
18
|
"total_time" => job.total_time,
|
19
19
|
"exclusive_time" => job.exclusive_time,
|
20
|
-
|
20
|
+
"mem_delta" => job.mem_delta,
|
21
|
+
"allocations" => job.allocations,
|
22
|
+
"seconds_since_startup" => job.seconds_since_startup,
|
23
|
+
"hostname" => job.hostname,
|
21
24
|
"metrics" => MetricsToJsonSerializer.new(job.metrics).as_json, # New style of metrics
|
25
|
+
"allocation_metrics" => MetricsToJsonSerializer.new(job.allocation_metrics).as_json, # New style of metrics
|
22
26
|
"context" => job.context.to_hash,
|
23
|
-
|
24
|
-
"score" => job.score,
|
25
27
|
}
|
26
28
|
end
|
27
29
|
end
|
@@ -1,99 +1,29 @@
|
|
1
|
-
#
|
2
|
-
# slow
|
3
|
-
|
1
|
+
# Create one of these at startup time, and ask it if a certain worker's
|
2
|
+
# processing time is slow enough for us to collect a slow trace.
|
3
|
+
#
|
4
|
+
# Keeps track of a histogram of times for each worker class (spearately), and
|
5
|
+
# uses a percentile of normal to mark individual runs as "slow".
|
6
|
+
#
|
7
|
+
# This assumes that all worker calls will be requested once to `slow?`, so that
|
8
|
+
# the data can be stored
|
4
9
|
module ScoutApm
|
5
10
|
class SlowJobPolicy
|
6
|
-
|
7
|
-
CAPTURE_DETAIL = "capture_detail",
|
8
|
-
CAPTURE_NONE = "capture_none",
|
9
|
-
]
|
10
|
-
|
11
|
-
# Adjust speed points. See the function
|
12
|
-
POINT_MULTIPLIER_SPEED = 0.25
|
13
|
-
|
14
|
-
# For each minute we haven't seen an endpoint
|
15
|
-
POINT_MULTIPLIER_AGE = 0.25
|
16
|
-
|
17
|
-
# Outliers are worth up to "1000ms" of weight
|
18
|
-
POINT_MULTIPLIER_PERCENTILE = 1.0
|
19
|
-
|
20
|
-
# A hash of Job Names to the last time we stored a slow trace for it.
|
21
|
-
#
|
22
|
-
# Defaults to a start time that is pretty close to application boot time.
|
23
|
-
# So the "age" of an endpoint we've never seen is the time the application
|
24
|
-
# has been running.
|
25
|
-
attr_reader :last_seen
|
11
|
+
DEFAULT_HISTOGRAM_SIZE = 50
|
26
12
|
|
13
|
+
QUANTILE = 95
|
27
14
|
|
28
|
-
def initialize
|
29
|
-
|
30
|
-
@last_seen = Hash.new { |h, k| h[k] = zero_time }
|
15
|
+
def initialize(histogram_size = DEFAULT_HISTOGRAM_SIZE)
|
16
|
+
@histograms = Hash.new { |h, k| h[k] = NumericHistogram.new(histogram_size) }
|
31
17
|
end
|
32
18
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
# make it into this minute's payload.
|
40
|
-
#
|
41
|
-
# Due to the combining nature of the agent & layaway file, there's no
|
42
|
-
# guarantee that a high scoring local champion will still be a winner when
|
43
|
-
# they go up to "regionals" and are compared against the other processes
|
44
|
-
# running on a node.
|
45
|
-
def score(request)
|
46
|
-
unique_name = request.unique_name
|
47
|
-
if unique_name == :unknown
|
48
|
-
return -1 # A negative score, should never be good enough to store.
|
49
|
-
end
|
50
|
-
|
51
|
-
total_time = request.root_layer.total_call_time
|
52
|
-
|
53
|
-
# How long has it been since we've seen this?
|
54
|
-
age = Time.now - last_seen[unique_name]
|
55
|
-
|
56
|
-
# What approximate percentile was this request?
|
57
|
-
percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
|
58
|
-
|
59
|
-
return speed_points(total_time) + percentile_points(percentile) + age_points(age)
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
def unique_name_for(request)
|
65
|
-
scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
|
66
|
-
if scope_layer
|
67
|
-
scope_layer.legacy_metric_name
|
68
|
-
else
|
69
|
-
:unknown
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# Time in seconds
|
74
|
-
# Logarithm keeps huge times from swamping the other metrics.
|
75
|
-
# 1+ is necessary to keep the log function in positive territory.
|
76
|
-
def speed_points(time)
|
77
|
-
Math.log(1 + time) * POINT_MULTIPLIER_SPEED
|
78
|
-
end
|
79
|
-
|
80
|
-
def percentile_points(percentile)
|
81
|
-
if percentile < 40
|
82
|
-
0.4 # Don't put much emphasis on capturing low percentiles.
|
83
|
-
elsif percentile < 60
|
84
|
-
1.4 # Highest here to get mean traces
|
85
|
-
elsif percentile < 90
|
86
|
-
0.7 # Between 60 & 90% is fine.
|
87
|
-
elsif percentile >= 90
|
88
|
-
1.4 # Highest here to get 90+%ile traces
|
89
|
-
else
|
90
|
-
# impossible.
|
91
|
-
percentile
|
92
|
-
end
|
93
|
-
end
|
19
|
+
# worker: just the worker class name. "PasswordResetJob" or similar
|
20
|
+
# total_time: runtime of the job in seconds
|
21
|
+
# returns true if this request should be stored in higher trace detail, false otherwise
|
22
|
+
def slow?(worker, total_time)
|
23
|
+
@histograms[worker].add(total_time)
|
24
|
+
return false if @histograms[worker].total == 1 # First call is never slow
|
94
25
|
|
95
|
-
|
96
|
-
age / 60.0 * POINT_MULTIPLIER_AGE
|
26
|
+
total_time >= @histograms[worker].quantile(QUANTILE)
|
97
27
|
end
|
98
28
|
end
|
99
29
|
end
|
@@ -14,10 +14,13 @@ module ScoutApm
|
|
14
14
|
alias_method :total_call_time, :total_time
|
15
15
|
|
16
16
|
attr_reader :metrics
|
17
|
+
attr_reader :allocation_metrics
|
18
|
+
attr_reader :mem_delta
|
19
|
+
attr_reader :allocations
|
20
|
+
attr_reader :hostname
|
21
|
+
attr_reader :seconds_since_startup
|
17
22
|
|
18
|
-
|
19
|
-
|
20
|
-
def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, score)
|
23
|
+
def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations)
|
21
24
|
@queue_name = queue_name
|
22
25
|
@job_name = job_name
|
23
26
|
@time = time
|
@@ -25,28 +28,17 @@ module ScoutApm
|
|
25
28
|
@exclusive_time = exclusive_time
|
26
29
|
@context = context
|
27
30
|
@metrics = metrics
|
28
|
-
@
|
31
|
+
@allocation_metrics = allocation_metrics
|
32
|
+
@mem_delta = mem_delta
|
33
|
+
@allocations = allocations
|
34
|
+
@seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
|
35
|
+
@hostname = ScoutApm::Environment.instance.hostname
|
36
|
+
ScoutApm::Agent.instance.logger.debug { "Slow Job [#{metric_name}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
|
29
37
|
end
|
30
38
|
|
31
39
|
def metric_name
|
32
40
|
"Job/#{queue_name}/#{job_name}"
|
33
41
|
end
|
34
42
|
|
35
|
-
########################
|
36
|
-
# Scorable interface
|
37
|
-
#
|
38
|
-
# Needed so we can merge ScoredItemSet instances
|
39
|
-
def call
|
40
|
-
self
|
41
|
-
end
|
42
|
-
|
43
|
-
def name
|
44
|
-
metric_name
|
45
|
-
end
|
46
|
-
|
47
|
-
def score
|
48
|
-
@score
|
49
|
-
end
|
50
|
-
|
51
43
|
end
|
52
44
|
end
|