scout_apm 1.6.8 → 2.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +8 -1
- data/CHANGELOG.markdown +7 -57
- data/ext/allocations/allocations.c +84 -0
- data/ext/allocations/extconf.rb +3 -0
- data/lib/scout_apm/agent/reporting.rb +9 -32
- data/lib/scout_apm/agent.rb +45 -31
- data/lib/scout_apm/app_server_load.rb +1 -2
- data/lib/scout_apm/attribute_arranger.rb +0 -4
- data/lib/scout_apm/background_worker.rb +6 -9
- data/lib/scout_apm/bucket_name_splitter.rb +3 -3
- data/lib/scout_apm/call_set.rb +1 -0
- data/lib/scout_apm/config.rb +110 -66
- data/lib/scout_apm/environment.rb +16 -10
- data/lib/scout_apm/framework_integrations/rails_2.rb +12 -14
- data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +5 -17
- data/lib/scout_apm/framework_integrations/ruby.rb +0 -4
- data/lib/scout_apm/framework_integrations/sinatra.rb +0 -4
- data/lib/scout_apm/histogram.rb +0 -20
- data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +1 -4
- data/lib/scout_apm/instruments/active_record.rb +149 -8
- data/lib/scout_apm/instruments/mongoid.rb +5 -78
- data/lib/scout_apm/instruments/process/process_cpu.rb +0 -12
- data/lib/scout_apm/instruments/process/process_memory.rb +14 -43
- data/lib/scout_apm/layaway.rb +34 -134
- data/lib/scout_apm/layaway_file.rb +50 -27
- data/lib/scout_apm/layer.rb +45 -1
- data/lib/scout_apm/layer_converters/allocation_metric_converter.rb +17 -0
- data/lib/scout_apm/layer_converters/converter_base.rb +4 -6
- data/lib/scout_apm/layer_converters/job_converter.rb +1 -0
- data/lib/scout_apm/layer_converters/metric_converter.rb +2 -1
- data/lib/scout_apm/layer_converters/slow_job_converter.rb +42 -21
- data/lib/scout_apm/layer_converters/slow_request_converter.rb +58 -37
- data/lib/scout_apm/metric_meta.rb +1 -5
- data/lib/scout_apm/metric_set.rb +6 -15
- data/lib/scout_apm/reporter.rb +4 -6
- data/lib/scout_apm/serializers/metrics_to_json_serializer.rb +5 -1
- data/lib/scout_apm/serializers/payload_serializer_to_json.rb +1 -3
- data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +5 -3
- data/lib/scout_apm/slow_job_policy.rb +19 -89
- data/lib/scout_apm/slow_job_record.rb +12 -20
- data/lib/scout_apm/slow_request_policy.rb +12 -80
- data/lib/scout_apm/slow_transaction.rb +16 -20
- data/lib/scout_apm/stackprof_tree_collapser.rb +103 -0
- data/lib/scout_apm/store.rb +16 -78
- data/lib/scout_apm/tracked_request.rb +53 -36
- data/lib/scout_apm/utils/active_record_metric_name.rb +2 -0
- data/lib/scout_apm/utils/fake_stack_prof.rb +40 -0
- data/lib/scout_apm/utils/klass_helper.rb +26 -0
- data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
- data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
- data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
- data/lib/scout_apm/version.rb +1 -1
- data/lib/scout_apm.rb +13 -7
- data/scout_apm.gemspec +3 -1
- data/test/test_helper.rb +3 -4
- data/test/unit/layaway_test.rb +8 -5
- data/test/unit/serializers/payload_serializer_test.rb +2 -2
- data/test/unit/slow_item_set_test.rb +1 -2
- data/test/unit/sql_sanitizer_test.rb +0 -6
- metadata +28 -20
- data/LICENSE.md +0 -27
- data/lib/scout_apm/instruments/grape.rb +0 -69
- data/lib/scout_apm/instruments/percentile_sampler.rb +0 -37
- data/lib/scout_apm/request_histograms.rb +0 -46
- data/lib/scout_apm/scored_item_set.rb +0 -79
- data/test/unit/metric_set_test.rb +0 -101
- data/test/unit/scored_item_set_test.rb +0 -65
- data/test/unit/slow_request_policy_test.rb +0 -42
@@ -1,5 +1,8 @@
|
|
1
1
|
# Long running class that determines if, and in how much detail a potentially
|
2
2
|
# slow transaction should be recorded in
|
3
|
+
#
|
4
|
+
# Rules:
|
5
|
+
# - Runtime must be slower than a threshold
|
3
6
|
|
4
7
|
module ScoutApm
|
5
8
|
class SlowRequestPolicy
|
@@ -8,92 +11,21 @@ module ScoutApm
|
|
8
11
|
CAPTURE_NONE = "capture_none",
|
9
12
|
]
|
10
13
|
|
11
|
-
#
|
12
|
-
|
14
|
+
# It's not slow unless it's at least this slow
|
15
|
+
SLOW_REQUEST_TIME_THRESHOLD = 2.0 # seconds
|
13
16
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
# Outliers are worth up to "1000ms" of weight
|
18
|
-
POINT_MULTIPLIER_PERCENTILE = 1.0
|
19
|
-
|
20
|
-
# A hash of Endpoint Name to the last time we stored a slow transaction for it.
|
21
|
-
#
|
22
|
-
# Defaults to a start time that is pretty close to application boot time.
|
23
|
-
# So the "age" of an endpoint we've never seen is the time the application
|
24
|
-
# has been running.
|
25
|
-
attr_reader :last_seen
|
26
|
-
|
27
|
-
|
28
|
-
def initialize
|
29
|
-
zero_time = Time.now
|
30
|
-
@last_seen = Hash.new { |h, k| h[k] = zero_time }
|
31
|
-
end
|
32
|
-
|
33
|
-
def stored!(request)
|
34
|
-
last_seen[unique_name_for(request)] = Time.now
|
35
|
-
end
|
36
|
-
|
37
|
-
# Determine if this request trace should be fully analyzed by scoring it
|
38
|
-
# across several metrics, and then determining if that's good enough to
|
39
|
-
# make it into this minute's payload.
|
40
|
-
#
|
41
|
-
# Due to the combining nature of the agent & layaway file, there's no
|
42
|
-
# guarantee that a high scoring local champion will still be a winner when
|
43
|
-
# they go up to "regionals" and are compared against the other processes
|
44
|
-
# running on a node.
|
45
|
-
def score(request)
|
46
|
-
unique_name = request.unique_name
|
47
|
-
if unique_name == :unknown
|
48
|
-
return -1 # A negative score, should never be good enough to store.
|
49
|
-
end
|
50
|
-
|
51
|
-
total_time = request.root_layer.total_call_time
|
52
|
-
|
53
|
-
# How long has it been since we've seen this?
|
54
|
-
age = Time.now - last_seen[unique_name]
|
55
|
-
|
56
|
-
# What approximate percentile was this request?
|
57
|
-
percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
|
58
|
-
|
59
|
-
return speed_points(total_time) + percentile_points(percentile) + age_points(age)
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
def unique_name_for(request)
|
65
|
-
scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
|
66
|
-
if scope_layer
|
67
|
-
scope_layer.legacy_metric_name
|
17
|
+
def capture_type(time)
|
18
|
+
if !slow_enough?(time)
|
19
|
+
CAPTURE_NONE
|
68
20
|
else
|
69
|
-
|
21
|
+
CAPTURE_DETAIL
|
70
22
|
end
|
71
23
|
end
|
72
24
|
|
73
|
-
|
74
|
-
# Logarithm keeps huge times from swamping the other metrics.
|
75
|
-
# 1+ is necessary to keep the log function in positive territory.
|
76
|
-
def speed_points(time)
|
77
|
-
Math.log(1 + time) * POINT_MULTIPLIER_SPEED
|
78
|
-
end
|
79
|
-
|
80
|
-
def percentile_points(percentile)
|
81
|
-
if percentile < 40
|
82
|
-
0.4 # Don't put much emphasis on capturing low percentiles.
|
83
|
-
elsif percentile < 60
|
84
|
-
1.4 # Highest here to get mean traces
|
85
|
-
elsif percentile < 90
|
86
|
-
0.7 # Between 60 & 90% is fine.
|
87
|
-
elsif percentile >= 90
|
88
|
-
1.4 # Highest here to get 90+%ile traces
|
89
|
-
else
|
90
|
-
# impossible.
|
91
|
-
percentile
|
92
|
-
end
|
93
|
-
end
|
25
|
+
private
|
94
26
|
|
95
|
-
def
|
96
|
-
|
27
|
+
def slow_enough?(time)
|
28
|
+
time > SLOW_REQUEST_TIME_THRESHOLD
|
97
29
|
end
|
98
30
|
end
|
99
31
|
end
|
@@ -5,21 +5,33 @@ module ScoutApm
|
|
5
5
|
attr_reader :metric_name
|
6
6
|
attr_reader :total_call_time
|
7
7
|
attr_reader :metrics
|
8
|
+
attr_reader :allocation_metrics
|
8
9
|
attr_reader :meta
|
9
10
|
attr_reader :uri
|
10
11
|
attr_reader :context
|
11
12
|
attr_reader :time
|
12
13
|
attr_reader :prof
|
14
|
+
attr_reader :raw_prof
|
15
|
+
attr_reader :mem_delta
|
16
|
+
attr_reader :allocations
|
17
|
+
attr_accessor :hostname # hack - we need to reset these server side.
|
18
|
+
attr_accessor :seconds_since_startup # hack - we need to reset these server side.
|
13
19
|
|
14
|
-
def initialize(uri, metric_name, total_call_time, metrics, context, time, raw_stackprof,
|
20
|
+
def initialize(uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations)
|
15
21
|
@uri = uri
|
16
22
|
@metric_name = metric_name
|
17
23
|
@total_call_time = total_call_time
|
18
24
|
@metrics = metrics
|
25
|
+
@allocation_metrics = allocation_metrics
|
19
26
|
@context = context
|
20
27
|
@time = time
|
21
|
-
@prof =
|
22
|
-
@
|
28
|
+
@prof = ScoutApm::StackprofTreeCollapser.new(raw_stackprof).call
|
29
|
+
@raw_prof = raw_stackprof # Send whole data up to server
|
30
|
+
@mem_delta = mem_delta
|
31
|
+
@allocations = allocations
|
32
|
+
@seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
|
33
|
+
@hostname = ScoutApm::Environment.instance.hostname
|
34
|
+
ScoutApm::Agent.instance.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
|
23
35
|
end
|
24
36
|
|
25
37
|
# Used to remove metrics when the payload will be too large.
|
@@ -33,28 +45,12 @@ module ScoutApm
|
|
33
45
|
end
|
34
46
|
|
35
47
|
def as_json
|
36
|
-
json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof, :
|
48
|
+
json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof, :mem_delta, :allocations, :seconds_since_startup, :hostname]
|
37
49
|
ScoutApm::AttributeArranger.call(self, json_attributes)
|
38
50
|
end
|
39
51
|
|
40
52
|
def context_hash
|
41
53
|
context.to_hash
|
42
54
|
end
|
43
|
-
|
44
|
-
########################
|
45
|
-
# Scorable interface
|
46
|
-
#
|
47
|
-
# Needed so we can merge ScoredItemSet instances
|
48
|
-
def call
|
49
|
-
self
|
50
|
-
end
|
51
|
-
|
52
|
-
def name
|
53
|
-
metric_name
|
54
|
-
end
|
55
|
-
|
56
|
-
def score
|
57
|
-
@score
|
58
|
-
end
|
59
55
|
end
|
60
56
|
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module ScoutApm
|
2
|
+
class StackprofTreeCollapser
|
3
|
+
attr_reader :raw_stackprof
|
4
|
+
attr_reader :nodes
|
5
|
+
|
6
|
+
def initialize(raw_stackprof)
|
7
|
+
@raw_stackprof = raw_stackprof
|
8
|
+
|
9
|
+
# Log the raw stackprof info
|
10
|
+
#unless StackProf.respond_to?(:fake?) && StackProf.fake?
|
11
|
+
# begin
|
12
|
+
# ScoutApm::Agent.instance.logger.debug("StackProf - Samples: #{raw_stackprof[:samples]}, GC: #{raw_stackprof[:gc_samples]}, missed: #{raw_stackprof[:missed_samples]}, Interval: #{raw_stackprof[:interval]}")
|
13
|
+
# rescue
|
14
|
+
# ScoutApm::Agent.instance.logger.debug("StackProf Raw - #{raw_stackprof.inspect}")
|
15
|
+
# end
|
16
|
+
#end
|
17
|
+
end
|
18
|
+
|
19
|
+
def call
|
20
|
+
build_tree
|
21
|
+
connect_children
|
22
|
+
total_samples_of_app_nodes
|
23
|
+
rescue
|
24
|
+
[]
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def build_tree
|
30
|
+
@nodes = raw_stackprof[:frames].map do |(frame_id, frame_data)|
|
31
|
+
TreeNode.new(frame_id, # frame_id
|
32
|
+
frame_data[:name], # name
|
33
|
+
frame_data[:file], # file
|
34
|
+
frame_data[:line], # line
|
35
|
+
frame_data[:samples], # samples
|
36
|
+
frame_data[:total_samples], # total_samples
|
37
|
+
(frame_data[:edges] || {}), # children_edges [ { id => weight } ]
|
38
|
+
[], # children [ treenode, ... ]
|
39
|
+
[] # parents [ [treenode, int (weight) ], [...] ]
|
40
|
+
)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def connect_children
|
45
|
+
nodes.each do |node|
|
46
|
+
children = nodes.find_all { |n| node.children_edges.keys.include? n.frame_id }
|
47
|
+
|
48
|
+
node.children_edges.each do |(frame_id, weight)|
|
49
|
+
child = children.detect{ |c| c.frame_id == frame_id }
|
50
|
+
child.parents << [node, weight]
|
51
|
+
end
|
52
|
+
|
53
|
+
node.children = children
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def in_app_nodes
|
58
|
+
nodes.select {|n| n.app? }
|
59
|
+
end
|
60
|
+
|
61
|
+
def total_samples_of_app_nodes
|
62
|
+
in_app_nodes.reject{|n| n.calls_only_app_nodes? && !n.has_samples? }.
|
63
|
+
map{|n| { :samples => n.total_samples,
|
64
|
+
:name => n.name,
|
65
|
+
:file => n.file,
|
66
|
+
:line => n.line
|
67
|
+
}
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
###########################################
|
72
|
+
# TreeNode class represents a single node.
|
73
|
+
###########################################
|
74
|
+
TreeNode = Struct.new(:frame_id, :name, :file, :line, :samples, :total_samples,
|
75
|
+
:children_edges, :children, :parents) do
|
76
|
+
def app?
|
77
|
+
@is_app ||= file =~ /^#{ScoutApm::Environment.instance.root}/
|
78
|
+
end
|
79
|
+
|
80
|
+
# Force object_id to be the equality mechanism, rather than struct's
|
81
|
+
# default which delegates to == on each value. That is wrong because
|
82
|
+
# we want to be able to dup a node in the tree construction process and
|
83
|
+
# not have those compare equal to each other.
|
84
|
+
def ==(other)
|
85
|
+
object_id == other.object_id
|
86
|
+
end
|
87
|
+
|
88
|
+
def inspect
|
89
|
+
"#{frame_id}: #{name} - ##{samples}\n" +
|
90
|
+
" Parents: #{parents.map{ |(p, w)| "#{p.name}: #{w}"}.join("\n ") }\n" +
|
91
|
+
" Children: #{children_edges.inspect} \n"
|
92
|
+
end
|
93
|
+
|
94
|
+
def calls_only_app_nodes?
|
95
|
+
children.all?(&:app?)
|
96
|
+
end
|
97
|
+
|
98
|
+
def has_samples?
|
99
|
+
samples > 0
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
data/lib/scout_apm/store.rb
CHANGED
@@ -6,13 +6,9 @@ module ScoutApm
|
|
6
6
|
# A hash of reporting periods. { StoreReportingPeriodTimestamp => StoreReportingPeriod }
|
7
7
|
attr_reader :reporting_periods
|
8
8
|
|
9
|
-
# Used to pull metrics into each reporting period, as that reporting period is finished.
|
10
|
-
attr_reader :samplers
|
11
|
-
|
12
9
|
def initialize
|
13
10
|
@mutex = Mutex.new
|
14
11
|
@reporting_periods = Hash.new { |h,k| h[k] = StoreReportingPeriod.new(k) }
|
15
|
-
@samplers = []
|
16
12
|
end
|
17
13
|
|
18
14
|
def current_timestamp
|
@@ -26,7 +22,7 @@ module ScoutApm
|
|
26
22
|
# Save newly collected metrics
|
27
23
|
def track!(metrics, options={})
|
28
24
|
@mutex.synchronize {
|
29
|
-
current_period.
|
25
|
+
current_period.merge_metrics!(metrics)
|
30
26
|
}
|
31
27
|
end
|
32
28
|
|
@@ -70,30 +66,11 @@ module ScoutApm
|
|
70
66
|
@mutex.synchronize {
|
71
67
|
reporting_periods.select { |time, rp| force || time.timestamp < current_timestamp.timestamp}.
|
72
68
|
each { |time, rp|
|
73
|
-
|
74
|
-
layaway.write_reporting_period(rp)
|
69
|
+
layaway.add_reporting_period(time, rp)
|
75
70
|
reporting_periods.delete(time)
|
76
71
|
}
|
77
72
|
}
|
78
|
-
|
79
|
-
|
80
|
-
######################################
|
81
|
-
# Sampler support
|
82
|
-
def add_sampler(sampler)
|
83
|
-
@samplers << sampler
|
84
|
-
end
|
85
|
-
|
86
|
-
def collect_samplers(rp)
|
87
|
-
@samplers.each do |sampler|
|
88
|
-
begin
|
89
|
-
metrics = sampler.metrics(rp.timestamp)
|
90
|
-
rp.absorb_metrics!(metrics)
|
91
|
-
rescue => e
|
92
|
-
ScoutApm::Agent.instance.logger.info "Error reading #{sampler.human_name} for period: #{rp}"
|
93
|
-
ScoutApm::Agent.instance.logger.debug e.message
|
94
|
-
ScoutApm::Agent.instance.logger.debug e.backtrace.join("\n")
|
95
|
-
end
|
96
|
-
end
|
73
|
+
ScoutApm::Agent.instance.logger.debug("Finished writing to layaway")
|
97
74
|
end
|
98
75
|
end
|
99
76
|
|
@@ -107,25 +84,8 @@ module ScoutApm
|
|
107
84
|
@timestamp = @raw_time.to_i - @raw_time.sec # The normalized time (integer) to compare by
|
108
85
|
end
|
109
86
|
|
110
|
-
def self.minutes_ago(min, base_time=Time.now)
|
111
|
-
adjusted = base_time - (min * 60)
|
112
|
-
new(adjusted)
|
113
|
-
end
|
114
|
-
|
115
87
|
def to_s
|
116
|
-
|
117
|
-
end
|
118
|
-
|
119
|
-
def strftime(pattern=nil)
|
120
|
-
if pattern.nil?
|
121
|
-
to_time.iso8601
|
122
|
-
else
|
123
|
-
to_time.strftime(pattern)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
def to_time
|
128
|
-
Time.at(@timestamp)
|
88
|
+
Time.at(@timestamp).iso8601
|
129
89
|
end
|
130
90
|
|
131
91
|
def eql?(o)
|
@@ -147,12 +107,11 @@ module ScoutApm
|
|
147
107
|
|
148
108
|
# One period of Storage. Typically 1 minute
|
149
109
|
class StoreReportingPeriod
|
110
|
+
# A SlowItemSet to store slow transactions in
|
111
|
+
attr_reader :slow_transactions
|
150
112
|
|
151
|
-
# A
|
152
|
-
attr_reader :
|
153
|
-
|
154
|
-
# A ScoredItemSet holding the "best" traces for the period
|
155
|
-
attr_reader :job_traces
|
113
|
+
# A SlowItemSet to store slow jobs in
|
114
|
+
attr_reader :slow_jobs
|
156
115
|
|
157
116
|
# A StoreReportingPeriodTimestamp representing the time that this
|
158
117
|
# collection of metrics is for
|
@@ -163,50 +122,31 @@ module ScoutApm
|
|
163
122
|
def initialize(timestamp)
|
164
123
|
@timestamp = timestamp
|
165
124
|
|
166
|
-
@
|
167
|
-
@
|
125
|
+
@slow_transactions = SlowItemSet.new
|
126
|
+
@slow_jobs = SlowItemSet.new
|
168
127
|
|
169
128
|
@metric_set = MetricSet.new
|
170
129
|
@jobs = Hash.new
|
171
130
|
end
|
172
131
|
|
173
|
-
# Merges another StoreReportingPeriod into this one
|
174
|
-
def merge(other)
|
175
|
-
self.
|
176
|
-
merge_metrics!(other.metric_set).
|
177
|
-
merge_slow_transactions!(other.slow_transactions_payload).
|
178
|
-
merge_jobs!(other.jobs).
|
179
|
-
merge_slow_jobs!(other.slow_jobs_payload)
|
180
|
-
self
|
181
|
-
end
|
182
|
-
|
183
132
|
#################################
|
184
133
|
# Add metrics as they are recorded
|
185
134
|
#################################
|
186
|
-
|
187
|
-
# For absorbing an array of metric {Meta => Stat} records
|
188
|
-
def absorb_metrics!(metrics)
|
135
|
+
def merge_metrics!(metrics)
|
189
136
|
metric_set.absorb_all(metrics)
|
190
137
|
self
|
191
138
|
end
|
192
139
|
|
193
|
-
# For merging when you have another metric_set object
|
194
|
-
# Makes sure that you don't duplicate error count records
|
195
|
-
def merge_metrics!(other_metric_set)
|
196
|
-
metric_set.combine!(other_metric_set)
|
197
|
-
self
|
198
|
-
end
|
199
|
-
|
200
140
|
def merge_slow_transactions!(new_transactions)
|
201
141
|
Array(new_transactions).each do |one_transaction|
|
202
|
-
|
142
|
+
slow_transactions << one_transaction
|
203
143
|
end
|
204
144
|
|
205
145
|
self
|
206
146
|
end
|
207
147
|
|
208
148
|
def merge_jobs!(jobs)
|
209
|
-
|
149
|
+
jobs.each do |job|
|
210
150
|
if @jobs.has_key?(job)
|
211
151
|
@jobs[job].combine!(job)
|
212
152
|
else
|
@@ -219,10 +159,8 @@ module ScoutApm
|
|
219
159
|
|
220
160
|
def merge_slow_jobs!(new_jobs)
|
221
161
|
Array(new_jobs).each do |job|
|
222
|
-
|
162
|
+
slow_jobs << job
|
223
163
|
end
|
224
|
-
|
225
|
-
self
|
226
164
|
end
|
227
165
|
|
228
166
|
#################################
|
@@ -233,7 +171,7 @@ module ScoutApm
|
|
233
171
|
end
|
234
172
|
|
235
173
|
def slow_transactions_payload
|
236
|
-
|
174
|
+
slow_transactions.to_a
|
237
175
|
end
|
238
176
|
|
239
177
|
def jobs
|
@@ -241,7 +179,7 @@ module ScoutApm
|
|
241
179
|
end
|
242
180
|
|
243
181
|
def slow_jobs_payload
|
244
|
-
|
182
|
+
slow_jobs.to_a
|
245
183
|
end
|
246
184
|
|
247
185
|
#################################
|
@@ -22,6 +22,10 @@ module ScoutApm
|
|
22
22
|
# :queue_latency - how long a background Job spent in the queue before starting processing
|
23
23
|
attr_reader :annotations
|
24
24
|
|
25
|
+
# Nil until the request is finalized, at which point it will hold the
|
26
|
+
# entire raw stackprof output for this request
|
27
|
+
attr_reader :stackprof
|
28
|
+
|
25
29
|
# Headers as recorded by rails
|
26
30
|
# Can be nil if we never reach a Rails Controller
|
27
31
|
attr_reader :headers
|
@@ -39,12 +43,14 @@ module ScoutApm
|
|
39
43
|
|
40
44
|
def initialize
|
41
45
|
@layers = []
|
42
|
-
@
|
46
|
+
@call_set = Hash.new { |h, k| h[k] = CallSet.new }
|
43
47
|
@annotations = {}
|
44
48
|
@ignoring_children = false
|
45
49
|
@context = Context.new
|
46
50
|
@root_layer = nil
|
51
|
+
@stackprof = nil
|
47
52
|
@error = false
|
53
|
+
@mem_start = mem_usage
|
48
54
|
end
|
49
55
|
|
50
56
|
def start_layer(layer)
|
@@ -53,7 +59,6 @@ module ScoutApm
|
|
53
59
|
end
|
54
60
|
|
55
61
|
start_request(layer) unless @root_layer
|
56
|
-
update_call_counts!(layer)
|
57
62
|
@layers[-1].add_child(layer) if @layers.any?
|
58
63
|
@layers.push(layer)
|
59
64
|
end
|
@@ -63,7 +68,11 @@ module ScoutApm
|
|
63
68
|
|
64
69
|
layer = @layers.pop
|
65
70
|
layer.record_stop_time!
|
71
|
+
layer.record_allocations!
|
66
72
|
|
73
|
+
# This must be called before checking if a backtrace should be collected as the call count influences our capture logic.
|
74
|
+
# We call `#update_call_counts in stop layer to ensure the layer has a final desc. Layer#desc is updated during the AR instrumentation flow.
|
75
|
+
update_call_counts!(layer)
|
67
76
|
if capture_backtrace?(layer)
|
68
77
|
layer.capture_backtrace!
|
69
78
|
end
|
@@ -73,6 +82,18 @@ module ScoutApm
|
|
73
82
|
end
|
74
83
|
end
|
75
84
|
|
85
|
+
# Grab the currently running layer. Useful for adding additional data as we
|
86
|
+
# learn it. This is useful in ActiveRecord instruments, where we start the
|
87
|
+
# instrumentation early, and gradually learn more about the request that
|
88
|
+
# actually happened as we go (for instance, the # of records found, or the
|
89
|
+
# actual SQL generated).
|
90
|
+
#
|
91
|
+
# Returns nil in the case there is no current layer. That would be normal
|
92
|
+
# for a completed TrackedRequest
|
93
|
+
def current_layer
|
94
|
+
@layers.last
|
95
|
+
end
|
96
|
+
|
76
97
|
BACKTRACE_BLACKLIST = ["Controller", "Job"]
|
77
98
|
def capture_backtrace?(layer)
|
78
99
|
# Never capture backtraces for this kind of layer. The backtrace will
|
@@ -88,7 +109,7 @@ module ScoutApm
|
|
88
109
|
return true if layer.total_exclusive_time > BACKTRACE_THRESHOLD
|
89
110
|
|
90
111
|
# Capture any layer that we've seen many times. Captures n+1 problems
|
91
|
-
return true if @
|
112
|
+
return true if @call_set[layer.name].capture_backtrace?
|
92
113
|
|
93
114
|
# Don't capture otherwise
|
94
115
|
false
|
@@ -96,7 +117,16 @@ module ScoutApm
|
|
96
117
|
|
97
118
|
# Maintains a lookup Hash of call counts by layer name. Used to determine if we should capture a backtrace.
|
98
119
|
def update_call_counts!(layer)
|
99
|
-
@
|
120
|
+
@call_set[layer.name].update!(layer.desc)
|
121
|
+
end
|
122
|
+
|
123
|
+
# This may be in bytes or KB based on the OSX. We store this as-is here and only do conversion to MB in Layer Converters.
|
124
|
+
def mem_usage
|
125
|
+
ScoutApm::Instruments::Process::ProcessMemory.rss
|
126
|
+
end
|
127
|
+
|
128
|
+
def capture_mem_delta!
|
129
|
+
@mem_delta = mem_usage - @mem_start
|
100
130
|
end
|
101
131
|
|
102
132
|
###################################
|
@@ -112,14 +142,21 @@ module ScoutApm
|
|
112
142
|
# Run at the beginning of the whole request
|
113
143
|
#
|
114
144
|
# * Capture the first layer as the root_layer
|
145
|
+
# * Start Stackprof (disabling to avoid conflicts if stackprof is included as middleware since we aren't sending this up to server now)
|
115
146
|
def start_request(layer)
|
116
147
|
@root_layer = layer unless @root_layer # capture root layer
|
148
|
+
#StackProf.start(:mode => :wall, :interval => ScoutApm::Agent.instance.config.value("stackprof_interval"))
|
117
149
|
end
|
118
150
|
|
119
151
|
# Run at the end of the whole request
|
120
152
|
#
|
153
|
+
# * Collect stackprof info
|
121
154
|
# * Send the request off to be stored
|
122
155
|
def stop_request
|
156
|
+
# ScoutApm::Agent.instance.logger.debug("stop_request: #{annotations[:uri]}" )
|
157
|
+
#StackProf.stop # disabling to avoid conflicts if stackprof is included as middleware since we aren't sending this up to server now
|
158
|
+
#@stackprof = StackProf.results
|
159
|
+
|
123
160
|
record!
|
124
161
|
end
|
125
162
|
|
@@ -174,47 +211,27 @@ module ScoutApm
|
|
174
211
|
def record!
|
175
212
|
@recorded = true
|
176
213
|
|
177
|
-
# Update immediate and long-term histograms for both job and web requests
|
178
|
-
if unique_name != :unknown
|
179
|
-
ScoutApm::Agent.instance.request_histograms.add(unique_name, root_layer.total_call_time)
|
180
|
-
ScoutApm::Agent.instance.request_histograms_by_time[ScoutApm::Agent.instance.store.current_timestamp].
|
181
|
-
add(unique_name, root_layer.total_call_time)
|
182
|
-
end
|
183
|
-
|
184
214
|
metrics = LayerConverters::MetricConverter.new(self).call
|
185
215
|
ScoutApm::Agent.instance.store.track!(metrics)
|
186
216
|
|
217
|
+
slow, slow_metrics = LayerConverters::SlowRequestConverter.new(self).call
|
218
|
+
ScoutApm::Agent.instance.store.track_slow_transaction!(slow)
|
219
|
+
ScoutApm::Agent.instance.store.track!(slow_metrics)
|
220
|
+
|
187
221
|
error_metrics = LayerConverters::ErrorConverter.new(self).call
|
188
222
|
ScoutApm::Agent.instance.store.track!(error_metrics)
|
189
223
|
|
190
|
-
|
191
|
-
|
192
|
-
slow_converter = LayerConverters::SlowRequestConverter.new(self)
|
193
|
-
ScoutApm::Agent.instance.store.track_slow_transaction!(slow_converter)
|
224
|
+
queue_time_metrics = LayerConverters::RequestQueueTimeConverter.new(self).call
|
225
|
+
ScoutApm::Agent.instance.store.track!(queue_time_metrics)
|
194
226
|
|
195
|
-
|
196
|
-
|
197
|
-
end
|
198
|
-
|
199
|
-
if job?
|
200
|
-
job_metrics = LayerConverters::JobConverter.new(self).call
|
201
|
-
ScoutApm::Agent.instance.store.track_job!(job_metrics)
|
227
|
+
job = LayerConverters::JobConverter.new(self).call
|
228
|
+
ScoutApm::Agent.instance.store.track_job!(job)
|
202
229
|
|
203
|
-
|
204
|
-
|
205
|
-
end
|
206
|
-
end
|
230
|
+
slow_job = LayerConverters::SlowJobConverter.new(self).call
|
231
|
+
ScoutApm::Agent.instance.store.track_slow_job!(slow_job)
|
207
232
|
|
208
|
-
|
209
|
-
|
210
|
-
@unique_name ||= begin
|
211
|
-
scope_layer = LayerConverters::ConverterBase.new(self).scope_layer
|
212
|
-
if scope_layer
|
213
|
-
scope_layer.legacy_metric_name
|
214
|
-
else
|
215
|
-
:unknown
|
216
|
-
end
|
217
|
-
end
|
233
|
+
allocation_metrics = LayerConverters::AllocationMetricConverter.new(self).call
|
234
|
+
ScoutApm::Agent.instance.store.track!(allocation_metrics)
|
218
235
|
end
|
219
236
|
|
220
237
|
# Have we already persisted this request?
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# A fake implementation of stackprof, for systems that don't support it.
|
2
|
+
module StackProf
|
3
|
+
def self.start(*args)
|
4
|
+
@running = true
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.stop(*args)
|
8
|
+
@running = false
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.running?
|
12
|
+
!!@running
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.run(*args)
|
16
|
+
start
|
17
|
+
yield
|
18
|
+
stop
|
19
|
+
results
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.sample(*args)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.results(*args)
|
26
|
+
{
|
27
|
+
:version => 0.0,
|
28
|
+
:mode => :wall,
|
29
|
+
:interval => 1000,
|
30
|
+
:samples => 0,
|
31
|
+
:gc_samples => 0,
|
32
|
+
:missed_samples => 0,
|
33
|
+
:frames => {},
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.fake?
|
38
|
+
true
|
39
|
+
end
|
40
|
+
end
|