scout_apm 1.6.8 → 2.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +8 -1
- data/CHANGELOG.markdown +7 -57
- data/ext/allocations/allocations.c +84 -0
- data/ext/allocations/extconf.rb +3 -0
- data/lib/scout_apm/agent/reporting.rb +9 -32
- data/lib/scout_apm/agent.rb +45 -31
- data/lib/scout_apm/app_server_load.rb +1 -2
- data/lib/scout_apm/attribute_arranger.rb +0 -4
- data/lib/scout_apm/background_worker.rb +6 -9
- data/lib/scout_apm/bucket_name_splitter.rb +3 -3
- data/lib/scout_apm/call_set.rb +1 -0
- data/lib/scout_apm/config.rb +110 -66
- data/lib/scout_apm/environment.rb +16 -10
- data/lib/scout_apm/framework_integrations/rails_2.rb +12 -14
- data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +5 -17
- data/lib/scout_apm/framework_integrations/ruby.rb +0 -4
- data/lib/scout_apm/framework_integrations/sinatra.rb +0 -4
- data/lib/scout_apm/histogram.rb +0 -20
- data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +1 -4
- data/lib/scout_apm/instruments/active_record.rb +149 -8
- data/lib/scout_apm/instruments/mongoid.rb +5 -78
- data/lib/scout_apm/instruments/process/process_cpu.rb +0 -12
- data/lib/scout_apm/instruments/process/process_memory.rb +14 -43
- data/lib/scout_apm/layaway.rb +34 -134
- data/lib/scout_apm/layaway_file.rb +50 -27
- data/lib/scout_apm/layer.rb +45 -1
- data/lib/scout_apm/layer_converters/allocation_metric_converter.rb +17 -0
- data/lib/scout_apm/layer_converters/converter_base.rb +4 -6
- data/lib/scout_apm/layer_converters/job_converter.rb +1 -0
- data/lib/scout_apm/layer_converters/metric_converter.rb +2 -1
- data/lib/scout_apm/layer_converters/slow_job_converter.rb +42 -21
- data/lib/scout_apm/layer_converters/slow_request_converter.rb +58 -37
- data/lib/scout_apm/metric_meta.rb +1 -5
- data/lib/scout_apm/metric_set.rb +6 -15
- data/lib/scout_apm/reporter.rb +4 -6
- data/lib/scout_apm/serializers/metrics_to_json_serializer.rb +5 -1
- data/lib/scout_apm/serializers/payload_serializer_to_json.rb +1 -3
- data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +5 -3
- data/lib/scout_apm/slow_job_policy.rb +19 -89
- data/lib/scout_apm/slow_job_record.rb +12 -20
- data/lib/scout_apm/slow_request_policy.rb +12 -80
- data/lib/scout_apm/slow_transaction.rb +16 -20
- data/lib/scout_apm/stackprof_tree_collapser.rb +103 -0
- data/lib/scout_apm/store.rb +16 -78
- data/lib/scout_apm/tracked_request.rb +53 -36
- data/lib/scout_apm/utils/active_record_metric_name.rb +2 -0
- data/lib/scout_apm/utils/fake_stack_prof.rb +40 -0
- data/lib/scout_apm/utils/klass_helper.rb +26 -0
- data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
- data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
- data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
- data/lib/scout_apm/version.rb +1 -1
- data/lib/scout_apm.rb +13 -7
- data/scout_apm.gemspec +3 -1
- data/test/test_helper.rb +3 -4
- data/test/unit/layaway_test.rb +8 -5
- data/test/unit/serializers/payload_serializer_test.rb +2 -2
- data/test/unit/slow_item_set_test.rb +1 -2
- data/test/unit/sql_sanitizer_test.rb +0 -6
- metadata +28 -20
- data/LICENSE.md +0 -27
- data/lib/scout_apm/instruments/grape.rb +0 -69
- data/lib/scout_apm/instruments/percentile_sampler.rb +0 -37
- data/lib/scout_apm/request_histograms.rb +0 -46
- data/lib/scout_apm/scored_item_set.rb +0 -79
- data/test/unit/metric_set_test.rb +0 -101
- data/test/unit/scored_item_set_test.rb +0 -65
- data/test/unit/slow_request_policy_test.rb +0 -42
@@ -1,5 +1,8 @@
|
|
1
1
|
# Long running class that determines if, and in how much detail a potentially
|
2
2
|
# slow transaction should be recorded in
|
3
|
+
#
|
4
|
+
# Rules:
|
5
|
+
# - Runtime must be slower than a threshold
|
3
6
|
|
4
7
|
module ScoutApm
|
5
8
|
class SlowRequestPolicy
|
@@ -8,92 +11,21 @@ module ScoutApm
|
|
8
11
|
CAPTURE_NONE = "capture_none",
|
9
12
|
]
|
10
13
|
|
11
|
-
#
|
12
|
-
|
14
|
+
# It's not slow unless it's at least this slow
|
15
|
+
SLOW_REQUEST_TIME_THRESHOLD = 2.0 # seconds
|
13
16
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
# Outliers are worth up to "1000ms" of weight
|
18
|
-
POINT_MULTIPLIER_PERCENTILE = 1.0
|
19
|
-
|
20
|
-
# A hash of Endpoint Name to the last time we stored a slow transaction for it.
|
21
|
-
#
|
22
|
-
# Defaults to a start time that is pretty close to application boot time.
|
23
|
-
# So the "age" of an endpoint we've never seen is the time the application
|
24
|
-
# has been running.
|
25
|
-
attr_reader :last_seen
|
26
|
-
|
27
|
-
|
28
|
-
def initialize
|
29
|
-
zero_time = Time.now
|
30
|
-
@last_seen = Hash.new { |h, k| h[k] = zero_time }
|
31
|
-
end
|
32
|
-
|
33
|
-
def stored!(request)
|
34
|
-
last_seen[unique_name_for(request)] = Time.now
|
35
|
-
end
|
36
|
-
|
37
|
-
# Determine if this request trace should be fully analyzed by scoring it
|
38
|
-
# across several metrics, and then determining if that's good enough to
|
39
|
-
# make it into this minute's payload.
|
40
|
-
#
|
41
|
-
# Due to the combining nature of the agent & layaway file, there's no
|
42
|
-
# guarantee that a high scoring local champion will still be a winner when
|
43
|
-
# they go up to "regionals" and are compared against the other processes
|
44
|
-
# running on a node.
|
45
|
-
def score(request)
|
46
|
-
unique_name = request.unique_name
|
47
|
-
if unique_name == :unknown
|
48
|
-
return -1 # A negative score, should never be good enough to store.
|
49
|
-
end
|
50
|
-
|
51
|
-
total_time = request.root_layer.total_call_time
|
52
|
-
|
53
|
-
# How long has it been since we've seen this?
|
54
|
-
age = Time.now - last_seen[unique_name]
|
55
|
-
|
56
|
-
# What approximate percentile was this request?
|
57
|
-
percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
|
58
|
-
|
59
|
-
return speed_points(total_time) + percentile_points(percentile) + age_points(age)
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
def unique_name_for(request)
|
65
|
-
scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
|
66
|
-
if scope_layer
|
67
|
-
scope_layer.legacy_metric_name
|
17
|
+
def capture_type(time)
|
18
|
+
if !slow_enough?(time)
|
19
|
+
CAPTURE_NONE
|
68
20
|
else
|
69
|
-
|
21
|
+
CAPTURE_DETAIL
|
70
22
|
end
|
71
23
|
end
|
72
24
|
|
73
|
-
|
74
|
-
# Logarithm keeps huge times from swamping the other metrics.
|
75
|
-
# 1+ is necessary to keep the log function in positive territory.
|
76
|
-
def speed_points(time)
|
77
|
-
Math.log(1 + time) * POINT_MULTIPLIER_SPEED
|
78
|
-
end
|
79
|
-
|
80
|
-
def percentile_points(percentile)
|
81
|
-
if percentile < 40
|
82
|
-
0.4 # Don't put much emphasis on capturing low percentiles.
|
83
|
-
elsif percentile < 60
|
84
|
-
1.4 # Highest here to get mean traces
|
85
|
-
elsif percentile < 90
|
86
|
-
0.7 # Between 60 & 90% is fine.
|
87
|
-
elsif percentile >= 90
|
88
|
-
1.4 # Highest here to get 90+%ile traces
|
89
|
-
else
|
90
|
-
# impossible.
|
91
|
-
percentile
|
92
|
-
end
|
93
|
-
end
|
25
|
+
private
|
94
26
|
|
95
|
-
def
|
96
|
-
|
27
|
+
def slow_enough?(time)
|
28
|
+
time > SLOW_REQUEST_TIME_THRESHOLD
|
97
29
|
end
|
98
30
|
end
|
99
31
|
end
|
@@ -5,21 +5,33 @@ module ScoutApm
|
|
5
5
|
attr_reader :metric_name
|
6
6
|
attr_reader :total_call_time
|
7
7
|
attr_reader :metrics
|
8
|
+
attr_reader :allocation_metrics
|
8
9
|
attr_reader :meta
|
9
10
|
attr_reader :uri
|
10
11
|
attr_reader :context
|
11
12
|
attr_reader :time
|
12
13
|
attr_reader :prof
|
14
|
+
attr_reader :raw_prof
|
15
|
+
attr_reader :mem_delta
|
16
|
+
attr_reader :allocations
|
17
|
+
attr_accessor :hostname # hack - we need to reset these server side.
|
18
|
+
attr_accessor :seconds_since_startup # hack - we need to reset these server side.
|
13
19
|
|
14
|
-
def initialize(uri, metric_name, total_call_time, metrics, context, time, raw_stackprof,
|
20
|
+
def initialize(uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations)
|
15
21
|
@uri = uri
|
16
22
|
@metric_name = metric_name
|
17
23
|
@total_call_time = total_call_time
|
18
24
|
@metrics = metrics
|
25
|
+
@allocation_metrics = allocation_metrics
|
19
26
|
@context = context
|
20
27
|
@time = time
|
21
|
-
@prof =
|
22
|
-
@
|
28
|
+
@prof = ScoutApm::StackprofTreeCollapser.new(raw_stackprof).call
|
29
|
+
@raw_prof = raw_stackprof # Send whole data up to server
|
30
|
+
@mem_delta = mem_delta
|
31
|
+
@allocations = allocations
|
32
|
+
@seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
|
33
|
+
@hostname = ScoutApm::Environment.instance.hostname
|
34
|
+
ScoutApm::Agent.instance.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
|
23
35
|
end
|
24
36
|
|
25
37
|
# Used to remove metrics when the payload will be too large.
|
@@ -33,28 +45,12 @@ module ScoutApm
|
|
33
45
|
end
|
34
46
|
|
35
47
|
def as_json
|
36
|
-
json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof, :
|
48
|
+
json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof, :mem_delta, :allocations, :seconds_since_startup, :hostname]
|
37
49
|
ScoutApm::AttributeArranger.call(self, json_attributes)
|
38
50
|
end
|
39
51
|
|
40
52
|
def context_hash
|
41
53
|
context.to_hash
|
42
54
|
end
|
43
|
-
|
44
|
-
########################
|
45
|
-
# Scorable interface
|
46
|
-
#
|
47
|
-
# Needed so we can merge ScoredItemSet instances
|
48
|
-
def call
|
49
|
-
self
|
50
|
-
end
|
51
|
-
|
52
|
-
def name
|
53
|
-
metric_name
|
54
|
-
end
|
55
|
-
|
56
|
-
def score
|
57
|
-
@score
|
58
|
-
end
|
59
55
|
end
|
60
56
|
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module ScoutApm
|
2
|
+
class StackprofTreeCollapser
|
3
|
+
attr_reader :raw_stackprof
|
4
|
+
attr_reader :nodes
|
5
|
+
|
6
|
+
def initialize(raw_stackprof)
|
7
|
+
@raw_stackprof = raw_stackprof
|
8
|
+
|
9
|
+
# Log the raw stackprof info
|
10
|
+
#unless StackProf.respond_to?(:fake?) && StackProf.fake?
|
11
|
+
# begin
|
12
|
+
# ScoutApm::Agent.instance.logger.debug("StackProf - Samples: #{raw_stackprof[:samples]}, GC: #{raw_stackprof[:gc_samples]}, missed: #{raw_stackprof[:missed_samples]}, Interval: #{raw_stackprof[:interval]}")
|
13
|
+
# rescue
|
14
|
+
# ScoutApm::Agent.instance.logger.debug("StackProf Raw - #{raw_stackprof.inspect}")
|
15
|
+
# end
|
16
|
+
#end
|
17
|
+
end
|
18
|
+
|
19
|
+
def call
|
20
|
+
build_tree
|
21
|
+
connect_children
|
22
|
+
total_samples_of_app_nodes
|
23
|
+
rescue
|
24
|
+
[]
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def build_tree
|
30
|
+
@nodes = raw_stackprof[:frames].map do |(frame_id, frame_data)|
|
31
|
+
TreeNode.new(frame_id, # frame_id
|
32
|
+
frame_data[:name], # name
|
33
|
+
frame_data[:file], # file
|
34
|
+
frame_data[:line], # line
|
35
|
+
frame_data[:samples], # samples
|
36
|
+
frame_data[:total_samples], # total_samples
|
37
|
+
(frame_data[:edges] || {}), # children_edges [ { id => weight } ]
|
38
|
+
[], # children [ treenode, ... ]
|
39
|
+
[] # parents [ [treenode, int (weight) ], [...] ]
|
40
|
+
)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def connect_children
|
45
|
+
nodes.each do |node|
|
46
|
+
children = nodes.find_all { |n| node.children_edges.keys.include? n.frame_id }
|
47
|
+
|
48
|
+
node.children_edges.each do |(frame_id, weight)|
|
49
|
+
child = children.detect{ |c| c.frame_id == frame_id }
|
50
|
+
child.parents << [node, weight]
|
51
|
+
end
|
52
|
+
|
53
|
+
node.children = children
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def in_app_nodes
|
58
|
+
nodes.select {|n| n.app? }
|
59
|
+
end
|
60
|
+
|
61
|
+
def total_samples_of_app_nodes
|
62
|
+
in_app_nodes.reject{|n| n.calls_only_app_nodes? && !n.has_samples? }.
|
63
|
+
map{|n| { :samples => n.total_samples,
|
64
|
+
:name => n.name,
|
65
|
+
:file => n.file,
|
66
|
+
:line => n.line
|
67
|
+
}
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
###########################################
|
72
|
+
# TreeNode class represents a single node.
|
73
|
+
###########################################
|
74
|
+
TreeNode = Struct.new(:frame_id, :name, :file, :line, :samples, :total_samples,
|
75
|
+
:children_edges, :children, :parents) do
|
76
|
+
def app?
|
77
|
+
@is_app ||= file =~ /^#{ScoutApm::Environment.instance.root}/
|
78
|
+
end
|
79
|
+
|
80
|
+
# Force object_id to be the equality mechanism, rather than struct's
|
81
|
+
# default which delegates to == on each value. That is wrong because
|
82
|
+
# we want to be able to dup a node in the tree construction process and
|
83
|
+
# not have those compare equal to each other.
|
84
|
+
def ==(other)
|
85
|
+
object_id == other.object_id
|
86
|
+
end
|
87
|
+
|
88
|
+
def inspect
|
89
|
+
"#{frame_id}: #{name} - ##{samples}\n" +
|
90
|
+
" Parents: #{parents.map{ |(p, w)| "#{p.name}: #{w}"}.join("\n ") }\n" +
|
91
|
+
" Children: #{children_edges.inspect} \n"
|
92
|
+
end
|
93
|
+
|
94
|
+
def calls_only_app_nodes?
|
95
|
+
children.all?(&:app?)
|
96
|
+
end
|
97
|
+
|
98
|
+
def has_samples?
|
99
|
+
samples > 0
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
data/lib/scout_apm/store.rb
CHANGED
@@ -6,13 +6,9 @@ module ScoutApm
|
|
6
6
|
# A hash of reporting periods. { StoreReportingPeriodTimestamp => StoreReportingPeriod }
|
7
7
|
attr_reader :reporting_periods
|
8
8
|
|
9
|
-
# Used to pull metrics into each reporting period, as that reporting period is finished.
|
10
|
-
attr_reader :samplers
|
11
|
-
|
12
9
|
def initialize
|
13
10
|
@mutex = Mutex.new
|
14
11
|
@reporting_periods = Hash.new { |h,k| h[k] = StoreReportingPeriod.new(k) }
|
15
|
-
@samplers = []
|
16
12
|
end
|
17
13
|
|
18
14
|
def current_timestamp
|
@@ -26,7 +22,7 @@ module ScoutApm
|
|
26
22
|
# Save newly collected metrics
|
27
23
|
def track!(metrics, options={})
|
28
24
|
@mutex.synchronize {
|
29
|
-
current_period.
|
25
|
+
current_period.merge_metrics!(metrics)
|
30
26
|
}
|
31
27
|
end
|
32
28
|
|
@@ -70,30 +66,11 @@ module ScoutApm
|
|
70
66
|
@mutex.synchronize {
|
71
67
|
reporting_periods.select { |time, rp| force || time.timestamp < current_timestamp.timestamp}.
|
72
68
|
each { |time, rp|
|
73
|
-
|
74
|
-
layaway.write_reporting_period(rp)
|
69
|
+
layaway.add_reporting_period(time, rp)
|
75
70
|
reporting_periods.delete(time)
|
76
71
|
}
|
77
72
|
}
|
78
|
-
|
79
|
-
|
80
|
-
######################################
|
81
|
-
# Sampler support
|
82
|
-
def add_sampler(sampler)
|
83
|
-
@samplers << sampler
|
84
|
-
end
|
85
|
-
|
86
|
-
def collect_samplers(rp)
|
87
|
-
@samplers.each do |sampler|
|
88
|
-
begin
|
89
|
-
metrics = sampler.metrics(rp.timestamp)
|
90
|
-
rp.absorb_metrics!(metrics)
|
91
|
-
rescue => e
|
92
|
-
ScoutApm::Agent.instance.logger.info "Error reading #{sampler.human_name} for period: #{rp}"
|
93
|
-
ScoutApm::Agent.instance.logger.debug e.message
|
94
|
-
ScoutApm::Agent.instance.logger.debug e.backtrace.join("\n")
|
95
|
-
end
|
96
|
-
end
|
73
|
+
ScoutApm::Agent.instance.logger.debug("Finished writing to layaway")
|
97
74
|
end
|
98
75
|
end
|
99
76
|
|
@@ -107,25 +84,8 @@ module ScoutApm
|
|
107
84
|
@timestamp = @raw_time.to_i - @raw_time.sec # The normalized time (integer) to compare by
|
108
85
|
end
|
109
86
|
|
110
|
-
def self.minutes_ago(min, base_time=Time.now)
|
111
|
-
adjusted = base_time - (min * 60)
|
112
|
-
new(adjusted)
|
113
|
-
end
|
114
|
-
|
115
87
|
def to_s
|
116
|
-
|
117
|
-
end
|
118
|
-
|
119
|
-
def strftime(pattern=nil)
|
120
|
-
if pattern.nil?
|
121
|
-
to_time.iso8601
|
122
|
-
else
|
123
|
-
to_time.strftime(pattern)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
def to_time
|
128
|
-
Time.at(@timestamp)
|
88
|
+
Time.at(@timestamp).iso8601
|
129
89
|
end
|
130
90
|
|
131
91
|
def eql?(o)
|
@@ -147,12 +107,11 @@ module ScoutApm
|
|
147
107
|
|
148
108
|
# One period of Storage. Typically 1 minute
|
149
109
|
class StoreReportingPeriod
|
110
|
+
# A SlowItemSet to store slow transactions in
|
111
|
+
attr_reader :slow_transactions
|
150
112
|
|
151
|
-
# A
|
152
|
-
attr_reader :
|
153
|
-
|
154
|
-
# A ScoredItemSet holding the "best" traces for the period
|
155
|
-
attr_reader :job_traces
|
113
|
+
# A SlowItemSet to store slow jobs in
|
114
|
+
attr_reader :slow_jobs
|
156
115
|
|
157
116
|
# A StoreReportingPeriodTimestamp representing the time that this
|
158
117
|
# collection of metrics is for
|
@@ -163,50 +122,31 @@ module ScoutApm
|
|
163
122
|
def initialize(timestamp)
|
164
123
|
@timestamp = timestamp
|
165
124
|
|
166
|
-
@
|
167
|
-
@
|
125
|
+
@slow_transactions = SlowItemSet.new
|
126
|
+
@slow_jobs = SlowItemSet.new
|
168
127
|
|
169
128
|
@metric_set = MetricSet.new
|
170
129
|
@jobs = Hash.new
|
171
130
|
end
|
172
131
|
|
173
|
-
# Merges another StoreReportingPeriod into this one
|
174
|
-
def merge(other)
|
175
|
-
self.
|
176
|
-
merge_metrics!(other.metric_set).
|
177
|
-
merge_slow_transactions!(other.slow_transactions_payload).
|
178
|
-
merge_jobs!(other.jobs).
|
179
|
-
merge_slow_jobs!(other.slow_jobs_payload)
|
180
|
-
self
|
181
|
-
end
|
182
|
-
|
183
132
|
#################################
|
184
133
|
# Add metrics as they are recorded
|
185
134
|
#################################
|
186
|
-
|
187
|
-
# For absorbing an array of metric {Meta => Stat} records
|
188
|
-
def absorb_metrics!(metrics)
|
135
|
+
def merge_metrics!(metrics)
|
189
136
|
metric_set.absorb_all(metrics)
|
190
137
|
self
|
191
138
|
end
|
192
139
|
|
193
|
-
# For merging when you have another metric_set object
|
194
|
-
# Makes sure that you don't duplicate error count records
|
195
|
-
def merge_metrics!(other_metric_set)
|
196
|
-
metric_set.combine!(other_metric_set)
|
197
|
-
self
|
198
|
-
end
|
199
|
-
|
200
140
|
def merge_slow_transactions!(new_transactions)
|
201
141
|
Array(new_transactions).each do |one_transaction|
|
202
|
-
|
142
|
+
slow_transactions << one_transaction
|
203
143
|
end
|
204
144
|
|
205
145
|
self
|
206
146
|
end
|
207
147
|
|
208
148
|
def merge_jobs!(jobs)
|
209
|
-
|
149
|
+
jobs.each do |job|
|
210
150
|
if @jobs.has_key?(job)
|
211
151
|
@jobs[job].combine!(job)
|
212
152
|
else
|
@@ -219,10 +159,8 @@ module ScoutApm
|
|
219
159
|
|
220
160
|
def merge_slow_jobs!(new_jobs)
|
221
161
|
Array(new_jobs).each do |job|
|
222
|
-
|
162
|
+
slow_jobs << job
|
223
163
|
end
|
224
|
-
|
225
|
-
self
|
226
164
|
end
|
227
165
|
|
228
166
|
#################################
|
@@ -233,7 +171,7 @@ module ScoutApm
|
|
233
171
|
end
|
234
172
|
|
235
173
|
def slow_transactions_payload
|
236
|
-
|
174
|
+
slow_transactions.to_a
|
237
175
|
end
|
238
176
|
|
239
177
|
def jobs
|
@@ -241,7 +179,7 @@ module ScoutApm
|
|
241
179
|
end
|
242
180
|
|
243
181
|
def slow_jobs_payload
|
244
|
-
|
182
|
+
slow_jobs.to_a
|
245
183
|
end
|
246
184
|
|
247
185
|
#################################
|
@@ -22,6 +22,10 @@ module ScoutApm
|
|
22
22
|
# :queue_latency - how long a background Job spent in the queue before starting processing
|
23
23
|
attr_reader :annotations
|
24
24
|
|
25
|
+
# Nil until the request is finalized, at which point it will hold the
|
26
|
+
# entire raw stackprof output for this request
|
27
|
+
attr_reader :stackprof
|
28
|
+
|
25
29
|
# Headers as recorded by rails
|
26
30
|
# Can be nil if we never reach a Rails Controller
|
27
31
|
attr_reader :headers
|
@@ -39,12 +43,14 @@ module ScoutApm
|
|
39
43
|
|
40
44
|
def initialize
|
41
45
|
@layers = []
|
42
|
-
@
|
46
|
+
@call_set = Hash.new { |h, k| h[k] = CallSet.new }
|
43
47
|
@annotations = {}
|
44
48
|
@ignoring_children = false
|
45
49
|
@context = Context.new
|
46
50
|
@root_layer = nil
|
51
|
+
@stackprof = nil
|
47
52
|
@error = false
|
53
|
+
@mem_start = mem_usage
|
48
54
|
end
|
49
55
|
|
50
56
|
def start_layer(layer)
|
@@ -53,7 +59,6 @@ module ScoutApm
|
|
53
59
|
end
|
54
60
|
|
55
61
|
start_request(layer) unless @root_layer
|
56
|
-
update_call_counts!(layer)
|
57
62
|
@layers[-1].add_child(layer) if @layers.any?
|
58
63
|
@layers.push(layer)
|
59
64
|
end
|
@@ -63,7 +68,11 @@ module ScoutApm
|
|
63
68
|
|
64
69
|
layer = @layers.pop
|
65
70
|
layer.record_stop_time!
|
71
|
+
layer.record_allocations!
|
66
72
|
|
73
|
+
# This must be called before checking if a backtrace should be collected as the call count influences our capture logic.
|
74
|
+
# We call `#update_call_counts in stop layer to ensure the layer has a final desc. Layer#desc is updated during the AR instrumentation flow.
|
75
|
+
update_call_counts!(layer)
|
67
76
|
if capture_backtrace?(layer)
|
68
77
|
layer.capture_backtrace!
|
69
78
|
end
|
@@ -73,6 +82,18 @@ module ScoutApm
|
|
73
82
|
end
|
74
83
|
end
|
75
84
|
|
85
|
+
# Grab the currently running layer. Useful for adding additional data as we
|
86
|
+
# learn it. This is useful in ActiveRecord instruments, where we start the
|
87
|
+
# instrumentation early, and gradually learn more about the request that
|
88
|
+
# actually happened as we go (for instance, the # of records found, or the
|
89
|
+
# actual SQL generated).
|
90
|
+
#
|
91
|
+
# Returns nil in the case there is no current layer. That would be normal
|
92
|
+
# for a completed TrackedRequest
|
93
|
+
def current_layer
|
94
|
+
@layers.last
|
95
|
+
end
|
96
|
+
|
76
97
|
BACKTRACE_BLACKLIST = ["Controller", "Job"]
|
77
98
|
def capture_backtrace?(layer)
|
78
99
|
# Never capture backtraces for this kind of layer. The backtrace will
|
@@ -88,7 +109,7 @@ module ScoutApm
|
|
88
109
|
return true if layer.total_exclusive_time > BACKTRACE_THRESHOLD
|
89
110
|
|
90
111
|
# Capture any layer that we've seen many times. Captures n+1 problems
|
91
|
-
return true if @
|
112
|
+
return true if @call_set[layer.name].capture_backtrace?
|
92
113
|
|
93
114
|
# Don't capture otherwise
|
94
115
|
false
|
@@ -96,7 +117,16 @@ module ScoutApm
|
|
96
117
|
|
97
118
|
# Maintains a lookup Hash of call counts by layer name. Used to determine if we should capture a backtrace.
|
98
119
|
def update_call_counts!(layer)
|
99
|
-
@
|
120
|
+
@call_set[layer.name].update!(layer.desc)
|
121
|
+
end
|
122
|
+
|
123
|
+
# This may be in bytes or KB based on the OSX. We store this as-is here and only do conversion to MB in Layer Converters.
|
124
|
+
def mem_usage
|
125
|
+
ScoutApm::Instruments::Process::ProcessMemory.rss
|
126
|
+
end
|
127
|
+
|
128
|
+
def capture_mem_delta!
|
129
|
+
@mem_delta = mem_usage - @mem_start
|
100
130
|
end
|
101
131
|
|
102
132
|
###################################
|
@@ -112,14 +142,21 @@ module ScoutApm
|
|
112
142
|
# Run at the beginning of the whole request
|
113
143
|
#
|
114
144
|
# * Capture the first layer as the root_layer
|
145
|
+
# * Start Stackprof (disabling to avoid conflicts if stackprof is included as middleware since we aren't sending this up to server now)
|
115
146
|
def start_request(layer)
|
116
147
|
@root_layer = layer unless @root_layer # capture root layer
|
148
|
+
#StackProf.start(:mode => :wall, :interval => ScoutApm::Agent.instance.config.value("stackprof_interval"))
|
117
149
|
end
|
118
150
|
|
119
151
|
# Run at the end of the whole request
|
120
152
|
#
|
153
|
+
# * Collect stackprof info
|
121
154
|
# * Send the request off to be stored
|
122
155
|
def stop_request
|
156
|
+
# ScoutApm::Agent.instance.logger.debug("stop_request: #{annotations[:uri]}" )
|
157
|
+
#StackProf.stop # disabling to avoid conflicts if stackprof is included as middleware since we aren't sending this up to server now
|
158
|
+
#@stackprof = StackProf.results
|
159
|
+
|
123
160
|
record!
|
124
161
|
end
|
125
162
|
|
@@ -174,47 +211,27 @@ module ScoutApm
|
|
174
211
|
def record!
|
175
212
|
@recorded = true
|
176
213
|
|
177
|
-
# Update immediate and long-term histograms for both job and web requests
|
178
|
-
if unique_name != :unknown
|
179
|
-
ScoutApm::Agent.instance.request_histograms.add(unique_name, root_layer.total_call_time)
|
180
|
-
ScoutApm::Agent.instance.request_histograms_by_time[ScoutApm::Agent.instance.store.current_timestamp].
|
181
|
-
add(unique_name, root_layer.total_call_time)
|
182
|
-
end
|
183
|
-
|
184
214
|
metrics = LayerConverters::MetricConverter.new(self).call
|
185
215
|
ScoutApm::Agent.instance.store.track!(metrics)
|
186
216
|
|
217
|
+
slow, slow_metrics = LayerConverters::SlowRequestConverter.new(self).call
|
218
|
+
ScoutApm::Agent.instance.store.track_slow_transaction!(slow)
|
219
|
+
ScoutApm::Agent.instance.store.track!(slow_metrics)
|
220
|
+
|
187
221
|
error_metrics = LayerConverters::ErrorConverter.new(self).call
|
188
222
|
ScoutApm::Agent.instance.store.track!(error_metrics)
|
189
223
|
|
190
|
-
|
191
|
-
|
192
|
-
slow_converter = LayerConverters::SlowRequestConverter.new(self)
|
193
|
-
ScoutApm::Agent.instance.store.track_slow_transaction!(slow_converter)
|
224
|
+
queue_time_metrics = LayerConverters::RequestQueueTimeConverter.new(self).call
|
225
|
+
ScoutApm::Agent.instance.store.track!(queue_time_metrics)
|
194
226
|
|
195
|
-
|
196
|
-
|
197
|
-
end
|
198
|
-
|
199
|
-
if job?
|
200
|
-
job_metrics = LayerConverters::JobConverter.new(self).call
|
201
|
-
ScoutApm::Agent.instance.store.track_job!(job_metrics)
|
227
|
+
job = LayerConverters::JobConverter.new(self).call
|
228
|
+
ScoutApm::Agent.instance.store.track_job!(job)
|
202
229
|
|
203
|
-
|
204
|
-
|
205
|
-
end
|
206
|
-
end
|
230
|
+
slow_job = LayerConverters::SlowJobConverter.new(self).call
|
231
|
+
ScoutApm::Agent.instance.store.track_slow_job!(slow_job)
|
207
232
|
|
208
|
-
|
209
|
-
|
210
|
-
@unique_name ||= begin
|
211
|
-
scope_layer = LayerConverters::ConverterBase.new(self).scope_layer
|
212
|
-
if scope_layer
|
213
|
-
scope_layer.legacy_metric_name
|
214
|
-
else
|
215
|
-
:unknown
|
216
|
-
end
|
217
|
-
end
|
233
|
+
allocation_metrics = LayerConverters::AllocationMetricConverter.new(self).call
|
234
|
+
ScoutApm::Agent.instance.store.track!(allocation_metrics)
|
218
235
|
end
|
219
236
|
|
220
237
|
# Have we already persisted this request?
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# A fake implementation of stackprof, for systems that don't support it.
|
2
|
+
module StackProf
|
3
|
+
def self.start(*args)
|
4
|
+
@running = true
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.stop(*args)
|
8
|
+
@running = false
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.running?
|
12
|
+
!!@running
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.run(*args)
|
16
|
+
start
|
17
|
+
yield
|
18
|
+
stop
|
19
|
+
results
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.sample(*args)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.results(*args)
|
26
|
+
{
|
27
|
+
:version => 0.0,
|
28
|
+
:mode => :wall,
|
29
|
+
:interval => 1000,
|
30
|
+
:samples => 0,
|
31
|
+
:gc_samples => 0,
|
32
|
+
:missed_samples => 0,
|
33
|
+
:frames => {},
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.fake?
|
38
|
+
true
|
39
|
+
end
|
40
|
+
end
|