scout_apm 2.0.0.pre → 2.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGELOG.markdown +22 -5
  4. data/Rakefile +5 -0
  5. data/lib/scout_apm.rb +4 -0
  6. data/lib/scout_apm/agent.rb +22 -8
  7. data/lib/scout_apm/agent/reporting.rb +8 -3
  8. data/lib/scout_apm/attribute_arranger.rb +4 -0
  9. data/lib/scout_apm/bucket_name_splitter.rb +3 -3
  10. data/lib/scout_apm/config.rb +5 -2
  11. data/lib/scout_apm/histogram.rb +20 -0
  12. data/lib/scout_apm/instant_reporting.rb +40 -0
  13. data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +11 -1
  14. data/lib/scout_apm/instruments/percentile_sampler.rb +38 -0
  15. data/lib/scout_apm/layaway.rb +1 -4
  16. data/lib/scout_apm/layaway_file.rb +26 -2
  17. data/lib/scout_apm/layer.rb +1 -1
  18. data/lib/scout_apm/layer_converters/converter_base.rb +6 -4
  19. data/lib/scout_apm/layer_converters/slow_job_converter.rb +21 -13
  20. data/lib/scout_apm/layer_converters/slow_request_converter.rb +37 -24
  21. data/lib/scout_apm/metric_meta.rb +5 -1
  22. data/lib/scout_apm/metric_set.rb +15 -6
  23. data/lib/scout_apm/reporter.rb +9 -3
  24. data/lib/scout_apm/request_histograms.rb +46 -0
  25. data/lib/scout_apm/scored_item_set.rb +79 -0
  26. data/lib/scout_apm/serializers/payload_serializer_to_json.rb +2 -0
  27. data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +2 -0
  28. data/lib/scout_apm/slow_job_policy.rb +89 -19
  29. data/lib/scout_apm/slow_job_record.rb +18 -1
  30. data/lib/scout_apm/slow_request_policy.rb +80 -12
  31. data/lib/scout_apm/slow_transaction.rb +22 -3
  32. data/lib/scout_apm/store.rb +35 -13
  33. data/lib/scout_apm/tracked_request.rb +63 -11
  34. data/lib/scout_apm/utils/backtrace_parser.rb +4 -4
  35. data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
  36. data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
  37. data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
  38. data/lib/scout_apm/version.rb +1 -1
  39. data/scout_apm.gemspec +1 -0
  40. data/test/test_helper.rb +4 -3
  41. data/test/unit/layaway_test.rb +5 -8
  42. data/test/unit/metric_set_test.rb +101 -0
  43. data/test/unit/scored_item_set_test.rb +65 -0
  44. data/test/unit/serializers/payload_serializer_test.rb +2 -1
  45. data/test/unit/slow_item_set_test.rb +2 -1
  46. data/test/unit/slow_request_policy_test.rb +42 -0
  47. data/test/unit/sql_sanitizer_test.rb +6 -0
  48. metadata +28 -3
@@ -19,8 +19,9 @@ module ScoutApm
19
19
  attr_reader :allocations
20
20
  attr_reader :hostname
21
21
  attr_reader :seconds_since_startup
22
+ attr_reader :score
22
23
 
23
- def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations)
24
+ def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations, score)
24
25
  @queue_name = queue_name
25
26
  @job_name = job_name
26
27
  @time = time
@@ -33,6 +34,7 @@ module ScoutApm
33
34
  @allocations = allocations
34
35
  @seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
35
36
  @hostname = ScoutApm::Environment.instance.hostname
37
+ @score = score
36
38
  ScoutApm::Agent.instance.logger.debug { "Slow Job [#{metric_name}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
37
39
  end
38
40
 
@@ -40,5 +42,20 @@ module ScoutApm
40
42
  "Job/#{queue_name}/#{job_name}"
41
43
  end
42
44
 
45
+ ########################
46
+ # Scorable interface
47
+ #
48
+ # Needed so we can merge ScoredItemSet instances
49
+ def call
50
+ self
51
+ end
52
+
53
+ def name
54
+ metric_name
55
+ end
56
+
57
+ def score
58
+ @score
59
+ end
43
60
  end
44
61
  end
@@ -1,8 +1,5 @@
1
1
  # Long running class that determines if, and in how much detail a potentially
2
2
  # slow transaction should be recorded in
3
- #
4
- # Rules:
5
- # - Runtime must be slower than a threshold
6
3
 
7
4
  module ScoutApm
8
5
  class SlowRequestPolicy
@@ -11,21 +8,92 @@ module ScoutApm
11
8
  CAPTURE_NONE = "capture_none",
12
9
  ]
13
10
 
14
- # It's not slow unless it's at least this slow
15
- SLOW_REQUEST_TIME_THRESHOLD = 2.0 # seconds
11
+ # Adjust speed points. See the function
12
+ POINT_MULTIPLIER_SPEED = 0.25
16
13
 
17
- def capture_type(time)
18
- if !slow_enough?(time)
19
- CAPTURE_NONE
20
- else
21
- CAPTURE_DETAIL
14
+ # For each minute we haven't seen an endpoint
15
+ POINT_MULTIPLIER_AGE = 0.25
16
+
17
+ # Outliers are worth up to "1000ms" of weight
18
+ POINT_MULTIPLIER_PERCENTILE = 1.0
19
+
20
+ # A hash of Endpoint Name to the last time we stored a slow transaction for it.
21
+ #
22
+ # Defaults to a start time that is pretty close to application boot time.
23
+ # So the "age" of an endpoint we've never seen is the time the application
24
+ # has been running.
25
+ attr_reader :last_seen
26
+
27
+
28
+ def initialize
29
+ zero_time = Time.now
30
+ @last_seen = Hash.new { |h, k| h[k] = zero_time }
31
+ end
32
+
33
+ def stored!(request)
34
+ last_seen[unique_name_for(request)] = Time.now
35
+ end
36
+
37
+ # Determine if this request trace should be fully analyzed by scoring it
38
+ # across several metrics, and then determining if that's good enough to
39
+ # make it into this minute's payload.
40
+ #
41
+ # Due to the combining nature of the agent & layaway file, there's no
42
+ # guarantee that a high scoring local champion will still be a winner when
43
+ # they go up to "regionals" and are compared against the other processes
44
+ # running on a node.
45
+ def score(request)
46
+ unique_name = request.unique_name
47
+ if unique_name == :unknown
48
+ return -1 # A negative score, should never be good enough to store.
22
49
  end
50
+
51
+ total_time = request.root_layer.total_call_time
52
+
53
+ # How long has it been since we've seen this?
54
+ age = Time.now - last_seen[unique_name]
55
+
56
+ # What approximate percentile was this request?
57
+ percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
58
+
59
+ return speed_points(total_time) + percentile_points(percentile) + age_points(age)
23
60
  end
24
61
 
25
62
  private
26
63
 
27
- def slow_enough?(time)
28
- time > SLOW_REQUEST_TIME_THRESHOLD
64
+ def unique_name_for(request)
65
+ scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
66
+ if scope_layer
67
+ scope_layer.legacy_metric_name
68
+ else
69
+ :unknown
70
+ end
71
+ end
72
+
73
+ # Time in seconds
74
+ # Logarithm keeps huge times from swamping the other metrics.
75
+ # 1+ is necessary to keep the log function in positive territory.
76
+ def speed_points(time)
77
+ Math.log(1 + time) * POINT_MULTIPLIER_SPEED
78
+ end
79
+
80
+ def percentile_points(percentile)
81
+ if percentile < 40
82
+ 0.4 # Don't put much emphasis on capturing low percentiles.
83
+ elsif percentile < 60
84
+ 1.4 # Highest here to get mean traces
85
+ elsif percentile < 90
86
+ 0.7 # Between 60 & 90% is fine.
87
+ elsif percentile >= 90
88
+ 1.4 # Highest here to get 90+%ile traces
89
+ else
90
+ # impossible.
91
+ percentile
92
+ end
93
+ end
94
+
95
+ def age_points(age)
96
+ age / 60.0 * POINT_MULTIPLIER_AGE
29
97
  end
30
98
  end
31
99
  end
@@ -17,7 +17,7 @@ module ScoutApm
17
17
  attr_accessor :hostname # hack - we need to reset these server side.
18
18
  attr_accessor :seconds_since_startup # hack - we need to reset these server side.
19
19
 
20
- def initialize(uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations)
20
+ def initialize(uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations, score)
21
21
  @uri = uri
22
22
  @metric_name = metric_name
23
23
  @total_call_time = total_call_time
@@ -27,11 +27,14 @@ module ScoutApm
27
27
  @time = time
28
28
  @prof = ScoutApm::StackprofTreeCollapser.new(raw_stackprof).call
29
29
  @raw_prof = raw_stackprof # Send whole data up to server
30
+
30
31
  @mem_delta = mem_delta
31
32
  @allocations = allocations
32
33
  @seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
33
34
  @hostname = ScoutApm::Environment.instance.hostname
34
- ScoutApm::Agent.instance.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
35
+
36
+ @score = score
37
+ ScoutApm::Agent.instance.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta} Score: #{score}"}
35
38
  end
36
39
 
37
40
  # Used to remove metrics when the payload will be too large.
@@ -45,12 +48,28 @@ module ScoutApm
45
48
  end
46
49
 
47
50
  def as_json
48
- json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof, :mem_delta, :allocations, :seconds_since_startup, :hostname]
51
+ json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :score, :prof, :mem_delta, :allocations, :seconds_since_startup, :hostname]
49
52
  ScoutApm::AttributeArranger.call(self, json_attributes)
50
53
  end
51
54
 
52
55
  def context_hash
53
56
  context.to_hash
54
57
  end
58
+
59
+ ########################
60
+ # Scorable interface
61
+ #
62
+ # Needed so we can merge ScoredItemSet instances
63
+ def call
64
+ self
65
+ end
66
+
67
+ def name
68
+ metric_name
69
+ end
70
+
71
+ def score
72
+ @score
73
+ end
55
74
  end
56
75
  end
@@ -22,7 +22,7 @@ module ScoutApm
22
22
  # Save newly collected metrics
23
23
  def track!(metrics, options={})
24
24
  @mutex.synchronize {
25
- current_period.merge_metrics!(metrics)
25
+ current_period.absorb_metrics!(metrics)
26
26
  }
27
27
  end
28
28
 
@@ -107,11 +107,12 @@ module ScoutApm
107
107
 
108
108
  # One period of Storage. Typically 1 minute
109
109
  class StoreReportingPeriod
110
- # A SlowItemSet to store slow transactions in
111
- attr_reader :slow_transactions
112
110
 
113
- # A SlowItemSet to store slow jobs in
114
- attr_reader :slow_jobs
111
+ # A ScoredItemSet holding the "best" traces for the period
112
+ attr_reader :request_traces
113
+
114
+ # A ScoredItemSet holding the "best" traces for the period
115
+ attr_reader :job_traces
115
116
 
116
117
  # A StoreReportingPeriodTimestamp representing the time that this
117
118
  # collection of metrics is for
@@ -122,31 +123,50 @@ module ScoutApm
122
123
  def initialize(timestamp)
123
124
  @timestamp = timestamp
124
125
 
125
- @slow_transactions = SlowItemSet.new
126
- @slow_jobs = SlowItemSet.new
126
+ @request_traces = ScoredItemSet.new
127
+ @job_traces = ScoredItemSet.new
127
128
 
128
129
  @metric_set = MetricSet.new
129
130
  @jobs = Hash.new
130
131
  end
131
132
 
133
+ # Merges another StoreReportingPeriod into this one
134
+ def merge(other)
135
+ self.
136
+ merge_metrics!(other.metric_set).
137
+ merge_slow_transactions!(other.slow_transactions_payload).
138
+ merge_jobs!(other.jobs).
139
+ merge_slow_jobs!(other.slow_jobs_payload)
140
+ self
141
+ end
142
+
132
143
  #################################
133
144
  # Add metrics as they are recorded
134
145
  #################################
135
- def merge_metrics!(metrics)
146
+
147
+ # For absorbing an array of metric {Meta => Stat} records
148
+ def absorb_metrics!(metrics)
136
149
  metric_set.absorb_all(metrics)
137
150
  self
138
151
  end
139
152
 
153
+ # For merging when you have another metric_set object
154
+ # Makes sure that you don't duplicate error count records
155
+ def merge_metrics!(other_metric_set)
156
+ metric_set.combine!(other_metric_set)
157
+ self
158
+ end
159
+
140
160
  def merge_slow_transactions!(new_transactions)
141
161
  Array(new_transactions).each do |one_transaction|
142
- slow_transactions << one_transaction
162
+ request_traces << one_transaction
143
163
  end
144
164
 
145
165
  self
146
166
  end
147
167
 
148
168
  def merge_jobs!(jobs)
149
- jobs.each do |job|
169
+ Array(jobs).each do |job|
150
170
  if @jobs.has_key?(job)
151
171
  @jobs[job].combine!(job)
152
172
  else
@@ -159,8 +179,10 @@ module ScoutApm
159
179
 
160
180
  def merge_slow_jobs!(new_jobs)
161
181
  Array(new_jobs).each do |job|
162
- slow_jobs << job
182
+ job_traces << job
163
183
  end
184
+
185
+ self
164
186
  end
165
187
 
166
188
  #################################
@@ -171,7 +193,7 @@ module ScoutApm
171
193
  end
172
194
 
173
195
  def slow_transactions_payload
174
- slow_transactions.to_a
196
+ request_traces.to_a
175
197
  end
176
198
 
177
199
  def jobs
@@ -179,7 +201,7 @@ module ScoutApm
179
201
  end
180
202
 
181
203
  def slow_jobs_payload
182
- slow_jobs.to_a
204
+ job_traces.to_a
183
205
  end
184
206
 
185
207
  #################################
@@ -39,6 +39,10 @@ module ScoutApm
39
39
  # with same names across multiple types.
40
40
  attr_accessor :call_counts
41
41
 
42
+ # if there's an instant_key, pass the transaction trace on for immediate reporting (in addition to the usual background aggregation)
43
+ # this is set in the controller instumentation (ActionControllerRails3Rails4 according)
44
+ attr_accessor :instant_key
45
+
42
46
  BACKTRACE_THRESHOLD = 0.5 # the minimum threshold in seconds to record the backtrace for a metric.
43
47
 
44
48
  def initialize
@@ -50,6 +54,7 @@ module ScoutApm
50
54
  @root_layer = nil
51
55
  @stackprof = nil
52
56
  @error = false
57
+ @instant_key = nil
53
58
  @mem_start = mem_usage
54
59
  end
55
60
 
@@ -67,6 +72,17 @@ module ScoutApm
67
72
  return if ignoring_children?
68
73
 
69
74
  layer = @layers.pop
75
+
76
+ # Safeguard against a mismatch in the layer tracking in an instrument.
77
+ # This class works under the assumption that start & stop layers are
78
+ # lined up correctly. If stop_layer gets called twice, when it should
79
+ # only have been called once you'll end up with this error.
80
+ if layer.nil?
81
+ ScoutApm::Agent.instance.logger.warn("Error stopping layer, was nil. Root Layer: #{@root_layer.inspect}")
82
+ stop_request
83
+ return
84
+ end
85
+
70
86
  layer.record_stop_time!
71
87
  layer.record_allocations!
72
88
 
@@ -87,7 +103,7 @@ module ScoutApm
87
103
  # instrumentation early, and gradually learn more about the request that
88
104
  # actually happened as we go (for instance, the # of records found, or the
89
105
  # actual SQL generated).
90
- #
106
+ #
91
107
  # Returns nil in the case there is no current layer. That would be normal
92
108
  # for a completed TrackedRequest
93
109
  def current_layer
@@ -202,6 +218,10 @@ module ScoutApm
202
218
  request_type == "web"
203
219
  end
204
220
 
221
+ def instant?
222
+ instant_key
223
+ end
224
+
205
225
  ###################################
206
226
  # Persist the Request
207
227
  ###################################
@@ -211,27 +231,59 @@ module ScoutApm
211
231
  def record!
212
232
  @recorded = true
213
233
 
234
+ # Update immediate and long-term histograms for both job and web requests
235
+ if unique_name != :unknown
236
+ ScoutApm::Agent.instance.request_histograms.add(unique_name, root_layer.total_call_time)
237
+ ScoutApm::Agent.instance.request_histograms_resettable.add(unique_name, root_layer.total_call_time)
238
+ end
239
+
214
240
  metrics = LayerConverters::MetricConverter.new(self).call
215
241
  ScoutApm::Agent.instance.store.track!(metrics)
216
242
 
217
- slow, slow_metrics = LayerConverters::SlowRequestConverter.new(self).call
218
- ScoutApm::Agent.instance.store.track_slow_transaction!(slow)
219
- ScoutApm::Agent.instance.store.track!(slow_metrics)
220
-
221
243
  error_metrics = LayerConverters::ErrorConverter.new(self).call
222
244
  ScoutApm::Agent.instance.store.track!(error_metrics)
223
245
 
224
- queue_time_metrics = LayerConverters::RequestQueueTimeConverter.new(self).call
225
- ScoutApm::Agent.instance.store.track!(queue_time_metrics)
246
+ allocation_metrics = LayerConverters::AllocationMetricConverter.new(self).call
247
+ ScoutApm::Agent.instance.store.track!(allocation_metrics)
248
+
249
+ if web?
250
+ # Don't #call this - that's the job of the ScoredItemSet later.
251
+ slow_converter = LayerConverters::SlowRequestConverter.new(self)
252
+ ScoutApm::Agent.instance.store.track_slow_transaction!(slow_converter)
253
+
254
+ queue_time_metrics = LayerConverters::RequestQueueTimeConverter.new(self).call
255
+ ScoutApm::Agent.instance.store.track!(queue_time_metrics)
226
256
 
227
- job = LayerConverters::JobConverter.new(self).call
228
- ScoutApm::Agent.instance.store.track_job!(job)
257
+ # If there's an instant_key, it means we need to report this right away
258
+ if instant?
259
+ trace = slow_converter.call
260
+ ScoutApm::InstantReporting.new(trace, instant_key).call()
261
+ end
262
+ end
229
263
 
230
- slow_job = LayerConverters::SlowJobConverter.new(self).call
231
- ScoutApm::Agent.instance.store.track_slow_job!(slow_job)
264
+ if job?
265
+ job_metrics = LayerConverters::JobConverter.new(self).call
266
+ ScoutApm::Agent.instance.store.track_job!(job_metrics)
267
+
268
+ job_converter = LayerConverters::SlowJobConverter.new(self)
269
+ ScoutApm::Agent.instance.store.track_slow_job!(job_converter)
270
+ end
232
271
 
233
272
  allocation_metrics = LayerConverters::AllocationMetricConverter.new(self).call
234
273
  ScoutApm::Agent.instance.store.track!(allocation_metrics)
274
+
275
+ end
276
+
277
+ # Only call this after the request is complete
278
+ def unique_name
279
+ @unique_name ||= begin
280
+ scope_layer = LayerConverters::ConverterBase.new(self).scope_layer
281
+ if scope_layer
282
+ scope_layer.legacy_metric_name
283
+ else
284
+ :unknown
285
+ end
286
+ end
235
287
  end
236
288
 
237
289
  # Have we already persisted this request?
@@ -6,21 +6,21 @@ module ScoutApm
6
6
  module Utils
7
7
  class BacktraceParser
8
8
 
9
+ APP_FRAMES = 3 # will return up to 3 frames from the app stack.
10
+
9
11
  def initialize(call_stack)
10
12
  @call_stack = call_stack
11
13
  # We can't use a constant as it'd be too early to fetch environment info
12
14
  @@app_dir_regex ||= /\A(#{ScoutApm::Environment.instance.root.to_s.gsub('/','\/')}\/)(app\/(.+))/.freeze
13
15
  end
14
16
 
15
- # Given a call stack Array, grabs the first call within the application root directory.
17
+ # Given a call stack Array, grabs the first +APP_FRAMES+ callers within the application root directory.
16
18
  def call
17
- # We used to return an array of up to 5 elements...this will return a single element-array for backwards compatibility.
18
- # Only the first element is used in Github code display.
19
19
  stack = []
20
20
  @call_stack.each_with_index do |c,i|
21
21
  if m = c.match(@@app_dir_regex)
22
22
  stack << m[2]
23
- break
23
+ break if stack.size == APP_FRAMES
24
24
  end
25
25
  end
26
26
  stack