scout_apm 1.5.5 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.markdown +8 -0
  3. data/lib/scout_apm.rb +3 -0
  4. data/lib/scout_apm/agent.rb +23 -25
  5. data/lib/scout_apm/agent/reporting.rb +8 -3
  6. data/lib/scout_apm/attribute_arranger.rb +4 -0
  7. data/lib/scout_apm/bucket_name_splitter.rb +3 -3
  8. data/lib/scout_apm/config.rb +4 -2
  9. data/lib/scout_apm/histogram.rb +20 -0
  10. data/lib/scout_apm/instruments/percentile_sampler.rb +37 -0
  11. data/lib/scout_apm/instruments/process/process_cpu.rb +12 -0
  12. data/lib/scout_apm/instruments/process/process_memory.rb +12 -0
  13. data/lib/scout_apm/layer_converters/converter_base.rb +6 -4
  14. data/lib/scout_apm/layer_converters/slow_job_converter.rb +21 -13
  15. data/lib/scout_apm/layer_converters/slow_request_converter.rb +28 -22
  16. data/lib/scout_apm/metric_meta.rb +5 -1
  17. data/lib/scout_apm/metric_set.rb +1 -1
  18. data/lib/scout_apm/reporter.rb +3 -1
  19. data/lib/scout_apm/request_histograms.rb +46 -0
  20. data/lib/scout_apm/scored_item_set.rb +79 -0
  21. data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +2 -0
  22. data/lib/scout_apm/slow_job_policy.rb +89 -19
  23. data/lib/scout_apm/slow_job_record.rb +20 -1
  24. data/lib/scout_apm/slow_request_policy.rb +80 -12
  25. data/lib/scout_apm/slow_transaction.rb +19 -2
  26. data/lib/scout_apm/store.rb +45 -15
  27. data/lib/scout_apm/tracked_request.rb +33 -10
  28. data/lib/scout_apm/version.rb +1 -1
  29. data/test/test_helper.rb +4 -3
  30. data/test/unit/layaway_test.rb +5 -8
  31. data/test/unit/scored_item_set_test.rb +65 -0
  32. data/test/unit/serializers/payload_serializer_test.rb +2 -1
  33. data/test/unit/slow_item_set_test.rb +2 -1
  34. data/test/unit/slow_request_policy_test.rb +42 -0
  35. metadata +9 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ca18811f4b9adad075737c7321340025345f1c14
4
- data.tar.gz: ca2873e587e8d70fc1a3bffe564b59cde10b0f85
3
+ metadata.gz: da7b4f6ec7fe62d8ee8243439e1c98404d2257a5
4
+ data.tar.gz: b3a4f52a7d88731fa0c603aef7b0a236535edb82
5
5
  SHA512:
6
- metadata.gz: 7a4e009a252a824a6e22bde107ad671e6c017767ad6d96960ecdf056e0ccef3761e6d8d9abe984ec533246c8f34ccba71cc53ce4f7e3012dc2fb2b7173104578
7
- data.tar.gz: d45a047c05341ef7da71c1b38a370d78f3e854a54b27ecac7a1bd33a37fd88bb4b30bd1c7d7fd2f897999a1b85e06d2e62437204d115152bd84470f7a36e9497
6
+ metadata.gz: c74c1effbb0846908bd6f61f6bfc1976cac80fa40ec789974c45c2e4c2e04cc2043967480b05e1541989668f096644c7e7bfa51feb4eea2825696b542142cf99
7
+ data.tar.gz: 3bb6ab6fbe7847c0c4cc952c23109189d03bad9f76a34afdde6e473c014de967a8d4e0c707ec766435d876f5346eda8bab7721a8167d6341932a794873cf86e3
data/CHANGELOG.markdown CHANGED
@@ -1,3 +1,11 @@
1
+ # 1.6.0
2
+
3
+ * Dynamic algorithm for selecting when to collect traces. Now, we will collect a
4
+ more complete cross-section of your application's performance, dynamically
5
+ tuned as your application runs.
6
+ * Record and report 95th percentiles for each action
7
+ * A variety of bug fixes
8
+
1
9
  # 1.5.5
2
10
 
3
11
  * Handle backslash escaped quotes inside mysql strings.
data/lib/scout_apm.rb CHANGED
@@ -85,6 +85,7 @@ require 'scout_apm/instruments/rails_router'
85
85
  require 'scout_apm/instruments/sinatra'
86
86
  require 'scout_apm/instruments/process/process_cpu'
87
87
  require 'scout_apm/instruments/process/process_memory'
88
+ require 'scout_apm/instruments/percentile_sampler'
88
89
 
89
90
  require 'scout_apm/app_server_load'
90
91
 
@@ -118,9 +119,11 @@ require 'scout_apm/metric_stats'
118
119
  require 'scout_apm/slow_transaction'
119
120
  require 'scout_apm/slow_job_record'
120
121
  require 'scout_apm/slow_item_set'
122
+ require 'scout_apm/scored_item_set'
121
123
  require 'scout_apm/slow_request_policy'
122
124
  require 'scout_apm/slow_job_policy'
123
125
  require 'scout_apm/job_record'
126
+ require 'scout_apm/request_histograms'
124
127
 
125
128
  require 'scout_apm/capacity'
126
129
  require 'scout_apm/attribute_arranger'
@@ -20,6 +20,13 @@ module ScoutApm
20
20
  attr_reader :slow_request_policy
21
21
  attr_reader :slow_job_policy
22
22
 
23
+ # Histogram of the cumulative requests since the start of the process
24
+ attr_reader :request_histograms
25
+
26
+ # Histogram of the requests, distinct by reporting period (minute)
27
+ # { StoreReportingPeriodTimestamp => RequestHistograms }
28
+ attr_reader :request_histograms_by_time
29
+
23
30
  # All access to the agent is thru this class method to ensure multiple Agent instances are not initialized per-Ruby process.
24
31
  def self.instance(options = {})
25
32
  @@instance ||= self.new(options)
@@ -32,14 +39,16 @@ module ScoutApm
32
39
  @started = false
33
40
  @options ||= options
34
41
  @config = ScoutApm::Config.new(options[:config_path])
42
+
43
+ @slow_request_policy = ScoutApm::SlowRequestPolicy.new
35
44
  @slow_job_policy = ScoutApm::SlowJobPolicy.new
45
+ @request_histograms = ScoutApm::RequestHistograms.new
46
+ @request_histograms_by_time = Hash.new { |h, k| h[k] = ScoutApm::RequestHistograms.new }
36
47
 
37
48
  @store = ScoutApm::Store.new
38
49
  @layaway = ScoutApm::Layaway.new
39
50
  @metric_lookup = Hash.new
40
51
 
41
- @slow_request_policy = ScoutApm::SlowRequestPolicy.new
42
-
43
52
  @capacity = ScoutApm::Capacity.new
44
53
  @installed_instruments = []
45
54
  end
@@ -103,10 +112,10 @@ module ScoutApm
103
112
 
104
113
  load_instruments if should_load_instruments?(options)
105
114
 
106
- @samplers = [
107
- ScoutApm::Instruments::Process::ProcessCpu.new(environment.processors, logger),
108
- ScoutApm::Instruments::Process::ProcessMemory.new(logger)
109
- ]
115
+ [ ScoutApm::Instruments::Process::ProcessCpu.new(environment.processors, logger),
116
+ ScoutApm::Instruments::Process::ProcessMemory.new(logger),
117
+ ScoutApm::Instruments::PercentileSampler.new(logger, 95),
118
+ ].each { |s| store.add_sampler(s) }
110
119
 
111
120
  app_server_load_hook
112
121
 
@@ -218,16 +227,19 @@ module ScoutApm
218
227
  @background_worker = ScoutApm::BackgroundWorker.new
219
228
  @background_worker_thread = Thread.new do
220
229
  @background_worker.start {
221
- # First, run periodic samplers. These should run once a minute,
222
- # rather than per-request. "CPU Load" and similar.
223
- run_samplers
224
- capacity.process
225
-
226
230
  ScoutApm::Agent.instance.process_metrics
231
+ clean_old_percentiles
227
232
  }
228
233
  end
229
234
  end
230
235
 
236
+ def clean_old_percentiles
237
+ request_histograms_by_time.
238
+ keys.
239
+ select {|timestamp| timestamp.age_in_seconds > 60 * 10 }.
240
+ each {|old_timestamp| request_histograms_by_time.delete(old_timestamp) }
241
+ end
242
+
231
243
  # If we want to skip the app_server_check, then we must load it.
232
244
  def should_load_instruments?(options={})
233
245
  return true if options[:skip_app_server_check]
@@ -290,20 +302,6 @@ module ScoutApm
290
302
  environment.deploy_integration
291
303
  end
292
304
 
293
- # TODO: Extract a proper class / registery for these. They don't really belong here
294
- def run_samplers
295
- @samplers.each do |sampler|
296
- begin
297
- result = sampler.run
298
- store.track_one!(sampler.metric_type, sampler.metric_name, result) if result
299
- rescue => e
300
- logger.info "Error reading #{sampler.human_name}"
301
- logger.debug e.message
302
- logger.debug e.backtrace.join("\n")
303
- end
304
- end
305
- end
306
-
307
305
  def app_server_missing?(options = {})
308
306
  !environment.app_server_integration(true).found? && !options[:skip_app_server_check]
309
307
  end
@@ -50,7 +50,7 @@ module ScoutApm
50
50
  :platform => "ruby",
51
51
  }
52
52
 
53
- log_deliver(metrics, slow_transactions, metadata)
53
+ log_deliver(metrics, slow_transactions, metadata, slow_jobs)
54
54
 
55
55
  payload = ScoutApm::Serializers::PayloadSerializer.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs)
56
56
  logger.debug("Payload: #{payload}")
@@ -62,7 +62,7 @@ module ScoutApm
62
62
  logger.debug e.backtrace
63
63
  end
64
64
 
65
- def log_deliver(metrics, slow_transactions, metadata)
65
+ def log_deliver(metrics, slow_transactions, metadata, jobs_traces)
66
66
  total_request_count = metrics.
67
67
  select { |meta,stats| meta.metric_name =~ /\AController/ }.
68
68
  inject(0) {|sum, (_, stat)| sum + stat.call_count }
@@ -75,7 +75,12 @@ module ScoutApm
75
75
  "Recorded across (unknown) processes"
76
76
  end
77
77
 
78
- logger.info "[#{Time.parse(metadata[:agent_time]).strftime("%H:%M")}] Delivering #{metrics.length} Metrics for #{total_request_count} requests and #{slow_transactions.length} Slow Transaction Traces, #{process_log_str}."
78
+ time_clause = "[#{Time.parse(metadata[:agent_time]).strftime("%H:%M")}]"
79
+ metrics_clause = "#{metrics.length} Metrics for #{total_request_count} requests"
80
+ slow_trans_clause = "#{slow_transactions.length} Slow Transaction Traces"
81
+ job_clause = "#{jobs_traces.length} Job Traces"
82
+
83
+ logger.info "#{time_clause} Delivering #{metrics_clause} and #{slow_trans_clause} and #{job_clause}, #{process_log_str}."
79
84
  logger.debug("Metrics: #{metrics.pretty_inspect}\nSlowTrans: #{slow_transactions.pretty_inspect}\nMetadata: #{metadata.inspect.pretty_inspect}")
80
85
  end
81
86
 
@@ -7,6 +7,10 @@ module ScoutApm
7
7
  case attribute
8
8
  when Array
9
9
  attribute_hash[attribute[0]] = subject.send(attribute[1])
10
+ when :bucket
11
+ attribute_hash[attribute] = subject.bucket_type
12
+ when :name
13
+ attribute_hash[attribute] = subject.bucket_name
10
14
  when Symbol
11
15
  attribute_hash[attribute] = subject.send(attribute)
12
16
  end
@@ -1,15 +1,15 @@
1
1
  module ScoutApm
2
2
  module BucketNameSplitter
3
- def bucket
3
+ def bucket_type
4
4
  split_metric_name(metric_name).first
5
5
  end
6
6
 
7
- def name
7
+ def bucket_name
8
8
  split_metric_name(metric_name).last
9
9
  end
10
10
 
11
11
  def key
12
- {:bucket => bucket, :name => name}
12
+ {:bucket => bucket_type, :name => bucket_name}
13
13
  end
14
14
 
15
15
  private
@@ -7,14 +7,15 @@ require 'scout_apm/environment'
7
7
  #
8
8
  # application_root - override the detected directory of the application
9
9
  # data_file - override the default temporary storage location. Must be a location in a writable directory
10
- # hostname - override the default hostname detection. Default varies by environment - either system hostname, or PAAS hostname
10
+ # host - override the default hostname detection. Default varies by environment - either system hostname, or PAAS hostname
11
+ # direct_host - override the default "direct" host. The direct_host bypasses the ingestion pipeline and goes directly to the webserver, and is primarily used for features under development.
11
12
  # key - the account key with Scout APM. Found in Settings in the Web UI
12
13
  # log_file_path - either a directory or "STDOUT".
13
14
  # log_level - DEBUG / INFO / WARN as usual
14
15
  # monitor - true or false. False prevents any instrumentation from starting
15
16
  # name - override the name reported to APM. This is the name that shows in the Web UI
16
17
  # uri_reporting - 'path' or 'full_path' default is 'full_path', which reports URL params as well as the path.
17
- # report_format - 'json' or 'marshal'. Marshal is currently the default; json processing is in beta
18
+ # report_format - 'json' or 'marshal'. Marshal is legacy and will be removed.
18
19
  #
19
20
  # Any of these config settings can be set with an environment variable prefixed
20
21
  # by SCOUT_ and uppercasing the key: SCOUT_LOG_LEVEL for instance.
@@ -23,6 +24,7 @@ module ScoutApm
23
24
  class Config
24
25
  DEFAULTS = {
25
26
  'host' => 'https://checkin.scoutapp.com',
27
+ 'direct_host' => 'https://apm.scoutapp.com',
26
28
  'log_level' => 'info',
27
29
  'stackprof_interval' => 20000, # microseconds, 1000 = 1 millisecond, so 20k == 20 milliseconds
28
30
  'uri_reporting' => 'full_path',
@@ -56,6 +56,26 @@ module ScoutApm
56
56
  end
57
57
  end
58
58
 
59
+ # Given a value, where in this histogram does it fall?
60
+ # Returns a float between 0 and 1
61
+ def approximate_quantile_of_value(v)
62
+ mutex.synchronize do
63
+ return 100 if total == 0
64
+
65
+ count_examined = 0
66
+
67
+ bins.each_with_index do |bin, index|
68
+ if v <= bin.value
69
+ break
70
+ end
71
+
72
+ count_examined += bin.count
73
+ end
74
+
75
+ count_examined / total.to_f
76
+ end
77
+ end
78
+
59
79
  def mean
60
80
  mutex.synchronize do
61
81
  if total == 0
@@ -0,0 +1,37 @@
1
+ module ScoutApm
2
+ module Instruments
3
+ class PercentileSampler
4
+ attr_reader :logger
5
+
6
+ attr_reader :percentiles
7
+
8
+ def initialize(logger, percentiles)
9
+ @logger = logger
10
+ @percentiles = Array(percentiles)
11
+ end
12
+
13
+ def human_name
14
+ "Percentiles"
15
+ end
16
+
17
+ # Gets the 95th%ile for the time requested
18
+ def metrics(time)
19
+ ms = {}
20
+ histos = ScoutApm::Agent.instance.request_histograms_by_time[time]
21
+ histos.each_name do |name|
22
+ percentiles.each do |percentile|
23
+ meta = MetricMeta.new("Percentile/#{percentile}/#{name}")
24
+ stat = MetricStats.new
25
+ stat.update!(histos.quantile(name, percentile))
26
+ ms[meta] = stat
27
+ end
28
+ end
29
+
30
+ # Wipe the histograms we just collected data on
31
+ ScoutApm::Agent.instance.request_histograms_by_time.delete(time)
32
+
33
+ ms
34
+ end
35
+ end
36
+ end
37
+ end
@@ -29,6 +29,18 @@ module ScoutApm
29
29
  "Process CPU"
30
30
  end
31
31
 
32
+ def metrics(_time)
33
+ result = run
34
+ if result
35
+ meta = MetricMeta.new("#{metric_type}/#{metric_name}")
36
+ stat = MetricStats.new(false)
37
+ stat.update!(result)
38
+ { meta => stat }
39
+ else
40
+ {}
41
+ end
42
+ end
43
+
32
44
  # TODO: Figure out a good default instead of nil
33
45
  def run
34
46
  res = nil
@@ -20,6 +20,18 @@ module ScoutApm
20
20
  "Process Memory"
21
21
  end
22
22
 
23
+ def metrics(_time)
24
+ result = run
25
+ if result
26
+ meta = MetricMeta.new("#{metric_type}/#{metric_name}")
27
+ stat = MetricStats.new(false)
28
+ stat.update!(result)
29
+ { meta => stat }
30
+ else
31
+ {}
32
+ end
33
+ end
34
+
23
35
  def run
24
36
  case RUBY_PLATFORM.downcase
25
37
  when /linux/
@@ -19,10 +19,12 @@ module ScoutApm
19
19
  # render :update
20
20
  # end
21
21
  def scope_layer
22
- @scope_layer ||= walker.walk do |layer|
23
- if layer.type == "Controller"
24
- break layer
25
- end
22
+ @scope_layer ||= find_first_layer_of_type("Controller") || find_first_layer_of_type("Job")
23
+ end
24
+
25
+ def find_first_layer_of_type(layer_type)
26
+ walker.walk do |layer|
27
+ return layer if layer.type == layer_type
26
28
  end
27
29
  end
28
30
  end
@@ -4,15 +4,29 @@ module ScoutApm
4
4
  def initialize(*)
5
5
  @backtraces = []
6
6
  super
7
+
8
+ # After call to super, so @request is populated
9
+ @points = if request.job?
10
+ ScoutApm::Agent.instance.slow_job_policy.score(request)
11
+ else
12
+ -1
13
+ end
7
14
  end
8
15
 
9
- def call
10
- return unless request.job?
16
+ def name
17
+ request.unique_name
18
+ end
19
+
20
+ def score
21
+ @points
22
+ end
11
23
 
12
- job_name = [queue_layer.name, job_layer.name]
24
+ def call
25
+ return nil unless request.job?
26
+ return nil unless queue_layer
27
+ return nil unless job_layer
13
28
 
14
- slow_enough = ScoutApm::Agent.instance.slow_job_policy.slow?(job_name, root_layer.total_call_time)
15
- return unless slow_enough
29
+ ScoutApm::Agent.instance.slow_job_policy.stored!(request)
16
30
 
17
31
  SlowJobRecord.new(
18
32
  queue_layer.name,
@@ -21,7 +35,8 @@ module ScoutApm
21
35
  job_layer.total_call_time,
22
36
  job_layer.total_exclusive_time,
23
37
  request.context,
24
- create_metrics)
38
+ create_metrics,
39
+ score)
25
40
  end
26
41
 
27
42
  def queue_layer
@@ -32,12 +47,6 @@ module ScoutApm
32
47
  @job_layer ||= find_first_layer_of_type("Job")
33
48
  end
34
49
 
35
- def find_first_layer_of_type(layer_type)
36
- walker.walk do |layer|
37
- return layer if layer.type == layer_type
38
- end
39
- end
40
-
41
50
  def create_metrics
42
51
  metric_hash = Hash.new
43
52
 
@@ -102,7 +111,6 @@ module ScoutApm
102
111
  end
103
112
 
104
113
  def attach_backtraces(metric_hash)
105
- ScoutApm::Agent.instance.logger.info("Attaching backtraces to job #{@backtraces}")
106
114
  @backtraces.each do |meta_with_backtrace|
107
115
  metric_hash.keys.find { |k| k == meta_with_backtrace }.backtrace = meta_with_backtrace.backtrace
108
116
  end
@@ -4,46 +4,52 @@ module ScoutApm
4
4
  def initialize(*)
5
5
  @backtraces = [] # An Array of MetricMetas that have a backtrace
6
6
  super
7
+
8
+ # After call to super, so @request is populated
9
+ @points = if request.web?
10
+ ScoutApm::Agent.instance.slow_request_policy.score(request)
11
+ else
12
+ -1
13
+ end
14
+ end
15
+
16
+ def name
17
+ request.unique_name
18
+ end
19
+
20
+ def score
21
+ @points
7
22
  end
8
23
 
24
+ # Unconditionally attempts to convert this into a SlowTransaction object.
25
+ # Can return nil if the request didn't have any scope_layer.
9
26
  def call
10
27
  scope = scope_layer
11
- return [nil, {}] unless scope
12
-
13
- policy = ScoutApm::Agent.instance.slow_request_policy.capture_type(root_layer.total_call_time)
14
- if policy == ScoutApm::SlowRequestPolicy::CAPTURE_NONE
15
- return [nil, {}]
16
- end
28
+ return nil unless scope
17
29
 
18
- # increment the slow transaction count if this is a slow transaction.
19
- meta = MetricMeta.new("SlowTransaction/#{scope.legacy_metric_name}")
20
- stat = MetricStats.new
21
- stat.update!(1)
30
+ ScoutApm::Agent.instance.slow_request_policy.stored!(request)
22
31
 
23
32
  uri = request.annotations[:uri] || ""
24
33
 
25
34
  ScoutApm::Agent.instance.config.value("ignore_traces").each do |pattern|
26
35
  if /#{pattern}/ =~ uri
27
36
  ScoutApm::Agent.instance.logger.debug("Skipped recording a trace for #{uri} due to `ignore_traces` pattern: #{pattern}")
28
- return [nil, { meta => stat }]
37
+ return nil
29
38
  end
30
39
  end
31
40
 
32
-
33
41
  metrics = create_metrics
34
42
  # Disable stackprof output for now
35
43
  stackprof = [] # request.stackprof
36
44
 
37
- [
38
- SlowTransaction.new(uri,
39
- scope.legacy_metric_name,
40
- root_layer.total_call_time,
41
- metrics,
42
- request.context,
43
- root_layer.stop_time,
44
- stackprof),
45
- { meta => stat }
46
- ]
45
+ SlowTransaction.new(uri,
46
+ scope.legacy_metric_name,
47
+ root_layer.total_call_time,
48
+ metrics,
49
+ request.context,
50
+ root_layer.stop_time,
51
+ stackprof,
52
+ @points)
47
53
  end
48
54
 
49
55
  # Iterates over the TrackedRequest's MetricMetas that have backtraces and attaches each to correct MetricMeta in the Metric Hash.