scout_apm 2.5.1 → 5.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/test.yml +68 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -5
  5. data/CHANGELOG.markdown +176 -3
  6. data/Gemfile +1 -7
  7. data/LICENSE.md +21 -28
  8. data/gems/README.md +28 -0
  9. data/gems/instruments.gemfile +6 -0
  10. data/gems/octoshark.gemfile +4 -0
  11. data/gems/rails3.gemfile +5 -0
  12. data/gems/rails4.gemfile +4 -0
  13. data/gems/rails5.gemfile +4 -0
  14. data/gems/rails6.gemfile +4 -0
  15. data/gems/sidekiq.gemfile +4 -0
  16. data/gems/typhoeus.gemfile +3 -0
  17. data/lib/scout_apm/agent/preconditions.rb +3 -3
  18. data/lib/scout_apm/agent.rb +22 -0
  19. data/lib/scout_apm/agent_context.rb +21 -2
  20. data/lib/scout_apm/app_server_load.rb +7 -2
  21. data/lib/scout_apm/auto_instrument/instruction_sequence.rb +31 -0
  22. data/lib/scout_apm/auto_instrument/layer.rb +23 -0
  23. data/lib/scout_apm/auto_instrument/parser.rb +27 -0
  24. data/lib/scout_apm/auto_instrument/rails.rb +174 -0
  25. data/lib/scout_apm/auto_instrument.rb +5 -0
  26. data/lib/scout_apm/background_job_integrations/delayed_job.rb +1 -1
  27. data/lib/scout_apm/background_job_integrations/faktory.rb +103 -0
  28. data/lib/scout_apm/background_job_integrations/legacy_sneakers.rb +55 -0
  29. data/lib/scout_apm/background_job_integrations/que.rb +134 -0
  30. data/lib/scout_apm/background_job_integrations/shoryuken.rb +2 -0
  31. data/lib/scout_apm/background_job_integrations/sidekiq.rb +15 -10
  32. data/lib/scout_apm/background_job_integrations/sneakers.rb +11 -11
  33. data/lib/scout_apm/config.rb +54 -6
  34. data/lib/scout_apm/detailed_trace.rb +3 -2
  35. data/lib/scout_apm/environment.rb +18 -1
  36. data/lib/scout_apm/error.rb +27 -0
  37. data/lib/scout_apm/error_service/error_buffer.rb +39 -0
  38. data/lib/scout_apm/error_service/error_record.rb +211 -0
  39. data/lib/scout_apm/error_service/ignored_exceptions.rb +66 -0
  40. data/lib/scout_apm/error_service/middleware.rb +32 -0
  41. data/lib/scout_apm/error_service/notifier.rb +33 -0
  42. data/lib/scout_apm/error_service/payload.rb +47 -0
  43. data/lib/scout_apm/error_service/periodic_work.rb +17 -0
  44. data/lib/scout_apm/error_service/railtie.rb +11 -0
  45. data/lib/scout_apm/error_service/sidekiq.rb +80 -0
  46. data/lib/scout_apm/error_service.rb +34 -0
  47. data/lib/scout_apm/exceptions.rb +12 -0
  48. data/lib/scout_apm/extensions/transaction_callback_payload.rb +1 -1
  49. data/lib/scout_apm/external_service_metric_set.rb +97 -0
  50. data/lib/scout_apm/external_service_metric_stats.rb +85 -0
  51. data/lib/scout_apm/fake_store.rb +3 -0
  52. data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +7 -2
  53. data/lib/scout_apm/git_revision.rb +9 -0
  54. data/lib/scout_apm/ignored_uris.rb +3 -1
  55. data/lib/scout_apm/instant/middleware.rb +4 -1
  56. data/lib/scout_apm/instrument_manager.rb +22 -1
  57. data/lib/scout_apm/instruments/action_controller_rails_2.rb +1 -1
  58. data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +53 -29
  59. data/lib/scout_apm/instruments/action_view.rb +30 -9
  60. data/lib/scout_apm/instruments/active_record.rb +69 -19
  61. data/lib/scout_apm/instruments/elasticsearch.rb +93 -42
  62. data/lib/scout_apm/instruments/grape.rb +1 -1
  63. data/lib/scout_apm/instruments/http.rb +68 -0
  64. data/lib/scout_apm/instruments/http_client.rb +33 -14
  65. data/lib/scout_apm/instruments/influxdb.rb +2 -2
  66. data/lib/scout_apm/instruments/memcached.rb +58 -0
  67. data/lib/scout_apm/instruments/middleware_detailed.rb +1 -1
  68. data/lib/scout_apm/instruments/middleware_summary.rb +1 -1
  69. data/lib/scout_apm/instruments/mongoid.rb +10 -5
  70. data/lib/scout_apm/instruments/moped.rb +44 -19
  71. data/lib/scout_apm/instruments/net_http.rb +51 -16
  72. data/lib/scout_apm/instruments/rails_router.rb +1 -1
  73. data/lib/scout_apm/instruments/redis.rb +27 -12
  74. data/lib/scout_apm/instruments/redis5.rb +59 -0
  75. data/lib/scout_apm/instruments/sinatra.rb +3 -1
  76. data/lib/scout_apm/instruments/typhoeus.rb +90 -0
  77. data/lib/scout_apm/job_record.rb +4 -2
  78. data/lib/scout_apm/layaway_file.rb +4 -0
  79. data/lib/scout_apm/layer.rb +5 -2
  80. data/lib/scout_apm/layer_children_set.rb +9 -8
  81. data/lib/scout_apm/layer_converters/external_service_converter.rb +65 -0
  82. data/lib/scout_apm/layer_converters/find_layer_by_type.rb +4 -0
  83. data/lib/scout_apm/layer_converters/request_queue_time_converter.rb +2 -0
  84. data/lib/scout_apm/layer_converters/trace_converter.rb +7 -4
  85. data/lib/scout_apm/logger.rb +5 -1
  86. data/lib/scout_apm/middleware.rb +1 -1
  87. data/lib/scout_apm/periodic_work.rb +19 -0
  88. data/lib/scout_apm/remote/message.rb +4 -0
  89. data/lib/scout_apm/remote/server.rb +13 -1
  90. data/lib/scout_apm/reporter.rb +8 -3
  91. data/lib/scout_apm/reporting.rb +2 -1
  92. data/lib/scout_apm/request_histograms.rb +8 -0
  93. data/lib/scout_apm/serializers/app_server_load_serializer.rb +4 -0
  94. data/lib/scout_apm/serializers/directive_serializer.rb +4 -0
  95. data/lib/scout_apm/serializers/external_service_serializer_to_json.rb +15 -0
  96. data/lib/scout_apm/serializers/payload_serializer.rb +4 -3
  97. data/lib/scout_apm/serializers/payload_serializer_to_json.rb +10 -3
  98. data/lib/scout_apm/slow_policy/age_policy.rb +33 -0
  99. data/lib/scout_apm/slow_policy/percent_policy.rb +22 -0
  100. data/lib/scout_apm/slow_policy/percentile_policy.rb +24 -0
  101. data/lib/scout_apm/slow_policy/policy.rb +21 -0
  102. data/lib/scout_apm/slow_policy/speed_policy.rb +16 -0
  103. data/lib/scout_apm/slow_request_policy.rb +18 -77
  104. data/lib/scout_apm/store.rb +31 -1
  105. data/lib/scout_apm/tracer.rb +2 -2
  106. data/lib/scout_apm/tracked_request.rb +35 -4
  107. data/lib/scout_apm/utils/backtrace_parser.rb +3 -0
  108. data/lib/scout_apm/utils/marshal_logging.rb +90 -0
  109. data/lib/scout_apm/utils/sql_sanitizer.rb +47 -7
  110. data/lib/scout_apm/version.rb +1 -1
  111. data/lib/scout_apm.rb +46 -1
  112. data/scout_apm.gemspec +14 -9
  113. data/test/test_helper.rb +2 -2
  114. data/test/tmp/README.md +17 -0
  115. data/test/unit/agent_context_test.rb +29 -0
  116. data/test/unit/auto_instrument/anonymous_block_value.rb +7 -0
  117. data/test/unit/auto_instrument/assignments-instrumented.rb +31 -0
  118. data/test/unit/auto_instrument/assignments.rb +31 -0
  119. data/test/unit/auto_instrument/controller-ast.txt +57 -0
  120. data/test/unit/auto_instrument/controller-instrumented.rb +49 -0
  121. data/test/unit/auto_instrument/controller.rb +49 -0
  122. data/test/unit/auto_instrument/hanging_method.rb +6 -0
  123. data/test/unit/auto_instrument/rescue_from-instrumented.rb +13 -0
  124. data/test/unit/auto_instrument/rescue_from.rb +13 -0
  125. data/test/unit/auto_instrument_test.rb +62 -0
  126. data/test/unit/background_job_integrations/sidekiq_test.rb +17 -0
  127. data/test/unit/environment_test.rb +2 -2
  128. data/test/unit/error_service/error_buffer_test.rb +25 -0
  129. data/test/unit/error_service/ignored_exceptions_test.rb +49 -0
  130. data/test/unit/external_service_metric_set_test.rb +67 -0
  131. data/test/unit/external_service_metric_stats_test.rb +106 -0
  132. data/test/unit/ignored_uris_test.rb +6 -0
  133. data/test/unit/instruments/active_record_test.rb +40 -0
  134. data/test/unit/instruments/http_client_test.rb +24 -0
  135. data/test/unit/instruments/http_test.rb +24 -0
  136. data/test/unit/instruments/moped_test.rb +24 -0
  137. data/test/unit/instruments/net_http_test.rb +11 -1
  138. data/test/unit/instruments/redis_test.rb +24 -0
  139. data/test/unit/instruments/typhoeus_test.rb +42 -0
  140. data/test/unit/layer_children_set_test.rb +9 -0
  141. data/test/unit/remote/{test_message.rb → message_test.rb} +0 -0
  142. data/test/unit/remote/{test_router.rb → route_test.rb} +0 -0
  143. data/test/unit/remote/{test_server.rb → server_test.rb} +4 -1
  144. data/test/unit/request_histograms_test.rb +17 -0
  145. data/test/unit/serializers/payload_serializer_test.rb +39 -3
  146. data/test/unit/slow_request_policy_test.rb +41 -13
  147. data/test/unit/sql_sanitizer_test.rb +106 -0
  148. data/test/unit/tracer_test.rb +25 -0
  149. metadata +118 -60
  150. data/.travis.yml +0 -25
  151. data/lib/scout_apm/instruments/.DS_Store +0 -0
  152. data/lib/scout_apm/slow_job_policy.rb +0 -111
  153. data/lib/scout_apm/utils/sql_sanitizer_regex.rb +0 -25
  154. data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +0 -26
  155. data/test/unit/instruments/active_record_instruments_test.rb +0 -5
  156. data/test/unit/slow_job_policy_test.rb +0 -6
@@ -16,8 +16,20 @@ module ScoutApm
16
16
  @server = nil
17
17
  end
18
18
 
19
- def start
19
+ def require_webrick
20
20
  require 'webrick'
21
+ true
22
+ rescue LoadError
23
+ @logger.warn(
24
+ %q|Could not require Webrick. Ruby 3.0 stopped bundling it
25
+ automatically, but it is required to instrument Resque. Please add
26
+ Webrick to your Gemfile.|
27
+ )
28
+ false
29
+ end
30
+
31
+ def start
32
+ return false unless require_webrick
21
33
 
22
34
  @server = WEBrick::HTTPServer.new(
23
35
  :BindAddress => bind,
@@ -2,7 +2,6 @@ require 'openssl'
2
2
 
3
3
  module ScoutApm
4
4
  class Reporter
5
- CA_FILE = File.join( File.dirname(__FILE__), *%w[.. .. data cacert.pem] )
6
5
  VERIFY_MODE = OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT
7
6
 
8
7
  attr_reader :type
@@ -23,6 +22,7 @@ module ScoutApm
23
22
  context.logger
24
23
  end
25
24
 
25
+ # The fully serialized string payload to be sent
26
26
  def report(payload, headers = {})
27
27
  hosts = determine_hosts
28
28
 
@@ -36,6 +36,7 @@ module ScoutApm
36
36
  logger.debug("Original Size: #{original_payload_size} Compressed Size: #{compress_payload_size}")
37
37
  end
38
38
 
39
+ logger.info("Posting payload to #{hosts.inspect}")
39
40
  post_payload(hosts, payload, headers)
40
41
  end
41
42
 
@@ -52,6 +53,8 @@ module ScoutApm
52
53
  URI.parse("#{host}/apps/deploy.scout?key=#{key}&name=#{encoded_app_name}")
53
54
  when :instant_trace
54
55
  URI.parse("#{host}/apps/instant_trace.scout?key=#{key}&name=#{encoded_app_name}&instant_key=#{instant_key}")
56
+ when :errors
57
+ URI.parse("#{host}/apps/error.scout?key=#{key}&name=#{encoded_app_name}")
55
58
  end.tap { |u| logger.debug("Posting to #{u}") }
56
59
  end
57
60
 
@@ -90,7 +93,7 @@ module ScoutApm
90
93
  logger.debug "got response: #{response.inspect}"
91
94
  case response
92
95
  when Net::HTTPSuccess, Net::HTTPNotModified
93
- logger.debug "/#{type} OK"
96
+ logger.debug "#{type} OK"
94
97
  when Net::HTTPBadRequest
95
98
  logger.warn "/#{type} FAILED: The Account Key [#{config.value('key')}] is invalid."
96
99
  when Net::HTTPUnprocessableEntity
@@ -123,7 +126,7 @@ module ScoutApm
123
126
  proxy_uri.password).new(url.host, url.port)
124
127
  if url.is_a?(URI::HTTPS)
125
128
  http.use_ssl = true
126
- http.ca_file = CA_FILE
129
+ http.ca_file = config.value("ssl_cert_file")
127
130
  http.verify_mode = VERIFY_MODE
128
131
  end
129
132
  http
@@ -142,6 +145,8 @@ module ScoutApm
142
145
  def determine_hosts
143
146
  if [:deploy_hook, :instant_trace].include?(type)
144
147
  config.value('direct_host')
148
+ elsif [:errors].include?(type)
149
+ config.value('errors_host')
145
150
  else
146
151
  config.value('host')
147
152
  end
@@ -83,11 +83,12 @@ module ScoutApm
83
83
  slow_jobs = reporting_period.slow_jobs_payload
84
84
  histograms = reporting_period.histograms
85
85
  db_query_metrics = reporting_period.db_query_metrics_payload
86
+ external_service_metrics = reporting_period.external_service_metrics_payload
86
87
  traces = (slow_transactions.map(&:span_trace) + slow_jobs.map(&:span_trace)).compact
87
88
 
88
89
  log_deliver(metrics, slow_transactions, metadata, slow_jobs, histograms)
89
90
 
90
- payload = ScoutApm::Serializers::PayloadSerializer.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, traces)
91
+ payload = ScoutApm::Serializers::PayloadSerializer.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, external_service_metrics, traces)
91
92
  logger.debug("Sending payload w/ Headers: #{headers.inspect}")
92
93
 
93
94
  reporter.report(payload, headers)
@@ -22,6 +22,14 @@ module ScoutApm
22
22
  @histograms.keys.each { |n| yield n }
23
23
  end
24
24
 
25
+ def as_json
26
+ Hash[
27
+ @histograms.map{ |key, histogram|
28
+ [key, histogram.as_json]
29
+ }
30
+ ]
31
+ end
32
+
25
33
  def add(item, value)
26
34
  @histograms[item].add(value)
27
35
  end
@@ -5,6 +5,10 @@ module ScoutApm
5
5
  class AppServerLoadSerializer
6
6
  def self.serialize(data)
7
7
  Marshal.dump(data)
8
+ rescue
9
+ ScoutApm::Agent.instance.logger.info("Failed Marshalling AppServerLoad")
10
+ ScoutApm::Agent.instance.logger.info(ScoutApm::Utils::MarshalLogging.new(data).dive) rescue nil
11
+ raise
8
12
  end
9
13
 
10
14
  def self.deserialize(data)
@@ -5,6 +5,10 @@ module ScoutApm
5
5
  class DirectiveSerializer
6
6
  def self.serialize(data)
7
7
  Marshal.dump(data)
8
+ rescue
9
+ ScoutApm::Agent.instance.logger.info("Failed Marshalling Directive")
10
+ ScoutApm::Agent.instance.logger.info(ScoutApm::Utils::MarshalLogging.new(data).dive) rescue nil
11
+ raise
8
12
  end
9
13
 
10
14
  def self.deserialize(data)
@@ -0,0 +1,15 @@
1
+ module ScoutApm
2
+ module Serializers
3
+ class ExternalServiceSerializerToJson
4
+ attr_reader :external_service_metrics
5
+
6
+ def initialize(external_service_metrics)
7
+ @external_service_metrics = external_service_metrics
8
+ end
9
+
10
+ def as_json
11
+ external_service_metrics.map{|metric| metric.as_json }
12
+ end
13
+ end
14
+ end
15
+ end
@@ -2,9 +2,9 @@
2
2
  module ScoutApm
3
3
  module Serializers
4
4
  class PayloadSerializer
5
- def self.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, traces)
5
+ def self.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, external_service_metrics, traces)
6
6
  if ScoutApm::Agent.instance.context.config.value("report_format") == 'json'
7
- ScoutApm::Serializers::PayloadSerializerToJson.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, traces)
7
+ ScoutApm::Serializers::PayloadSerializerToJson.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, external_service_metrics, traces)
8
8
  else
9
9
  metadata = metadata.dup
10
10
  metadata.default = nil
@@ -22,7 +22,8 @@ module ScoutApm
22
22
  # payloads. At this point, the marshal code branch is
23
23
  # very rarely used anyway.
24
24
  :histograms => HistogramsSerializerToJson.new(histograms).as_json,
25
- :db_query_metrics => db_query_metrics)
25
+ :db_query_metrics => db_query_metrics,
26
+ :external_service_metrics => external_service_metrics)
26
27
  end
27
28
  end
28
29
 
@@ -2,7 +2,7 @@ module ScoutApm
2
2
  module Serializers
3
3
  module PayloadSerializerToJson
4
4
  class << self
5
- def serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, traces)
5
+ def serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, external_service_metrics, traces)
6
6
  metadata.merge!({:payload_version => 2})
7
7
 
8
8
  jsonify_hash({:metadata => metadata,
@@ -14,6 +14,9 @@ module ScoutApm
14
14
  :db_metrics => {
15
15
  :query => DbQuerySerializerToJson.new(db_query_metrics).as_json,
16
16
  },
17
+ :es_metrics => {
18
+ :http => ExternalServiceSerializerToJson.new(external_service_metrics).as_json,
19
+ },
17
20
  :span_traces => traces.map{ |t| t.as_json },
18
21
  })
19
22
  end
@@ -46,17 +49,21 @@ module ScoutApm
46
49
  end
47
50
 
48
51
  ESCAPE_MAPPINGS = {
52
+ # Stackoverflow answer on gsub matches and backslashes
53
+ # https://stackoverflow.com/a/4149087/2705125
54
+ '\\' => '\\\\\\\\',
49
55
  "\b" => '\\b',
50
56
  "\t" => '\\t',
51
57
  "\n" => '\\n',
52
58
  "\f" => '\\f',
53
59
  "\r" => '\\r',
54
60
  '"' => '\\"',
55
- '\\' => '\\\\',
56
61
  }
57
62
 
58
63
  def escape(string)
59
- ESCAPE_MAPPINGS.inject(string.to_s) {|s, (bad, good)| s.gsub(bad, good) }
64
+ ESCAPE_MAPPINGS.inject(string.to_s) {|s, (bad, good)|
65
+ s.gsub(bad, good)
66
+ }
60
67
  end
61
68
 
62
69
  def format_by_type(formatee)
@@ -0,0 +1,33 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class AgePolicy < Policy
5
+ # For each minute we haven't seen an endpoint
6
+ POINT_MULTIPLIER_AGE = 0.25
7
+
8
+ # A hash of Endpoint Name to the last time we stored a slow transaction for it.
9
+ #
10
+ # Defaults to a start time that is pretty close to application boot time.
11
+ # So the "age" of an endpoint we've never seen is the time the application
12
+ # has been running.
13
+ attr_reader :last_seen
14
+
15
+ def initialize(context)
16
+ super
17
+
18
+ zero_time = Time.now
19
+ @last_seen = Hash.new { |h, k| h[k] = zero_time }
20
+ end
21
+
22
+ def call(request)
23
+ # How long has it been since we've seen this?
24
+ age = Time.now - last_seen[request.unique_name]
25
+
26
+ age / 60.0 * POINT_MULTIPLIER_AGE
27
+ end
28
+
29
+ def stored!(request)
30
+ last_seen[request.unique_name] = Time.now
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,22 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class PercentPolicy < Policy
5
+ # Points for an endpoint's who's throughput * response time is a large % of
6
+ # overall time spent processing requests
7
+ POINT_MULTIPLIER_PERCENT_TIME = 2.5
8
+
9
+ # Of the total time spent handling endpoints in this app, if this endpoint
10
+ # is a higher percent, it should get more points.
11
+ #
12
+ # A: 20 calls @ 100ms each => 2 seconds of total time
13
+ # B: 10 calls @ 100ms each => 1 second of total time
14
+ #
15
+ # Then A is 66% of the total call time
16
+ def call(request) # Scale 0.0 - 1.0
17
+ percent = context.transaction_time_consumed.percent_of_total(request.unique_name)
18
+
19
+ percent * POINT_MULTIPLIER_PERCENT_TIME
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,24 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class PercentilePolicy < Policy
5
+ def call(request)
6
+ # What approximate percentile was this request?
7
+ total_time = request.root_layer.total_call_time
8
+ percentile = context.request_histograms.approximate_quantile_of_value(request.unique_name, total_time)
9
+
10
+ if percentile < 40
11
+ 0.4 # Don't put much emphasis on capturing low percentiles.
12
+ elsif percentile < 60
13
+ 1.4 # Highest here to get mean traces
14
+ elsif percentile < 90
15
+ 0.7 # Between 60 & 90% is fine.
16
+ elsif percentile >= 90
17
+ 1.4 # Highest here to get 90+%ile traces
18
+ else
19
+ # impossible.
20
+ percentile
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ # Note that this is semi-internal API. You should not need this, and if you do
2
+ # we're here to help at support@scoutapm.com. TrackedRequest doesn't change
3
+ # often, but we can't promise a perfectly stable API for it either.
4
+ module ScoutApm::SlowPolicy
5
+ class Policy
6
+ attr_reader :context
7
+
8
+ def initialize(context)
9
+ @context = context
10
+ end
11
+
12
+ def call(request)
13
+ raise NotImplementedError
14
+ end
15
+
16
+ # Override in subclasses to execute some behavior if the request gets a
17
+ # slot in the ScoredItemSet. Defaults to no-op
18
+ def stored!(request)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,16 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class SpeedPolicy < Policy
5
+ # Adjust speed points. See the function
6
+ POINT_MULTIPLIER_SPEED = 0.25
7
+
8
+ # Time in seconds
9
+ # Logarithm keeps huge times from swamping the other metrics.
10
+ # 1+ is necessary to keep the log function in positive territory.
11
+ def call(request)
12
+ total_time = request.root_layer.total_call_time
13
+ Math.log(1 + total_time) * POINT_MULTIPLIER_SPEED
14
+ end
15
+ end
16
+ end
@@ -3,43 +3,29 @@
3
3
 
4
4
  module ScoutApm
5
5
  class SlowRequestPolicy
6
- CAPTURE_TYPES = [
7
- CAPTURE_DETAIL = "capture_detail",
8
- CAPTURE_NONE = "capture_none",
9
- ]
10
-
11
- # Adjust speed points. See the function
12
- POINT_MULTIPLIER_SPEED = 0.25
13
-
14
- # For each minute we haven't seen an endpoint
15
- POINT_MULTIPLIER_AGE = 0.25
16
-
17
- # Outliers are worth up to "1000ms" of weight
18
- POINT_MULTIPLIER_PERCENTILE = 1.0
19
-
20
- # Points for an endpoint's who's throughput * response time is a large % of
21
- # overall time spent processing requests
22
- POINT_MULTIPLIER_PERCENT_TIME = 2.5
23
-
24
- # A hash of Endpoint Name to the last time we stored a slow transaction for it.
25
- #
26
- # Defaults to a start time that is pretty close to application boot time.
27
- # So the "age" of an endpoint we've never seen is the time the application
28
- # has been running.
29
- attr_reader :last_seen
30
-
31
6
  # The AgentContext we're running in
32
7
  attr_reader :context
8
+ attr_reader :policies
33
9
 
34
10
  def initialize(context)
35
11
  @context = context
12
+ @policies = []
13
+ end
36
14
 
37
- zero_time = Time.now
38
- @last_seen = Hash.new { |h, k| h[k] = zero_time }
15
+ def add_default_policies
16
+ add(SlowPolicy::SpeedPolicy.new(context))
17
+ add(SlowPolicy::PercentilePolicy.new(context))
18
+ add(SlowPolicy::AgePolicy.new(context))
19
+ add(SlowPolicy::PercentilePolicy.new(context))
39
20
  end
40
21
 
41
- def stored!(request)
42
- last_seen[request.unique_name] = Time.now
22
+ # policy is an object that behaves like a policy (responds to .call(req) for the score, and .store!(req))
23
+ def add(policy)
24
+ unless policy.respond_to?(:call) && policy.respond_to?(:stored!)
25
+ raise "SlowRequestPolicy must implement policy api call(req) and stored!(req)"
26
+ end
27
+
28
+ @policies << policy
43
29
  end
44
30
 
45
31
  # Determine if this request trace should be fully analyzed by scoring it
@@ -56,56 +42,11 @@ module ScoutApm
56
42
  return -1 # A negative score, should never be good enough to store.
57
43
  end
58
44
 
59
- total_time = request.root_layer.total_call_time
60
-
61
- # How long has it been since we've seen this?
62
- age = Time.now - last_seen[unique_name]
63
-
64
- # What approximate percentile was this request?
65
- percentile = context.request_histograms.approximate_quantile_of_value(unique_name, total_time)
66
-
67
- percent_of_total_time = context.transaction_time_consumed.percent_of_total(unique_name)
68
-
69
- return speed_points(total_time) + percentile_points(percentile) + age_points(age) + percent_time_points(percent_of_total_time)
70
- end
71
-
72
- private
73
-
74
- # Time in seconds
75
- # Logarithm keeps huge times from swamping the other metrics.
76
- # 1+ is necessary to keep the log function in positive territory.
77
- def speed_points(time)
78
- Math.log(1 + time) * POINT_MULTIPLIER_SPEED
79
- end
80
-
81
- def percentile_points(percentile)
82
- if percentile < 40
83
- 0.4 # Don't put much emphasis on capturing low percentiles.
84
- elsif percentile < 60
85
- 1.4 # Highest here to get mean traces
86
- elsif percentile < 90
87
- 0.7 # Between 60 & 90% is fine.
88
- elsif percentile >= 90
89
- 1.4 # Highest here to get 90+%ile traces
90
- else
91
- # impossible.
92
- percentile
93
- end
94
- end
95
-
96
- def age_points(age)
97
- age / 60.0 * POINT_MULTIPLIER_AGE
45
+ policies.map{ |p| p.call(request) }.sum
98
46
  end
99
47
 
100
- # Of the total time spent handling endpoints in this app, if this endpoint
101
- # is a higher percent, it should get more points.
102
- #
103
- # A: 20 calls @ 100ms each => 2 seconds of total time
104
- # B: 10 calls @ 100ms each => 1 second of total time
105
- #
106
- # Then A is 66% of the total call time
107
- def percent_time_points(percent) # Scale 0.0 - 1.0
108
- percent * POINT_MULTIPLIER_PERCENT_TIME
48
+ def stored!(request)
49
+ policies.each{ |p| p.stored!(request) }
109
50
  end
110
51
  end
111
52
  end
@@ -52,6 +52,13 @@ module ScoutApm
52
52
  }
53
53
  end
54
54
 
55
+ def track_external_service_metrics!(external_service_metric_set, options={})
56
+ @mutex.synchronize {
57
+ period = find_period(options[:timestamp])
58
+ period.merge_external_service_metrics!(external_service_metric_set)
59
+ }
60
+ end
61
+
55
62
  def track_one!(type, name, value, options={})
56
63
  meta = MetricMeta.new("#{type}/#{name}")
57
64
  stat = MetricStats.new(false)
@@ -206,8 +213,11 @@ module ScoutApm
206
213
 
207
214
  attr_reader :db_query_metric_set
208
215
 
216
+ attr_reader :external_service_metric_set
217
+
209
218
  def initialize(timestamp, context)
210
219
  @timestamp = timestamp
220
+ @context = context
211
221
 
212
222
  @request_traces = ScoredItemSet.new(context.config.value('max_traces'))
213
223
  @job_traces = ScoredItemSet.new(context.config.value('max_traces'))
@@ -216,10 +226,16 @@ module ScoutApm
216
226
 
217
227
  @metric_set = MetricSet.new
218
228
  @db_query_metric_set = DbQueryMetricSet.new(context)
229
+ @external_service_metric_set = ExternalServiceMetricSet.new(context)
219
230
 
220
231
  @jobs = Hash.new
221
232
  end
222
233
 
234
+ def logger
235
+ @context.logger
236
+ end
237
+ private :logger
238
+
223
239
  # Merges another StoreReportingPeriod into this one
224
240
  def merge(other)
225
241
  self.
@@ -228,7 +244,8 @@ module ScoutApm
228
244
  merge_jobs!(other.jobs).
229
245
  merge_slow_jobs!(other.slow_jobs_payload).
230
246
  merge_histograms!(other.histograms).
231
- merge_db_query_metrics!(other.db_query_metric_set)
247
+ merge_db_query_metrics!(other.db_query_metric_set).
248
+ merge_external_service_metrics!(other.external_service_metric_set)
232
249
  self
233
250
  end
234
251
 
@@ -254,6 +271,15 @@ module ScoutApm
254
271
  self
255
272
  end
256
273
 
274
+ def merge_external_service_metrics!(other_metric_set)
275
+ if other_metric_set.nil?
276
+ logger.debug("Missing other_metric_set for merge_external_service_metrics - skipping.")
277
+ else
278
+ external_service_metric_set.combine!(other_metric_set)
279
+ end
280
+ self
281
+ end
282
+
257
283
  def merge_slow_transactions!(new_transactions)
258
284
  Array(new_transactions).each do |one_transaction|
259
285
  request_traces << one_transaction
@@ -316,6 +342,10 @@ module ScoutApm
316
342
  db_query_metric_set.metrics_to_report
317
343
  end
318
344
 
345
+ def external_service_metrics_payload
346
+ external_service_metric_set.metrics_to_report
347
+ end
348
+
319
349
  #################################
320
350
  # Debug Helpers
321
351
  #################################
@@ -91,7 +91,7 @@ module ScoutApm
91
91
 
92
92
  def _instrumented_method_string(instrumented_name, uninstrumented_name, type, name, options={})
93
93
  method_str = <<-EOF
94
- def #{instrumented_name}(*args, &block)
94
+ def #{instrumented_name}(*args#{", **kwargs" if ScoutApm::Agent.instance.context.environment.supports_kwarg_delegation?}, &block)
95
95
  name = begin
96
96
  "#{name}"
97
97
  rescue => e
@@ -103,7 +103,7 @@ module ScoutApm
103
103
  name,
104
104
  {:scope => #{options[:scope] || false}}
105
105
  ) do
106
- #{uninstrumented_name}(*args, &block)
106
+ #{uninstrumented_name}(*args#{", **kwargs" if ScoutApm::Agent.instance.context.environment.supports_kwarg_delegation?}, &block)
107
107
  end
108
108
  end
109
109
  EOF
@@ -52,6 +52,10 @@ module ScoutApm
52
52
  # see that on Sidekiq.
53
53
  REQUEST_TYPES = ["Controller", "Job"]
54
54
 
55
+ # Layers of type 'AutoInstrument' are not recorded if their total_call_time doesn't exceed this threshold.
56
+ # AutoInstrument layers are frequently of short duration. This throws out this deadweight that is unlikely to be optimized.
57
+ AUTO_INSTRUMENT_TIMING_THRESHOLD = 5/1_000.0 # units = seconds
58
+
55
59
  def initialize(agent_context, store)
56
60
  @agent_context = agent_context
57
61
  @store = store #this is passed in so we can use a real store (normal operation) or fake store (instant mode only)
@@ -110,7 +114,15 @@ module ScoutApm
110
114
  layer.record_stop_time!
111
115
  layer.record_allocations!
112
116
 
113
- @layers[-1].add_child(layer) if @layers.any?
117
+ # Must follow layer.record_stop_time! as the total_call_time is used to determine if the layer is significant.
118
+ return if layer_insignificant?(layer)
119
+
120
+ # Check that the parent exists before calling a method on it, since some threading can get us into a weird state.
121
+ # this doesn't fix that state, but prevents exceptions from leaking out.
122
+ parent = @layers[-1]
123
+ if parent
124
+ parent.add_child(layer)
125
+ end
114
126
 
115
127
  # This must be called before checking if a backtrace should be collected as the call count influences our capture logic.
116
128
  # We call `#update_call_counts in stop layer to ensure the layer has a final desc. Layer#desc is updated during the AR instrumentation flow.
@@ -150,6 +162,10 @@ module ScoutApm
150
162
  def capture_backtrace?(layer)
151
163
  return if ignoring_request?
152
164
 
165
+ # A backtrace has already been recorded. This happens with autoinstruments as
166
+ # the partial backtrace is set when creating the layer.
167
+ return false if layer.backtrace
168
+
153
169
  # Never capture backtraces for this kind of layer. The backtrace will
154
170
  # always be 100% framework code.
155
171
  return false if BACKTRACE_BLACKLIST.include?(layer.type)
@@ -169,6 +185,20 @@ module ScoutApm
169
185
  false
170
186
  end
171
187
 
188
+ # Returns +true+ if the total call time of AutoInstrument layers exceeds +AUTO_INSTRUMENT_TIMING_THRESHOLD+ and
189
+ # records a Histogram of insignificant / significant layers by file name.
190
+ def layer_insignificant?(layer)
191
+ result = false # default is significant
192
+ if layer.type == 'AutoInstrument'
193
+ if layer.total_call_time < AUTO_INSTRUMENT_TIMING_THRESHOLD
194
+ result = true # not significant
195
+ end
196
+ # 0 = not significant, 1 = significant
197
+ @agent_context.auto_instruments_layer_histograms.add(layer.file_name, (result ? 0 : 1))
198
+ end
199
+ result
200
+ end
201
+
172
202
  # Maintains a lookup Hash of call counts by layer name. Used to determine if we should capture a backtrace.
173
203
  def update_call_counts!(layer)
174
204
  @call_set[layer.name].update!(layer.desc)
@@ -212,8 +242,8 @@ module ScoutApm
212
242
  def stop_request
213
243
  @stopping = true
214
244
 
215
- if recorder
216
- recorder.record!(self)
245
+ if @recorder
246
+ @recorder.record!(self)
217
247
  end
218
248
  end
219
249
 
@@ -290,6 +320,7 @@ module ScoutApm
290
320
  :queue_time => LayerConverters::RequestQueueTimeConverter,
291
321
  :job => LayerConverters::JobConverter,
292
322
  :db => LayerConverters::DatabaseConverter,
323
+ :external_service => LayerConverters::ExternalServiceConverter,
293
324
 
294
325
  :slow_job => LayerConverters::SlowJobConverter,
295
326
  :slow_req => LayerConverters::SlowRequestConverter,
@@ -308,7 +339,7 @@ module ScoutApm
308
339
  memo
309
340
  end
310
341
  walker.walk
311
- converter_results = converter_instances.inject({}) do |memo, (slug,i)|
342
+ converter_results = converter_instances.inject({}) do |memo, (slug,i)|
312
343
  memo[slug] = i.record!
313
344
  memo
314
345
  end
@@ -11,6 +11,9 @@ module ScoutApm
11
11
 
12
12
  attr_reader :call_stack
13
13
 
14
+ # call_stack - an +Array+ of calls, typically generated via the +caller+ method.
15
+ # Example single line:
16
+ # "/Users/dlite/.rvm/rubies/ruby-2.4.5/lib/ruby/2.4.0/irb/workspace.rb:87:in `eval'"
14
17
  def initialize(call_stack, root=ScoutApm::Agent.instance.context.environment.root)
15
18
  @call_stack = call_stack
16
19
  # We can't use a constant as it'd be too early to fetch environment info