scout_apm 2.6.6 → 4.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +49 -0
  3. data/.rubocop.yml +2 -5
  4. data/.travis.yml +3 -7
  5. data/CHANGELOG.markdown +51 -0
  6. data/Gemfile +1 -8
  7. data/gems/rails6.gemfile +1 -1
  8. data/lib/scout_apm.rb +23 -1
  9. data/lib/scout_apm/agent.rb +22 -0
  10. data/lib/scout_apm/agent_context.rb +14 -2
  11. data/lib/scout_apm/background_job_integrations/delayed_job.rb +1 -1
  12. data/lib/scout_apm/background_job_integrations/faktory.rb +103 -0
  13. data/lib/scout_apm/background_job_integrations/sidekiq.rb +2 -2
  14. data/lib/scout_apm/config.rb +17 -2
  15. data/lib/scout_apm/detailed_trace.rb +2 -1
  16. data/lib/scout_apm/environment.rb +17 -1
  17. data/lib/scout_apm/error.rb +27 -0
  18. data/lib/scout_apm/error_service.rb +32 -0
  19. data/lib/scout_apm/error_service/error_buffer.rb +39 -0
  20. data/lib/scout_apm/error_service/error_record.rb +211 -0
  21. data/lib/scout_apm/error_service/ignored_exceptions.rb +66 -0
  22. data/lib/scout_apm/error_service/middleware.rb +32 -0
  23. data/lib/scout_apm/error_service/notifier.rb +33 -0
  24. data/lib/scout_apm/error_service/payload.rb +47 -0
  25. data/lib/scout_apm/error_service/periodic_work.rb +17 -0
  26. data/lib/scout_apm/error_service/railtie.rb +11 -0
  27. data/lib/scout_apm/error_service/sidekiq.rb +80 -0
  28. data/lib/scout_apm/extensions/transaction_callback_payload.rb +1 -1
  29. data/lib/scout_apm/instrument_manager.rb +1 -0
  30. data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +47 -26
  31. data/lib/scout_apm/instruments/action_view.rb +21 -8
  32. data/lib/scout_apm/instruments/active_record.rb +17 -28
  33. data/lib/scout_apm/instruments/typhoeus.rb +88 -0
  34. data/lib/scout_apm/layer.rb +1 -1
  35. data/lib/scout_apm/middleware.rb +1 -1
  36. data/lib/scout_apm/remote/server.rb +13 -1
  37. data/lib/scout_apm/reporter.rb +8 -3
  38. data/lib/scout_apm/serializers/payload_serializer_to_json.rb +6 -2
  39. data/lib/scout_apm/slow_policy/age_policy.rb +33 -0
  40. data/lib/scout_apm/slow_policy/percent_policy.rb +22 -0
  41. data/lib/scout_apm/slow_policy/percentile_policy.rb +24 -0
  42. data/lib/scout_apm/slow_policy/policy.rb +21 -0
  43. data/lib/scout_apm/slow_policy/speed_policy.rb +16 -0
  44. data/lib/scout_apm/slow_request_policy.rb +18 -77
  45. data/lib/scout_apm/tracer.rb +2 -2
  46. data/lib/scout_apm/utils/sql_sanitizer.rb +1 -0
  47. data/lib/scout_apm/utils/sql_sanitizer_regex.rb +3 -3
  48. data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +1 -0
  49. data/lib/scout_apm/version.rb +1 -1
  50. data/scout_apm.gemspec +6 -6
  51. data/test/unit/agent_context_test.rb +29 -0
  52. data/test/unit/environment_test.rb +2 -2
  53. data/test/unit/error_service/error_buffer_test.rb +25 -0
  54. data/test/unit/error_service/ignored_exceptions_test.rb +49 -0
  55. data/test/unit/serializers/payload_serializer_test.rb +36 -0
  56. data/test/unit/slow_request_policy_test.rb +41 -13
  57. data/test/unit/sql_sanitizer_test.rb +38 -0
  58. data/test/unit/tracer_test.rb +25 -0
  59. metadata +27 -61
  60. data/lib/scout_apm/slow_job_policy.rb +0 -111
  61. data/test/unit/slow_job_policy_test.rb +0 -6
@@ -0,0 +1,88 @@
1
+ module ScoutApm
2
+ module Instruments
3
+ class Typhoeus
4
+ attr_reader :context
5
+
6
+ def initialize(context)
7
+ @context = context
8
+ @installed = false
9
+ end
10
+
11
+ def logger
12
+ context.logger
13
+ end
14
+
15
+ def installed?
16
+ @installed
17
+ end
18
+
19
+ def install
20
+ if defined?(::Typhoeus)
21
+ @installed = true
22
+
23
+ logger.info "Instrumenting Typhoeus"
24
+
25
+ ::Typhoeus::Request.send(:prepend, TyphoeusInstrumentation)
26
+ ::Typhoeus::Hydra.send(:prepend, TyphoeusHydraInstrumentation)
27
+ end
28
+ end
29
+
30
+ module TyphoeusHydraInstrumentation
31
+ def run(*args, &block)
32
+ req = ScoutApm::RequestManager.lookup
33
+ req.start_layer(ScoutApm::Layer.new("HTTP", "Hydra"))
34
+ current_layer = req.current_layer
35
+ current_layer.desc = scout_desc if current_layer
36
+
37
+ begin
38
+ super(*args, &block)
39
+ ensure
40
+ req.stop_layer
41
+ end
42
+ end
43
+
44
+ def scout_desc
45
+ "#{self.queued_requests.count} requests"
46
+ rescue
47
+ ""
48
+ end
49
+ end
50
+
51
+ module TyphoeusInstrumentation
52
+ def run(*args, &block)
53
+ req = ScoutApm::RequestManager.lookup
54
+ req.start_layer(ScoutApm::Layer.new("HTTP", scout_request_verb))
55
+ current_layer = req.current_layer
56
+ current_layer.desc = scout_desc(scout_request_verb, scout_request_url) if current_layer
57
+
58
+ begin
59
+ super(*args, &block)
60
+ ensure
61
+ req.stop_layer
62
+ end
63
+ end
64
+
65
+ def scout_desc(verb, uri)
66
+ max_length = ScoutApm::Agent.instance.context.config.value('instrument_http_url_length')
67
+ (String(uri).split('?').first)[0..(max_length - 1)]
68
+ rescue
69
+ ""
70
+ end
71
+
72
+ def scout_request_url
73
+ self.url
74
+ rescue
75
+ ""
76
+ end
77
+
78
+ def scout_request_verb
79
+ self.options[:method].to_s
80
+ rescue
81
+ ""
82
+ end
83
+
84
+ end
85
+
86
+ end
87
+ end
88
+ end
@@ -116,7 +116,7 @@ module ScoutApm
116
116
  # In Ruby 2.0+, we can pass the range directly to the caller to reduce the memory footprint.
117
117
  def caller_array
118
118
  # omits the first several callers which are in the ScoutAPM stack.
119
- if ScoutApm::Agent.instance.context.environment.ruby_2?
119
+ if ScoutApm::Agent.instance.context.environment.ruby_2? || ScoutApm::Agent.instance.context.environment.ruby_3?
120
120
  caller(3...BACKTRACE_CALLER_LIMIT)
121
121
  else
122
122
  caller[3...BACKTRACE_CALLER_LIMIT]
@@ -26,7 +26,7 @@ module ScoutApm
26
26
  ScoutApm::Agent.instance.start
27
27
  @started = ScoutApm::Agent.instance.context.started? && ScoutApm::Agent.instance.background_worker_running?
28
28
  rescue => e
29
- ScoutApm::Agent.instance.context.logger("Failed to start via Middleware: #{e.message}\n\t#{e.backtrace.join("\n\t")}")
29
+ ScoutApm::Agent.instance.context.logger.info("Failed to start via Middleware: #{e.message}\n\t#{e.backtrace.join("\n\t")}")
30
30
  end
31
31
  end
32
32
  end
@@ -16,8 +16,20 @@ module ScoutApm
16
16
  @server = nil
17
17
  end
18
18
 
19
- def start
19
+ def require_webrick
20
20
  require 'webrick'
21
+ true
22
+ rescue LoadError
23
+ @logger.warn(
24
+ %q|Could not require Webrick. Ruby 3.0 stopped bundling it
25
+ automatically, but it is required to instrument Resque. Please add
26
+ Webrick to your Gemfile.|
27
+ )
28
+ false
29
+ end
30
+
31
+ def start
32
+ return false unless require_webrick
21
33
 
22
34
  @server = WEBrick::HTTPServer.new(
23
35
  :BindAddress => bind,
@@ -2,7 +2,6 @@ require 'openssl'
2
2
 
3
3
  module ScoutApm
4
4
  class Reporter
5
- CA_FILE = File.join( File.dirname(__FILE__), *%w[.. .. data cacert.pem] )
6
5
  VERIFY_MODE = OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT
7
6
 
8
7
  attr_reader :type
@@ -23,6 +22,7 @@ module ScoutApm
23
22
  context.logger
24
23
  end
25
24
 
25
+ # The fully serialized string payload to be sent
26
26
  def report(payload, headers = {})
27
27
  hosts = determine_hosts
28
28
 
@@ -36,6 +36,7 @@ module ScoutApm
36
36
  logger.debug("Original Size: #{original_payload_size} Compressed Size: #{compress_payload_size}")
37
37
  end
38
38
 
39
+ logger.info("Posting payload to #{hosts.inspect}")
39
40
  post_payload(hosts, payload, headers)
40
41
  end
41
42
 
@@ -52,6 +53,8 @@ module ScoutApm
52
53
  URI.parse("#{host}/apps/deploy.scout?key=#{key}&name=#{encoded_app_name}")
53
54
  when :instant_trace
54
55
  URI.parse("#{host}/apps/instant_trace.scout?key=#{key}&name=#{encoded_app_name}&instant_key=#{instant_key}")
56
+ when :errors
57
+ URI.parse("#{host}/apps/error.scout?key=#{key}&name=#{encoded_app_name}")
55
58
  end.tap { |u| logger.debug("Posting to #{u}") }
56
59
  end
57
60
 
@@ -90,7 +93,7 @@ module ScoutApm
90
93
  logger.debug "got response: #{response.inspect}"
91
94
  case response
92
95
  when Net::HTTPSuccess, Net::HTTPNotModified
93
- logger.debug "/#{type} OK"
96
+ logger.debug "#{type} OK"
94
97
  when Net::HTTPBadRequest
95
98
  logger.warn "/#{type} FAILED: The Account Key [#{config.value('key')}] is invalid."
96
99
  when Net::HTTPUnprocessableEntity
@@ -123,7 +126,7 @@ module ScoutApm
123
126
  proxy_uri.password).new(url.host, url.port)
124
127
  if url.is_a?(URI::HTTPS)
125
128
  http.use_ssl = true
126
- http.ca_file = CA_FILE
129
+ http.ca_file = config.value("ssl_cert_file")
127
130
  http.verify_mode = VERIFY_MODE
128
131
  end
129
132
  http
@@ -142,6 +145,8 @@ module ScoutApm
142
145
  def determine_hosts
143
146
  if [:deploy_hook, :instant_trace].include?(type)
144
147
  config.value('direct_host')
148
+ elsif [:errors].include?(type)
149
+ config.value('errors_host')
145
150
  else
146
151
  config.value('host')
147
152
  end
@@ -46,17 +46,21 @@ module ScoutApm
46
46
  end
47
47
 
48
48
  ESCAPE_MAPPINGS = {
49
+ # Stackoverflow answer on gsub matches and backslashes
50
+ # https://stackoverflow.com/a/4149087/2705125
51
+ '\\' => '\\\\\\\\',
49
52
  "\b" => '\\b',
50
53
  "\t" => '\\t',
51
54
  "\n" => '\\n',
52
55
  "\f" => '\\f',
53
56
  "\r" => '\\r',
54
57
  '"' => '\\"',
55
- '\\' => '\\\\',
56
58
  }
57
59
 
58
60
  def escape(string)
59
- ESCAPE_MAPPINGS.inject(string.to_s) {|s, (bad, good)| s.gsub(bad, good) }
61
+ ESCAPE_MAPPINGS.inject(string.to_s) {|s, (bad, good)|
62
+ s.gsub(bad, good)
63
+ }
60
64
  end
61
65
 
62
66
  def format_by_type(formatee)
@@ -0,0 +1,33 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class AgePolicy < Policy
5
+ # For each minute we haven't seen an endpoint
6
+ POINT_MULTIPLIER_AGE = 0.25
7
+
8
+ # A hash of Endpoint Name to the last time we stored a slow transaction for it.
9
+ #
10
+ # Defaults to a start time that is pretty close to application boot time.
11
+ # So the "age" of an endpoint we've never seen is the time the application
12
+ # has been running.
13
+ attr_reader :last_seen
14
+
15
+ def initialize(context)
16
+ super
17
+
18
+ zero_time = Time.now
19
+ @last_seen = Hash.new { |h, k| h[k] = zero_time }
20
+ end
21
+
22
+ def call(request)
23
+ # How long has it been since we've seen this?
24
+ age = Time.now - last_seen[request.unique_name]
25
+
26
+ age / 60.0 * POINT_MULTIPLIER_AGE
27
+ end
28
+
29
+ def stored!(request)
30
+ last_seen[request.unique_name] = Time.now
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,22 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class PercentPolicy < Policy
5
+ # Points for an endpoint's who's throughput * response time is a large % of
6
+ # overall time spent processing requests
7
+ POINT_MULTIPLIER_PERCENT_TIME = 2.5
8
+
9
+ # Of the total time spent handling endpoints in this app, if this endpoint
10
+ # is a higher percent, it should get more points.
11
+ #
12
+ # A: 20 calls @ 100ms each => 2 seconds of total time
13
+ # B: 10 calls @ 100ms each => 1 second of total time
14
+ #
15
+ # Then A is 66% of the total call time
16
+ def call(request) # Scale 0.0 - 1.0
17
+ percent = context.transaction_time_consumed.percent_of_total(request.unique_name)
18
+
19
+ percent * POINT_MULTIPLIER_PERCENT_TIME
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,24 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class PercentilePolicy < Policy
5
+ def call(request)
6
+ # What approximate percentile was this request?
7
+ total_time = request.root_layer.total_call_time
8
+ percentile = context.request_histograms.approximate_quantile_of_value(request.unique_name, total_time)
9
+
10
+ if percentile < 40
11
+ 0.4 # Don't put much emphasis on capturing low percentiles.
12
+ elsif percentile < 60
13
+ 1.4 # Highest here to get mean traces
14
+ elsif percentile < 90
15
+ 0.7 # Between 60 & 90% is fine.
16
+ elsif percentile >= 90
17
+ 1.4 # Highest here to get 90+%ile traces
18
+ else
19
+ # impossible.
20
+ percentile
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ # Note that this is semi-internal API. You should not need this, and if you do
2
+ # we're here to help at support@scoutapm.com. TrackedRequest doesn't change
3
+ # often, but we can't promise a perfectly stable API for it either.
4
+ module ScoutApm::SlowPolicy
5
+ class Policy
6
+ attr_reader :context
7
+
8
+ def initialize(context)
9
+ @context = context
10
+ end
11
+
12
+ def call(request)
13
+ raise NotImplementedError
14
+ end
15
+
16
+ # Override in subclasses to execute some behavior if the request gets a
17
+ # slot in the ScoredItemSet. Defaults to no-op
18
+ def stored!(request)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,16 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class SpeedPolicy < Policy
5
+ # Adjust speed points. See the function
6
+ POINT_MULTIPLIER_SPEED = 0.25
7
+
8
+ # Time in seconds
9
+ # Logarithm keeps huge times from swamping the other metrics.
10
+ # 1+ is necessary to keep the log function in positive territory.
11
+ def call(request)
12
+ total_time = request.root_layer.total_call_time
13
+ Math.log(1 + total_time) * POINT_MULTIPLIER_SPEED
14
+ end
15
+ end
16
+ end
@@ -3,43 +3,29 @@
3
3
 
4
4
  module ScoutApm
5
5
  class SlowRequestPolicy
6
- CAPTURE_TYPES = [
7
- CAPTURE_DETAIL = "capture_detail",
8
- CAPTURE_NONE = "capture_none",
9
- ]
10
-
11
- # Adjust speed points. See the function
12
- POINT_MULTIPLIER_SPEED = 0.25
13
-
14
- # For each minute we haven't seen an endpoint
15
- POINT_MULTIPLIER_AGE = 0.25
16
-
17
- # Outliers are worth up to "1000ms" of weight
18
- POINT_MULTIPLIER_PERCENTILE = 1.0
19
-
20
- # Points for an endpoint's who's throughput * response time is a large % of
21
- # overall time spent processing requests
22
- POINT_MULTIPLIER_PERCENT_TIME = 2.5
23
-
24
- # A hash of Endpoint Name to the last time we stored a slow transaction for it.
25
- #
26
- # Defaults to a start time that is pretty close to application boot time.
27
- # So the "age" of an endpoint we've never seen is the time the application
28
- # has been running.
29
- attr_reader :last_seen
30
-
31
6
  # The AgentContext we're running in
32
7
  attr_reader :context
8
+ attr_reader :policies
33
9
 
34
10
  def initialize(context)
35
11
  @context = context
12
+ @policies = []
13
+ end
36
14
 
37
- zero_time = Time.now
38
- @last_seen = Hash.new { |h, k| h[k] = zero_time }
15
+ def add_default_policies
16
+ add(SlowPolicy::SpeedPolicy.new(context))
17
+ add(SlowPolicy::PercentilePolicy.new(context))
18
+ add(SlowPolicy::AgePolicy.new(context))
19
+ add(SlowPolicy::PercentilePolicy.new(context))
39
20
  end
40
21
 
41
- def stored!(request)
42
- last_seen[request.unique_name] = Time.now
22
+ # policy is an object that behaves like a policy (responds to .call(req) for the score, and .store!(req))
23
+ def add(policy)
24
+ unless policy.respond_to?(:call) && policy.respond_to?(:stored!)
25
+ raise "SlowRequestPolicy must implement policy api call(req) and stored!(req)"
26
+ end
27
+
28
+ @policies << policy
43
29
  end
44
30
 
45
31
  # Determine if this request trace should be fully analyzed by scoring it
@@ -56,56 +42,11 @@ module ScoutApm
56
42
  return -1 # A negative score, should never be good enough to store.
57
43
  end
58
44
 
59
- total_time = request.root_layer.total_call_time
60
-
61
- # How long has it been since we've seen this?
62
- age = Time.now - last_seen[unique_name]
63
-
64
- # What approximate percentile was this request?
65
- percentile = context.request_histograms.approximate_quantile_of_value(unique_name, total_time)
66
-
67
- percent_of_total_time = context.transaction_time_consumed.percent_of_total(unique_name)
68
-
69
- return speed_points(total_time) + percentile_points(percentile) + age_points(age) + percent_time_points(percent_of_total_time)
70
- end
71
-
72
- private
73
-
74
- # Time in seconds
75
- # Logarithm keeps huge times from swamping the other metrics.
76
- # 1+ is necessary to keep the log function in positive territory.
77
- def speed_points(time)
78
- Math.log(1 + time) * POINT_MULTIPLIER_SPEED
79
- end
80
-
81
- def percentile_points(percentile)
82
- if percentile < 40
83
- 0.4 # Don't put much emphasis on capturing low percentiles.
84
- elsif percentile < 60
85
- 1.4 # Highest here to get mean traces
86
- elsif percentile < 90
87
- 0.7 # Between 60 & 90% is fine.
88
- elsif percentile >= 90
89
- 1.4 # Highest here to get 90+%ile traces
90
- else
91
- # impossible.
92
- percentile
93
- end
94
- end
95
-
96
- def age_points(age)
97
- age / 60.0 * POINT_MULTIPLIER_AGE
45
+ policies.map{ |p| p.call(request) }.sum
98
46
  end
99
47
 
100
- # Of the total time spent handling endpoints in this app, if this endpoint
101
- # is a higher percent, it should get more points.
102
- #
103
- # A: 20 calls @ 100ms each => 2 seconds of total time
104
- # B: 10 calls @ 100ms each => 1 second of total time
105
- #
106
- # Then A is 66% of the total call time
107
- def percent_time_points(percent) # Scale 0.0 - 1.0
108
- percent * POINT_MULTIPLIER_PERCENT_TIME
48
+ def stored!(request)
49
+ policies.each{ |p| p.stored!(request) }
109
50
  end
110
51
  end
111
52
  end