scout_apm 3.0.0.pre23 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +3 -4
  4. data/.travis.yml +17 -14
  5. data/CHANGELOG.markdown +150 -4
  6. data/Gemfile +2 -8
  7. data/README.markdown +30 -4
  8. data/Rakefile +1 -1
  9. data/ext/allocations/allocations.c +2 -0
  10. data/gems/README.md +28 -0
  11. data/gems/octoshark.gemfile +4 -0
  12. data/gems/rails3.gemfile +5 -0
  13. data/gems/rails4.gemfile +4 -0
  14. data/gems/rails5.gemfile +4 -0
  15. data/gems/rails6.gemfile +4 -0
  16. data/lib/scout_apm.rb +39 -9
  17. data/lib/scout_apm/agent.rb +29 -10
  18. data/lib/scout_apm/agent/exit_handler.rb +0 -1
  19. data/lib/scout_apm/agent_context.rb +26 -3
  20. data/lib/scout_apm/app_server_load.rb +7 -2
  21. data/lib/scout_apm/attribute_arranger.rb +0 -2
  22. data/lib/scout_apm/auto_instrument.rb +5 -0
  23. data/lib/scout_apm/auto_instrument/instruction_sequence.rb +31 -0
  24. data/lib/scout_apm/auto_instrument/layer.rb +23 -0
  25. data/lib/scout_apm/auto_instrument/parser.rb +27 -0
  26. data/lib/scout_apm/auto_instrument/rails.rb +175 -0
  27. data/lib/scout_apm/background_job_integrations/delayed_job.rb +1 -1
  28. data/lib/scout_apm/background_job_integrations/legacy_sneakers.rb +55 -0
  29. data/lib/scout_apm/background_job_integrations/que.rb +134 -0
  30. data/lib/scout_apm/background_job_integrations/resque.rb +6 -2
  31. data/lib/scout_apm/background_job_integrations/shoryuken.rb +124 -0
  32. data/lib/scout_apm/background_job_integrations/sidekiq.rb +5 -19
  33. data/lib/scout_apm/background_job_integrations/sneakers.rb +87 -0
  34. data/lib/scout_apm/config.rb +48 -7
  35. data/lib/scout_apm/detailed_trace.rb +217 -0
  36. data/lib/scout_apm/environment.rb +3 -0
  37. data/lib/scout_apm/error.rb +27 -0
  38. data/lib/scout_apm/error_service.rb +32 -0
  39. data/lib/scout_apm/error_service/error_buffer.rb +39 -0
  40. data/lib/scout_apm/error_service/error_record.rb +211 -0
  41. data/lib/scout_apm/error_service/ignored_exceptions.rb +66 -0
  42. data/lib/scout_apm/error_service/middleware.rb +32 -0
  43. data/lib/scout_apm/error_service/notifier.rb +33 -0
  44. data/lib/scout_apm/error_service/payload.rb +47 -0
  45. data/lib/scout_apm/error_service/periodic_work.rb +17 -0
  46. data/lib/scout_apm/error_service/railtie.rb +11 -0
  47. data/lib/scout_apm/error_service/sidekiq.rb +80 -0
  48. data/lib/scout_apm/extensions/transaction_callback_payload.rb +1 -1
  49. data/lib/scout_apm/fake_store.rb +3 -0
  50. data/lib/scout_apm/framework_integrations/rails_2.rb +2 -1
  51. data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +17 -6
  52. data/lib/scout_apm/git_revision.rb +6 -3
  53. data/lib/scout_apm/instant/middleware.rb +2 -1
  54. data/lib/scout_apm/instrument_manager.rb +8 -7
  55. data/lib/scout_apm/instruments/action_controller_rails_2.rb +3 -1
  56. data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +56 -55
  57. data/lib/scout_apm/instruments/action_view.rb +114 -26
  58. data/lib/scout_apm/instruments/active_record.rb +66 -19
  59. data/lib/scout_apm/instruments/http.rb +48 -0
  60. data/lib/scout_apm/instruments/memcached.rb +43 -0
  61. data/lib/scout_apm/instruments/mongoid.rb +9 -4
  62. data/lib/scout_apm/instruments/net_http.rb +8 -1
  63. data/lib/scout_apm/job_record.rb +4 -2
  64. data/lib/scout_apm/layaway_file.rb +4 -0
  65. data/lib/scout_apm/layer.rb +5 -56
  66. data/lib/scout_apm/layer_children_set.rb +15 -6
  67. data/lib/scout_apm/layer_converters/converter_base.rb +15 -30
  68. data/lib/scout_apm/layer_converters/database_converter.rb +2 -15
  69. data/lib/scout_apm/layer_converters/slow_job_converter.rb +12 -2
  70. data/lib/scout_apm/layer_converters/slow_request_converter.rb +14 -4
  71. data/lib/scout_apm/layer_converters/trace_converter.rb +184 -0
  72. data/lib/scout_apm/limited_layer.rb +0 -7
  73. data/lib/scout_apm/metric_stats.rb +0 -8
  74. data/lib/scout_apm/middleware.rb +1 -1
  75. data/lib/scout_apm/periodic_work.rb +19 -0
  76. data/lib/scout_apm/remote/message.rb +4 -0
  77. data/lib/scout_apm/reporter.rb +8 -3
  78. data/lib/scout_apm/reporting.rb +2 -1
  79. data/lib/scout_apm/request_histograms.rb +8 -0
  80. data/lib/scout_apm/serializers/app_server_load_serializer.rb +4 -0
  81. data/lib/scout_apm/serializers/directive_serializer.rb +4 -0
  82. data/lib/scout_apm/serializers/payload_serializer.rb +2 -2
  83. data/lib/scout_apm/serializers/payload_serializer_to_json.rb +30 -15
  84. data/lib/scout_apm/slow_job_record.rb +5 -1
  85. data/lib/scout_apm/slow_policy/age_policy.rb +33 -0
  86. data/lib/scout_apm/slow_policy/percent_policy.rb +22 -0
  87. data/lib/scout_apm/slow_policy/percentile_policy.rb +24 -0
  88. data/lib/scout_apm/slow_policy/policy.rb +21 -0
  89. data/lib/scout_apm/slow_policy/speed_policy.rb +16 -0
  90. data/lib/scout_apm/slow_request_policy.rb +18 -60
  91. data/lib/scout_apm/slow_transaction.rb +3 -1
  92. data/lib/scout_apm/store.rb +14 -10
  93. data/lib/scout_apm/tracked_request.rb +41 -30
  94. data/lib/scout_apm/transaction_time_consumed.rb +51 -0
  95. data/lib/scout_apm/utils/active_record_metric_name.rb +16 -3
  96. data/lib/scout_apm/utils/backtrace_parser.rb +3 -0
  97. data/lib/scout_apm/utils/marshal_logging.rb +90 -0
  98. data/lib/scout_apm/utils/sql_sanitizer.rb +10 -1
  99. data/lib/scout_apm/utils/sql_sanitizer_regex.rb +7 -0
  100. data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +6 -0
  101. data/lib/scout_apm/utils/unique_id.rb +27 -0
  102. data/lib/scout_apm/version.rb +1 -1
  103. data/scout_apm.gemspec +13 -7
  104. data/test/test_helper.rb +2 -2
  105. data/test/unit/agent_context_test.rb +29 -0
  106. data/test/unit/auto_instrument/assignments-instrumented.rb +31 -0
  107. data/test/unit/auto_instrument/assignments.rb +31 -0
  108. data/test/unit/auto_instrument/controller-ast.txt +57 -0
  109. data/test/unit/auto_instrument/controller-instrumented.rb +49 -0
  110. data/test/unit/auto_instrument/controller.rb +49 -0
  111. data/test/unit/auto_instrument/rescue_from-instrumented.rb +13 -0
  112. data/test/unit/auto_instrument/rescue_from.rb +13 -0
  113. data/test/unit/auto_instrument_test.rb +54 -0
  114. data/test/unit/error_service/error_buffer_test.rb +25 -0
  115. data/test/unit/error_service/ignored_exceptions_test.rb +49 -0
  116. data/test/unit/extensions/periodic_callbacks_test.rb +2 -2
  117. data/test/unit/instruments/active_record_test.rb +40 -0
  118. data/test/unit/layer_children_set_test.rb +9 -0
  119. data/test/unit/request_histograms_test.rb +17 -0
  120. data/test/unit/serializers/payload_serializer_test.rb +39 -5
  121. data/test/unit/slow_request_policy_test.rb +42 -9
  122. data/test/unit/sql_sanitizer_test.rb +47 -0
  123. data/test/unit/transaction_time_consumed_test.rb +46 -0
  124. data/test/unit/utils/active_record_metric_name_test.rb +10 -2
  125. metadata +101 -19
  126. data/ext/stacks/extconf.rb +0 -37
  127. data/ext/stacks/scout_atomics.h +0 -86
  128. data/ext/stacks/stacks.c +0 -814
  129. data/lib/scout_apm/slow_job_policy.rb +0 -94
  130. data/lib/scout_apm/trace_compactor.rb +0 -312
  131. data/lib/scout_apm/utils/fake_stacks.rb +0 -88
  132. data/test/unit/instruments/active_record_instruments_test.rb +0 -5
  133. data/test/unit/slow_job_policy_test.rb +0 -6
  134. data/tester.rb +0 -53
@@ -2,9 +2,9 @@
2
2
  module ScoutApm
3
3
  module Serializers
4
4
  class PayloadSerializer
5
- def self.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics)
5
+ def self.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, traces)
6
6
  if ScoutApm::Agent.instance.context.config.value("report_format") == 'json'
7
- ScoutApm::Serializers::PayloadSerializerToJson.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics)
7
+ ScoutApm::Serializers::PayloadSerializerToJson.serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, traces)
8
8
  else
9
9
  metadata = metadata.dup
10
10
  metadata.default = nil
@@ -2,7 +2,7 @@ module ScoutApm
2
2
  module Serializers
3
3
  module PayloadSerializerToJson
4
4
  class << self
5
- def serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics)
5
+ def serialize(metadata, metrics, slow_transactions, jobs, slow_jobs, histograms, db_query_metrics, traces)
6
6
  metadata.merge!({:payload_version => 2})
7
7
 
8
8
  jsonify_hash({:metadata => metadata,
@@ -14,6 +14,7 @@ module ScoutApm
14
14
  :db_metrics => {
15
15
  :query => DbQuerySerializerToJson.new(db_query_metrics).as_json,
16
16
  },
17
+ :span_traces => traces.map{ |t| t.as_json },
17
18
  })
18
19
  end
19
20
 
@@ -44,18 +45,36 @@ module ScoutApm
44
45
  "{#{str_parts.join(",")}}"
45
46
  end
46
47
 
47
- ESCAPE_MAPPINGS = {
48
- "\b" => '\\b',
49
- "\t" => '\\t',
50
- "\n" => '\\n',
51
- "\f" => '\\f',
52
- "\r" => '\\r',
53
- '"' => '\\"',
54
- '\\' => '\\\\',
55
- }
48
+ # Ruby 1.8.7 seems to be fundamentally different in how gsub or regexes
49
+ # work. This is a hack and will be removed as soon as we can drop
50
+ # support
51
+ if RUBY_VERSION == "1.8.7"
52
+ ESCAPE_MAPPINGS = {
53
+ "\b" => '\\b',
54
+ "\t" => '\\t',
55
+ "\n" => '\\n',
56
+ "\f" => '\\f',
57
+ "\r" => '\\r',
58
+ '"' => '\\"',
59
+ '\\' => '\\\\',
60
+ }
61
+ else
62
+ ESCAPE_MAPPINGS = {
63
+ # Stackoverflow answer on gsub matches and backslashes - https://stackoverflow.com/a/4149087/2705125
64
+ '\\' => '\\\\\\\\',
65
+ "\b" => '\\b',
66
+ "\t" => '\\t',
67
+ "\n" => '\\n',
68
+ "\f" => '\\f',
69
+ "\r" => '\\r',
70
+ '"' => '\\"',
71
+ }
72
+ end
56
73
 
57
74
  def escape(string)
58
- ESCAPE_MAPPINGS.inject(string.to_s) {|s, (bad, good)| s.gsub(bad, good) }
75
+ ESCAPE_MAPPINGS.inject(string.to_s) {|s, (bad, good)|
76
+ s.gsub(bad, good)
77
+ }
59
78
  end
60
79
 
61
80
  def format_by_type(formatee)
@@ -71,10 +90,6 @@ module ScoutApm
71
90
  %Q["#{formatee.iso8601}"]
72
91
  when nil
73
92
  "null"
74
- when TrueClass
75
- "true"
76
- when FalseClass
77
- "false"
78
93
  else # strings and everything
79
94
  %Q["#{escape(formatee)}"]
80
95
  end
@@ -23,7 +23,9 @@ module ScoutApm
23
23
  attr_reader :git_sha
24
24
  attr_reader :truncated_metrics
25
25
 
26
- def initialize(agent_context, queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations, score, truncated_metrics)
26
+ attr_reader :span_trace
27
+
28
+ def initialize(agent_context, queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations, score, truncated_metrics, span_trace)
27
29
  @queue_name = queue_name
28
30
  @job_name = job_name
29
31
  @time = time
@@ -40,6 +42,8 @@ module ScoutApm
40
42
  @score = score
41
43
  @truncated_metrics = truncated_metrics
42
44
 
45
+ @span_trace = span_trace
46
+
43
47
  agent_context.logger.debug { "Slow Job [#{metric_name}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
44
48
  end
45
49
 
@@ -0,0 +1,33 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class AgePolicy < Policy
5
+ # For each minute we haven't seen an endpoint
6
+ POINT_MULTIPLIER_AGE = 0.25
7
+
8
+ # A hash of Endpoint Name to the last time we stored a slow transaction for it.
9
+ #
10
+ # Defaults to a start time that is pretty close to application boot time.
11
+ # So the "age" of an endpoint we've never seen is the time the application
12
+ # has been running.
13
+ attr_reader :last_seen
14
+
15
+ def initialize(context)
16
+ super
17
+
18
+ zero_time = Time.now
19
+ @last_seen = Hash.new { |h, k| h[k] = zero_time }
20
+ end
21
+
22
+ def call(request)
23
+ # How long has it been since we've seen this?
24
+ age = Time.now - last_seen[request.unique_name]
25
+
26
+ age / 60.0 * POINT_MULTIPLIER_AGE
27
+ end
28
+
29
+ def stored!(request)
30
+ last_seen[request.unique_name] = Time.now
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,22 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class PercentPolicy < Policy
5
+ # Points for an endpoint's who's throughput * response time is a large % of
6
+ # overall time spent processing requests
7
+ POINT_MULTIPLIER_PERCENT_TIME = 2.5
8
+
9
+ # Of the total time spent handling endpoints in this app, if this endpoint
10
+ # is a higher percent, it should get more points.
11
+ #
12
+ # A: 20 calls @ 100ms each => 2 seconds of total time
13
+ # B: 10 calls @ 100ms each => 1 second of total time
14
+ #
15
+ # Then A is 66% of the total call time
16
+ def call(request) # Scale 0.0 - 1.0
17
+ percent = context.transaction_time_consumed.percent_of_total(request.unique_name)
18
+
19
+ percent * POINT_MULTIPLIER_PERCENT_TIME
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,24 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class PercentilePolicy < Policy
5
+ def call(request)
6
+ # What approximate percentile was this request?
7
+ total_time = request.root_layer.total_call_time
8
+ percentile = context.request_histograms.approximate_quantile_of_value(request.unique_name, total_time)
9
+
10
+ if percentile < 40
11
+ 0.4 # Don't put much emphasis on capturing low percentiles.
12
+ elsif percentile < 60
13
+ 1.4 # Highest here to get mean traces
14
+ elsif percentile < 90
15
+ 0.7 # Between 60 & 90% is fine.
16
+ elsif percentile >= 90
17
+ 1.4 # Highest here to get 90+%ile traces
18
+ else
19
+ # impossible.
20
+ percentile
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ # Note that this is semi-internal API. You should not need this, and if you do
2
+ # we're here to help at support@scoutapm.com. TrackedRequest doesn't change
3
+ # often, but we can't promise a perfectly stable API for it either.
4
+ module ScoutApm::SlowPolicy
5
+ class Policy
6
+ attr_reader :context
7
+
8
+ def initialize(context)
9
+ @context = context
10
+ end
11
+
12
+ def call(request)
13
+ raise NotImplementedError
14
+ end
15
+
16
+ # Override in subclasses to execute some behavior if the request gets a
17
+ # slot in the ScoredItemSet. Defaults to no-op
18
+ def stored!(request)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,16 @@
1
+ require 'scout_apm/slow_policy/policy'
2
+
3
+ module ScoutApm::SlowPolicy
4
+ class SpeedPolicy < Policy
5
+ # Adjust speed points. See the function
6
+ POINT_MULTIPLIER_SPEED = 0.25
7
+
8
+ # Time in seconds
9
+ # Logarithm keeps huge times from swamping the other metrics.
10
+ # 1+ is necessary to keep the log function in positive territory.
11
+ def call(request)
12
+ total_time = request.root_layer.total_call_time
13
+ Math.log(1 + total_time) * POINT_MULTIPLIER_SPEED
14
+ end
15
+ end
16
+ end
@@ -3,39 +3,29 @@
3
3
 
4
4
  module ScoutApm
5
5
  class SlowRequestPolicy
6
- CAPTURE_TYPES = [
7
- CAPTURE_DETAIL = "capture_detail",
8
- CAPTURE_NONE = "capture_none",
9
- ]
10
-
11
- # Adjust speed points. See the function
12
- POINT_MULTIPLIER_SPEED = 0.25
13
-
14
- # For each minute we haven't seen an endpoint
15
- POINT_MULTIPLIER_AGE = 0.25
16
-
17
- # Outliers are worth up to "1000ms" of weight
18
- POINT_MULTIPLIER_PERCENTILE = 1.0
19
-
20
- # A hash of Endpoint Name to the last time we stored a slow transaction for it.
21
- #
22
- # Defaults to a start time that is pretty close to application boot time.
23
- # So the "age" of an endpoint we've never seen is the time the application
24
- # has been running.
25
- attr_reader :last_seen
26
-
27
6
  # The AgentContext we're running in
28
7
  attr_reader :context
8
+ attr_reader :policies
29
9
 
30
10
  def initialize(context)
31
11
  @context = context
12
+ @policies = []
13
+ end
32
14
 
33
- zero_time = Time.now
34
- @last_seen = Hash.new { |h, k| h[k] = zero_time }
15
+ def add_default_policies
16
+ add(SlowPolicy::SpeedPolicy.new(context))
17
+ add(SlowPolicy::PercentilePolicy.new(context))
18
+ add(SlowPolicy::AgePolicy.new(context))
19
+ add(SlowPolicy::PercentilePolicy.new(context))
35
20
  end
36
21
 
37
- def stored!(request)
38
- last_seen[request.unique_name] = Time.now
22
+ # policy is an object that behaves like a policy (responds to .call(req) for the score, and .store!(req))
23
+ def add(policy)
24
+ unless policy.respond_to?(:call) && policy.respond_to?(:stored!)
25
+ raise "SlowRequestPolicy must implement policy api call(req) and stored!(req)"
26
+ end
27
+
28
+ @policies << policy
39
29
  end
40
30
 
41
31
  # Determine if this request trace should be fully analyzed by scoring it
@@ -52,43 +42,11 @@ module ScoutApm
52
42
  return -1 # A negative score, should never be good enough to store.
53
43
  end
54
44
 
55
- total_time = request.root_layer.total_call_time
56
-
57
- # How long has it been since we've seen this?
58
- age = Time.now - last_seen[unique_name]
59
-
60
- # What approximate percentile was this request?
61
- percentile = context.request_histograms.approximate_quantile_of_value(unique_name, total_time)
62
-
63
- return speed_points(total_time) + percentile_points(percentile) + age_points(age)
64
- end
65
-
66
- private
67
-
68
- # Time in seconds
69
- # Logarithm keeps huge times from swamping the other metrics.
70
- # 1+ is necessary to keep the log function in positive territory.
71
- def speed_points(time)
72
- Math.log(1 + time) * POINT_MULTIPLIER_SPEED
45
+ policies.map{ |p| p.call(request) }.sum
73
46
  end
74
47
 
75
- def percentile_points(percentile)
76
- if percentile < 40
77
- 0.4 # Don't put much emphasis on capturing low percentiles.
78
- elsif percentile < 60
79
- 1.4 # Highest here to get mean traces
80
- elsif percentile < 90
81
- 0.7 # Between 60 & 90% is fine.
82
- elsif percentile >= 90
83
- 1.4 # Highest here to get 90+%ile traces
84
- else
85
- # impossible.
86
- percentile
87
- end
88
- end
89
-
90
- def age_points(age)
91
- age / 60.0 * POINT_MULTIPLIER_AGE
48
+ def stored!(request)
49
+ policies.each{ |p| p.stored!(request) }
92
50
  end
93
51
  end
94
52
  end
@@ -13,13 +13,14 @@ module ScoutApm
13
13
  attr_reader :prof
14
14
  attr_reader :mem_delta
15
15
  attr_reader :allocations
16
+ attr_reader :span_trace
16
17
  attr_accessor :hostname # hack - we need to reset these server side.
17
18
  attr_accessor :seconds_since_startup # hack - we need to reset these server side.
18
19
  attr_accessor :git_sha # hack - we need to reset these server side.
19
20
 
20
21
  attr_reader :truncated_metrics # True/False that says if we had to truncate the metrics of this trace
21
22
 
22
- def initialize(agent_context, uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations, score, truncated_metrics)
23
+ def initialize(agent_context, uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations, score, truncated_metrics, span_trace)
23
24
  @uri = uri
24
25
  @metric_name = metric_name
25
26
  @total_call_time = total_call_time
@@ -35,6 +36,7 @@ module ScoutApm
35
36
  @score = score
36
37
  @git_sha = agent_context.environment.git_revision.sha
37
38
  @truncated_metrics = truncated_metrics
39
+ @span_trace = span_trace
38
40
 
39
41
  agent_context.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta} Score: #{score}"}
40
42
  end
@@ -5,8 +5,10 @@ module ScoutApm
5
5
  class Store
6
6
  def initialize(context)
7
7
  @context = context
8
- @mutex = Mutex.new
9
- @reporting_periods = Hash.new { |h,k| h[k] = StoreReportingPeriod.new(k, @context) }
8
+ @mutex = Monitor.new
9
+ @reporting_periods = Hash.new { |h,k|
10
+ @mutex.synchronize { h[k] = StoreReportingPeriod.new(k, @context) }
11
+ }
10
12
  @samplers = []
11
13
  end
12
14
 
@@ -87,8 +89,13 @@ module ScoutApm
87
89
  def write_to_layaway(layaway, force=false)
88
90
  logger.debug("Writing to layaway#{" (Forced)" if force}")
89
91
 
90
- @reporting_periods.select { |time, rp| force || (time.timestamp < current_timestamp.timestamp) }.
91
- each { |time, rp| write_reporting_period(layaway, time, rp) }
92
+ to_report = @mutex.synchronize {
93
+ @reporting_periods.select { |time, rp|
94
+ force || (time.timestamp < current_timestamp.timestamp)
95
+ }
96
+ }
97
+
98
+ to_report.each { |time, rp| write_reporting_period(layaway, time, rp) }
92
99
  end
93
100
 
94
101
  # For each tick (minute), be sure we have a reporting period, and that samplers are run for it.
@@ -98,14 +105,12 @@ module ScoutApm
98
105
  end
99
106
 
100
107
  def write_reporting_period(layaway, time, rp)
101
- @mutex.synchronize {
102
108
  layaway.write_reporting_period(rp)
103
- }
104
109
  rescue => e
105
110
  logger.warn("Failed writing data to layaway file: #{e.message} / #{e.backtrace}")
106
111
  ensure
107
112
  logger.debug("Before delete, reporting periods length: #{@reporting_periods.size}")
108
- deleted_items = @reporting_periods.delete(time)
113
+ deleted_items = @mutex.synchronize { @reporting_periods.delete(time) }
109
114
  logger.debug("After delete, reporting periods length: #{@reporting_periods.size}. Did delete #{deleted_items}")
110
115
  end
111
116
  private :write_reporting_period
@@ -184,7 +189,6 @@ module ScoutApm
184
189
 
185
190
  # One period of Storage. Typically 1 minute
186
191
  class StoreReportingPeriod
187
-
188
192
  # A ScoredItemSet holding the "best" traces for the period
189
193
  attr_reader :request_traces
190
194
 
@@ -205,8 +209,8 @@ module ScoutApm
205
209
  def initialize(timestamp, context)
206
210
  @timestamp = timestamp
207
211
 
208
- @request_traces = ScoredItemSet.new
209
- @job_traces = ScoredItemSet.new
212
+ @request_traces = ScoredItemSet.new(context.config.value('max_traces'))
213
+ @job_traces = ScoredItemSet.new(context.config.value('max_traces'))
210
214
 
211
215
  @histograms = []
212
216
 
@@ -42,6 +42,9 @@ module ScoutApm
42
42
  # the name is determined from the name of the Controller or Job layer.
43
43
  attr_accessor :name_override
44
44
 
45
+ # A unique, but otherwise meaningless String to identify this request. UUID
46
+ attr_reader :transaction_id
47
+
45
48
  # When we see these layers, it means a real request is going through the
46
49
  # system. We toggle a flag to turn on some slightly more expensive
47
50
  # instrumentation (backtrace collection and the like) that would be too
@@ -49,6 +52,10 @@ module ScoutApm
49
52
  # see that on Sidekiq.
50
53
  REQUEST_TYPES = ["Controller", "Job"]
51
54
 
55
+ # Layers of type 'AutoInstrument' are not recorded if their total_call_time doesn't exceed this threshold.
56
+ # AutoInstrument layers are frequently of short duration. This throws out this deadweight that is unlikely to be optimized.
57
+ AUTO_INSTRUMENT_TIMING_THRESHOLD = 5/1_000.0 # units = seconds
58
+
52
59
  def initialize(agent_context, store)
53
60
  @agent_context = agent_context
54
61
  @store = store #this is passed in so we can use a real store (normal operation) or fake store (instant mode only)
@@ -64,6 +71,7 @@ module ScoutApm
64
71
  @mem_start = mem_usage
65
72
  @recorder = agent_context.recorder
66
73
  @real_request = false
74
+ @transaction_id = ScoutApm::Utils::TransactionId.new.to_s
67
75
  ignore_request! if @recorder.nil?
68
76
  end
69
77
 
@@ -75,8 +83,6 @@ module ScoutApm
75
83
 
76
84
  return ignoring_start_layer if ignoring_request?
77
85
 
78
- layer.start_sampling
79
-
80
86
  start_request(layer) unless @root_layer
81
87
 
82
88
  if REQUEST_TYPES.include?(layer.type)
@@ -105,10 +111,12 @@ module ScoutApm
105
111
  return
106
112
  end
107
113
 
108
- layer.record_traces!
109
114
  layer.record_stop_time!
110
115
  layer.record_allocations!
111
116
 
117
+ # Must follow layer.record_stop_time! as the total_call_time is used to determine if the layer is significant.
118
+ return if layer_insignificant?(layer)
119
+
112
120
  @layers[-1].add_child(layer) if @layers.any?
113
121
 
114
122
  # This must be called before checking if a backtrace should be collected as the call count influences our capture logic.
@@ -121,8 +129,6 @@ module ScoutApm
121
129
 
122
130
  if finalized?
123
131
  stop_request
124
- else
125
- continue_sampling_for_layers if @agent_context.config.value('profile')
126
132
  end
127
133
  end
128
134
 
@@ -151,6 +157,10 @@ module ScoutApm
151
157
  def capture_backtrace?(layer)
152
158
  return if ignoring_request?
153
159
 
160
+ # A backtrace has already been recorded. This happens with autoinstruments as
161
+ # the partial backtrace is set when creating the layer.
162
+ return false if layer.backtrace
163
+
154
164
  # Never capture backtraces for this kind of layer. The backtrace will
155
165
  # always be 100% framework code.
156
166
  return false if BACKTRACE_BLACKLIST.include?(layer.type)
@@ -170,6 +180,20 @@ module ScoutApm
170
180
  false
171
181
  end
172
182
 
183
+ # Returns +true+ if the total call time of AutoInstrument layers exceeds +AUTO_INSTRUMENT_TIMING_THRESHOLD+ and
184
+ # records a Histogram of insignificant / significant layers by file name.
185
+ def layer_insignificant?(layer)
186
+ result = false # default is significant
187
+ if layer.type == 'AutoInstrument'
188
+ if layer.total_call_time < AUTO_INSTRUMENT_TIMING_THRESHOLD
189
+ result = true # not significant
190
+ end
191
+ # 0 = not significant, 1 = significant
192
+ @agent_context.auto_instruments_layer_histograms.add(layer.file_name, (result ? 0 : 1))
193
+ end
194
+ result
195
+ end
196
+
173
197
  # Maintains a lookup Hash of call counts by layer name. Used to determine if we should capture a backtrace.
174
198
  def update_call_counts!(layer)
175
199
  @call_set[layer.name].update!(layer.desc)
@@ -200,13 +224,6 @@ module ScoutApm
200
224
  @layers.none?
201
225
  end
202
226
 
203
- def continue_sampling_for_layers
204
- if last_traced_layer = @layers.select{|layer| layer.traced?}.last
205
- ScoutApm::Instruments::Stacks.update_indexes(@layers.last.frame_index, @layers.last.trace_index)
206
- ScoutApm::Instruments::Stacks.start_sampling
207
- end
208
- end
209
-
210
227
  # Run at the beginning of the whole request
211
228
  #
212
229
  # * Capture the first layer as the root_layer
@@ -220,13 +237,8 @@ module ScoutApm
220
237
  def stop_request
221
238
  @stopping = true
222
239
 
223
- if @agent_context.config.value('profile')
224
- ScoutApm::Instruments::Stacks.stop_sampling(true)
225
- ScoutApm::Instruments::Stacks.update_indexes(0, 0)
226
- end
227
-
228
- if recorder
229
- recorder.record!(self)
240
+ if @recorder
241
+ @recorder.record!(self)
230
242
  end
231
243
  end
232
244
 
@@ -234,16 +246,6 @@ module ScoutApm
234
246
  @stopping
235
247
  end
236
248
 
237
- # Enable ScoutProf for this thread
238
- def enable_profiled_thread!
239
- ScoutApm::Instruments::Stacks.add_profiled_thread
240
- end
241
-
242
- # Disable ScoutProf for this thread
243
- def disable_profiled_thread!
244
- ScoutApm::Instruments::Stacks.remove_profiled_thread
245
- end
246
-
247
249
  ###################################
248
250
  # Annotations
249
251
  ###################################
@@ -300,6 +302,10 @@ module ScoutApm
300
302
 
301
303
  apply_name_override
302
304
 
305
+ @agent_context.transaction_time_consumed.add(unique_name, root_layer.total_call_time)
306
+
307
+ context.add(:transaction_id => transaction_id)
308
+
303
309
  # Make a constant, then call converters.dup.each so it isn't inline?
304
310
  converters = {
305
311
  :histograms => LayerConverters::Histograms,
@@ -312,6 +318,11 @@ module ScoutApm
312
318
 
313
319
  :slow_job => LayerConverters::SlowJobConverter,
314
320
  :slow_req => LayerConverters::SlowRequestConverter,
321
+
322
+ # This is now integrated into the slow_job and slow_req converters, so that
323
+ # we get the exact same set of traces either way. We can call it
324
+ # directly when we move away from the legacy trace styles.
325
+ # :traces => LayerConverters::TraceConverter,
315
326
  }
316
327
 
317
328
  walker = LayerConverters::DepthFirstWalker.new(self.root_layer)
@@ -322,7 +333,7 @@ module ScoutApm
322
333
  memo
323
334
  end
324
335
  walker.walk
325
- converter_results = converter_instances.inject({}) do |memo, (slug,i)|
336
+ converter_results = converter_instances.inject({}) do |memo, (slug,i)|
326
337
  memo[slug] = i.record!
327
338
  memo
328
339
  end