scout_apm 3.0.0.pre26 → 4.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/test.yml +49 -0
- data/.gitignore +1 -1
- data/.rubocop.yml +5 -5
- data/.travis.yml +19 -14
- data/CHANGELOG.markdown +143 -4
- data/Gemfile +1 -7
- data/README.markdown +13 -4
- data/Rakefile +1 -1
- data/ext/allocations/allocations.c +2 -0
- data/gems/README.md +28 -0
- data/gems/octoshark.gemfile +4 -0
- data/gems/rails3.gemfile +5 -0
- data/gems/rails4.gemfile +4 -0
- data/gems/rails5.gemfile +4 -0
- data/gems/rails6.gemfile +4 -0
- data/lib/scout_apm.rb +39 -9
- data/lib/scout_apm/agent.rb +29 -10
- data/lib/scout_apm/agent/exit_handler.rb +0 -1
- data/lib/scout_apm/agent_context.rb +22 -3
- data/lib/scout_apm/app_server_load.rb +7 -2
- data/lib/scout_apm/attribute_arranger.rb +0 -2
- data/lib/scout_apm/auto_instrument.rb +5 -0
- data/lib/scout_apm/auto_instrument/instruction_sequence.rb +31 -0
- data/lib/scout_apm/auto_instrument/layer.rb +23 -0
- data/lib/scout_apm/auto_instrument/parser.rb +27 -0
- data/lib/scout_apm/auto_instrument/rails.rb +175 -0
- data/lib/scout_apm/background_job_integrations/legacy_sneakers.rb +55 -0
- data/lib/scout_apm/background_job_integrations/que.rb +134 -0
- data/lib/scout_apm/background_job_integrations/resque.rb +6 -2
- data/lib/scout_apm/background_job_integrations/shoryuken.rb +124 -0
- data/lib/scout_apm/background_job_integrations/sidekiq.rb +5 -19
- data/lib/scout_apm/background_job_integrations/sneakers.rb +87 -0
- data/lib/scout_apm/config.rb +45 -8
- data/lib/scout_apm/detailed_trace.rb +217 -0
- data/lib/scout_apm/environment.rb +19 -1
- data/lib/scout_apm/error.rb +27 -0
- data/lib/scout_apm/error_service.rb +32 -0
- data/lib/scout_apm/error_service/error_buffer.rb +39 -0
- data/lib/scout_apm/error_service/error_record.rb +211 -0
- data/lib/scout_apm/error_service/ignored_exceptions.rb +66 -0
- data/lib/scout_apm/error_service/middleware.rb +32 -0
- data/lib/scout_apm/error_service/notifier.rb +33 -0
- data/lib/scout_apm/error_service/payload.rb +47 -0
- data/lib/scout_apm/error_service/periodic_work.rb +17 -0
- data/lib/scout_apm/error_service/railtie.rb +11 -0
- data/lib/scout_apm/error_service/sidekiq.rb +80 -0
- data/lib/scout_apm/extensions/transaction_callback_payload.rb +1 -1
- data/lib/scout_apm/fake_store.rb +3 -0
- data/lib/scout_apm/framework_integrations/rails_2.rb +2 -1
- data/lib/scout_apm/framework_integrations/rails_3_or_4.rb +3 -1
- data/lib/scout_apm/git_revision.rb +6 -3
- data/lib/scout_apm/instant/middleware.rb +2 -1
- data/lib/scout_apm/instrument_manager.rb +9 -7
- data/lib/scout_apm/instruments/action_controller_rails_2.rb +3 -1
- data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +56 -55
- data/lib/scout_apm/instruments/action_view.rb +126 -26
- data/lib/scout_apm/instruments/active_record.rb +66 -18
- data/lib/scout_apm/instruments/http.rb +48 -0
- data/lib/scout_apm/instruments/memcached.rb +43 -0
- data/lib/scout_apm/instruments/mongoid.rb +9 -4
- data/lib/scout_apm/instruments/net_http.rb +8 -1
- data/lib/scout_apm/instruments/typhoeus.rb +87 -0
- data/lib/scout_apm/job_record.rb +4 -2
- data/lib/scout_apm/layaway_file.rb +4 -0
- data/lib/scout_apm/layer.rb +6 -57
- data/lib/scout_apm/layer_children_set.rb +9 -8
- data/lib/scout_apm/layer_converters/converter_base.rb +15 -30
- data/lib/scout_apm/layer_converters/database_converter.rb +2 -15
- data/lib/scout_apm/layer_converters/slow_job_converter.rb +12 -2
- data/lib/scout_apm/layer_converters/slow_request_converter.rb +14 -4
- data/lib/scout_apm/layer_converters/trace_converter.rb +184 -0
- data/lib/scout_apm/limited_layer.rb +0 -7
- data/lib/scout_apm/metric_stats.rb +0 -8
- data/lib/scout_apm/middleware.rb +1 -1
- data/lib/scout_apm/periodic_work.rb +19 -0
- data/lib/scout_apm/remote/message.rb +4 -0
- data/lib/scout_apm/remote/server.rb +13 -1
- data/lib/scout_apm/reporter.rb +8 -3
- data/lib/scout_apm/reporting.rb +2 -1
- data/lib/scout_apm/request_histograms.rb +8 -0
- data/lib/scout_apm/serializers/app_server_load_serializer.rb +4 -0
- data/lib/scout_apm/serializers/directive_serializer.rb +4 -0
- data/lib/scout_apm/serializers/payload_serializer.rb +2 -2
- data/lib/scout_apm/serializers/payload_serializer_to_json.rb +30 -15
- data/lib/scout_apm/slow_job_record.rb +5 -1
- data/lib/scout_apm/slow_policy/age_policy.rb +33 -0
- data/lib/scout_apm/slow_policy/percent_policy.rb +22 -0
- data/lib/scout_apm/slow_policy/percentile_policy.rb +24 -0
- data/lib/scout_apm/slow_policy/policy.rb +21 -0
- data/lib/scout_apm/slow_policy/speed_policy.rb +16 -0
- data/lib/scout_apm/slow_request_policy.rb +18 -77
- data/lib/scout_apm/slow_transaction.rb +3 -1
- data/lib/scout_apm/store.rb +12 -8
- data/lib/scout_apm/tracked_request.rb +39 -30
- data/lib/scout_apm/utils/active_record_metric_name.rb +16 -3
- data/lib/scout_apm/utils/backtrace_parser.rb +3 -0
- data/lib/scout_apm/utils/marshal_logging.rb +90 -0
- data/lib/scout_apm/utils/sql_sanitizer.rb +10 -1
- data/lib/scout_apm/utils/sql_sanitizer_regex.rb +8 -1
- data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +6 -0
- data/lib/scout_apm/utils/unique_id.rb +27 -0
- data/lib/scout_apm/version.rb +1 -1
- data/scout_apm.gemspec +13 -7
- data/test/test_helper.rb +2 -2
- data/test/unit/agent_context_test.rb +29 -0
- data/test/unit/auto_instrument/assignments-instrumented.rb +31 -0
- data/test/unit/auto_instrument/assignments.rb +31 -0
- data/test/unit/auto_instrument/controller-ast.txt +57 -0
- data/test/unit/auto_instrument/controller-instrumented.rb +49 -0
- data/test/unit/auto_instrument/controller.rb +49 -0
- data/test/unit/auto_instrument/rescue_from-instrumented.rb +13 -0
- data/test/unit/auto_instrument/rescue_from.rb +13 -0
- data/test/unit/auto_instrument_test.rb +54 -0
- data/test/unit/environment_test.rb +2 -2
- data/test/unit/error_service/error_buffer_test.rb +25 -0
- data/test/unit/error_service/ignored_exceptions_test.rb +49 -0
- data/test/unit/instruments/active_record_test.rb +40 -0
- data/test/unit/layer_children_set_test.rb +9 -0
- data/test/unit/request_histograms_test.rb +17 -0
- data/test/unit/serializers/payload_serializer_test.rb +39 -5
- data/test/unit/slow_request_policy_test.rb +41 -13
- data/test/unit/sql_sanitizer_test.rb +78 -0
- data/test/unit/utils/active_record_metric_name_test.rb +10 -2
- metadata +100 -18
- data/ext/stacks/extconf.rb +0 -37
- data/ext/stacks/scout_atomics.h +0 -86
- data/ext/stacks/stacks.c +0 -814
- data/lib/scout_apm/slow_job_policy.rb +0 -111
- data/lib/scout_apm/trace_compactor.rb +0 -312
- data/lib/scout_apm/utils/fake_stacks.rb +0 -88
- data/test/unit/instruments/active_record_instruments_test.rb +0 -5
- data/test/unit/slow_job_policy_test.rb +0 -6
- data/tester.rb +0 -53
@@ -23,7 +23,9 @@ module ScoutApm
|
|
23
23
|
attr_reader :git_sha
|
24
24
|
attr_reader :truncated_metrics
|
25
25
|
|
26
|
-
|
26
|
+
attr_reader :span_trace
|
27
|
+
|
28
|
+
def initialize(agent_context, queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations, score, truncated_metrics, span_trace)
|
27
29
|
@queue_name = queue_name
|
28
30
|
@job_name = job_name
|
29
31
|
@time = time
|
@@ -40,6 +42,8 @@ module ScoutApm
|
|
40
42
|
@score = score
|
41
43
|
@truncated_metrics = truncated_metrics
|
42
44
|
|
45
|
+
@span_trace = span_trace
|
46
|
+
|
43
47
|
agent_context.logger.debug { "Slow Job [#{metric_name}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
|
44
48
|
end
|
45
49
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'scout_apm/slow_policy/policy'
|
2
|
+
|
3
|
+
module ScoutApm::SlowPolicy
|
4
|
+
class AgePolicy < Policy
|
5
|
+
# For each minute we haven't seen an endpoint
|
6
|
+
POINT_MULTIPLIER_AGE = 0.25
|
7
|
+
|
8
|
+
# A hash of Endpoint Name to the last time we stored a slow transaction for it.
|
9
|
+
#
|
10
|
+
# Defaults to a start time that is pretty close to application boot time.
|
11
|
+
# So the "age" of an endpoint we've never seen is the time the application
|
12
|
+
# has been running.
|
13
|
+
attr_reader :last_seen
|
14
|
+
|
15
|
+
def initialize(context)
|
16
|
+
super
|
17
|
+
|
18
|
+
zero_time = Time.now
|
19
|
+
@last_seen = Hash.new { |h, k| h[k] = zero_time }
|
20
|
+
end
|
21
|
+
|
22
|
+
def call(request)
|
23
|
+
# How long has it been since we've seen this?
|
24
|
+
age = Time.now - last_seen[request.unique_name]
|
25
|
+
|
26
|
+
age / 60.0 * POINT_MULTIPLIER_AGE
|
27
|
+
end
|
28
|
+
|
29
|
+
def stored!(request)
|
30
|
+
last_seen[request.unique_name] = Time.now
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'scout_apm/slow_policy/policy'
|
2
|
+
|
3
|
+
module ScoutApm::SlowPolicy
|
4
|
+
class PercentPolicy < Policy
|
5
|
+
# Points for an endpoint's who's throughput * response time is a large % of
|
6
|
+
# overall time spent processing requests
|
7
|
+
POINT_MULTIPLIER_PERCENT_TIME = 2.5
|
8
|
+
|
9
|
+
# Of the total time spent handling endpoints in this app, if this endpoint
|
10
|
+
# is a higher percent, it should get more points.
|
11
|
+
#
|
12
|
+
# A: 20 calls @ 100ms each => 2 seconds of total time
|
13
|
+
# B: 10 calls @ 100ms each => 1 second of total time
|
14
|
+
#
|
15
|
+
# Then A is 66% of the total call time
|
16
|
+
def call(request) # Scale 0.0 - 1.0
|
17
|
+
percent = context.transaction_time_consumed.percent_of_total(request.unique_name)
|
18
|
+
|
19
|
+
percent * POINT_MULTIPLIER_PERCENT_TIME
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'scout_apm/slow_policy/policy'
|
2
|
+
|
3
|
+
module ScoutApm::SlowPolicy
|
4
|
+
class PercentilePolicy < Policy
|
5
|
+
def call(request)
|
6
|
+
# What approximate percentile was this request?
|
7
|
+
total_time = request.root_layer.total_call_time
|
8
|
+
percentile = context.request_histograms.approximate_quantile_of_value(request.unique_name, total_time)
|
9
|
+
|
10
|
+
if percentile < 40
|
11
|
+
0.4 # Don't put much emphasis on capturing low percentiles.
|
12
|
+
elsif percentile < 60
|
13
|
+
1.4 # Highest here to get mean traces
|
14
|
+
elsif percentile < 90
|
15
|
+
0.7 # Between 60 & 90% is fine.
|
16
|
+
elsif percentile >= 90
|
17
|
+
1.4 # Highest here to get 90+%ile traces
|
18
|
+
else
|
19
|
+
# impossible.
|
20
|
+
percentile
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Note that this is semi-internal API. You should not need this, and if you do
|
2
|
+
# we're here to help at support@scoutapm.com. TrackedRequest doesn't change
|
3
|
+
# often, but we can't promise a perfectly stable API for it either.
|
4
|
+
module ScoutApm::SlowPolicy
|
5
|
+
class Policy
|
6
|
+
attr_reader :context
|
7
|
+
|
8
|
+
def initialize(context)
|
9
|
+
@context = context
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(request)
|
13
|
+
raise NotImplementedError
|
14
|
+
end
|
15
|
+
|
16
|
+
# Override in subclasses to execute some behavior if the request gets a
|
17
|
+
# slot in the ScoredItemSet. Defaults to no-op
|
18
|
+
def stored!(request)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'scout_apm/slow_policy/policy'
|
2
|
+
|
3
|
+
module ScoutApm::SlowPolicy
|
4
|
+
class SpeedPolicy < Policy
|
5
|
+
# Adjust speed points. See the function
|
6
|
+
POINT_MULTIPLIER_SPEED = 0.25
|
7
|
+
|
8
|
+
# Time in seconds
|
9
|
+
# Logarithm keeps huge times from swamping the other metrics.
|
10
|
+
# 1+ is necessary to keep the log function in positive territory.
|
11
|
+
def call(request)
|
12
|
+
total_time = request.root_layer.total_call_time
|
13
|
+
Math.log(1 + total_time) * POINT_MULTIPLIER_SPEED
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -3,43 +3,29 @@
|
|
3
3
|
|
4
4
|
module ScoutApm
|
5
5
|
class SlowRequestPolicy
|
6
|
-
CAPTURE_TYPES = [
|
7
|
-
CAPTURE_DETAIL = "capture_detail",
|
8
|
-
CAPTURE_NONE = "capture_none",
|
9
|
-
]
|
10
|
-
|
11
|
-
# Adjust speed points. See the function
|
12
|
-
POINT_MULTIPLIER_SPEED = 0.25
|
13
|
-
|
14
|
-
# For each minute we haven't seen an endpoint
|
15
|
-
POINT_MULTIPLIER_AGE = 0.25
|
16
|
-
|
17
|
-
# Outliers are worth up to "1000ms" of weight
|
18
|
-
POINT_MULTIPLIER_PERCENTILE = 1.0
|
19
|
-
|
20
|
-
# Points for an endpoint's who's throughput * response time is a large % of
|
21
|
-
# overall time spent processing requests
|
22
|
-
POINT_MULTIPLIER_PERCENT_TIME = 2.5
|
23
|
-
|
24
|
-
# A hash of Endpoint Name to the last time we stored a slow transaction for it.
|
25
|
-
#
|
26
|
-
# Defaults to a start time that is pretty close to application boot time.
|
27
|
-
# So the "age" of an endpoint we've never seen is the time the application
|
28
|
-
# has been running.
|
29
|
-
attr_reader :last_seen
|
30
|
-
|
31
6
|
# The AgentContext we're running in
|
32
7
|
attr_reader :context
|
8
|
+
attr_reader :policies
|
33
9
|
|
34
10
|
def initialize(context)
|
35
11
|
@context = context
|
12
|
+
@policies = []
|
13
|
+
end
|
36
14
|
|
37
|
-
|
38
|
-
|
15
|
+
def add_default_policies
|
16
|
+
add(SlowPolicy::SpeedPolicy.new(context))
|
17
|
+
add(SlowPolicy::PercentilePolicy.new(context))
|
18
|
+
add(SlowPolicy::AgePolicy.new(context))
|
19
|
+
add(SlowPolicy::PercentilePolicy.new(context))
|
39
20
|
end
|
40
21
|
|
41
|
-
|
42
|
-
|
22
|
+
# policy is an object that behaves like a policy (responds to .call(req) for the score, and .store!(req))
|
23
|
+
def add(policy)
|
24
|
+
unless policy.respond_to?(:call) && policy.respond_to?(:stored!)
|
25
|
+
raise "SlowRequestPolicy must implement policy api call(req) and stored!(req)"
|
26
|
+
end
|
27
|
+
|
28
|
+
@policies << policy
|
43
29
|
end
|
44
30
|
|
45
31
|
# Determine if this request trace should be fully analyzed by scoring it
|
@@ -56,56 +42,11 @@ module ScoutApm
|
|
56
42
|
return -1 # A negative score, should never be good enough to store.
|
57
43
|
end
|
58
44
|
|
59
|
-
|
60
|
-
|
61
|
-
# How long has it been since we've seen this?
|
62
|
-
age = Time.now - last_seen[unique_name]
|
63
|
-
|
64
|
-
# What approximate percentile was this request?
|
65
|
-
percentile = context.request_histograms.approximate_quantile_of_value(unique_name, total_time)
|
66
|
-
|
67
|
-
percent_of_total_time = context.transaction_time_consumed.percent_of_total(unique_name)
|
68
|
-
|
69
|
-
return speed_points(total_time) + percentile_points(percentile) + age_points(age) + percent_time_points(percent_of_total_time)
|
70
|
-
end
|
71
|
-
|
72
|
-
private
|
73
|
-
|
74
|
-
# Time in seconds
|
75
|
-
# Logarithm keeps huge times from swamping the other metrics.
|
76
|
-
# 1+ is necessary to keep the log function in positive territory.
|
77
|
-
def speed_points(time)
|
78
|
-
Math.log(1 + time) * POINT_MULTIPLIER_SPEED
|
79
|
-
end
|
80
|
-
|
81
|
-
def percentile_points(percentile)
|
82
|
-
if percentile < 40
|
83
|
-
0.4 # Don't put much emphasis on capturing low percentiles.
|
84
|
-
elsif percentile < 60
|
85
|
-
1.4 # Highest here to get mean traces
|
86
|
-
elsif percentile < 90
|
87
|
-
0.7 # Between 60 & 90% is fine.
|
88
|
-
elsif percentile >= 90
|
89
|
-
1.4 # Highest here to get 90+%ile traces
|
90
|
-
else
|
91
|
-
# impossible.
|
92
|
-
percentile
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
def age_points(age)
|
97
|
-
age / 60.0 * POINT_MULTIPLIER_AGE
|
45
|
+
policies.map{ |p| p.call(request) }.sum
|
98
46
|
end
|
99
47
|
|
100
|
-
|
101
|
-
|
102
|
-
#
|
103
|
-
# A: 20 calls @ 100ms each => 2 seconds of total time
|
104
|
-
# B: 10 calls @ 100ms each => 1 second of total time
|
105
|
-
#
|
106
|
-
# Then A is 66% of the total call time
|
107
|
-
def percent_time_points(percent) # Scale 0.0 - 1.0
|
108
|
-
percent * POINT_MULTIPLIER_PERCENT_TIME
|
48
|
+
def stored!(request)
|
49
|
+
policies.each{ |p| p.stored!(request) }
|
109
50
|
end
|
110
51
|
end
|
111
52
|
end
|
@@ -13,13 +13,14 @@ module ScoutApm
|
|
13
13
|
attr_reader :prof
|
14
14
|
attr_reader :mem_delta
|
15
15
|
attr_reader :allocations
|
16
|
+
attr_reader :span_trace
|
16
17
|
attr_accessor :hostname # hack - we need to reset these server side.
|
17
18
|
attr_accessor :seconds_since_startup # hack - we need to reset these server side.
|
18
19
|
attr_accessor :git_sha # hack - we need to reset these server side.
|
19
20
|
|
20
21
|
attr_reader :truncated_metrics # True/False that says if we had to truncate the metrics of this trace
|
21
22
|
|
22
|
-
def initialize(agent_context, uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations, score, truncated_metrics)
|
23
|
+
def initialize(agent_context, uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations, score, truncated_metrics, span_trace)
|
23
24
|
@uri = uri
|
24
25
|
@metric_name = metric_name
|
25
26
|
@total_call_time = total_call_time
|
@@ -35,6 +36,7 @@ module ScoutApm
|
|
35
36
|
@score = score
|
36
37
|
@git_sha = agent_context.environment.git_revision.sha
|
37
38
|
@truncated_metrics = truncated_metrics
|
39
|
+
@span_trace = span_trace
|
38
40
|
|
39
41
|
agent_context.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta} Score: #{score}"}
|
40
42
|
end
|
data/lib/scout_apm/store.rb
CHANGED
@@ -5,8 +5,10 @@ module ScoutApm
|
|
5
5
|
class Store
|
6
6
|
def initialize(context)
|
7
7
|
@context = context
|
8
|
-
@mutex =
|
9
|
-
@reporting_periods = Hash.new { |h,k|
|
8
|
+
@mutex = Monitor.new
|
9
|
+
@reporting_periods = Hash.new { |h,k|
|
10
|
+
@mutex.synchronize { h[k] = StoreReportingPeriod.new(k, @context) }
|
11
|
+
}
|
10
12
|
@samplers = []
|
11
13
|
end
|
12
14
|
|
@@ -87,8 +89,13 @@ module ScoutApm
|
|
87
89
|
def write_to_layaway(layaway, force=false)
|
88
90
|
logger.debug("Writing to layaway#{" (Forced)" if force}")
|
89
91
|
|
90
|
-
@
|
91
|
-
|
92
|
+
to_report = @mutex.synchronize {
|
93
|
+
@reporting_periods.select { |time, rp|
|
94
|
+
force || (time.timestamp < current_timestamp.timestamp)
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
to_report.each { |time, rp| write_reporting_period(layaway, time, rp) }
|
92
99
|
end
|
93
100
|
|
94
101
|
# For each tick (minute), be sure we have a reporting period, and that samplers are run for it.
|
@@ -98,14 +105,12 @@ module ScoutApm
|
|
98
105
|
end
|
99
106
|
|
100
107
|
def write_reporting_period(layaway, time, rp)
|
101
|
-
@mutex.synchronize {
|
102
108
|
layaway.write_reporting_period(rp)
|
103
|
-
}
|
104
109
|
rescue => e
|
105
110
|
logger.warn("Failed writing data to layaway file: #{e.message} / #{e.backtrace}")
|
106
111
|
ensure
|
107
112
|
logger.debug("Before delete, reporting periods length: #{@reporting_periods.size}")
|
108
|
-
deleted_items = @reporting_periods.delete(time)
|
113
|
+
deleted_items = @mutex.synchronize { @reporting_periods.delete(time) }
|
109
114
|
logger.debug("After delete, reporting periods length: #{@reporting_periods.size}. Did delete #{deleted_items}")
|
110
115
|
end
|
111
116
|
private :write_reporting_period
|
@@ -184,7 +189,6 @@ module ScoutApm
|
|
184
189
|
|
185
190
|
# One period of Storage. Typically 1 minute
|
186
191
|
class StoreReportingPeriod
|
187
|
-
|
188
192
|
# A ScoredItemSet holding the "best" traces for the period
|
189
193
|
attr_reader :request_traces
|
190
194
|
|
@@ -42,6 +42,9 @@ module ScoutApm
|
|
42
42
|
# the name is determined from the name of the Controller or Job layer.
|
43
43
|
attr_accessor :name_override
|
44
44
|
|
45
|
+
# A unique, but otherwise meaningless String to identify this request. UUID
|
46
|
+
attr_reader :transaction_id
|
47
|
+
|
45
48
|
# When we see these layers, it means a real request is going through the
|
46
49
|
# system. We toggle a flag to turn on some slightly more expensive
|
47
50
|
# instrumentation (backtrace collection and the like) that would be too
|
@@ -49,6 +52,10 @@ module ScoutApm
|
|
49
52
|
# see that on Sidekiq.
|
50
53
|
REQUEST_TYPES = ["Controller", "Job"]
|
51
54
|
|
55
|
+
# Layers of type 'AutoInstrument' are not recorded if their total_call_time doesn't exceed this threshold.
|
56
|
+
# AutoInstrument layers are frequently of short duration. This throws out this deadweight that is unlikely to be optimized.
|
57
|
+
AUTO_INSTRUMENT_TIMING_THRESHOLD = 5/1_000.0 # units = seconds
|
58
|
+
|
52
59
|
def initialize(agent_context, store)
|
53
60
|
@agent_context = agent_context
|
54
61
|
@store = store #this is passed in so we can use a real store (normal operation) or fake store (instant mode only)
|
@@ -64,6 +71,7 @@ module ScoutApm
|
|
64
71
|
@mem_start = mem_usage
|
65
72
|
@recorder = agent_context.recorder
|
66
73
|
@real_request = false
|
74
|
+
@transaction_id = ScoutApm::Utils::TransactionId.new.to_s
|
67
75
|
ignore_request! if @recorder.nil?
|
68
76
|
end
|
69
77
|
|
@@ -75,8 +83,6 @@ module ScoutApm
|
|
75
83
|
|
76
84
|
return ignoring_start_layer if ignoring_request?
|
77
85
|
|
78
|
-
layer.start_sampling
|
79
|
-
|
80
86
|
start_request(layer) unless @root_layer
|
81
87
|
|
82
88
|
if REQUEST_TYPES.include?(layer.type)
|
@@ -105,10 +111,12 @@ module ScoutApm
|
|
105
111
|
return
|
106
112
|
end
|
107
113
|
|
108
|
-
layer.record_traces!
|
109
114
|
layer.record_stop_time!
|
110
115
|
layer.record_allocations!
|
111
116
|
|
117
|
+
# Must follow layer.record_stop_time! as the total_call_time is used to determine if the layer is significant.
|
118
|
+
return if layer_insignificant?(layer)
|
119
|
+
|
112
120
|
@layers[-1].add_child(layer) if @layers.any?
|
113
121
|
|
114
122
|
# This must be called before checking if a backtrace should be collected as the call count influences our capture logic.
|
@@ -121,8 +129,6 @@ module ScoutApm
|
|
121
129
|
|
122
130
|
if finalized?
|
123
131
|
stop_request
|
124
|
-
else
|
125
|
-
continue_sampling_for_layers if @agent_context.config.value('profile')
|
126
132
|
end
|
127
133
|
end
|
128
134
|
|
@@ -151,6 +157,10 @@ module ScoutApm
|
|
151
157
|
def capture_backtrace?(layer)
|
152
158
|
return if ignoring_request?
|
153
159
|
|
160
|
+
# A backtrace has already been recorded. This happens with autoinstruments as
|
161
|
+
# the partial backtrace is set when creating the layer.
|
162
|
+
return false if layer.backtrace
|
163
|
+
|
154
164
|
# Never capture backtraces for this kind of layer. The backtrace will
|
155
165
|
# always be 100% framework code.
|
156
166
|
return false if BACKTRACE_BLACKLIST.include?(layer.type)
|
@@ -170,6 +180,20 @@ module ScoutApm
|
|
170
180
|
false
|
171
181
|
end
|
172
182
|
|
183
|
+
# Returns +true+ if the total call time of AutoInstrument layers exceeds +AUTO_INSTRUMENT_TIMING_THRESHOLD+ and
|
184
|
+
# records a Histogram of insignificant / significant layers by file name.
|
185
|
+
def layer_insignificant?(layer)
|
186
|
+
result = false # default is significant
|
187
|
+
if layer.type == 'AutoInstrument'
|
188
|
+
if layer.total_call_time < AUTO_INSTRUMENT_TIMING_THRESHOLD
|
189
|
+
result = true # not significant
|
190
|
+
end
|
191
|
+
# 0 = not significant, 1 = significant
|
192
|
+
@agent_context.auto_instruments_layer_histograms.add(layer.file_name, (result ? 0 : 1))
|
193
|
+
end
|
194
|
+
result
|
195
|
+
end
|
196
|
+
|
173
197
|
# Maintains a lookup Hash of call counts by layer name. Used to determine if we should capture a backtrace.
|
174
198
|
def update_call_counts!(layer)
|
175
199
|
@call_set[layer.name].update!(layer.desc)
|
@@ -200,13 +224,6 @@ module ScoutApm
|
|
200
224
|
@layers.none?
|
201
225
|
end
|
202
226
|
|
203
|
-
def continue_sampling_for_layers
|
204
|
-
if last_traced_layer = @layers.select{|layer| layer.traced?}.last
|
205
|
-
ScoutApm::Instruments::Stacks.update_indexes(@layers.last.frame_index, @layers.last.trace_index)
|
206
|
-
ScoutApm::Instruments::Stacks.start_sampling
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
227
|
# Run at the beginning of the whole request
|
211
228
|
#
|
212
229
|
# * Capture the first layer as the root_layer
|
@@ -220,13 +237,8 @@ module ScoutApm
|
|
220
237
|
def stop_request
|
221
238
|
@stopping = true
|
222
239
|
|
223
|
-
if @
|
224
|
-
|
225
|
-
ScoutApm::Instruments::Stacks.update_indexes(0, 0)
|
226
|
-
end
|
227
|
-
|
228
|
-
if recorder
|
229
|
-
recorder.record!(self)
|
240
|
+
if @recorder
|
241
|
+
@recorder.record!(self)
|
230
242
|
end
|
231
243
|
end
|
232
244
|
|
@@ -234,16 +246,6 @@ module ScoutApm
|
|
234
246
|
@stopping
|
235
247
|
end
|
236
248
|
|
237
|
-
# Enable ScoutProf for this thread
|
238
|
-
def enable_profiled_thread!
|
239
|
-
ScoutApm::Instruments::Stacks.add_profiled_thread
|
240
|
-
end
|
241
|
-
|
242
|
-
# Disable ScoutProf for this thread
|
243
|
-
def disable_profiled_thread!
|
244
|
-
ScoutApm::Instruments::Stacks.remove_profiled_thread
|
245
|
-
end
|
246
|
-
|
247
249
|
###################################
|
248
250
|
# Annotations
|
249
251
|
###################################
|
@@ -302,6 +304,8 @@ module ScoutApm
|
|
302
304
|
|
303
305
|
@agent_context.transaction_time_consumed.add(unique_name, root_layer.total_call_time)
|
304
306
|
|
307
|
+
context.add(:transaction_id => transaction_id)
|
308
|
+
|
305
309
|
# Make a constant, then call converters.dup.each so it isn't inline?
|
306
310
|
converters = {
|
307
311
|
:histograms => LayerConverters::Histograms,
|
@@ -314,6 +318,11 @@ module ScoutApm
|
|
314
318
|
|
315
319
|
:slow_job => LayerConverters::SlowJobConverter,
|
316
320
|
:slow_req => LayerConverters::SlowRequestConverter,
|
321
|
+
|
322
|
+
# This is now integrated into the slow_job and slow_req converters, so that
|
323
|
+
# we get the exact same set of traces either way. We can call it
|
324
|
+
# directly when we move away from the legacy trace styles.
|
325
|
+
# :traces => LayerConverters::TraceConverter,
|
317
326
|
}
|
318
327
|
|
319
328
|
walker = LayerConverters::DepthFirstWalker.new(self.root_layer)
|
@@ -324,7 +333,7 @@ module ScoutApm
|
|
324
333
|
memo
|
325
334
|
end
|
326
335
|
walker.walk
|
327
|
-
converter_results = converter_instances.inject({}) do |memo, (slug,i)|
|
336
|
+
converter_results = converter_instances.inject({}) do |memo, (slug,i)|
|
328
337
|
memo[slug] = i.record!
|
329
338
|
memo
|
330
339
|
end
|