datadog 2.6.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -1
- data/ext/datadog_profiling_native_extension/extconf.rb +3 -0
- data/ext/datadog_profiling_native_extension/private_vm_api_access.c +2 -8
- data/ext/datadog_profiling_native_extension/profiling.c +6 -0
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +14 -4
- data/ext/datadog_profiling_native_extension/ruby_helpers.h +4 -0
- data/ext/libdatadog_api/crashtracker.c +6 -4
- data/ext/libdatadog_extconf_helpers.rb +1 -1
- data/lib/datadog/core/configuration/settings.rb +4 -4
- data/lib/datadog/di/code_tracker.rb +30 -3
- data/lib/datadog/di/component.rb +108 -0
- data/lib/datadog/di/configuration/settings.rb +69 -44
- data/lib/datadog/di/contrib/active_record.rb +11 -0
- data/lib/datadog/di/error.rb +17 -0
- data/lib/datadog/di/instrumenter.rb +27 -11
- data/lib/datadog/di/probe.rb +23 -1
- data/lib/datadog/di/probe_manager.rb +246 -0
- data/lib/datadog/di/probe_notification_builder.rb +4 -12
- data/lib/datadog/di/probe_notifier_worker.rb +68 -41
- data/lib/datadog/di/serializer.rb +143 -95
- data/lib/datadog/di/transport.rb +23 -9
- data/lib/datadog/di.rb +49 -1
- data/lib/datadog/tracing/tracer.rb +1 -1
- data/lib/datadog/version.rb +2 -2
- metadata +10 -7
@@ -54,10 +54,11 @@ module Datadog
|
|
54
54
|
#
|
55
55
|
# @api private
|
56
56
|
class Instrumenter
|
57
|
-
def initialize(settings, serializer, logger, code_tracker: nil)
|
57
|
+
def initialize(settings, serializer, logger, code_tracker: nil, telemetry: nil)
|
58
58
|
@settings = settings
|
59
59
|
@serializer = serializer
|
60
60
|
@logger = logger
|
61
|
+
@telemetry = telemetry
|
61
62
|
@code_tracker = code_tracker
|
62
63
|
|
63
64
|
@lock = Mutex.new
|
@@ -66,6 +67,7 @@ module Datadog
|
|
66
67
|
attr_reader :settings
|
67
68
|
attr_reader :serializer
|
68
69
|
attr_reader :logger
|
70
|
+
attr_reader :telemetry
|
69
71
|
attr_reader :code_tracker
|
70
72
|
|
71
73
|
# This is a substitute for Thread::Backtrace::Location
|
@@ -172,12 +174,12 @@ module Datadog
|
|
172
174
|
# we use mock objects and the methods may be mocked with
|
173
175
|
# individual invocations, yielding different return values on
|
174
176
|
# different calls to the same method.
|
175
|
-
permit_untargeted_trace_points = settings.dynamic_instrumentation.untargeted_trace_points
|
177
|
+
permit_untargeted_trace_points = settings.dynamic_instrumentation.internal.untargeted_trace_points
|
176
178
|
|
177
179
|
iseq = nil
|
178
180
|
if code_tracker
|
179
|
-
|
180
|
-
unless
|
181
|
+
ret = code_tracker.iseqs_for_path_suffix(probe.file) # steep:ignore
|
182
|
+
unless ret
|
181
183
|
if permit_untargeted_trace_points
|
182
184
|
# Continue withoout targeting the trace point.
|
183
185
|
# This is going to cause a serious performance penalty for
|
@@ -204,6 +206,10 @@ module Datadog
|
|
204
206
|
raise Error::DITargetNotDefined, "File not in code tracker registry: #{probe.file}"
|
205
207
|
end
|
206
208
|
|
209
|
+
if ret
|
210
|
+
actual_path, iseq = ret
|
211
|
+
end
|
212
|
+
|
207
213
|
# If trace point is not targeted, we only need one trace point per file.
|
208
214
|
# Creating a trace point for each probe does work but the performance
|
209
215
|
# penalty will be taken for each trace point defined in the file.
|
@@ -217,18 +223,26 @@ module Datadog
|
|
217
223
|
# this optimization just yet and create a trace point for each probe.
|
218
224
|
|
219
225
|
tp = TracePoint.new(:line) do |tp|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
if
|
225
|
-
|
226
|
-
|
226
|
+
begin
|
227
|
+
# If trace point is not targeted, we must verify that the invocation
|
228
|
+
# is the file & line that we want, because untargeted trace points
|
229
|
+
# are invoked for *each* line of Ruby executed.
|
230
|
+
if iseq || tp.lineno == probe.line_no && probe.file_matches?(tp.path)
|
231
|
+
if rate_limiter.nil? || rate_limiter.allow?
|
232
|
+
# & is to stop steep complaints, block is always present here.
|
233
|
+
block&.call(probe: probe, trace_point: tp, caller_locations: caller_locations)
|
234
|
+
end
|
227
235
|
end
|
236
|
+
rescue => exc
|
237
|
+
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
|
238
|
+
logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}")
|
239
|
+
telemetry&.report(exc, description: "Unhandled exception in line trace point")
|
240
|
+
# TODO test this path
|
228
241
|
end
|
229
242
|
rescue => exc
|
230
243
|
raise if settings.dynamic_instrumentation.propagate_all_exceptions
|
231
244
|
logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}")
|
245
|
+
telemetry&.report(exc, description: "Unhandled exception in line trace point")
|
232
246
|
# TODO test this path
|
233
247
|
end
|
234
248
|
|
@@ -244,6 +258,8 @@ module Datadog
|
|
244
258
|
end
|
245
259
|
|
246
260
|
probe.instrumentation_trace_point = tp
|
261
|
+
# actual_path could be nil if we don't use targeted trace points.
|
262
|
+
probe.instrumented_path = actual_path
|
247
263
|
|
248
264
|
if iseq
|
249
265
|
tp.enable(target: iseq, target_line: line_no)
|
data/lib/datadog/di/probe.rb
CHANGED
@@ -47,6 +47,10 @@ module Datadog
|
|
47
47
|
raise ArgumentError, "Probe contains both line number and method name: #{id}"
|
48
48
|
end
|
49
49
|
|
50
|
+
if line_no && !file
|
51
|
+
raise ArgumentError, "Probe contains line number but not file: #{id}"
|
52
|
+
end
|
53
|
+
|
50
54
|
if type_name && !method_name || method_name && !type_name
|
51
55
|
raise ArgumentError, "Partial method probe definition: #{id}"
|
52
56
|
end
|
@@ -71,6 +75,8 @@ module Datadog
|
|
71
75
|
|
72
76
|
@rate_limit = rate_limit || (@capture_snapshot ? 1 : 5000)
|
73
77
|
@rate_limiter = Datadog::Core::TokenBucket.new(@rate_limit)
|
78
|
+
|
79
|
+
@emitting_notified = false
|
74
80
|
end
|
75
81
|
|
76
82
|
attr_reader :id
|
@@ -101,7 +107,10 @@ module Datadog
|
|
101
107
|
# method or for stack traversal purposes?), therefore we do not check
|
102
108
|
# for file name/path presence here and just consider the line number.
|
103
109
|
def line?
|
104
|
-
|
110
|
+
# Constructor checks that file is given if line number is given,
|
111
|
+
# but for safety, check again here since we somehow got a probe with
|
112
|
+
# a line number but no file in the wild.
|
113
|
+
!!(file && line_no)
|
105
114
|
end
|
106
115
|
|
107
116
|
# Returns whether the probe is a method probe.
|
@@ -157,6 +166,19 @@ module Datadog
|
|
157
166
|
# Line trace point for line probes. Normally this would be a targeted
|
158
167
|
# trace point.
|
159
168
|
attr_accessor :instrumentation_trace_point
|
169
|
+
|
170
|
+
# Actual path to the file instrumented by the probe, for line probes,
|
171
|
+
# when code tracking is available and line trace point is targeted.
|
172
|
+
# For untargeted line trace points instrumented path will be nil.
|
173
|
+
attr_accessor :instrumented_path
|
174
|
+
|
175
|
+
# TODO emitting_notified reads and writes should in theory be locked,
|
176
|
+
# however since DI is only implemented for MRI in practice the missing
|
177
|
+
# locking should not cause issues.
|
178
|
+
attr_writer :emitting_notified
|
179
|
+
def emitting_notified?
|
180
|
+
!!@emitting_notified
|
181
|
+
end
|
160
182
|
end
|
161
183
|
end
|
162
184
|
end
|
@@ -0,0 +1,246 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Lint/AssignmentInCondition
|
4
|
+
|
5
|
+
require 'monitor'
|
6
|
+
|
7
|
+
module Datadog
|
8
|
+
module DI
|
9
|
+
# Stores probes received from remote config (that we can parse, in other
|
10
|
+
# words, whose type/attributes we support), requests needed instrumentation
|
11
|
+
# for the probes via Instrumenter, and stores pending probes (those which
|
12
|
+
# haven't yet been instrumented successfully due to their targets not
|
13
|
+
# existing) and failed probes (where we are certain the target will not
|
14
|
+
# ever be loaded, or otherwise become valid).
|
15
|
+
#
|
16
|
+
# @api private
|
17
|
+
class ProbeManager
|
18
|
+
def initialize(settings, instrumenter, probe_notification_builder,
|
19
|
+
probe_notifier_worker, logger, telemetry: nil)
|
20
|
+
@settings = settings
|
21
|
+
@instrumenter = instrumenter
|
22
|
+
@probe_notification_builder = probe_notification_builder
|
23
|
+
@probe_notifier_worker = probe_notifier_worker
|
24
|
+
@logger = logger
|
25
|
+
@telemetry = telemetry
|
26
|
+
@installed_probes = {}
|
27
|
+
@pending_probes = {}
|
28
|
+
@failed_probes = {}
|
29
|
+
@lock = Monitor.new
|
30
|
+
|
31
|
+
@definition_trace_point = TracePoint.trace(:end) do |tp|
|
32
|
+
install_pending_method_probes(tp.self)
|
33
|
+
rescue => exc
|
34
|
+
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
|
35
|
+
logger.warn("Unhandled exception in definition trace point: #{exc.class}: #{exc}")
|
36
|
+
telemetry&.report(exc, description: "Unhandled exception in definition trace point")
|
37
|
+
# TODO test this path
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_reader :logger
|
42
|
+
attr_reader :telemetry
|
43
|
+
|
44
|
+
# TODO test that close is called during component teardown and
|
45
|
+
# the trace point is cleared
|
46
|
+
def close
|
47
|
+
definition_trace_point.disable
|
48
|
+
clear_hooks
|
49
|
+
end
|
50
|
+
|
51
|
+
def clear_hooks
|
52
|
+
@lock.synchronize do
|
53
|
+
@pending_probes.clear
|
54
|
+
@installed_probes.each do |probe_id, probe|
|
55
|
+
instrumenter.unhook(probe)
|
56
|
+
end
|
57
|
+
@installed_probes.clear
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
attr_reader :settings
|
62
|
+
attr_reader :instrumenter
|
63
|
+
attr_reader :probe_notification_builder
|
64
|
+
attr_reader :probe_notifier_worker
|
65
|
+
|
66
|
+
def installed_probes
|
67
|
+
@lock.synchronize do
|
68
|
+
@installed_probes
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def pending_probes
|
73
|
+
@lock.synchronize do
|
74
|
+
@pending_probes
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Probes that failed to instrument for reasons other than the target is
|
79
|
+
# not yet loaded are added to this collection, so that we do not try
|
80
|
+
# to instrument them every time remote configuration is processed.
|
81
|
+
def failed_probes
|
82
|
+
@lock.synchronize do
|
83
|
+
@failed_probes
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Requests to install the specified probe.
|
88
|
+
#
|
89
|
+
# If the target of the probe does not exist, assume the relevant
|
90
|
+
# code is not loaded yet (rather than that it will never be loaded),
|
91
|
+
# and store the probe in a pending probe list. When classes are
|
92
|
+
# defined, or files loaded, the probe will be checked against the
|
93
|
+
# newly defined classes/loaded files, and will be installed if it
|
94
|
+
# matches.
|
95
|
+
def add_probe(probe)
|
96
|
+
@lock.synchronize do
|
97
|
+
# Probe failed to install previously, do not try to install it again.
|
98
|
+
if msg = @failed_probes[probe.id]
|
99
|
+
# TODO test this path
|
100
|
+
raise Error::ProbePreviouslyFailed, msg
|
101
|
+
end
|
102
|
+
|
103
|
+
begin
|
104
|
+
instrumenter.hook(probe, &method(:probe_executed_callback))
|
105
|
+
|
106
|
+
@installed_probes[probe.id] = probe
|
107
|
+
payload = probe_notification_builder.build_installed(probe)
|
108
|
+
probe_notifier_worker.add_status(payload)
|
109
|
+
# The probe would only be in the pending probes list if it was
|
110
|
+
# previously attempted to be installed and the target was not loaded.
|
111
|
+
# Always remove from pending list here because it makes the
|
112
|
+
# API smaller and shouldn't cause any actual problems.
|
113
|
+
@pending_probes.delete(probe.id)
|
114
|
+
true
|
115
|
+
rescue Error::DITargetNotDefined
|
116
|
+
@pending_probes[probe.id] = probe
|
117
|
+
false
|
118
|
+
end
|
119
|
+
rescue => exc
|
120
|
+
# In "propagate all exceptions" mode we will try to instrument again.
|
121
|
+
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
|
122
|
+
|
123
|
+
logger.warn("Error processing probe configuration: #{exc.class}: #{exc}")
|
124
|
+
telemetry&.report(exc, description: "Error processing probe configuration")
|
125
|
+
# TODO report probe as failed to agent since we won't attempt to
|
126
|
+
# install it again.
|
127
|
+
|
128
|
+
# TODO add top stack frame to message
|
129
|
+
@failed_probes[probe.id] = "#{exc.class}: #{exc}"
|
130
|
+
|
131
|
+
raise
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Removes probes with ids other than in the specified list.
|
136
|
+
#
|
137
|
+
# This method is meant to be invoked from remote config processor.
|
138
|
+
# Remote config contains the list of currently defined probes; any
|
139
|
+
# probes not in that list have been removed by user and should be
|
140
|
+
# de-instrumented from the application.
|
141
|
+
def remove_other_probes(probe_ids)
|
142
|
+
@lock.synchronize do
|
143
|
+
@pending_probes.values.each do |probe|
|
144
|
+
unless probe_ids.include?(probe.id)
|
145
|
+
@pending_probes.delete(probe.id)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
@installed_probes.values.each do |probe|
|
149
|
+
unless probe_ids.include?(probe.id)
|
150
|
+
begin
|
151
|
+
instrumenter.unhook(probe)
|
152
|
+
# Only remove the probe from installed list if it was
|
153
|
+
# successfully de-instrumented. Active probes do incur overhead
|
154
|
+
# for the running application, and if the error is ephemeral
|
155
|
+
# we want to try removing the probe again at the next opportunity.
|
156
|
+
#
|
157
|
+
# TODO give up after some time?
|
158
|
+
@installed_probes.delete(probe.id)
|
159
|
+
rescue => exc
|
160
|
+
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
|
161
|
+
# Silence all exceptions?
|
162
|
+
# TODO should we propagate here and rescue upstream?
|
163
|
+
logger.warn("Error removing probe #{probe.id}: #{exc.class}: #{exc}")
|
164
|
+
telemetry&.report(exc, description: "Error removing probe #{probe.id}")
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Installs pending method probes, if any, for the specified class.
|
172
|
+
#
|
173
|
+
# This method is meant to be called from the "end" trace point,
|
174
|
+
# which is invoked for each class definition.
|
175
|
+
private def install_pending_method_probes(cls)
|
176
|
+
@lock.synchronize do
|
177
|
+
# TODO search more efficiently than linearly
|
178
|
+
@pending_probes.each do |probe_id, probe|
|
179
|
+
if probe.method?
|
180
|
+
# TODO move this stringification elsewhere
|
181
|
+
if probe.type_name == cls.name
|
182
|
+
begin
|
183
|
+
# TODO is it OK to hook from trace point handler?
|
184
|
+
# TODO the class is now defined, but can hooking still fail?
|
185
|
+
instrumenter.hook(probe, &method(:probe_executed_callback))
|
186
|
+
@pending_probes.delete(probe.id)
|
187
|
+
break
|
188
|
+
rescue Error::DITargetNotDefined
|
189
|
+
# This should not happen... try installing again later?
|
190
|
+
rescue => exc
|
191
|
+
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
|
192
|
+
|
193
|
+
logger.warn("Error installing probe after class is defined: #{exc.class}: #{exc}")
|
194
|
+
telemetry&.report(exc, description: "Error installing probe after class is defined")
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# Installs pending line probes, if any, for the file of the specified
|
203
|
+
# absolute path.
|
204
|
+
#
|
205
|
+
# This method is meant to be called from the script_compiled trace
|
206
|
+
# point, which is invoked for each required or loaded file
|
207
|
+
# (and also for eval'd code, but those invocations are filtered out).
|
208
|
+
def install_pending_line_probes(path)
|
209
|
+
@lock.synchronize do
|
210
|
+
@pending_probes.values.each do |probe|
|
211
|
+
if probe.line?
|
212
|
+
if probe.file_matches?(path)
|
213
|
+
add_probe(probe)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# Entry point invoked from the instrumentation when the specfied probe
|
221
|
+
# is invoked (that is, either its target method is invoked, or
|
222
|
+
# execution reached its target file/line).
|
223
|
+
#
|
224
|
+
# This method is responsible for queueing probe status to be sent to the
|
225
|
+
# backend (once per the probe's lifetime) and a snapshot corresponding
|
226
|
+
# to the current invocation.
|
227
|
+
def probe_executed_callback(probe:, **opts)
|
228
|
+
unless probe.emitting_notified?
|
229
|
+
payload = probe_notification_builder.build_emitting(probe)
|
230
|
+
probe_notifier_worker.add_status(payload)
|
231
|
+
probe.emitting_notified = true
|
232
|
+
end
|
233
|
+
|
234
|
+
payload = probe_notification_builder.build_executed(probe, **opts)
|
235
|
+
probe_notifier_worker.add_snapshot(payload)
|
236
|
+
end
|
237
|
+
|
238
|
+
# Class/module definition trace point (:end type).
|
239
|
+
# Used to install hooks when the target classes/modules aren't yet
|
240
|
+
# defined when the hook request is received.
|
241
|
+
attr_reader :definition_trace_point
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# rubocop:enable Lint/AssignmentInCondition
|
@@ -46,11 +46,13 @@ module Datadog
|
|
46
46
|
# this should be all frames for enriched probes and no frames for
|
47
47
|
# non-enriched probes?
|
48
48
|
build_snapshot(probe, rv: rv, snapshot: snapshot,
|
49
|
+
# Actual path of the instrumented file.
|
50
|
+
path: trace_point&.path,
|
49
51
|
duration: duration, caller_locations: caller_locations, args: args, kwargs: kwargs,
|
50
52
|
serialized_entry_args: serialized_entry_args)
|
51
53
|
end
|
52
54
|
|
53
|
-
def build_snapshot(probe, rv: nil, snapshot: nil,
|
55
|
+
def build_snapshot(probe, rv: nil, snapshot: nil, path: nil,
|
54
56
|
duration: nil, caller_locations: nil, args: nil, kwargs: nil,
|
55
57
|
serialized_entry_args: nil)
|
56
58
|
# TODO also verify that non-capturing probe does not pass
|
@@ -85,18 +87,8 @@ module Datadog
|
|
85
87
|
end
|
86
88
|
|
87
89
|
location = if probe.line?
|
88
|
-
actual_file = if probe.file
|
89
|
-
# Normally caller_locations should always be filled for a line probe
|
90
|
-
# but in the test suite we don't always provide all arguments.
|
91
|
-
actual_file_basename = File.basename(probe.file)
|
92
|
-
caller_locations&.detect do |loc|
|
93
|
-
# TODO record actual path that probe was installed into,
|
94
|
-
# perform exact match here against that path.
|
95
|
-
File.basename(loc.path) == actual_file_basename
|
96
|
-
end&.path || probe.file
|
97
|
-
end
|
98
90
|
{
|
99
|
-
file:
|
91
|
+
file: path,
|
100
92
|
lines: [probe.line_no],
|
101
93
|
}
|
102
94
|
elsif probe.method?
|
@@ -23,12 +23,9 @@ module Datadog
|
|
23
23
|
#
|
24
24
|
# @api private
|
25
25
|
class ProbeNotifierWorker
|
26
|
-
|
27
|
-
# TODO make this into an internal setting and increase default to 2 or 3.
|
28
|
-
MIN_SEND_INTERVAL = 1
|
29
|
-
|
30
|
-
def initialize(settings, transport, logger)
|
26
|
+
def initialize(settings, transport, logger, telemetry: nil)
|
31
27
|
@settings = settings
|
28
|
+
@telemetry = telemetry
|
32
29
|
@status_queue = []
|
33
30
|
@snapshot_queue = []
|
34
31
|
@transport = transport
|
@@ -39,10 +36,12 @@ module Datadog
|
|
39
36
|
@sleep_remaining = nil
|
40
37
|
@wake_scheduled = false
|
41
38
|
@thread = nil
|
39
|
+
@flush = 0
|
42
40
|
end
|
43
41
|
|
44
42
|
attr_reader :settings
|
45
43
|
attr_reader :logger
|
44
|
+
attr_reader :telemetry
|
46
45
|
|
47
46
|
def start
|
48
47
|
return if @thread
|
@@ -53,33 +52,38 @@ module Datadog
|
|
53
52
|
# and then quit?
|
54
53
|
break if @stop_requested
|
55
54
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
55
|
+
# If a flush was requested, send immediately and do not
|
56
|
+
# wait for the cooldown period.
|
57
|
+
if @lock.synchronize { @flush } == 0
|
58
|
+
sleep_remaining = @lock.synchronize do
|
59
|
+
if sleep_remaining && sleep_remaining > 0
|
60
|
+
# Recalculate how much sleep time is remaining, then sleep that long.
|
61
|
+
set_sleep_remaining
|
62
|
+
else
|
63
|
+
0
|
64
|
+
end
|
62
65
|
end
|
63
|
-
end
|
64
66
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
67
|
+
if sleep_remaining > 0
|
68
|
+
# Do not need to update @wake_scheduled here because
|
69
|
+
# wake-up is already scheduled for the earliest possible time.
|
70
|
+
wake.wait(sleep_remaining)
|
71
|
+
next
|
72
|
+
end
|
70
73
|
end
|
71
74
|
|
72
75
|
begin
|
73
76
|
more = maybe_send
|
74
77
|
rescue => exc
|
75
|
-
raise if settings.dynamic_instrumentation.propagate_all_exceptions
|
78
|
+
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
|
76
79
|
|
77
80
|
logger.warn("Error in probe notifier worker: #{exc.class}: #{exc} (at #{exc.backtrace.first})")
|
81
|
+
telemetry&.report(exc, description: "Error in probe notifier worker")
|
78
82
|
end
|
79
83
|
@lock.synchronize do
|
80
84
|
@wake_scheduled = more
|
81
85
|
end
|
82
|
-
wake.wait(more ?
|
86
|
+
wake.wait(more ? min_send_interval : nil)
|
83
87
|
end
|
84
88
|
end
|
85
89
|
end
|
@@ -106,26 +110,40 @@ module Datadog
|
|
106
110
|
# therefore, it should only be called when there is no parallel
|
107
111
|
# activity (in another thread) that causes more notifications
|
108
112
|
# to be generated.
|
113
|
+
#
|
114
|
+
# This method is used by the test suite to wait until notifications have
|
115
|
+
# been sent out, and could be used for graceful stopping of the
|
116
|
+
# worker thread.
|
109
117
|
def flush
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
118
|
+
@lock.synchronize do
|
119
|
+
@flush += 1
|
120
|
+
end
|
121
|
+
begin
|
122
|
+
loop do
|
123
|
+
if @thread.nil? || !@thread.alive?
|
124
|
+
return
|
125
|
+
end
|
114
126
|
|
115
|
-
|
116
|
-
|
117
|
-
|
127
|
+
io_in_progress, queues_empty = @lock.synchronize do
|
128
|
+
[io_in_progress?, status_queue.empty? && snapshot_queue.empty?]
|
129
|
+
end
|
118
130
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
131
|
+
if io_in_progress
|
132
|
+
# If we just call Thread.pass we could be in a busy loop -
|
133
|
+
# add a sleep.
|
134
|
+
sleep 0.25
|
135
|
+
next
|
136
|
+
elsif queues_empty
|
137
|
+
break
|
138
|
+
else
|
139
|
+
wake.signal
|
140
|
+
sleep 0.25
|
141
|
+
next
|
142
|
+
end
|
143
|
+
end
|
144
|
+
ensure
|
145
|
+
@lock.synchronize do
|
146
|
+
@flush -= 1
|
129
147
|
end
|
130
148
|
end
|
131
149
|
end
|
@@ -136,6 +154,11 @@ module Datadog
|
|
136
154
|
attr_reader :wake
|
137
155
|
attr_reader :thread
|
138
156
|
|
157
|
+
# Convenience method to keep line length reasonable in the rest of the file.
|
158
|
+
def min_send_interval
|
159
|
+
settings.dynamic_instrumentation.internal.min_send_interval
|
160
|
+
end
|
161
|
+
|
139
162
|
# This method should be called while @lock is held.
|
140
163
|
def io_in_progress?
|
141
164
|
@io_in_progress
|
@@ -181,14 +204,14 @@ module Datadog
|
|
181
204
|
end
|
182
205
|
|
183
206
|
# Determine how much longer the worker thread should sleep
|
184
|
-
# so as not to send in less than
|
207
|
+
# so as not to send in less than min send interval since the last send.
|
185
208
|
# Important: this method must be called when @lock is held.
|
186
209
|
#
|
187
210
|
# Returns the time remaining to sleep.
|
188
211
|
def set_sleep_remaining
|
189
212
|
now = Core::Utils::Time.get_time
|
190
213
|
@sleep_remaining = if last_sent
|
191
|
-
[last_sent +
|
214
|
+
[last_sent + min_send_interval - now, 0].max
|
192
215
|
else
|
193
216
|
0
|
194
217
|
end
|
@@ -218,16 +241,20 @@ module Datadog
|
|
218
241
|
@last_sent = time
|
219
242
|
end
|
220
243
|
rescue => exc
|
221
|
-
raise if settings.dynamic_instrumentation.propagate_all_exceptions
|
244
|
+
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
|
222
245
|
logger.warn("failed to send #{event_name}: #{exc.class}: #{exc} (at #{exc.backtrace.first})")
|
246
|
+
# Should we report this error to telemetry? Most likely failure
|
247
|
+
# to send is due to a network issue, and trying to send a
|
248
|
+
# telemetry message would also fail.
|
223
249
|
end
|
224
250
|
end
|
225
251
|
batch.any? # steep:ignore
|
226
|
-
rescue ThreadError
|
252
|
+
rescue ThreadError => exc
|
227
253
|
# Normally the queue should only be consumed in this method,
|
228
254
|
# however if anyone consumes it elsewhere we don't want to block
|
229
255
|
# while consuming it here. Rescue ThreadError and return.
|
230
|
-
logger.warn("
|
256
|
+
logger.warn("Unexpected #{event_name} queue underflow - consumed elsewhere?")
|
257
|
+
telemetry&.report(exc, description: "Unexpected #{event_name} queue underflow")
|
231
258
|
ensure
|
232
259
|
@lock.synchronize do
|
233
260
|
@io_in_progress = false
|