ddtrace 0.53.0 → 0.54.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -11
- data/ddtrace.gemspec +5 -2
- data/docs/GettingStarted.md +40 -3
- data/docs/ProfilingDevelopment.md +2 -2
- data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +86 -0
- data/ext/ddtrace_profiling_native_extension/clock_id.h +4 -0
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +52 -0
- data/ext/ddtrace_profiling_native_extension/clock_id_noop.c +14 -0
- data/ext/ddtrace_profiling_native_extension/extconf.rb +111 -3
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +35 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +6 -1
- data/lib/datadog/ci/contrib/cucumber/formatter.rb +1 -0
- data/lib/datadog/ci/contrib/rspec/example.rb +1 -0
- data/lib/datadog/ci/ext/environment.rb +26 -21
- data/lib/datadog/ci/ext/test.rb +1 -0
- data/lib/datadog/ci/test.rb +5 -1
- data/lib/ddtrace/buffer.rb +28 -16
- data/lib/ddtrace/configuration/agent_settings_resolver.rb +27 -16
- data/lib/ddtrace/context.rb +10 -2
- data/lib/ddtrace/contrib/delayed_job/plugin.rb +2 -2
- data/lib/ddtrace/contrib/mongodb/instrumentation.rb +1 -1
- data/lib/ddtrace/contrib/mongodb/integration.rb +5 -0
- data/lib/ddtrace/contrib/rails/configuration/settings.rb +7 -0
- data/lib/ddtrace/contrib/rails/framework.rb +3 -2
- data/lib/ddtrace/contrib/redis/instrumentation.rb +90 -0
- data/lib/ddtrace/contrib/redis/patcher.rb +2 -84
- data/lib/ddtrace/contrib/resque/integration.rb +1 -5
- data/lib/ddtrace/ext/priority.rb +6 -4
- data/lib/ddtrace/ext/profiling.rb +1 -1
- data/lib/ddtrace/metrics.rb +2 -2
- data/lib/ddtrace/profiling/collectors/stack.rb +45 -45
- data/lib/ddtrace/profiling/encoding/profile.rb +1 -1
- data/lib/ddtrace/profiling/events/stack.rb +8 -8
- data/lib/ddtrace/profiling/native_extension.rb +23 -1
- data/lib/ddtrace/profiling/pprof/builder.rb +8 -2
- data/lib/ddtrace/profiling/pprof/stack_sample.rb +13 -16
- data/lib/ddtrace/profiling/pprof/template.rb +2 -2
- data/lib/ddtrace/profiling/tasks/setup.rb +21 -12
- data/lib/ddtrace/profiling/trace_identifiers/ddtrace.rb +9 -8
- data/lib/ddtrace/profiling/trace_identifiers/helper.rb +2 -2
- data/lib/ddtrace/profiling.rb +0 -2
- data/lib/ddtrace/sampler.rb +18 -8
- data/lib/ddtrace/sampling/rule_sampler.rb +13 -1
- data/lib/ddtrace/utils/time.rb +6 -0
- data/lib/ddtrace/version.rb +1 -1
- metadata +14 -9
- data/lib/ddtrace/profiling/ext/cpu.rb +0 -67
- data/lib/ddtrace/profiling/ext/cthread.rb +0 -156
data/lib/ddtrace/metrics.rb
CHANGED
@@ -31,7 +31,7 @@ module Datadog
|
|
31
31
|
!version.nil? && version >= Gem::Version.new('3.3.0') &&
|
32
32
|
# dogstatsd-ruby >= 5.0 & < 5.2.0 has known issues with process forks
|
33
33
|
# and do not support the single thread mode we use to avoid this problem.
|
34
|
-
!(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.
|
34
|
+
!(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.3'))
|
35
35
|
end
|
36
36
|
|
37
37
|
def enabled?
|
@@ -274,7 +274,7 @@ module Datadog
|
|
274
274
|
IGNORED_STATSD_ONLY_ONCE.run do
|
275
275
|
Datadog.logger.warn(
|
276
276
|
'Ignoring user-supplied statsd instance as currently-installed version of dogstastd-ruby is incompatible. ' \
|
277
|
-
"To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.
|
277
|
+
"To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.3'` on your Gemfile or gems.rb file."
|
278
278
|
)
|
279
279
|
end
|
280
280
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# typed: true
|
2
|
+
|
3
|
+
require 'ddtrace/profiling/native_extension'
|
2
4
|
require 'ddtrace/profiling/backtrace_location'
|
3
5
|
require 'ddtrace/profiling/events/stack'
|
4
6
|
require 'ddtrace/utils/only_once'
|
@@ -19,6 +21,7 @@ module Datadog
|
|
19
21
|
MIN_INTERVAL = 0.01
|
20
22
|
THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
|
21
23
|
THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
|
24
|
+
SYNTHETIC_STACK_IN_NATIVE_CODE = [BacktraceLocation.new('', 0, 'In native code').freeze].freeze
|
22
25
|
|
23
26
|
# This default was picked based on the current sampling performance and on expected concurrency on an average
|
24
27
|
# Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
|
@@ -31,7 +34,8 @@ module Datadog
|
|
31
34
|
:trace_identifiers_helper,
|
32
35
|
:ignore_thread,
|
33
36
|
:max_time_usage_pct,
|
34
|
-
:thread_api
|
37
|
+
:thread_api,
|
38
|
+
:cpu_time_provider
|
35
39
|
|
36
40
|
def initialize(
|
37
41
|
recorder,
|
@@ -41,6 +45,7 @@ module Datadog
|
|
41
45
|
max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
|
42
46
|
max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
|
43
47
|
thread_api: Thread,
|
48
|
+
cpu_time_provider: Datadog::Profiling::NativeExtension,
|
44
49
|
fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
|
45
50
|
interval: MIN_INTERVAL,
|
46
51
|
enabled: true
|
@@ -52,6 +57,8 @@ module Datadog
|
|
52
57
|
@max_time_usage_pct = max_time_usage_pct
|
53
58
|
@max_threads_sampled = max_threads_sampled
|
54
59
|
@thread_api = thread_api
|
60
|
+
# Only set the provider if it's able to work in the current Ruby/OS combo
|
61
|
+
@cpu_time_provider = cpu_time_provider unless cpu_time_provider.cpu_time_ns_for(thread_api.current).nil?
|
55
62
|
|
56
63
|
# Workers::Async::Thread settings
|
57
64
|
self.fork_policy = fork_policy
|
@@ -62,8 +69,6 @@ module Datadog
|
|
62
69
|
# Workers::Polling settings
|
63
70
|
self.enabled = enabled
|
64
71
|
|
65
|
-
@warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
|
66
|
-
|
67
72
|
# Cache this proc, since it's pretty expensive to keep recreating it
|
68
73
|
@build_backtrace_location = method(:build_backtrace_location).to_proc
|
69
74
|
# Cache this buffer, since it's pretty expensive to keep accessing it
|
@@ -119,6 +124,26 @@ module Datadog
|
|
119
124
|
locations = thread.backtrace_locations
|
120
125
|
return if locations.nil?
|
121
126
|
|
127
|
+
# Having empty locations means that the thread is alive, but we don't know what it's doing:
|
128
|
+
#
|
129
|
+
# 1. It can be starting up
|
130
|
+
# ```
|
131
|
+
# > Thread.new { sleep }.backtrace
|
132
|
+
# => [] # <-- note the thread hasn't actually started running sleep yet, we got there first
|
133
|
+
# ```
|
134
|
+
# 2. It can be running native code
|
135
|
+
# ```
|
136
|
+
# > t = Process.detach(fork { sleep })
|
137
|
+
# => #<Process::Waiter:0x00007ffe7285f7a0 run>
|
138
|
+
# > t.backtrace
|
139
|
+
# => [] # <-- this can happen even minutes later, e.g. it's not a race as in 1.
|
140
|
+
# ```
|
141
|
+
# This effect has been observed in threads created by the Iodine web server and the ffi gem
|
142
|
+
#
|
143
|
+
# To give customers visibility into these threads, we replace the empty stack with one containing a
|
144
|
+
# synthetic placeholder frame, so that these threads are properly represented in the UX.
|
145
|
+
locations = SYNTHETIC_STACK_IN_NATIVE_CODE if locations.empty?
|
146
|
+
|
122
147
|
# Get actual stack size then trim the stack
|
123
148
|
stack_size = locations.length
|
124
149
|
locations = locations[0..(max_frames - 1)]
|
@@ -126,8 +151,8 @@ module Datadog
|
|
126
151
|
# Convert backtrace locations into structs
|
127
152
|
locations = convert_backtrace_locations(locations)
|
128
153
|
|
129
|
-
thread_id = thread.
|
130
|
-
|
154
|
+
thread_id = thread.object_id
|
155
|
+
root_span_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
|
131
156
|
cpu_time = get_cpu_time_interval!(thread)
|
132
157
|
wall_time_interval_ns =
|
133
158
|
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
|
@@ -137,7 +162,7 @@ module Datadog
|
|
137
162
|
locations,
|
138
163
|
stack_size,
|
139
164
|
thread_id,
|
140
|
-
|
165
|
+
root_span_id,
|
141
166
|
span_id,
|
142
167
|
trace_resource,
|
143
168
|
cpu_time,
|
@@ -146,17 +171,10 @@ module Datadog
|
|
146
171
|
end
|
147
172
|
|
148
173
|
def get_cpu_time_interval!(thread)
|
149
|
-
|
150
|
-
unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
|
151
|
-
warn_about_missing_cpu_time_instrumentation(thread)
|
152
|
-
return
|
153
|
-
end
|
174
|
+
return unless cpu_time_provider
|
154
175
|
|
155
|
-
current_cpu_time_ns =
|
176
|
+
current_cpu_time_ns = cpu_time_provider.cpu_time_ns_for(thread)
|
156
177
|
|
157
|
-
# NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
|
158
|
-
# initialization that needs to be done by the thread itself, and it's possible for us to try to sample
|
159
|
-
# *before* the thread had time to finish the initialization
|
160
178
|
return unless current_cpu_time_ns
|
161
179
|
|
162
180
|
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
|
@@ -205,33 +223,6 @@ module Datadog
|
|
205
223
|
|
206
224
|
private
|
207
225
|
|
208
|
-
def warn_about_missing_cpu_time_instrumentation(thread)
|
209
|
-
@warn_about_missing_cpu_time_instrumentation_only_once.run do
|
210
|
-
# Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
|
211
|
-
# missing on this thread we just found.
|
212
|
-
#
|
213
|
-
# As far as we know, it can be missing due to one the following:
|
214
|
-
#
|
215
|
-
# a) The thread was started before we installed our instrumentation.
|
216
|
-
# In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
|
217
|
-
#
|
218
|
-
# b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
|
219
|
-
# Known cases right now that trigger this are the ethon/typhoeus gems.
|
220
|
-
# We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
|
221
|
-
#
|
222
|
-
# c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
|
223
|
-
# When threads are started using these APIs, there's a small time window during which the thread has started
|
224
|
-
# but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
|
225
|
-
# it to run and our instrumentation to be applied.
|
226
|
-
#
|
227
|
-
if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
|
228
|
-
Datadog.logger.debug(
|
229
|
-
"Thread ('#{thread}') is missing profiling instrumentation; other threads should be unaffected"
|
230
|
-
)
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
226
|
# If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
|
236
227
|
# clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
|
237
228
|
#
|
@@ -253,9 +244,18 @@ module Datadog
|
|
253
244
|
end
|
254
245
|
|
255
246
|
def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
|
256
|
-
#
|
257
|
-
#
|
258
|
-
#
|
247
|
+
# Process::Waiter crash workaround:
|
248
|
+
#
|
249
|
+
# This is a workaround for a Ruby VM segfault (usually something like
|
250
|
+
# "[BUG] Segmentation fault at 0x0000000000000008") in the affected Ruby versions.
|
251
|
+
# See https://bugs.ruby-lang.org/issues/17807 for details.
|
252
|
+
#
|
253
|
+
# In those Ruby versions, there's a very special subclass of `Thread` called `Process::Waiter` that causes VM
|
254
|
+
# crashes whenever something tries to read its instance or thread variables. This subclass of thread only
|
255
|
+
# shows up when the `Process.detach` API gets used.
|
256
|
+
# In the specs you'll find crash regression tests that include a way of reproducing it.
|
257
|
+
#
|
258
|
+
# As workaround for now we just skip it for the affected Rubies
|
259
259
|
return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
260
260
|
|
261
261
|
last_value = thread.thread_variable_get(key) || current_value
|
@@ -11,7 +11,7 @@ module Datadog
|
|
11
11
|
:frames,
|
12
12
|
:total_frame_count,
|
13
13
|
:thread_id,
|
14
|
-
:
|
14
|
+
:root_span_id,
|
15
15
|
:span_id,
|
16
16
|
:trace_resource
|
17
17
|
|
@@ -20,7 +20,7 @@ module Datadog
|
|
20
20
|
frames,
|
21
21
|
total_frame_count,
|
22
22
|
thread_id,
|
23
|
-
|
23
|
+
root_span_id,
|
24
24
|
span_id,
|
25
25
|
trace_resource
|
26
26
|
)
|
@@ -29,16 +29,16 @@ module Datadog
|
|
29
29
|
@frames = frames
|
30
30
|
@total_frame_count = total_frame_count
|
31
31
|
@thread_id = thread_id
|
32
|
-
@
|
32
|
+
@root_span_id = root_span_id
|
33
33
|
@span_id = span_id
|
34
34
|
@trace_resource = trace_resource
|
35
35
|
|
36
36
|
@hash = [
|
37
37
|
thread_id,
|
38
|
-
|
38
|
+
root_span_id,
|
39
39
|
span_id,
|
40
|
-
# trace_resource is deliberately not included -- events that share the same (
|
41
|
-
#
|
40
|
+
# trace_resource is deliberately not included -- events that share the same (root_span_id, span_id) refer
|
41
|
+
# to the same trace
|
42
42
|
frames.collect(&:hash),
|
43
43
|
total_frame_count
|
44
44
|
].hash
|
@@ -56,7 +56,7 @@ module Datadog
|
|
56
56
|
frames,
|
57
57
|
total_frame_count,
|
58
58
|
thread_id,
|
59
|
-
|
59
|
+
root_span_id,
|
60
60
|
span_id,
|
61
61
|
trace_resource,
|
62
62
|
cpu_time_interval_ns,
|
@@ -67,7 +67,7 @@ module Datadog
|
|
67
67
|
frames,
|
68
68
|
total_frame_count,
|
69
69
|
thread_id,
|
70
|
-
|
70
|
+
root_span_id,
|
71
71
|
span_id,
|
72
72
|
trace_resource
|
73
73
|
)
|
@@ -2,7 +2,8 @@
|
|
2
2
|
module Datadog
|
3
3
|
module Profiling
|
4
4
|
# This module contains classes and methods which are implemented using native code in the
|
5
|
-
# ext/ddtrace_profiling_native_extension folder
|
5
|
+
# ext/ddtrace_profiling_native_extension folder, as well as some Ruby-level utilities that don't make sense to
|
6
|
+
# write using C
|
6
7
|
module NativeExtension
|
7
8
|
private_class_method def self.working?
|
8
9
|
native_working?
|
@@ -13,6 +14,27 @@ module Datadog
|
|
13
14
|
false
|
14
15
|
end
|
15
16
|
end
|
17
|
+
|
18
|
+
unless singleton_class.method_defined?(:clock_id_for)
|
19
|
+
def self.clock_id_for(_)
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.cpu_time_ns_for(thread)
|
25
|
+
clock_id =
|
26
|
+
begin
|
27
|
+
clock_id_for(thread)
|
28
|
+
rescue Errno::ESRCH
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
begin
|
33
|
+
::Process.clock_gettime(clock_id, :nanosecond) if clock_id
|
34
|
+
rescue Errno::EINVAL
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
16
38
|
end
|
17
39
|
end
|
18
40
|
end
|
@@ -4,6 +4,7 @@
|
|
4
4
|
require 'ddtrace/profiling/flush'
|
5
5
|
require 'ddtrace/profiling/pprof/message_set'
|
6
6
|
require 'ddtrace/profiling/pprof/string_table'
|
7
|
+
require 'ddtrace/utils/time'
|
7
8
|
|
8
9
|
module Datadog
|
9
10
|
module Profiling
|
@@ -47,14 +48,19 @@ module Datadog
|
|
47
48
|
Perftools::Profiles::Profile.encode(profile).force_encoding(DEFAULT_ENCODING)
|
48
49
|
end
|
49
50
|
|
50
|
-
def build_profile
|
51
|
+
def build_profile(start:, finish:)
|
52
|
+
start_ns = Datadog::Utils::Time.as_utc_epoch_ns(start)
|
53
|
+
finish_ns = Datadog::Utils::Time.as_utc_epoch_ns(finish)
|
54
|
+
|
51
55
|
Perftools::Profiles::Profile.new(
|
52
56
|
sample_type: @sample_types.messages,
|
53
57
|
sample: @samples,
|
54
58
|
mapping: @mappings.messages,
|
55
59
|
location: @locations.values,
|
56
60
|
function: @functions.messages,
|
57
|
-
string_table: @string_table.strings
|
61
|
+
string_table: @string_table.strings,
|
62
|
+
time_nanos: start_ns,
|
63
|
+
duration_nanos: finish_ns - start_ns,
|
58
64
|
)
|
59
65
|
end
|
60
66
|
|
@@ -32,7 +32,7 @@ module Datadog
|
|
32
32
|
|
33
33
|
@most_recent_trace_samples = {}
|
34
34
|
@processed_unique_stacks = 0
|
35
|
-
@
|
35
|
+
@processed_with_trace = 0
|
36
36
|
end
|
37
37
|
|
38
38
|
def add_events!(stack_samples)
|
@@ -48,18 +48,18 @@ module Datadog
|
|
48
48
|
stack_sample.hash
|
49
49
|
end
|
50
50
|
|
51
|
-
# Track the most recent sample for each trace
|
51
|
+
# Track the most recent sample for each trace (identified by root span id)
|
52
52
|
def update_most_recent_trace_sample(stack_sample)
|
53
|
-
return unless stack_sample.
|
53
|
+
return unless stack_sample.root_span_id && stack_sample.trace_resource
|
54
54
|
|
55
55
|
# Update trace resource with most recent value
|
56
|
-
if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.
|
56
|
+
if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.root_span_id])
|
57
57
|
if most_recent_trace_sample.timestamp < stack_sample.timestamp
|
58
|
-
@most_recent_trace_samples[stack_sample.
|
58
|
+
@most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
|
59
59
|
end
|
60
60
|
else
|
61
61
|
# Add trace resource
|
62
|
-
@most_recent_trace_samples[stack_sample.
|
62
|
+
@most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
@@ -100,15 +100,15 @@ module Datadog
|
|
100
100
|
)
|
101
101
|
]
|
102
102
|
|
103
|
-
|
103
|
+
root_span_id = stack_sample.root_span_id || 0
|
104
104
|
span_id = stack_sample.span_id || 0
|
105
105
|
|
106
|
-
if
|
107
|
-
@
|
106
|
+
if root_span_id != 0 && span_id != 0
|
107
|
+
@processed_with_trace += 1
|
108
108
|
|
109
109
|
labels << Perftools::Profiles::Label.new(
|
110
|
-
key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::
|
111
|
-
str: builder.string_table.fetch(
|
110
|
+
key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_LOCAL_ROOT_SPAN_ID),
|
111
|
+
str: builder.string_table.fetch(root_span_id.to_s)
|
112
112
|
)
|
113
113
|
|
114
114
|
labels << Perftools::Profiles::Label.new(
|
@@ -118,10 +118,7 @@ module Datadog
|
|
118
118
|
|
119
119
|
# Use most up-to-date trace resource, if available.
|
120
120
|
# Otherwise, use the trace resource provided.
|
121
|
-
trace_resource = (
|
122
|
-
@most_recent_trace_samples[stack_sample.trace_id] \
|
123
|
-
|| stack_sample
|
124
|
-
).trace_resource
|
121
|
+
trace_resource = @most_recent_trace_samples.fetch(stack_sample.root_span_id, stack_sample).trace_resource
|
125
122
|
|
126
123
|
if trace_resource && !trace_resource.empty?
|
127
124
|
labels << Perftools::Profiles::Label.new(
|
@@ -135,7 +132,7 @@ module Datadog
|
|
135
132
|
end
|
136
133
|
|
137
134
|
def debug_statistics
|
138
|
-
"unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@
|
135
|
+
"unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@processed_with_trace}"
|
139
136
|
end
|
140
137
|
end
|
141
138
|
end
|
@@ -80,8 +80,8 @@ module Datadog
|
|
80
80
|
converters.values.map(&:debug_statistics).join(', ')
|
81
81
|
end
|
82
82
|
|
83
|
-
def to_pprof
|
84
|
-
profile = builder.build_profile
|
83
|
+
def to_pprof(start:, finish:)
|
84
|
+
profile = builder.build_profile(start: start, finish: finish)
|
85
85
|
data = builder.encode_profile(profile)
|
86
86
|
types = sample_type_mappings.keys
|
87
87
|
|
@@ -1,21 +1,20 @@
|
|
1
1
|
# typed: false
|
2
2
|
require 'ddtrace/utils/only_once'
|
3
3
|
require 'ddtrace/profiling'
|
4
|
-
require 'ddtrace/profiling/ext/cpu'
|
5
4
|
require 'ddtrace/profiling/ext/forking'
|
6
5
|
|
7
6
|
module Datadog
|
8
7
|
module Profiling
|
9
8
|
module Tasks
|
10
|
-
# Takes care of loading our extensions/monkey patches to handle fork() and CPU profiling
|
9
|
+
# Takes care of loading our extensions/monkey patches to handle fork() and validating if CPU-time profiling is usable
|
11
10
|
class Setup
|
12
11
|
ACTIVATE_EXTENSIONS_ONLY_ONCE = Datadog::Utils::OnlyOnce.new
|
13
12
|
|
14
13
|
def run
|
15
14
|
ACTIVATE_EXTENSIONS_ONLY_ONCE.run do
|
16
15
|
begin
|
16
|
+
check_if_cpu_time_profiling_is_supported
|
17
17
|
activate_forking_extensions
|
18
|
-
activate_cpu_extensions
|
19
18
|
setup_at_fork_hooks
|
20
19
|
rescue StandardError, ScriptError => e
|
21
20
|
Datadog.logger.warn do
|
@@ -39,19 +38,15 @@ module Datadog
|
|
39
38
|
end
|
40
39
|
end
|
41
40
|
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
41
|
+
def check_if_cpu_time_profiling_is_supported
|
42
|
+
unsupported = cpu_time_profiling_unsupported_reason
|
43
|
+
|
44
|
+
if unsupported
|
46
45
|
Datadog.logger.info do
|
47
46
|
'CPU time profiling skipped because native CPU time is not supported: ' \
|
48
|
-
"#{
|
47
|
+
"#{unsupported}. Profiles containing 'Wall time' data will still be reported."
|
49
48
|
end
|
50
49
|
end
|
51
|
-
rescue StandardError, ScriptError => e
|
52
|
-
Datadog.logger.warn do
|
53
|
-
"Profiler CPU profiling extensions unavailable. Cause: #{e.message} Location: #{Array(e.backtrace).first}"
|
54
|
-
end
|
55
50
|
end
|
56
51
|
|
57
52
|
def setup_at_fork_hooks
|
@@ -75,6 +70,20 @@ module Datadog
|
|
75
70
|
end
|
76
71
|
end
|
77
72
|
end
|
73
|
+
|
74
|
+
def cpu_time_profiling_unsupported_reason
|
75
|
+
# NOTE: Only the first matching reason is returned, so try to keep a nice order on reasons
|
76
|
+
|
77
|
+
if RUBY_ENGINE == 'jruby'
|
78
|
+
'JRuby is not supported'
|
79
|
+
elsif RUBY_PLATFORM.include?('darwin')
|
80
|
+
'Feature requires Linux; macOS is not supported'
|
81
|
+
elsif RUBY_PLATFORM =~ /(mswin|mingw)/
|
82
|
+
'Feature requires Linux; Windows is not supported'
|
83
|
+
elsif !RUBY_PLATFORM.include?('linux')
|
84
|
+
"Feature requires Linux; #{RUBY_PLATFORM} is not supported"
|
85
|
+
end
|
86
|
+
end
|
78
87
|
end
|
79
88
|
end
|
80
89
|
end
|
@@ -6,10 +6,10 @@ require 'ddtrace/ext/http'
|
|
6
6
|
module Datadog
|
7
7
|
module Profiling
|
8
8
|
module TraceIdentifiers
|
9
|
-
# Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (
|
10
|
-
# given thread, if there is an active trace for that thread in
|
9
|
+
# Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (root span id and span id)
|
10
|
+
# for a given thread, if there is an active trace for that thread in the supplied tracer object.
|
11
11
|
class Ddtrace
|
12
|
-
def initialize(tracer:
|
12
|
+
def initialize(tracer:)
|
13
13
|
@tracer = (tracer if tracer.respond_to?(:call_context))
|
14
14
|
end
|
15
15
|
|
@@ -19,10 +19,13 @@ module Datadog
|
|
19
19
|
context = @tracer.call_context(thread)
|
20
20
|
return unless context
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
span, root_span = context.current_span_and_root_span
|
23
|
+
return unless span && root_span
|
24
24
|
|
25
|
-
|
25
|
+
root_span_id = root_span.span_id || 0
|
26
|
+
span_id = span.span_id || 0
|
27
|
+
|
28
|
+
[root_span_id, span_id, maybe_extract_resource(root_span)] if root_span_id != 0 && span_id != 0
|
26
29
|
end
|
27
30
|
|
28
31
|
private
|
@@ -31,8 +34,6 @@ module Datadog
|
|
31
34
|
# Resources MUST NOT include personal identifiable information (PII); this should not be the case with
|
32
35
|
# ddtrace integrations, but worth mentioning just in case :)
|
33
36
|
def maybe_extract_resource(root_span)
|
34
|
-
return unless root_span
|
35
|
-
|
36
37
|
root_span.resource if root_span.span_type == Datadog::Ext::HTTP::TYPE_INBOUND
|
37
38
|
end
|
38
39
|
end
|
@@ -6,7 +6,7 @@ require 'ddtrace/profiling/trace_identifiers/ddtrace'
|
|
6
6
|
module Datadog
|
7
7
|
module Profiling
|
8
8
|
module TraceIdentifiers
|
9
|
-
# Helper used to retrieve the trace identifiers (
|
9
|
+
# Helper used to retrieve the trace identifiers (root span id and span id) for a given thread,
|
10
10
|
# if there is an active trace for that thread for the supported tracing APIs.
|
11
11
|
#
|
12
12
|
# This data is used to connect profiles to the traces -- samples in a profile will be tagged with this data and
|
@@ -28,7 +28,7 @@ module Datadog
|
|
28
28
|
end
|
29
29
|
|
30
30
|
# Expected output of the #trace_identifiers_for
|
31
|
-
# duck type is [
|
31
|
+
# duck type is [root_span_id, span_id, (optional trace_resource_container)]
|
32
32
|
def trace_identifiers_for(thread)
|
33
33
|
@supported_apis.each do |api|
|
34
34
|
trace_identifiers = api.trace_identifiers_for(thread)
|
data/lib/ddtrace/profiling.rb
CHANGED
@@ -128,9 +128,7 @@ module Datadog
|
|
128
128
|
private_class_method def self.load_profiling
|
129
129
|
return false unless supported?
|
130
130
|
|
131
|
-
require 'ddtrace/profiling/ext/cpu'
|
132
131
|
require 'ddtrace/profiling/ext/forking'
|
133
|
-
|
134
132
|
require 'ddtrace/profiling/collectors/stack'
|
135
133
|
require 'ddtrace/profiling/exporter'
|
136
134
|
require 'ddtrace/profiling/recorder'
|
data/lib/ddtrace/sampler.rb
CHANGED
@@ -194,6 +194,12 @@ module Datadog
|
|
194
194
|
class PrioritySampler
|
195
195
|
extend Forwardable
|
196
196
|
|
197
|
+
# NOTE: We do not advise using a pre-sampler. It can save resources,
|
198
|
+
# but pre-sampling at rates < 100% may result in partial traces, unless
|
199
|
+
# the pre-sampler knows exactly how to drop a span without dropping its ancestors.
|
200
|
+
#
|
201
|
+
# Additionally, as service metrics are calculated in the Datadog Agent,
|
202
|
+
# the service's throughput will be underestimated.
|
197
203
|
attr_reader :pre_sampler, :priority_sampler
|
198
204
|
|
199
205
|
SAMPLE_RATE_METRIC_KEY = '_sample_rate'.freeze
|
@@ -209,17 +215,21 @@ module Datadog
|
|
209
215
|
|
210
216
|
def sample!(span)
|
211
217
|
# If pre-sampling is configured, do it first. (By default, this will sample at 100%.)
|
212
|
-
# NOTE: Pre-sampling at rates < 100% may result in partial traces; not recommended.
|
213
218
|
span.sampled = pre_sample?(span) ? @pre_sampler.sample!(span) : true
|
214
219
|
|
215
220
|
if span.sampled
|
216
|
-
# If priority sampling has already been applied upstream, use that
|
217
|
-
|
218
|
-
# Roll the dice and determine whether how we set the priority.
|
219
|
-
priority = priority_sample!(span) ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
|
221
|
+
# If priority sampling has already been applied upstream, use that value.
|
222
|
+
return true if priority_assigned?(span)
|
220
223
|
|
221
|
-
|
222
|
-
|
224
|
+
# Check with post sampler how we set the priority.
|
225
|
+
sample = priority_sample!(span)
|
226
|
+
|
227
|
+
# Check if post sampler has already assigned a priority.
|
228
|
+
return true if priority_assigned?(span)
|
229
|
+
|
230
|
+
# If not, use agent priority values.
|
231
|
+
priority = sample ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
|
232
|
+
assign_priority!(span, priority)
|
223
233
|
else
|
224
234
|
# If discarded by pre-sampling, set "reject" priority, so other
|
225
235
|
# services for the same trace don't sample needlessly.
|
@@ -244,7 +254,7 @@ module Datadog
|
|
244
254
|
end
|
245
255
|
end
|
246
256
|
|
247
|
-
def
|
257
|
+
def priority_assigned?(span)
|
248
258
|
span.context && !span.context.sampling_priority.nil?
|
249
259
|
end
|
250
260
|
|
@@ -97,11 +97,13 @@ module Datadog
|
|
97
97
|
sampled = rule.sample?(span)
|
98
98
|
sample_rate = rule.sample_rate(span)
|
99
99
|
|
100
|
+
set_priority(span, sampled)
|
100
101
|
set_rule_metrics(span, sample_rate)
|
101
102
|
|
102
103
|
return false unless sampled
|
103
104
|
|
104
|
-
rate_limiter.allow?(1).tap do
|
105
|
+
rate_limiter.allow?(1).tap do |allowed|
|
106
|
+
set_priority(span, allowed)
|
105
107
|
set_limiter_metrics(span, rate_limiter.effective_rate)
|
106
108
|
end
|
107
109
|
rescue StandardError => e
|
@@ -109,6 +111,16 @@ module Datadog
|
|
109
111
|
yield(span)
|
110
112
|
end
|
111
113
|
|
114
|
+
# Span priority should only be set when the {RuleSampler}
|
115
|
+
# was responsible for the sampling decision.
|
116
|
+
def set_priority(span, sampled)
|
117
|
+
if sampled
|
118
|
+
ForcedTracing.keep(span)
|
119
|
+
else
|
120
|
+
ForcedTracing.drop(span)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
112
124
|
def set_rule_metrics(span, sample_rate)
|
113
125
|
span.set_metric(Ext::Sampling::RULE_SAMPLE_RATE, sample_rate)
|
114
126
|
end
|
data/lib/ddtrace/utils/time.rb
CHANGED
@@ -40,6 +40,12 @@ module Datadog
|
|
40
40
|
after = get_time
|
41
41
|
after - before
|
42
42
|
end
|
43
|
+
|
44
|
+
def as_utc_epoch_ns(time)
|
45
|
+
# we use #to_r instead of #to_f because Float doesn't have enough precision to represent exact nanoseconds, see
|
46
|
+
# https://rubyapi.org/3.0/o/time#method-i-to_f
|
47
|
+
(time.to_r * 1_000_000_000).to_i
|
48
|
+
end
|
43
49
|
end
|
44
50
|
end
|
45
51
|
end
|