ddtrace 0.53.0 → 0.54.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +89 -11
- data/ddtrace.gemspec +5 -2
- data/docs/GettingStarted.md +40 -3
- data/docs/ProfilingDevelopment.md +2 -2
- data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +86 -0
- data/ext/ddtrace_profiling_native_extension/clock_id.h +4 -0
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +52 -0
- data/ext/ddtrace_profiling_native_extension/clock_id_noop.c +14 -0
- data/ext/ddtrace_profiling_native_extension/extconf.rb +144 -6
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +35 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +6 -1
- data/lib/datadog/ci/contrib/cucumber/formatter.rb +1 -0
- data/lib/datadog/ci/contrib/rspec/example.rb +1 -0
- data/lib/datadog/ci/ext/environment.rb +26 -21
- data/lib/datadog/ci/ext/test.rb +1 -0
- data/lib/datadog/ci/test.rb +5 -1
- data/lib/ddtrace/buffer.rb +28 -16
- data/lib/ddtrace/configuration/agent_settings_resolver.rb +27 -16
- data/lib/ddtrace/context.rb +10 -2
- data/lib/ddtrace/contrib/delayed_job/plugin.rb +2 -2
- data/lib/ddtrace/contrib/mongodb/instrumentation.rb +1 -1
- data/lib/ddtrace/contrib/mongodb/integration.rb +5 -0
- data/lib/ddtrace/contrib/rails/configuration/settings.rb +7 -0
- data/lib/ddtrace/contrib/rails/framework.rb +3 -2
- data/lib/ddtrace/contrib/redis/instrumentation.rb +90 -0
- data/lib/ddtrace/contrib/redis/patcher.rb +2 -84
- data/lib/ddtrace/contrib/resque/integration.rb +1 -5
- data/lib/ddtrace/ext/priority.rb +6 -4
- data/lib/ddtrace/ext/profiling.rb +1 -1
- data/lib/ddtrace/metrics.rb +2 -2
- data/lib/ddtrace/profiling/collectors/stack.rb +45 -45
- data/lib/ddtrace/profiling/encoding/profile.rb +1 -1
- data/lib/ddtrace/profiling/events/stack.rb +8 -8
- data/lib/ddtrace/profiling/native_extension.rb +23 -1
- data/lib/ddtrace/profiling/pprof/builder.rb +8 -2
- data/lib/ddtrace/profiling/pprof/stack_sample.rb +13 -16
- data/lib/ddtrace/profiling/pprof/template.rb +2 -2
- data/lib/ddtrace/profiling/tasks/setup.rb +21 -12
- data/lib/ddtrace/profiling/trace_identifiers/ddtrace.rb +9 -8
- data/lib/ddtrace/profiling/trace_identifiers/helper.rb +2 -2
- data/lib/ddtrace/profiling.rb +0 -2
- data/lib/ddtrace/sampler.rb +18 -8
- data/lib/ddtrace/sampling/rule_sampler.rb +13 -1
- data/lib/ddtrace/utils/time.rb +6 -0
- data/lib/ddtrace/version.rb +2 -2
- metadata +14 -9
- data/lib/ddtrace/profiling/ext/cpu.rb +0 -67
- data/lib/ddtrace/profiling/ext/cthread.rb +0 -156
data/lib/ddtrace/ext/priority.rb
CHANGED
@@ -4,13 +4,15 @@ module Datadog
|
|
4
4
|
# Priority is a hint given to the backend so that it knows which traces to reject or kept.
|
5
5
|
# In a distributed context, it should be set before any context propagation (fork, RPC calls) to be effective.
|
6
6
|
module Priority
|
7
|
-
# Use this to
|
7
|
+
# Use this to explicitly inform the backend that a trace MUST be rejected and not stored.
|
8
|
+
# This includes rules and rate limits configured by the user through the {RuleSampler}.
|
8
9
|
USER_REJECT = -1
|
9
|
-
# Used by the
|
10
|
+
# Used by the {PrioritySampler} to inform the backend that a trace should be rejected and not stored.
|
10
11
|
AUTO_REJECT = 0
|
11
|
-
# Used by the
|
12
|
+
# Used by the {PrioritySampler} to inform the backend that a trace should be kept and stored.
|
12
13
|
AUTO_KEEP = 1
|
13
|
-
# Use this to
|
14
|
+
# Use this to explicitly inform the backend that a trace MUST be kept and stored.
|
15
|
+
# This includes rules and rate limits configured by the user through the {RuleSampler}.
|
14
16
|
USER_KEEP = 2
|
15
17
|
end
|
16
18
|
end
|
@@ -9,9 +9,9 @@ module Datadog
|
|
9
9
|
ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED'.freeze
|
10
10
|
|
11
11
|
module Pprof
|
12
|
+
LABEL_KEY_LOCAL_ROOT_SPAN_ID = 'local root span id'.freeze
|
12
13
|
LABEL_KEY_SPAN_ID = 'span id'.freeze
|
13
14
|
LABEL_KEY_THREAD_ID = 'thread id'.freeze
|
14
|
-
LABEL_KEY_TRACE_ID = 'trace id'.freeze
|
15
15
|
LABEL_KEY_TRACE_ENDPOINT = 'trace endpoint'.freeze
|
16
16
|
SAMPLE_VALUE_NO_VALUE = 0
|
17
17
|
VALUE_TYPE_CPU = 'cpu-time'.freeze
|
data/lib/ddtrace/metrics.rb
CHANGED
@@ -31,7 +31,7 @@ module Datadog
|
|
31
31
|
!version.nil? && version >= Gem::Version.new('3.3.0') &&
|
32
32
|
# dogstatsd-ruby >= 5.0 & < 5.2.0 has known issues with process forks
|
33
33
|
# and do not support the single thread mode we use to avoid this problem.
|
34
|
-
!(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.
|
34
|
+
!(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.3'))
|
35
35
|
end
|
36
36
|
|
37
37
|
def enabled?
|
@@ -274,7 +274,7 @@ module Datadog
|
|
274
274
|
IGNORED_STATSD_ONLY_ONCE.run do
|
275
275
|
Datadog.logger.warn(
|
276
276
|
'Ignoring user-supplied statsd instance as currently-installed version of dogstastd-ruby is incompatible. ' \
|
277
|
-
"To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.
|
277
|
+
"To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.3'` on your Gemfile or gems.rb file."
|
278
278
|
)
|
279
279
|
end
|
280
280
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# typed: true
|
2
|
+
|
3
|
+
require 'ddtrace/profiling/native_extension'
|
2
4
|
require 'ddtrace/profiling/backtrace_location'
|
3
5
|
require 'ddtrace/profiling/events/stack'
|
4
6
|
require 'ddtrace/utils/only_once'
|
@@ -19,6 +21,7 @@ module Datadog
|
|
19
21
|
MIN_INTERVAL = 0.01
|
20
22
|
THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
|
21
23
|
THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
|
24
|
+
SYNTHETIC_STACK_IN_NATIVE_CODE = [BacktraceLocation.new('', 0, 'In native code').freeze].freeze
|
22
25
|
|
23
26
|
# This default was picked based on the current sampling performance and on expected concurrency on an average
|
24
27
|
# Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
|
@@ -31,7 +34,8 @@ module Datadog
|
|
31
34
|
:trace_identifiers_helper,
|
32
35
|
:ignore_thread,
|
33
36
|
:max_time_usage_pct,
|
34
|
-
:thread_api
|
37
|
+
:thread_api,
|
38
|
+
:cpu_time_provider
|
35
39
|
|
36
40
|
def initialize(
|
37
41
|
recorder,
|
@@ -41,6 +45,7 @@ module Datadog
|
|
41
45
|
max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
|
42
46
|
max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
|
43
47
|
thread_api: Thread,
|
48
|
+
cpu_time_provider: Datadog::Profiling::NativeExtension,
|
44
49
|
fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
|
45
50
|
interval: MIN_INTERVAL,
|
46
51
|
enabled: true
|
@@ -52,6 +57,8 @@ module Datadog
|
|
52
57
|
@max_time_usage_pct = max_time_usage_pct
|
53
58
|
@max_threads_sampled = max_threads_sampled
|
54
59
|
@thread_api = thread_api
|
60
|
+
# Only set the provider if it's able to work in the current Ruby/OS combo
|
61
|
+
@cpu_time_provider = cpu_time_provider unless cpu_time_provider.cpu_time_ns_for(thread_api.current).nil?
|
55
62
|
|
56
63
|
# Workers::Async::Thread settings
|
57
64
|
self.fork_policy = fork_policy
|
@@ -62,8 +69,6 @@ module Datadog
|
|
62
69
|
# Workers::Polling settings
|
63
70
|
self.enabled = enabled
|
64
71
|
|
65
|
-
@warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
|
66
|
-
|
67
72
|
# Cache this proc, since it's pretty expensive to keep recreating it
|
68
73
|
@build_backtrace_location = method(:build_backtrace_location).to_proc
|
69
74
|
# Cache this buffer, since it's pretty expensive to keep accessing it
|
@@ -119,6 +124,26 @@ module Datadog
|
|
119
124
|
locations = thread.backtrace_locations
|
120
125
|
return if locations.nil?
|
121
126
|
|
127
|
+
# Having empty locations means that the thread is alive, but we don't know what it's doing:
|
128
|
+
#
|
129
|
+
# 1. It can be starting up
|
130
|
+
# ```
|
131
|
+
# > Thread.new { sleep }.backtrace
|
132
|
+
# => [] # <-- note the thread hasn't actually started running sleep yet, we got there first
|
133
|
+
# ```
|
134
|
+
# 2. It can be running native code
|
135
|
+
# ```
|
136
|
+
# > t = Process.detach(fork { sleep })
|
137
|
+
# => #<Process::Waiter:0x00007ffe7285f7a0 run>
|
138
|
+
# > t.backtrace
|
139
|
+
# => [] # <-- this can happen even minutes later, e.g. it's not a race as in 1.
|
140
|
+
# ```
|
141
|
+
# This effect has been observed in threads created by the Iodine web server and the ffi gem
|
142
|
+
#
|
143
|
+
# To give customers visibility into these threads, we replace the empty stack with one containing a
|
144
|
+
# synthetic placeholder frame, so that these threads are properly represented in the UX.
|
145
|
+
locations = SYNTHETIC_STACK_IN_NATIVE_CODE if locations.empty?
|
146
|
+
|
122
147
|
# Get actual stack size then trim the stack
|
123
148
|
stack_size = locations.length
|
124
149
|
locations = locations[0..(max_frames - 1)]
|
@@ -126,8 +151,8 @@ module Datadog
|
|
126
151
|
# Convert backtrace locations into structs
|
127
152
|
locations = convert_backtrace_locations(locations)
|
128
153
|
|
129
|
-
thread_id = thread.
|
130
|
-
|
154
|
+
thread_id = thread.object_id
|
155
|
+
root_span_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
|
131
156
|
cpu_time = get_cpu_time_interval!(thread)
|
132
157
|
wall_time_interval_ns =
|
133
158
|
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
|
@@ -137,7 +162,7 @@ module Datadog
|
|
137
162
|
locations,
|
138
163
|
stack_size,
|
139
164
|
thread_id,
|
140
|
-
|
165
|
+
root_span_id,
|
141
166
|
span_id,
|
142
167
|
trace_resource,
|
143
168
|
cpu_time,
|
@@ -146,17 +171,10 @@ module Datadog
|
|
146
171
|
end
|
147
172
|
|
148
173
|
def get_cpu_time_interval!(thread)
|
149
|
-
|
150
|
-
unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
|
151
|
-
warn_about_missing_cpu_time_instrumentation(thread)
|
152
|
-
return
|
153
|
-
end
|
174
|
+
return unless cpu_time_provider
|
154
175
|
|
155
|
-
current_cpu_time_ns =
|
176
|
+
current_cpu_time_ns = cpu_time_provider.cpu_time_ns_for(thread)
|
156
177
|
|
157
|
-
# NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
|
158
|
-
# initialization that needs to be done by the thread itself, and it's possible for us to try to sample
|
159
|
-
# *before* the thread had time to finish the initialization
|
160
178
|
return unless current_cpu_time_ns
|
161
179
|
|
162
180
|
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
|
@@ -205,33 +223,6 @@ module Datadog
|
|
205
223
|
|
206
224
|
private
|
207
225
|
|
208
|
-
def warn_about_missing_cpu_time_instrumentation(thread)
|
209
|
-
@warn_about_missing_cpu_time_instrumentation_only_once.run do
|
210
|
-
# Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
|
211
|
-
# missing on this thread we just found.
|
212
|
-
#
|
213
|
-
# As far as we know, it can be missing due to one the following:
|
214
|
-
#
|
215
|
-
# a) The thread was started before we installed our instrumentation.
|
216
|
-
# In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
|
217
|
-
#
|
218
|
-
# b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
|
219
|
-
# Known cases right now that trigger this are the ethon/typhoeus gems.
|
220
|
-
# We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
|
221
|
-
#
|
222
|
-
# c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
|
223
|
-
# When threads are started using these APIs, there's a small time window during which the thread has started
|
224
|
-
# but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
|
225
|
-
# it to run and our instrumentation to be applied.
|
226
|
-
#
|
227
|
-
if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
|
228
|
-
Datadog.logger.debug(
|
229
|
-
"Thread ('#{thread}') is missing profiling instrumentation; other threads should be unaffected"
|
230
|
-
)
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
226
|
# If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
|
236
227
|
# clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
|
237
228
|
#
|
@@ -253,9 +244,18 @@ module Datadog
|
|
253
244
|
end
|
254
245
|
|
255
246
|
def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
|
256
|
-
#
|
257
|
-
#
|
258
|
-
#
|
247
|
+
# Process::Waiter crash workaround:
|
248
|
+
#
|
249
|
+
# This is a workaround for a Ruby VM segfault (usually something like
|
250
|
+
# "[BUG] Segmentation fault at 0x0000000000000008") in the affected Ruby versions.
|
251
|
+
# See https://bugs.ruby-lang.org/issues/17807 for details.
|
252
|
+
#
|
253
|
+
# In those Ruby versions, there's a very special subclass of `Thread` called `Process::Waiter` that causes VM
|
254
|
+
# crashes whenever something tries to read its instance or thread variables. This subclass of thread only
|
255
|
+
# shows up when the `Process.detach` API gets used.
|
256
|
+
# In the specs you'll find crash regression tests that include a way of reproducing it.
|
257
|
+
#
|
258
|
+
# As workaround for now we just skip it for the affected Rubies
|
259
259
|
return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
260
260
|
|
261
261
|
last_value = thread.thread_variable_get(key) || current_value
|
@@ -11,7 +11,7 @@ module Datadog
|
|
11
11
|
:frames,
|
12
12
|
:total_frame_count,
|
13
13
|
:thread_id,
|
14
|
-
:
|
14
|
+
:root_span_id,
|
15
15
|
:span_id,
|
16
16
|
:trace_resource
|
17
17
|
|
@@ -20,7 +20,7 @@ module Datadog
|
|
20
20
|
frames,
|
21
21
|
total_frame_count,
|
22
22
|
thread_id,
|
23
|
-
|
23
|
+
root_span_id,
|
24
24
|
span_id,
|
25
25
|
trace_resource
|
26
26
|
)
|
@@ -29,16 +29,16 @@ module Datadog
|
|
29
29
|
@frames = frames
|
30
30
|
@total_frame_count = total_frame_count
|
31
31
|
@thread_id = thread_id
|
32
|
-
@
|
32
|
+
@root_span_id = root_span_id
|
33
33
|
@span_id = span_id
|
34
34
|
@trace_resource = trace_resource
|
35
35
|
|
36
36
|
@hash = [
|
37
37
|
thread_id,
|
38
|
-
|
38
|
+
root_span_id,
|
39
39
|
span_id,
|
40
|
-
# trace_resource is deliberately not included -- events that share the same (
|
41
|
-
#
|
40
|
+
# trace_resource is deliberately not included -- events that share the same (root_span_id, span_id) refer
|
41
|
+
# to the same trace
|
42
42
|
frames.collect(&:hash),
|
43
43
|
total_frame_count
|
44
44
|
].hash
|
@@ -56,7 +56,7 @@ module Datadog
|
|
56
56
|
frames,
|
57
57
|
total_frame_count,
|
58
58
|
thread_id,
|
59
|
-
|
59
|
+
root_span_id,
|
60
60
|
span_id,
|
61
61
|
trace_resource,
|
62
62
|
cpu_time_interval_ns,
|
@@ -67,7 +67,7 @@ module Datadog
|
|
67
67
|
frames,
|
68
68
|
total_frame_count,
|
69
69
|
thread_id,
|
70
|
-
|
70
|
+
root_span_id,
|
71
71
|
span_id,
|
72
72
|
trace_resource
|
73
73
|
)
|
@@ -2,7 +2,8 @@
|
|
2
2
|
module Datadog
|
3
3
|
module Profiling
|
4
4
|
# This module contains classes and methods which are implemented using native code in the
|
5
|
-
# ext/ddtrace_profiling_native_extension folder
|
5
|
+
# ext/ddtrace_profiling_native_extension folder, as well as some Ruby-level utilities that don't make sense to
|
6
|
+
# write using C
|
6
7
|
module NativeExtension
|
7
8
|
private_class_method def self.working?
|
8
9
|
native_working?
|
@@ -13,6 +14,27 @@ module Datadog
|
|
13
14
|
false
|
14
15
|
end
|
15
16
|
end
|
17
|
+
|
18
|
+
unless singleton_class.method_defined?(:clock_id_for)
|
19
|
+
def self.clock_id_for(_)
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.cpu_time_ns_for(thread)
|
25
|
+
clock_id =
|
26
|
+
begin
|
27
|
+
clock_id_for(thread)
|
28
|
+
rescue Errno::ESRCH
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
begin
|
33
|
+
::Process.clock_gettime(clock_id, :nanosecond) if clock_id
|
34
|
+
rescue Errno::EINVAL
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
16
38
|
end
|
17
39
|
end
|
18
40
|
end
|
@@ -4,6 +4,7 @@
|
|
4
4
|
require 'ddtrace/profiling/flush'
|
5
5
|
require 'ddtrace/profiling/pprof/message_set'
|
6
6
|
require 'ddtrace/profiling/pprof/string_table'
|
7
|
+
require 'ddtrace/utils/time'
|
7
8
|
|
8
9
|
module Datadog
|
9
10
|
module Profiling
|
@@ -47,14 +48,19 @@ module Datadog
|
|
47
48
|
Perftools::Profiles::Profile.encode(profile).force_encoding(DEFAULT_ENCODING)
|
48
49
|
end
|
49
50
|
|
50
|
-
def build_profile
|
51
|
+
def build_profile(start:, finish:)
|
52
|
+
start_ns = Datadog::Utils::Time.as_utc_epoch_ns(start)
|
53
|
+
finish_ns = Datadog::Utils::Time.as_utc_epoch_ns(finish)
|
54
|
+
|
51
55
|
Perftools::Profiles::Profile.new(
|
52
56
|
sample_type: @sample_types.messages,
|
53
57
|
sample: @samples,
|
54
58
|
mapping: @mappings.messages,
|
55
59
|
location: @locations.values,
|
56
60
|
function: @functions.messages,
|
57
|
-
string_table: @string_table.strings
|
61
|
+
string_table: @string_table.strings,
|
62
|
+
time_nanos: start_ns,
|
63
|
+
duration_nanos: finish_ns - start_ns,
|
58
64
|
)
|
59
65
|
end
|
60
66
|
|
@@ -32,7 +32,7 @@ module Datadog
|
|
32
32
|
|
33
33
|
@most_recent_trace_samples = {}
|
34
34
|
@processed_unique_stacks = 0
|
35
|
-
@
|
35
|
+
@processed_with_trace = 0
|
36
36
|
end
|
37
37
|
|
38
38
|
def add_events!(stack_samples)
|
@@ -48,18 +48,18 @@ module Datadog
|
|
48
48
|
stack_sample.hash
|
49
49
|
end
|
50
50
|
|
51
|
-
# Track the most recent sample for each trace
|
51
|
+
# Track the most recent sample for each trace (identified by root span id)
|
52
52
|
def update_most_recent_trace_sample(stack_sample)
|
53
|
-
return unless stack_sample.
|
53
|
+
return unless stack_sample.root_span_id && stack_sample.trace_resource
|
54
54
|
|
55
55
|
# Update trace resource with most recent value
|
56
|
-
if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.
|
56
|
+
if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.root_span_id])
|
57
57
|
if most_recent_trace_sample.timestamp < stack_sample.timestamp
|
58
|
-
@most_recent_trace_samples[stack_sample.
|
58
|
+
@most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
|
59
59
|
end
|
60
60
|
else
|
61
61
|
# Add trace resource
|
62
|
-
@most_recent_trace_samples[stack_sample.
|
62
|
+
@most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
@@ -100,15 +100,15 @@ module Datadog
|
|
100
100
|
)
|
101
101
|
]
|
102
102
|
|
103
|
-
|
103
|
+
root_span_id = stack_sample.root_span_id || 0
|
104
104
|
span_id = stack_sample.span_id || 0
|
105
105
|
|
106
|
-
if
|
107
|
-
@
|
106
|
+
if root_span_id != 0 && span_id != 0
|
107
|
+
@processed_with_trace += 1
|
108
108
|
|
109
109
|
labels << Perftools::Profiles::Label.new(
|
110
|
-
key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::
|
111
|
-
str: builder.string_table.fetch(
|
110
|
+
key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_LOCAL_ROOT_SPAN_ID),
|
111
|
+
str: builder.string_table.fetch(root_span_id.to_s)
|
112
112
|
)
|
113
113
|
|
114
114
|
labels << Perftools::Profiles::Label.new(
|
@@ -118,10 +118,7 @@ module Datadog
|
|
118
118
|
|
119
119
|
# Use most up-to-date trace resource, if available.
|
120
120
|
# Otherwise, use the trace resource provided.
|
121
|
-
trace_resource = (
|
122
|
-
@most_recent_trace_samples[stack_sample.trace_id] \
|
123
|
-
|| stack_sample
|
124
|
-
).trace_resource
|
121
|
+
trace_resource = @most_recent_trace_samples.fetch(stack_sample.root_span_id, stack_sample).trace_resource
|
125
122
|
|
126
123
|
if trace_resource && !trace_resource.empty?
|
127
124
|
labels << Perftools::Profiles::Label.new(
|
@@ -135,7 +132,7 @@ module Datadog
|
|
135
132
|
end
|
136
133
|
|
137
134
|
def debug_statistics
|
138
|
-
"unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@
|
135
|
+
"unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@processed_with_trace}"
|
139
136
|
end
|
140
137
|
end
|
141
138
|
end
|
@@ -80,8 +80,8 @@ module Datadog
|
|
80
80
|
converters.values.map(&:debug_statistics).join(', ')
|
81
81
|
end
|
82
82
|
|
83
|
-
def to_pprof
|
84
|
-
profile = builder.build_profile
|
83
|
+
def to_pprof(start:, finish:)
|
84
|
+
profile = builder.build_profile(start: start, finish: finish)
|
85
85
|
data = builder.encode_profile(profile)
|
86
86
|
types = sample_type_mappings.keys
|
87
87
|
|
@@ -1,21 +1,20 @@
|
|
1
1
|
# typed: false
|
2
2
|
require 'ddtrace/utils/only_once'
|
3
3
|
require 'ddtrace/profiling'
|
4
|
-
require 'ddtrace/profiling/ext/cpu'
|
5
4
|
require 'ddtrace/profiling/ext/forking'
|
6
5
|
|
7
6
|
module Datadog
|
8
7
|
module Profiling
|
9
8
|
module Tasks
|
10
|
-
# Takes care of loading our extensions/monkey patches to handle fork() and CPU profiling
|
9
|
+
# Takes care of loading our extensions/monkey patches to handle fork() and validating if CPU-time profiling is usable
|
11
10
|
class Setup
|
12
11
|
ACTIVATE_EXTENSIONS_ONLY_ONCE = Datadog::Utils::OnlyOnce.new
|
13
12
|
|
14
13
|
def run
|
15
14
|
ACTIVATE_EXTENSIONS_ONLY_ONCE.run do
|
16
15
|
begin
|
16
|
+
check_if_cpu_time_profiling_is_supported
|
17
17
|
activate_forking_extensions
|
18
|
-
activate_cpu_extensions
|
19
18
|
setup_at_fork_hooks
|
20
19
|
rescue StandardError, ScriptError => e
|
21
20
|
Datadog.logger.warn do
|
@@ -39,19 +38,15 @@ module Datadog
|
|
39
38
|
end
|
40
39
|
end
|
41
40
|
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
41
|
+
def check_if_cpu_time_profiling_is_supported
|
42
|
+
unsupported = cpu_time_profiling_unsupported_reason
|
43
|
+
|
44
|
+
if unsupported
|
46
45
|
Datadog.logger.info do
|
47
46
|
'CPU time profiling skipped because native CPU time is not supported: ' \
|
48
|
-
"#{
|
47
|
+
"#{unsupported}. Profiles containing 'Wall time' data will still be reported."
|
49
48
|
end
|
50
49
|
end
|
51
|
-
rescue StandardError, ScriptError => e
|
52
|
-
Datadog.logger.warn do
|
53
|
-
"Profiler CPU profiling extensions unavailable. Cause: #{e.message} Location: #{Array(e.backtrace).first}"
|
54
|
-
end
|
55
50
|
end
|
56
51
|
|
57
52
|
def setup_at_fork_hooks
|
@@ -75,6 +70,20 @@ module Datadog
|
|
75
70
|
end
|
76
71
|
end
|
77
72
|
end
|
73
|
+
|
74
|
+
def cpu_time_profiling_unsupported_reason
|
75
|
+
# NOTE: Only the first matching reason is returned, so try to keep a nice order on reasons
|
76
|
+
|
77
|
+
if RUBY_ENGINE == 'jruby'
|
78
|
+
'JRuby is not supported'
|
79
|
+
elsif RUBY_PLATFORM.include?('darwin')
|
80
|
+
'Feature requires Linux; macOS is not supported'
|
81
|
+
elsif RUBY_PLATFORM =~ /(mswin|mingw)/
|
82
|
+
'Feature requires Linux; Windows is not supported'
|
83
|
+
elsif !RUBY_PLATFORM.include?('linux')
|
84
|
+
"Feature requires Linux; #{RUBY_PLATFORM} is not supported"
|
85
|
+
end
|
86
|
+
end
|
78
87
|
end
|
79
88
|
end
|
80
89
|
end
|
@@ -6,10 +6,10 @@ require 'ddtrace/ext/http'
|
|
6
6
|
module Datadog
|
7
7
|
module Profiling
|
8
8
|
module TraceIdentifiers
|
9
|
-
# Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (
|
10
|
-
# given thread, if there is an active trace for that thread in
|
9
|
+
# Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (root span id and span id)
|
10
|
+
# for a given thread, if there is an active trace for that thread in the supplied tracer object.
|
11
11
|
class Ddtrace
|
12
|
-
def initialize(tracer:
|
12
|
+
def initialize(tracer:)
|
13
13
|
@tracer = (tracer if tracer.respond_to?(:call_context))
|
14
14
|
end
|
15
15
|
|
@@ -19,10 +19,13 @@ module Datadog
|
|
19
19
|
context = @tracer.call_context(thread)
|
20
20
|
return unless context
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
span, root_span = context.current_span_and_root_span
|
23
|
+
return unless span && root_span
|
24
24
|
|
25
|
-
|
25
|
+
root_span_id = root_span.span_id || 0
|
26
|
+
span_id = span.span_id || 0
|
27
|
+
|
28
|
+
[root_span_id, span_id, maybe_extract_resource(root_span)] if root_span_id != 0 && span_id != 0
|
26
29
|
end
|
27
30
|
|
28
31
|
private
|
@@ -31,8 +34,6 @@ module Datadog
|
|
31
34
|
# Resources MUST NOT include personal identifiable information (PII); this should not be the case with
|
32
35
|
# ddtrace integrations, but worth mentioning just in case :)
|
33
36
|
def maybe_extract_resource(root_span)
|
34
|
-
return unless root_span
|
35
|
-
|
36
37
|
root_span.resource if root_span.span_type == Datadog::Ext::HTTP::TYPE_INBOUND
|
37
38
|
end
|
38
39
|
end
|
@@ -6,7 +6,7 @@ require 'ddtrace/profiling/trace_identifiers/ddtrace'
|
|
6
6
|
module Datadog
|
7
7
|
module Profiling
|
8
8
|
module TraceIdentifiers
|
9
|
-
# Helper used to retrieve the trace identifiers (
|
9
|
+
# Helper used to retrieve the trace identifiers (root span id and span id) for a given thread,
|
10
10
|
# if there is an active trace for that thread for the supported tracing APIs.
|
11
11
|
#
|
12
12
|
# This data is used to connect profiles to the traces -- samples in a profile will be tagged with this data and
|
@@ -28,7 +28,7 @@ module Datadog
|
|
28
28
|
end
|
29
29
|
|
30
30
|
# Expected output of the #trace_identifiers_for
|
31
|
-
# duck type is [
|
31
|
+
# duck type is [root_span_id, span_id, (optional trace_resource_container)]
|
32
32
|
def trace_identifiers_for(thread)
|
33
33
|
@supported_apis.each do |api|
|
34
34
|
trace_identifiers = api.trace_identifiers_for(thread)
|
data/lib/ddtrace/profiling.rb
CHANGED
@@ -128,9 +128,7 @@ module Datadog
|
|
128
128
|
private_class_method def self.load_profiling
|
129
129
|
return false unless supported?
|
130
130
|
|
131
|
-
require 'ddtrace/profiling/ext/cpu'
|
132
131
|
require 'ddtrace/profiling/ext/forking'
|
133
|
-
|
134
132
|
require 'ddtrace/profiling/collectors/stack'
|
135
133
|
require 'ddtrace/profiling/exporter'
|
136
134
|
require 'ddtrace/profiling/recorder'
|
data/lib/ddtrace/sampler.rb
CHANGED
@@ -194,6 +194,12 @@ module Datadog
|
|
194
194
|
class PrioritySampler
|
195
195
|
extend Forwardable
|
196
196
|
|
197
|
+
# NOTE: We do not advise using a pre-sampler. It can save resources,
|
198
|
+
# but pre-sampling at rates < 100% may result in partial traces, unless
|
199
|
+
# the pre-sampler knows exactly how to drop a span without dropping its ancestors.
|
200
|
+
#
|
201
|
+
# Additionally, as service metrics are calculated in the Datadog Agent,
|
202
|
+
# the service's throughput will be underestimated.
|
197
203
|
attr_reader :pre_sampler, :priority_sampler
|
198
204
|
|
199
205
|
SAMPLE_RATE_METRIC_KEY = '_sample_rate'.freeze
|
@@ -209,17 +215,21 @@ module Datadog
|
|
209
215
|
|
210
216
|
def sample!(span)
|
211
217
|
# If pre-sampling is configured, do it first. (By default, this will sample at 100%.)
|
212
|
-
# NOTE: Pre-sampling at rates < 100% may result in partial traces; not recommended.
|
213
218
|
span.sampled = pre_sample?(span) ? @pre_sampler.sample!(span) : true
|
214
219
|
|
215
220
|
if span.sampled
|
216
|
-
# If priority sampling has already been applied upstream, use that
|
217
|
-
|
218
|
-
# Roll the dice and determine whether how we set the priority.
|
219
|
-
priority = priority_sample!(span) ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
|
221
|
+
# If priority sampling has already been applied upstream, use that value.
|
222
|
+
return true if priority_assigned?(span)
|
220
223
|
|
221
|
-
|
222
|
-
|
224
|
+
# Check with post sampler how we set the priority.
|
225
|
+
sample = priority_sample!(span)
|
226
|
+
|
227
|
+
# Check if post sampler has already assigned a priority.
|
228
|
+
return true if priority_assigned?(span)
|
229
|
+
|
230
|
+
# If not, use agent priority values.
|
231
|
+
priority = sample ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
|
232
|
+
assign_priority!(span, priority)
|
223
233
|
else
|
224
234
|
# If discarded by pre-sampling, set "reject" priority, so other
|
225
235
|
# services for the same trace don't sample needlessly.
|
@@ -244,7 +254,7 @@ module Datadog
|
|
244
254
|
end
|
245
255
|
end
|
246
256
|
|
247
|
-
def
|
257
|
+
def priority_assigned?(span)
|
248
258
|
span.context && !span.context.sampling_priority.nil?
|
249
259
|
end
|
250
260
|
|
@@ -97,11 +97,13 @@ module Datadog
|
|
97
97
|
sampled = rule.sample?(span)
|
98
98
|
sample_rate = rule.sample_rate(span)
|
99
99
|
|
100
|
+
set_priority(span, sampled)
|
100
101
|
set_rule_metrics(span, sample_rate)
|
101
102
|
|
102
103
|
return false unless sampled
|
103
104
|
|
104
|
-
rate_limiter.allow?(1).tap do
|
105
|
+
rate_limiter.allow?(1).tap do |allowed|
|
106
|
+
set_priority(span, allowed)
|
105
107
|
set_limiter_metrics(span, rate_limiter.effective_rate)
|
106
108
|
end
|
107
109
|
rescue StandardError => e
|
@@ -109,6 +111,16 @@ module Datadog
|
|
109
111
|
yield(span)
|
110
112
|
end
|
111
113
|
|
114
|
+
# Span priority should only be set when the {RuleSampler}
|
115
|
+
# was responsible for the sampling decision.
|
116
|
+
def set_priority(span, sampled)
|
117
|
+
if sampled
|
118
|
+
ForcedTracing.keep(span)
|
119
|
+
else
|
120
|
+
ForcedTracing.drop(span)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
112
124
|
def set_rule_metrics(span, sample_rate)
|
113
125
|
span.set_metric(Ext::Sampling::RULE_SAMPLE_RATE, sample_rate)
|
114
126
|
end
|