ddtrace 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -16
- data/CHANGELOG.md +31 -2
- data/LICENSE-3rdparty.csv +3 -2
- data/README.md +2 -2
- data/ddtrace.gemspec +12 -3
- data/docs/GettingStarted.md +19 -2
- data/docs/ProfilingDevelopment.md +8 -8
- data/docs/UpgradeGuide.md +3 -3
- data/ext/ddtrace_profiling_loader/ddtrace_profiling_loader.c +118 -0
- data/ext/ddtrace_profiling_loader/extconf.rb +53 -0
- data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +31 -5
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +0 -8
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +278 -0
- data/ext/ddtrace_profiling_native_extension/extconf.rb +70 -100
- data/ext/ddtrace_profiling_native_extension/libddprof_helpers.h +13 -0
- data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +186 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +579 -7
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +30 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +7 -0
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +139 -0
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +28 -0
- data/lib/datadog/appsec/autoload.rb +2 -2
- data/lib/datadog/appsec/configuration/settings.rb +19 -0
- data/lib/datadog/appsec/configuration.rb +8 -0
- data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +76 -33
- data/lib/datadog/appsec/contrib/rack/integration.rb +1 -0
- data/lib/datadog/appsec/contrib/rack/patcher.rb +0 -1
- data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +64 -0
- data/lib/datadog/appsec/contrib/rack/request.rb +6 -0
- data/lib/datadog/appsec/contrib/rack/request_body_middleware.rb +41 -0
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +60 -5
- data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +81 -0
- data/lib/datadog/appsec/contrib/rails/patcher.rb +34 -1
- data/lib/datadog/appsec/contrib/rails/reactive/action.rb +68 -0
- data/lib/datadog/appsec/contrib/rails/request.rb +33 -0
- data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +124 -0
- data/lib/datadog/appsec/contrib/sinatra/patcher.rb +69 -2
- data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +63 -0
- data/lib/datadog/appsec/event.rb +33 -18
- data/lib/datadog/appsec/extensions.rb +0 -3
- data/lib/datadog/appsec/processor.rb +45 -2
- data/lib/datadog/appsec/rate_limiter.rb +5 -0
- data/lib/datadog/appsec/reactive/operation.rb +0 -1
- data/lib/datadog/ci/ext/environment.rb +21 -7
- data/lib/datadog/core/configuration/agent_settings_resolver.rb +1 -1
- data/lib/datadog/core/configuration/components.rb +22 -4
- data/lib/datadog/core/configuration/settings.rb +3 -3
- data/lib/datadog/core/configuration.rb +7 -5
- data/lib/datadog/core/environment/cgroup.rb +3 -1
- data/lib/datadog/core/environment/container.rb +2 -1
- data/lib/datadog/core/environment/variable_helpers.rb +26 -2
- data/lib/datadog/core/logging/ext.rb +11 -0
- data/lib/datadog/core/metrics/client.rb +15 -5
- data/lib/datadog/core/runtime/metrics.rb +1 -1
- data/lib/datadog/core/workers/async.rb +3 -1
- data/lib/datadog/core/workers/runtime_metrics.rb +0 -3
- data/lib/datadog/core.rb +6 -0
- data/lib/datadog/kit/enable_core_dumps.rb +50 -0
- data/lib/datadog/kit/identity.rb +63 -0
- data/lib/datadog/kit.rb +11 -0
- data/lib/datadog/opentracer/tracer.rb +0 -2
- data/lib/datadog/profiling/collectors/old_stack.rb +298 -0
- data/lib/datadog/profiling/collectors/stack.rb +6 -287
- data/lib/datadog/profiling/encoding/profile.rb +0 -1
- data/lib/datadog/profiling/ext.rb +1 -1
- data/lib/datadog/profiling/flush.rb +1 -1
- data/lib/datadog/profiling/load_native_extension.rb +22 -0
- data/lib/datadog/profiling/recorder.rb +1 -1
- data/lib/datadog/profiling/scheduler.rb +1 -1
- data/lib/datadog/profiling/stack_recorder.rb +33 -0
- data/lib/datadog/profiling/tag_builder.rb +48 -0
- data/lib/datadog/profiling/tasks/exec.rb +2 -2
- data/lib/datadog/profiling/tasks/setup.rb +6 -4
- data/lib/datadog/profiling.rb +29 -27
- data/lib/datadog/tracing/buffer.rb +9 -3
- data/lib/datadog/tracing/contrib/action_view/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +2 -2
- data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
- data/lib/datadog/tracing/contrib/active_record/vendor/connection_specification.rb +1 -1
- data/lib/datadog/tracing/contrib/active_support/notifications/subscription.rb +4 -2
- data/lib/datadog/tracing/contrib/concurrent_ruby/context_composite_executor_service.rb +10 -3
- data/lib/datadog/tracing/contrib/dalli/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/delayed_job/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/elasticsearch/integration.rb +9 -3
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +38 -2
- data/lib/datadog/tracing/contrib/ethon/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/extensions.rb +0 -2
- data/lib/datadog/tracing/contrib/faraday/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/grape/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/graphql/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/grpc/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/kafka/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/lograge/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/qless/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/que/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/racecar/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/rails/log_injection.rb +3 -16
- data/lib/datadog/tracing/contrib/rake/instrumentation.rb +2 -2
- data/lib/datadog/tracing/contrib/rake/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/redis/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/resque/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/rest_client/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/semantic_logger/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +20 -1
- data/lib/datadog/tracing/contrib/sinatra/framework.rb +11 -0
- data/lib/datadog/tracing/contrib/sinatra/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/sneakers/patcher.rb +0 -1
- data/lib/datadog/tracing/contrib/sucker_punch/patcher.rb +0 -1
- data/lib/datadog/tracing/event.rb +2 -1
- data/lib/datadog/tracing/sampling/priority_sampler.rb +4 -5
- data/lib/datadog/tracing/sampling/rule.rb +12 -6
- data/lib/datadog/tracing/sampling/rule_sampler.rb +3 -5
- data/lib/datadog/tracing/span_operation.rb +2 -3
- data/lib/datadog/tracing/trace_operation.rb +0 -1
- data/lib/ddtrace/transport/http/client.rb +2 -1
- data/lib/ddtrace/transport/http/response.rb +34 -4
- data/lib/ddtrace/transport/io/client.rb +3 -1
- data/lib/ddtrace/version.rb +1 -1
- data/lib/ddtrace.rb +1 -0
- metadata +43 -6
@@ -0,0 +1,298 @@
|
|
1
|
+
# typed: true
|
2
|
+
|
3
|
+
require 'datadog/core/utils/only_once'
|
4
|
+
require 'datadog/core/utils/time'
|
5
|
+
require 'datadog/core/worker'
|
6
|
+
require 'datadog/core/workers/polling'
|
7
|
+
require 'datadog/profiling/backtrace_location'
|
8
|
+
require 'datadog/profiling/events/stack'
|
9
|
+
require 'datadog/profiling/native_extension'
|
10
|
+
|
11
|
+
module Datadog
|
12
|
+
module Profiling
|
13
|
+
module Collectors
|
14
|
+
# Collects stack trace samples from Ruby threads for both CPU-time (if available) and wall-clock.
|
15
|
+
# Runs on its own background thread.
|
16
|
+
#
|
17
|
+
# This class has the prefix "Old" because it will be deprecated by the new native CPU Profiler
|
18
|
+
class OldStack < Core::Worker # rubocop:disable Metrics/ClassLength
|
19
|
+
include Core::Workers::Polling
|
20
|
+
|
21
|
+
DEFAULT_MAX_TIME_USAGE_PCT = 2.0
|
22
|
+
MIN_INTERVAL = 0.01
|
23
|
+
THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
|
24
|
+
THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
|
25
|
+
SYNTHETIC_STACK_IN_NATIVE_CODE = [BacktraceLocation.new('', 0, 'In native code').freeze].freeze
|
26
|
+
|
27
|
+
# This default was picked based on the current sampling performance and on expected concurrency on an average
|
28
|
+
# Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
|
29
|
+
# optimizes for coverage (less chance to miss what a given thread is doing).
|
30
|
+
DEFAULT_MAX_THREADS_SAMPLED = 16
|
31
|
+
|
32
|
+
attr_reader \
|
33
|
+
:recorder,
|
34
|
+
:max_frames,
|
35
|
+
:trace_identifiers_helper,
|
36
|
+
:ignore_thread,
|
37
|
+
:max_time_usage_pct,
|
38
|
+
:thread_api,
|
39
|
+
:cpu_time_provider
|
40
|
+
|
41
|
+
def initialize(
|
42
|
+
recorder,
|
43
|
+
max_frames:,
|
44
|
+
trace_identifiers_helper:, # Usually an instance of Profiling::TraceIdentifiers::Helper
|
45
|
+
ignore_thread: nil,
|
46
|
+
max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
|
47
|
+
max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
|
48
|
+
thread_api: Thread,
|
49
|
+
cpu_time_provider: Profiling::NativeExtension,
|
50
|
+
fork_policy: Core::Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
|
51
|
+
interval: MIN_INTERVAL,
|
52
|
+
enabled: true
|
53
|
+
)
|
54
|
+
@recorder = recorder
|
55
|
+
@max_frames = max_frames
|
56
|
+
@trace_identifiers_helper = trace_identifiers_helper
|
57
|
+
@ignore_thread = ignore_thread
|
58
|
+
@max_time_usage_pct = max_time_usage_pct
|
59
|
+
@max_threads_sampled = max_threads_sampled
|
60
|
+
@thread_api = thread_api
|
61
|
+
# Only set the provider if it's able to work in the current Ruby/OS combo
|
62
|
+
@cpu_time_provider = cpu_time_provider unless cpu_time_provider.cpu_time_ns_for(thread_api.current).nil?
|
63
|
+
|
64
|
+
# Workers::Async::Thread settings
|
65
|
+
self.fork_policy = fork_policy
|
66
|
+
|
67
|
+
# Workers::IntervalLoop settings
|
68
|
+
self.loop_base_interval = interval
|
69
|
+
|
70
|
+
# Workers::Polling settings
|
71
|
+
self.enabled = enabled
|
72
|
+
|
73
|
+
# Cache this proc, since it's pretty expensive to keep recreating it
|
74
|
+
@build_backtrace_location = method(:build_backtrace_location).to_proc
|
75
|
+
# Cache this buffer, since it's pretty expensive to keep accessing it
|
76
|
+
@stack_sample_event_recorder = recorder[Events::StackSample]
|
77
|
+
# See below for details on why this is needed
|
78
|
+
@needs_process_waiter_workaround =
|
79
|
+
Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3') &&
|
80
|
+
Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7')
|
81
|
+
end
|
82
|
+
|
83
|
+
def start
|
84
|
+
reset_cpu_time_tracking
|
85
|
+
perform
|
86
|
+
end
|
87
|
+
|
88
|
+
def perform
|
89
|
+
collect_and_wait
|
90
|
+
end
|
91
|
+
|
92
|
+
def collect_and_wait
|
93
|
+
run_time = Core::Utils::Time.measure do
|
94
|
+
collect_events
|
95
|
+
end
|
96
|
+
|
97
|
+
# Update wait time to throttle profiling
|
98
|
+
self.loop_wait_time = compute_wait_time(run_time)
|
99
|
+
end
|
100
|
+
|
101
|
+
def collect_events
|
102
|
+
events = []
|
103
|
+
current_wall_time_ns = get_current_wall_time_timestamp_ns
|
104
|
+
|
105
|
+
# Collect backtraces from each thread
|
106
|
+
threads_to_sample.each do |thread|
|
107
|
+
next unless thread.alive?
|
108
|
+
next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
|
109
|
+
|
110
|
+
event = collect_thread_event(thread, current_wall_time_ns)
|
111
|
+
events << event unless event.nil?
|
112
|
+
end
|
113
|
+
|
114
|
+
# Send events to recorder
|
115
|
+
recorder.push(events) unless events.empty?
|
116
|
+
|
117
|
+
events
|
118
|
+
end
|
119
|
+
|
120
|
+
def collect_thread_event(thread, current_wall_time_ns)
|
121
|
+
locations = thread.backtrace_locations
|
122
|
+
return if locations.nil?
|
123
|
+
|
124
|
+
# Having empty locations means that the thread is alive, but we don't know what it's doing:
|
125
|
+
#
|
126
|
+
# 1. It can be starting up
|
127
|
+
# ```
|
128
|
+
# > Thread.new { sleep }.backtrace
|
129
|
+
# => [] # <-- note the thread hasn't actually started running sleep yet, we got there first
|
130
|
+
# ```
|
131
|
+
# 2. It can be running native code
|
132
|
+
# ```
|
133
|
+
# > t = Process.detach(fork { sleep })
|
134
|
+
# => #<Process::Waiter:0x00007ffe7285f7a0 run>
|
135
|
+
# > t.backtrace
|
136
|
+
# => [] # <-- this can happen even minutes later, e.g. it's not a race as in 1.
|
137
|
+
# ```
|
138
|
+
# This effect has been observed in threads created by the Iodine web server and the ffi gem
|
139
|
+
#
|
140
|
+
# To give customers visibility into these threads, we replace the empty stack with one containing a
|
141
|
+
# synthetic placeholder frame, so that these threads are properly represented in the UX.
|
142
|
+
locations = SYNTHETIC_STACK_IN_NATIVE_CODE if locations.empty?
|
143
|
+
|
144
|
+
# Get actual stack size then trim the stack
|
145
|
+
stack_size = locations.length
|
146
|
+
locations = locations[0..(max_frames - 1)]
|
147
|
+
|
148
|
+
# Convert backtrace locations into structs
|
149
|
+
locations = convert_backtrace_locations(locations)
|
150
|
+
|
151
|
+
thread_id = thread.object_id
|
152
|
+
root_span_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
|
153
|
+
cpu_time = get_cpu_time_interval!(thread)
|
154
|
+
wall_time_interval_ns =
|
155
|
+
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
|
156
|
+
|
157
|
+
Events::StackSample.new(
|
158
|
+
nil,
|
159
|
+
locations,
|
160
|
+
stack_size,
|
161
|
+
thread_id,
|
162
|
+
root_span_id,
|
163
|
+
span_id,
|
164
|
+
trace_resource,
|
165
|
+
cpu_time,
|
166
|
+
wall_time_interval_ns
|
167
|
+
)
|
168
|
+
end
|
169
|
+
|
170
|
+
def get_cpu_time_interval!(thread)
|
171
|
+
return unless cpu_time_provider
|
172
|
+
|
173
|
+
current_cpu_time_ns = cpu_time_provider.cpu_time_ns_for(thread)
|
174
|
+
|
175
|
+
return unless current_cpu_time_ns
|
176
|
+
|
177
|
+
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
|
178
|
+
end
|
179
|
+
|
180
|
+
def compute_wait_time(used_time)
|
181
|
+
# We took used_time to get the last sample.
|
182
|
+
#
|
183
|
+
# What we're computing here is -- if used_time corresponds to max_time_usage_pct of the time we should
|
184
|
+
# spend working, how much is (100% - max_time_usage_pct) of the time?
|
185
|
+
#
|
186
|
+
# For instance, if we took 10ms to sample, and max_time_usage_pct is 1%, then the other 99% is 990ms, which
|
187
|
+
# means we need to sleep for 990ms to guarantee that we don't spend more than 1% of the time working.
|
188
|
+
used_time_ns = used_time * 1e9
|
189
|
+
interval = (used_time_ns / (max_time_usage_pct / 100.0)) - used_time_ns
|
190
|
+
[interval / 1e9, MIN_INTERVAL].max
|
191
|
+
end
|
192
|
+
|
193
|
+
# Convert backtrace locations into structs
|
194
|
+
# Re-use old backtrace location objects if they already exist in the buffer
|
195
|
+
def convert_backtrace_locations(locations)
|
196
|
+
locations.collect do |location|
|
197
|
+
# Re-use existing BacktraceLocation if identical copy, otherwise build a new one.
|
198
|
+
@stack_sample_event_recorder.cache(:backtrace_locations).fetch(
|
199
|
+
# Function name
|
200
|
+
location.base_label,
|
201
|
+
# Line number
|
202
|
+
location.lineno,
|
203
|
+
# Filename
|
204
|
+
location.path,
|
205
|
+
# Build function
|
206
|
+
&@build_backtrace_location
|
207
|
+
)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def build_backtrace_location(_id, base_label, lineno, path)
|
212
|
+
string_table = @stack_sample_event_recorder.string_table
|
213
|
+
|
214
|
+
Profiling::BacktraceLocation.new(
|
215
|
+
string_table.fetch_string(base_label),
|
216
|
+
lineno,
|
217
|
+
string_table.fetch_string(path)
|
218
|
+
)
|
219
|
+
end
|
220
|
+
|
221
|
+
private
|
222
|
+
|
223
|
+
# If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
|
224
|
+
# clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
|
225
|
+
#
|
226
|
+
# a) negative time: At least on my test docker container, and on the reliability environment, after the process
|
227
|
+
# forks, the cpu time reference changes and (old cpu time - new cpu time) can be < 0
|
228
|
+
#
|
229
|
+
# b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
|
230
|
+
# restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
|
231
|
+
#
|
232
|
+
# By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
|
233
|
+
def reset_cpu_time_tracking
|
234
|
+
thread_api.list.each do |thread|
|
235
|
+
# See below for details on why this is needed
|
236
|
+
next if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
237
|
+
|
238
|
+
thread.thread_variable_set(THREAD_LAST_CPU_TIME_KEY, nil)
|
239
|
+
thread.thread_variable_set(THREAD_LAST_WALL_CLOCK_KEY, nil)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
|
244
|
+
# Process::Waiter crash workaround:
|
245
|
+
#
|
246
|
+
# This is a workaround for a Ruby VM segfault (usually something like
|
247
|
+
# "[BUG] Segmentation fault at 0x0000000000000008") in the affected Ruby versions.
|
248
|
+
# See https://bugs.ruby-lang.org/issues/17807 for details.
|
249
|
+
#
|
250
|
+
# In those Ruby versions, there's a very special subclass of `Thread` called `Process::Waiter` that causes VM
|
251
|
+
# crashes whenever something tries to read its instance or thread variables. This subclass of thread only
|
252
|
+
# shows up when the `Process.detach` API gets used.
|
253
|
+
# In the specs you'll find crash regression tests that include a way of reproducing it.
|
254
|
+
#
|
255
|
+
# As workaround for now we just skip it for the affected Rubies
|
256
|
+
return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
257
|
+
|
258
|
+
last_value = thread.thread_variable_get(key) || current_value
|
259
|
+
thread.thread_variable_set(key, current_value)
|
260
|
+
|
261
|
+
current_value - last_value
|
262
|
+
end
|
263
|
+
|
264
|
+
# Whenever there are more than max_threads_sampled active, we only sample a subset of them.
|
265
|
+
# We do this to avoid impacting the latency of the service being profiled. We want to avoid doing
|
266
|
+
# a big burst of work all at once (sample everything), and instead do a little work each time
|
267
|
+
# (sample a bit by bit).
|
268
|
+
#
|
269
|
+
# Because we pick the threads to sample randomly, we'll eventually sample all threads -- just not at once.
|
270
|
+
# Notice also that this will interact with our dynamic sampling mechanism -- if samples are faster, we take
|
271
|
+
# them more often, if they are slower, we take them less often -- which again means that over a longer period
|
272
|
+
# we should take sample roughly the same samples.
|
273
|
+
#
|
274
|
+
# One downside of this approach is that if there really are many threads, the resulting wall clock times
|
275
|
+
# in a one minute profile may "drift" around the 60 second mark, e.g. maybe we only sampled a thread once per
|
276
|
+
# second and only 59 times, so we'll report 59s, but on the next report we'll include the missing one, so
|
277
|
+
# then the result will be 61s. I've observed 60 +- 1.68 secs for an app with ~65 threads, given the
|
278
|
+
# default maximum of 16 threads. This seems a reasonable enough margin of error given the improvement to
|
279
|
+
# latency (especially on such a large application! -> even bigger latency impact if we tried to sample all
|
280
|
+
# threads).
|
281
|
+
#
|
282
|
+
def threads_to_sample
|
283
|
+
all_threads = thread_api.list
|
284
|
+
|
285
|
+
if all_threads.size > @max_threads_sampled
|
286
|
+
all_threads.sample(@max_threads_sampled)
|
287
|
+
else
|
288
|
+
all_threads
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def get_current_wall_time_timestamp_ns
|
293
|
+
Core::Utils::Time.get_time(:nanosecond)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
@@ -1,295 +1,14 @@
|
|
1
|
-
# typed:
|
2
|
-
|
3
|
-
require 'datadog/core/utils/only_once'
|
4
|
-
require 'datadog/core/utils/time'
|
5
|
-
require 'datadog/core/worker'
|
6
|
-
require 'datadog/core/workers/polling'
|
7
|
-
require 'datadog/profiling/backtrace_location'
|
8
|
-
require 'datadog/profiling/events/stack'
|
9
|
-
require 'datadog/profiling/native_extension'
|
1
|
+
# typed: false
|
10
2
|
|
11
3
|
module Datadog
|
12
4
|
module Profiling
|
13
5
|
module Collectors
|
14
|
-
#
|
15
|
-
# Runs on its own background thread.
|
6
|
+
# Used to gather a stack trace from a given Ruby thread. Almost all of this class is implemented as native code.
|
16
7
|
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
MIN_INTERVAL = 0.01
|
22
|
-
THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
|
23
|
-
THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
|
24
|
-
SYNTHETIC_STACK_IN_NATIVE_CODE = [BacktraceLocation.new('', 0, 'In native code').freeze].freeze
|
25
|
-
|
26
|
-
# This default was picked based on the current sampling performance and on expected concurrency on an average
|
27
|
-
# Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
|
28
|
-
# optimizes for coverage (less chance to miss what a given thread is doing).
|
29
|
-
DEFAULT_MAX_THREADS_SAMPLED = 16
|
30
|
-
|
31
|
-
attr_reader \
|
32
|
-
:recorder,
|
33
|
-
:max_frames,
|
34
|
-
:trace_identifiers_helper,
|
35
|
-
:ignore_thread,
|
36
|
-
:max_time_usage_pct,
|
37
|
-
:thread_api,
|
38
|
-
:cpu_time_provider
|
39
|
-
|
40
|
-
def initialize(
|
41
|
-
recorder,
|
42
|
-
max_frames:,
|
43
|
-
trace_identifiers_helper:, # Usually an instance of Profiling::TraceIdentifiers::Helper
|
44
|
-
ignore_thread: nil,
|
45
|
-
max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
|
46
|
-
max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
|
47
|
-
thread_api: Thread,
|
48
|
-
cpu_time_provider: Profiling::NativeExtension,
|
49
|
-
fork_policy: Core::Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
|
50
|
-
interval: MIN_INTERVAL,
|
51
|
-
enabled: true
|
52
|
-
)
|
53
|
-
@recorder = recorder
|
54
|
-
@max_frames = max_frames
|
55
|
-
@trace_identifiers_helper = trace_identifiers_helper
|
56
|
-
@ignore_thread = ignore_thread
|
57
|
-
@max_time_usage_pct = max_time_usage_pct
|
58
|
-
@max_threads_sampled = max_threads_sampled
|
59
|
-
@thread_api = thread_api
|
60
|
-
# Only set the provider if it's able to work in the current Ruby/OS combo
|
61
|
-
@cpu_time_provider = cpu_time_provider unless cpu_time_provider.cpu_time_ns_for(thread_api.current).nil?
|
62
|
-
|
63
|
-
# Workers::Async::Thread settings
|
64
|
-
self.fork_policy = fork_policy
|
65
|
-
|
66
|
-
# Workers::IntervalLoop settings
|
67
|
-
self.loop_base_interval = interval
|
68
|
-
|
69
|
-
# Workers::Polling settings
|
70
|
-
self.enabled = enabled
|
71
|
-
|
72
|
-
# Cache this proc, since it's pretty expensive to keep recreating it
|
73
|
-
@build_backtrace_location = method(:build_backtrace_location).to_proc
|
74
|
-
# Cache this buffer, since it's pretty expensive to keep accessing it
|
75
|
-
@stack_sample_event_recorder = recorder[Events::StackSample]
|
76
|
-
# See below for details on why this is needed
|
77
|
-
@needs_process_waiter_workaround =
|
78
|
-
Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3') &&
|
79
|
-
Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7')
|
80
|
-
end
|
81
|
-
|
82
|
-
def start
|
83
|
-
reset_cpu_time_tracking
|
84
|
-
perform
|
85
|
-
end
|
86
|
-
|
87
|
-
def perform
|
88
|
-
collect_and_wait
|
89
|
-
end
|
90
|
-
|
91
|
-
def collect_and_wait
|
92
|
-
run_time = Core::Utils::Time.measure do
|
93
|
-
collect_events
|
94
|
-
end
|
95
|
-
|
96
|
-
# Update wait time to throttle profiling
|
97
|
-
self.loop_wait_time = compute_wait_time(run_time)
|
98
|
-
end
|
99
|
-
|
100
|
-
def collect_events
|
101
|
-
events = []
|
102
|
-
current_wall_time_ns = get_current_wall_time_timestamp_ns
|
103
|
-
|
104
|
-
# Collect backtraces from each thread
|
105
|
-
threads_to_sample.each do |thread|
|
106
|
-
next unless thread.alive?
|
107
|
-
next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
|
108
|
-
|
109
|
-
event = collect_thread_event(thread, current_wall_time_ns)
|
110
|
-
events << event unless event.nil?
|
111
|
-
end
|
112
|
-
|
113
|
-
# Send events to recorder
|
114
|
-
recorder.push(events) unless events.empty?
|
115
|
-
|
116
|
-
events
|
117
|
-
end
|
118
|
-
|
119
|
-
def collect_thread_event(thread, current_wall_time_ns)
|
120
|
-
locations = thread.backtrace_locations
|
121
|
-
return if locations.nil?
|
122
|
-
|
123
|
-
# Having empty locations means that the thread is alive, but we don't know what it's doing:
|
124
|
-
#
|
125
|
-
# 1. It can be starting up
|
126
|
-
# ```
|
127
|
-
# > Thread.new { sleep }.backtrace
|
128
|
-
# => [] # <-- note the thread hasn't actually started running sleep yet, we got there first
|
129
|
-
# ```
|
130
|
-
# 2. It can be running native code
|
131
|
-
# ```
|
132
|
-
# > t = Process.detach(fork { sleep })
|
133
|
-
# => #<Process::Waiter:0x00007ffe7285f7a0 run>
|
134
|
-
# > t.backtrace
|
135
|
-
# => [] # <-- this can happen even minutes later, e.g. it's not a race as in 1.
|
136
|
-
# ```
|
137
|
-
# This effect has been observed in threads created by the Iodine web server and the ffi gem
|
138
|
-
#
|
139
|
-
# To give customers visibility into these threads, we replace the empty stack with one containing a
|
140
|
-
# synthetic placeholder frame, so that these threads are properly represented in the UX.
|
141
|
-
locations = SYNTHETIC_STACK_IN_NATIVE_CODE if locations.empty?
|
142
|
-
|
143
|
-
# Get actual stack size then trim the stack
|
144
|
-
stack_size = locations.length
|
145
|
-
locations = locations[0..(max_frames - 1)]
|
146
|
-
|
147
|
-
# Convert backtrace locations into structs
|
148
|
-
locations = convert_backtrace_locations(locations)
|
149
|
-
|
150
|
-
thread_id = thread.object_id
|
151
|
-
root_span_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
|
152
|
-
cpu_time = get_cpu_time_interval!(thread)
|
153
|
-
wall_time_interval_ns =
|
154
|
-
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
|
155
|
-
|
156
|
-
Events::StackSample.new(
|
157
|
-
nil,
|
158
|
-
locations,
|
159
|
-
stack_size,
|
160
|
-
thread_id,
|
161
|
-
root_span_id,
|
162
|
-
span_id,
|
163
|
-
trace_resource,
|
164
|
-
cpu_time,
|
165
|
-
wall_time_interval_ns
|
166
|
-
)
|
167
|
-
end
|
168
|
-
|
169
|
-
def get_cpu_time_interval!(thread)
|
170
|
-
return unless cpu_time_provider
|
171
|
-
|
172
|
-
current_cpu_time_ns = cpu_time_provider.cpu_time_ns_for(thread)
|
173
|
-
|
174
|
-
return unless current_cpu_time_ns
|
175
|
-
|
176
|
-
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
|
177
|
-
end
|
178
|
-
|
179
|
-
def compute_wait_time(used_time)
|
180
|
-
# We took used_time to get the last sample.
|
181
|
-
#
|
182
|
-
# What we're computing here is -- if used_time corresponds to max_time_usage_pct of the time we should
|
183
|
-
# spend working, how much is (100% - max_time_usage_pct) of the time?
|
184
|
-
#
|
185
|
-
# For instance, if we took 10ms to sample, and max_time_usage_pct is 1%, then the other 99% is 990ms, which
|
186
|
-
# means we need to sleep for 990ms to guarantee that we don't spend more than 1% of the time working.
|
187
|
-
used_time_ns = used_time * 1e9
|
188
|
-
interval = (used_time_ns / (max_time_usage_pct / 100.0)) - used_time_ns
|
189
|
-
[interval / 1e9, MIN_INTERVAL].max
|
190
|
-
end
|
191
|
-
|
192
|
-
# Convert backtrace locations into structs
|
193
|
-
# Re-use old backtrace location objects if they already exist in the buffer
|
194
|
-
def convert_backtrace_locations(locations)
|
195
|
-
locations.collect do |location|
|
196
|
-
# Re-use existing BacktraceLocation if identical copy, otherwise build a new one.
|
197
|
-
@stack_sample_event_recorder.cache(:backtrace_locations).fetch(
|
198
|
-
# Function name
|
199
|
-
location.base_label,
|
200
|
-
# Line number
|
201
|
-
location.lineno,
|
202
|
-
# Filename
|
203
|
-
location.path,
|
204
|
-
# Build function
|
205
|
-
&@build_backtrace_location
|
206
|
-
)
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
def build_backtrace_location(_id, base_label, lineno, path)
|
211
|
-
string_table = @stack_sample_event_recorder.string_table
|
212
|
-
|
213
|
-
Profiling::BacktraceLocation.new(
|
214
|
-
string_table.fetch_string(base_label),
|
215
|
-
lineno,
|
216
|
-
string_table.fetch_string(path)
|
217
|
-
)
|
218
|
-
end
|
219
|
-
|
220
|
-
private
|
221
|
-
|
222
|
-
# If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
|
223
|
-
# clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
|
224
|
-
#
|
225
|
-
# a) negative time: At least on my test docker container, and on the reliability environment, after the process
|
226
|
-
# forks, the cpu time reference changes and (old cpu time - new cpu time) can be < 0
|
227
|
-
#
|
228
|
-
# b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
|
229
|
-
# restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
|
230
|
-
#
|
231
|
-
# By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
|
232
|
-
def reset_cpu_time_tracking
|
233
|
-
thread_api.list.each do |thread|
|
234
|
-
# See below for details on why this is needed
|
235
|
-
next if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
236
|
-
|
237
|
-
thread.thread_variable_set(THREAD_LAST_CPU_TIME_KEY, nil)
|
238
|
-
thread.thread_variable_set(THREAD_LAST_WALL_CLOCK_KEY, nil)
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
|
243
|
-
# Process::Waiter crash workaround:
|
244
|
-
#
|
245
|
-
# This is a workaround for a Ruby VM segfault (usually something like
|
246
|
-
# "[BUG] Segmentation fault at 0x0000000000000008") in the affected Ruby versions.
|
247
|
-
# See https://bugs.ruby-lang.org/issues/17807 for details.
|
248
|
-
#
|
249
|
-
# In those Ruby versions, there's a very special subclass of `Thread` called `Process::Waiter` that causes VM
|
250
|
-
# crashes whenever something tries to read its instance or thread variables. This subclass of thread only
|
251
|
-
# shows up when the `Process.detach` API gets used.
|
252
|
-
# In the specs you'll find crash regression tests that include a way of reproducing it.
|
253
|
-
#
|
254
|
-
# As workaround for now we just skip it for the affected Rubies
|
255
|
-
return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
256
|
-
|
257
|
-
last_value = thread.thread_variable_get(key) || current_value
|
258
|
-
thread.thread_variable_set(key, current_value)
|
259
|
-
|
260
|
-
current_value - last_value
|
261
|
-
end
|
262
|
-
|
263
|
-
# Whenever there are more than max_threads_sampled active, we only sample a subset of them.
|
264
|
-
# We do this to avoid impacting the latency of the service being profiled. We want to avoid doing
|
265
|
-
# a big burst of work all at once (sample everything), and instead do a little work each time
|
266
|
-
# (sample a bit by bit).
|
267
|
-
#
|
268
|
-
# Because we pick the threads to sample randomly, we'll eventually sample all threads -- just not at once.
|
269
|
-
# Notice also that this will interact with our dynamic sampling mechanism -- if samples are faster, we take
|
270
|
-
# them more often, if they are slower, we take them less often -- which again means that over a longer period
|
271
|
-
# we should take sample roughly the same samples.
|
272
|
-
#
|
273
|
-
# One downside of this approach is that if there really are many threads, the resulting wall clock times
|
274
|
-
# in a one minute profile may "drift" around the 60 second mark, e.g. maybe we only sampled a thread once per
|
275
|
-
# second and only 59 times, so we'll report 59s, but on the next report we'll include the missing one, so
|
276
|
-
# then the result will be 61s. I've observed 60 +- 1.68 secs for an app with ~65 threads, given the
|
277
|
-
# default maximum of 16 threads. This seems a reasonable enough margin of error given the improvement to
|
278
|
-
# latency (especially on such a large application! -> even bigger latency impact if we tried to sample all
|
279
|
-
# threads).
|
280
|
-
#
|
281
|
-
def threads_to_sample
|
282
|
-
all_threads = thread_api.list
|
283
|
-
|
284
|
-
if all_threads.size > @max_threads_sampled
|
285
|
-
all_threads.sample(@max_threads_sampled)
|
286
|
-
else
|
287
|
-
all_threads
|
288
|
-
end
|
289
|
-
end
|
290
|
-
|
291
|
-
def get_current_wall_time_timestamp_ns
|
292
|
-
Core::Utils::Time.get_time(:nanosecond)
|
8
|
+
# Methods prefixed with _native_ are implemented in `collectors_stack.c`
|
9
|
+
class Stack
|
10
|
+
def sample(thread, recorder_instance, metric_values_hash, labels_array, max_frames: 400)
|
11
|
+
self.class._native_sample(thread, recorder_instance, metric_values_hash, labels_array, max_frames)
|
293
12
|
end
|
294
13
|
end
|
295
14
|
end
|
@@ -30,7 +30,7 @@ module Datadog
|
|
30
30
|
FORM_FIELD_TAG_ENV = 'env'.freeze
|
31
31
|
FORM_FIELD_TAG_HOST = 'host'.freeze
|
32
32
|
FORM_FIELD_TAG_LANGUAGE = 'language'.freeze
|
33
|
-
FORM_FIELD_TAG_PID = '
|
33
|
+
FORM_FIELD_TAG_PID = 'process_id'.freeze
|
34
34
|
FORM_FIELD_TAG_PROFILER_VERSION = 'profiler_version'.freeze
|
35
35
|
FORM_FIELD_TAG_RUNTIME = 'runtime'.freeze
|
36
36
|
FORM_FIELD_TAG_RUNTIME_ENGINE = 'runtime_engine'.freeze
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# typed: ignore
|
2
|
+
|
3
|
+
# This file is used to load the profiling native extension. It works in two steps:
|
4
|
+
#
|
5
|
+
# 1. Load the ddtrace_profiling_loader extension. This extension will be used to load the actual extension, but in
|
6
|
+
# a special way that avoids exposing native-level code symbols. See `ddtrace_profiling_loader.c` for more details.
|
7
|
+
#
|
8
|
+
# 2. Use the Datadog::Profiling::Loader exposed by the ddtrace_profiling_loader extension to load the actual
|
9
|
+
# profiling native extension.
|
10
|
+
#
|
11
|
+
# All code on this file is on-purpose at the top-level; this makes it so this file is executed only once,
|
12
|
+
# the first time it gets required, to avoid any issues with the native extension being initialized more than once.
|
13
|
+
|
14
|
+
require "ddtrace_profiling_loader.#{RUBY_VERSION}_#{RUBY_PLATFORM}"
|
15
|
+
|
16
|
+
extension_name = "ddtrace_profiling_native_extension.#{RUBY_VERSION}_#{RUBY_PLATFORM}"
|
17
|
+
full_file_path = "#{__dir__}/../../#{extension_name}.#{RbConfig::CONFIG['DLEXT']}"
|
18
|
+
init_function_name = "Init_#{extension_name.split('.').first}"
|
19
|
+
|
20
|
+
status, result = Datadog::Profiling::Loader._native_load(full_file_path, init_function_name)
|
21
|
+
|
22
|
+
raise "Failure to load #{extension_name} due to #{result}" if status == :error
|
@@ -114,7 +114,7 @@ module Datadog
|
|
114
114
|
# Sleep for a bit to cause misalignment between profilers in multi-process applications
|
115
115
|
#
|
116
116
|
# When not being run in a loop, it means the scheduler has not been started or was stopped, and thus
|
117
|
-
# a) it's being
|
117
|
+
# a) it's being shut down (and is trying to report the last profile)
|
118
118
|
# b) it's being run as a one-shot, usually in a test
|
119
119
|
# ...so in those cases we don't sleep
|
120
120
|
#
|