ddtrace 1.20.0 → 1.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +61 -2
- data/LICENSE-3rdparty.csv +1 -1
- data/bin/ddprofrb +15 -0
- data/bin/ddtracerb +3 -1
- data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
- data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +206 -49
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.c +145 -72
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.h +17 -5
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +92 -2
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +2 -2
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +10 -14
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +4 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +14 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +1 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.c +10 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +2 -0
- data/lib/datadog/core/configuration/components.rb +5 -5
- data/lib/datadog/core/configuration/option.rb +1 -1
- data/lib/datadog/core/configuration/settings.rb +92 -46
- data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
- data/lib/datadog/core/environment/git.rb +25 -0
- data/lib/datadog/core/environment/identity.rb +18 -48
- data/lib/datadog/core/git/ext.rb +2 -23
- data/lib/datadog/core/remote/negotiation.rb +2 -2
- data/lib/datadog/core/remote/worker.rb +7 -4
- data/lib/datadog/core/transport/ext.rb +2 -0
- data/lib/datadog/core/utils/url.rb +25 -0
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -0
- data/lib/datadog/profiling/collectors/info.rb +101 -0
- data/lib/datadog/profiling/component.rb +12 -14
- data/lib/datadog/profiling/exporter.rb +19 -5
- data/lib/datadog/profiling/ext.rb +2 -0
- data/lib/datadog/profiling/flush.rb +6 -3
- data/lib/datadog/profiling/http_transport.rb +5 -1
- data/lib/datadog/profiling/load_native_extension.rb +5 -5
- data/lib/datadog/profiling/native_extension.rb +1 -1
- data/lib/datadog/profiling/tag_builder.rb +5 -0
- data/lib/datadog/profiling/tasks/exec.rb +3 -3
- data/lib/datadog/profiling/tasks/help.rb +3 -3
- data/lib/datadog/profiling.rb +2 -2
- data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
- data/lib/datadog/tracing/contrib/extensions.rb +6 -2
- data/lib/datadog/tracing/contrib/grape/endpoint.rb +5 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +28 -4
- data/lib/datadog/tracing/contrib/rails/patcher.rb +16 -0
- data/lib/datadog/tracing/contrib/sinatra/tracer.rb +6 -3
- data/lib/datadog/tracing/metadata/ext.rb +2 -0
- data/lib/datadog/tracing/trace_operation.rb +1 -2
- data/lib/datadog/tracing/transport/http.rb +1 -0
- data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
- data/lib/ddtrace/version.rb +1 -1
- metadata +56 -53
- data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
- data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
- data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +0 -0
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
#include "setup_signal_handler.h"
|
|
18
18
|
#include "time_helpers.h"
|
|
19
19
|
|
|
20
|
+
#define ERR_CLOCK_FAIL "failed to get clock time"
|
|
21
|
+
|
|
20
22
|
// Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
|
|
21
23
|
unsigned int MAX_ALLOC_WEIGHT = 65535;
|
|
22
24
|
|
|
@@ -119,6 +121,7 @@ struct cpu_and_wall_time_worker_state {
|
|
|
119
121
|
bool during_sample;
|
|
120
122
|
|
|
121
123
|
struct stats {
|
|
124
|
+
// # Generic stats
|
|
122
125
|
// How many times we tried to trigger a sample
|
|
123
126
|
unsigned int trigger_sample_attempts;
|
|
124
127
|
// How many times we tried to simulate signal delivery
|
|
@@ -129,25 +132,36 @@ struct cpu_and_wall_time_worker_state {
|
|
|
129
132
|
unsigned int signal_handler_enqueued_sample;
|
|
130
133
|
// How many times the signal handler was called from the wrong thread
|
|
131
134
|
unsigned int signal_handler_wrong_thread;
|
|
132
|
-
// How many times we actually sampled (except GC samples)
|
|
133
|
-
unsigned int sampled;
|
|
134
|
-
// How many times we skipped a sample because of the dynamic sampling rate mechanism
|
|
135
|
-
unsigned int skipped_sample_because_of_dynamic_sampling_rate;
|
|
136
135
|
|
|
137
|
-
// Stats for the results of calling rb_postponed_job_register_one
|
|
138
|
-
|
|
136
|
+
// # Stats for the results of calling rb_postponed_job_register_one
|
|
137
|
+
// The same function was already waiting to be executed
|
|
139
138
|
unsigned int postponed_job_skipped_already_existed;
|
|
140
|
-
|
|
139
|
+
// The function was added to the queue successfully
|
|
141
140
|
unsigned int postponed_job_success;
|
|
142
|
-
|
|
141
|
+
// The queue was full
|
|
143
142
|
unsigned int postponed_job_full;
|
|
144
|
-
|
|
143
|
+
// The function returned an unknown result code
|
|
145
144
|
unsigned int postponed_job_unknown_result;
|
|
146
145
|
|
|
147
|
-
//
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
146
|
+
// # CPU/Walltime sampling stats
|
|
147
|
+
// How many times we actually CPU/wall sampled
|
|
148
|
+
unsigned int cpu_sampled;
|
|
149
|
+
// How many times we skipped a CPU/wall sample because of the dynamic sampling rate mechanism
|
|
150
|
+
unsigned int cpu_skipped;
|
|
151
|
+
// Min/max/total wall-time spent on CPU/wall sampling
|
|
152
|
+
uint64_t cpu_sampling_time_ns_min;
|
|
153
|
+
uint64_t cpu_sampling_time_ns_max;
|
|
154
|
+
uint64_t cpu_sampling_time_ns_total;
|
|
155
|
+
|
|
156
|
+
// # Allocation sampling stats
|
|
157
|
+
// How many times we actually allocation sampled
|
|
158
|
+
uint64_t allocation_sampled;
|
|
159
|
+
// How many times we skipped an allocation sample because of the dynamic sampling rate mechanism
|
|
160
|
+
uint64_t allocation_skipped;
|
|
161
|
+
// Min/max/total wall-time spent on allocation sampling
|
|
162
|
+
uint64_t allocation_sampling_time_ns_min;
|
|
163
|
+
uint64_t allocation_sampling_time_ns_max;
|
|
164
|
+
uint64_t allocation_sampling_time_ns_total;
|
|
151
165
|
// How many times we saw allocations being done inside a sample
|
|
152
166
|
unsigned int allocations_during_sample;
|
|
153
167
|
} stats;
|
|
@@ -169,6 +183,7 @@ static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
|
|
169
183
|
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
|
|
170
184
|
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE worker_thread);
|
|
171
185
|
static VALUE stop(VALUE self_instance, VALUE optional_exception);
|
|
186
|
+
static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception);
|
|
172
187
|
static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
|
|
173
188
|
static void *run_sampling_trigger_loop(void *state_ptr);
|
|
174
189
|
static void interrupt_sampling_trigger_loop(void *state_ptr);
|
|
@@ -191,15 +206,18 @@ static VALUE _native_simulate_sample_from_postponed_job(DDTRACE_UNUSED VALUE sel
|
|
|
191
206
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
192
207
|
static VALUE _native_is_sigprof_blocked_in_current_thread(DDTRACE_UNUSED VALUE self);
|
|
193
208
|
static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
209
|
+
static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
194
210
|
void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused);
|
|
195
211
|
static void grab_gvl_and_sample(void);
|
|
196
|
-
static void
|
|
212
|
+
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
|
|
197
213
|
static void sleep_for(uint64_t time_ns);
|
|
198
214
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
|
|
199
215
|
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
|
|
200
216
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
|
|
201
217
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
|
|
202
218
|
static VALUE rescued_sample_allocation(VALUE tracepoint_data);
|
|
219
|
+
static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error);
|
|
220
|
+
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg);
|
|
203
221
|
|
|
204
222
|
// Note on sampler global state safety:
|
|
205
223
|
//
|
|
@@ -212,6 +230,11 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data);
|
|
|
212
230
|
static VALUE active_sampler_instance = Qnil;
|
|
213
231
|
static struct cpu_and_wall_time_worker_state *active_sampler_instance_state = NULL;
|
|
214
232
|
|
|
233
|
+
// See handle_sampling_signal for details on what this does
|
|
234
|
+
#ifdef NO_POSTPONED_TRIGGER
|
|
235
|
+
static void *gc_finalize_deferred_workaround;
|
|
236
|
+
#endif
|
|
237
|
+
|
|
215
238
|
// Used to implement CpuAndWallTimeWorker._native_allocation_count . To be able to use cheap thread-local variables
|
|
216
239
|
// (here with `__thread`, see https://gcc.gnu.org/onlinedocs/gcc/Thread-Local.html), this needs to be global.
|
|
217
240
|
//
|
|
@@ -230,6 +253,8 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
230
253
|
if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
|
|
231
254
|
rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
|
|
232
255
|
}
|
|
256
|
+
#else
|
|
257
|
+
gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround();
|
|
233
258
|
#endif
|
|
234
259
|
|
|
235
260
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
|
@@ -252,6 +277,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
252
277
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
|
|
253
278
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
|
254
279
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats", _native_stats, 1);
|
|
280
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats_reset_not_thread_safe", _native_stats_reset_not_thread_safe, 1);
|
|
255
281
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
|
|
256
282
|
rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
|
|
257
283
|
rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
|
|
@@ -263,6 +289,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
263
289
|
rb_define_singleton_method(testing_module, "_native_simulate_sample_from_postponed_job", _native_simulate_sample_from_postponed_job, 0);
|
|
264
290
|
rb_define_singleton_method(testing_module, "_native_is_sigprof_blocked_in_current_thread", _native_is_sigprof_blocked_in_current_thread, 0);
|
|
265
291
|
rb_define_singleton_method(testing_module, "_native_with_blocked_sigprof", _native_with_blocked_sigprof, 0);
|
|
292
|
+
rb_define_singleton_method(testing_module, "_native_delayed_error", _native_delayed_error, 2);
|
|
266
293
|
}
|
|
267
294
|
|
|
268
295
|
// This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_worker_state
|
|
@@ -292,7 +319,6 @@ static VALUE _native_new(VALUE klass) {
|
|
|
292
319
|
state->idle_sampling_helper_instance = Qnil;
|
|
293
320
|
state->owner_thread = Qnil;
|
|
294
321
|
dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
|
|
295
|
-
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation");
|
|
296
322
|
state->gc_tracepoint = Qnil;
|
|
297
323
|
state->object_allocation_tracepoint = Qnil;
|
|
298
324
|
|
|
@@ -302,7 +328,15 @@ static VALUE _native_new(VALUE klass) {
|
|
|
302
328
|
|
|
303
329
|
state->during_sample = false;
|
|
304
330
|
|
|
305
|
-
|
|
331
|
+
reset_stats_not_thread_safe(state);
|
|
332
|
+
|
|
333
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
334
|
+
if (now == 0) {
|
|
335
|
+
ruby_xfree(state);
|
|
336
|
+
rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
|
|
306
340
|
|
|
307
341
|
return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
|
|
308
342
|
}
|
|
@@ -339,7 +373,8 @@ static VALUE _native_initialize(
|
|
|
339
373
|
// TODO: May be nice to offer customization here? Distribute available "overhead" margin with a bias towards one or the other
|
|
340
374
|
// sampler.
|
|
341
375
|
dynamic_sampling_rate_set_overhead_target_percentage(&state->cpu_dynamic_sampling_rate, total_overhead_target_percentage / 2);
|
|
342
|
-
|
|
376
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
377
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(&state->allocation_sampler, total_overhead_target_percentage / 2, now);
|
|
343
378
|
}
|
|
344
379
|
|
|
345
380
|
state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
|
|
@@ -368,6 +403,12 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
368
403
|
struct cpu_and_wall_time_worker_state *state;
|
|
369
404
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
370
405
|
|
|
406
|
+
// If we already got a delayed exception registered even before starting, raise before starting
|
|
407
|
+
if (state->failure_exception != Qnil) {
|
|
408
|
+
disable_tracepoints(state);
|
|
409
|
+
rb_exc_raise(state->failure_exception);
|
|
410
|
+
}
|
|
411
|
+
|
|
371
412
|
struct cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state;
|
|
372
413
|
if (old_state != NULL) {
|
|
373
414
|
if (is_thread_alive(old_state->owner_thread)) {
|
|
@@ -393,7 +434,8 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
393
434
|
|
|
394
435
|
// Reset the dynamic sampling rate state, if any (reminder: the monotonic clock reference may change after a fork)
|
|
395
436
|
dynamic_sampling_rate_reset(&state->cpu_dynamic_sampling_rate);
|
|
396
|
-
|
|
437
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
438
|
+
discrete_dynamic_sampler_reset(&state->allocation_sampler, now);
|
|
397
439
|
|
|
398
440
|
// This write to a global is thread-safe BECAUSE we're still holding on to the global VM lock at this point
|
|
399
441
|
active_sampler_instance_state = state;
|
|
@@ -455,15 +497,19 @@ static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE
|
|
|
455
497
|
return stop(self_instance, /* optional_exception: */ Qnil);
|
|
456
498
|
}
|
|
457
499
|
|
|
458
|
-
static
|
|
459
|
-
struct cpu_and_wall_time_worker_state *state;
|
|
460
|
-
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
461
|
-
|
|
500
|
+
static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception) {
|
|
462
501
|
atomic_store(&state->should_run, false);
|
|
463
502
|
state->failure_exception = optional_exception;
|
|
464
503
|
|
|
465
504
|
// Disable the tracepoints as soon as possible, so the VM doesn't keep on calling them
|
|
466
505
|
disable_tracepoints(state);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
static VALUE stop(VALUE self_instance, VALUE optional_exception) {
|
|
509
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
510
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
511
|
+
|
|
512
|
+
stop_state(state, optional_exception);
|
|
467
513
|
|
|
468
514
|
return Qtrue;
|
|
469
515
|
}
|
|
@@ -503,7 +549,32 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
|
|
|
503
549
|
rb_postponed_job_trigger(sample_from_postponed_job_handle);
|
|
504
550
|
state->stats.postponed_job_success++; // Always succeeds
|
|
505
551
|
#else
|
|
506
|
-
|
|
552
|
+
|
|
553
|
+
// This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
|
|
554
|
+
//
|
|
555
|
+
// TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
|
|
556
|
+
// and in rare cases can cause VM crashes.
|
|
557
|
+
//
|
|
558
|
+
// Specifically, if we're interrupting `rb_postponed_job_flush` (the function that processes postponed jobs), the way
|
|
559
|
+
// that this function reads the jobs is not atomic, and can cause our call to
|
|
560
|
+
// `rb_postponed_job_register(function, arg)` to clobber an existing job that is getting dequeued.
|
|
561
|
+
// Clobbering an existing job is somewhat annoying, but the worst part is that it can happen that we clobber only
|
|
562
|
+
// the existing job's arguments.
|
|
563
|
+
// As surveyed in https://github.com/ruby/ruby/pull/8949#issuecomment-1821441370 clobbering the arguments turns out
|
|
564
|
+
// to not matter in many cases as usually `rb_postponed_job_register` calls in the VM and ecosystem ignore the argument.
|
|
565
|
+
//
|
|
566
|
+
// https://bugs.ruby-lang.org/issues/19991 is the exception: inside Ruby's `gc.c`, when dealing with object
|
|
567
|
+
// finalizers, Ruby calls `gc_finalize_deferred_register` which internally calls
|
|
568
|
+
// `rb_postponed_job_register_one(gc_finalize_deferred, objspace)`.
|
|
569
|
+
// Clobbering this call means that `gc_finalize_deferred` would get called with `NULL`, causing a segmentation fault.
|
|
570
|
+
//
|
|
571
|
+
// Note that this is quite rare: our signal needs to land at exactly the point where the VM has read the function
|
|
572
|
+
// to execute, but has yet to read the arguments. @ivoanjo: I could only reproduce it by manually changing the VM
|
|
573
|
+
// code to simulate this happening.
|
|
574
|
+
//
|
|
575
|
+
// Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
|
|
576
|
+
// In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
|
|
577
|
+
int result = rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
|
|
507
578
|
|
|
508
579
|
// Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
|
|
509
580
|
// seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
|
|
@@ -607,11 +678,11 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
|
|
|
607
678
|
long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
608
679
|
|
|
609
680
|
if (state->dynamic_sampling_rate_enabled && !dynamic_sampling_rate_should_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_before_sample)) {
|
|
610
|
-
state->stats.
|
|
681
|
+
state->stats.cpu_skipped++;
|
|
611
682
|
return Qnil;
|
|
612
683
|
}
|
|
613
684
|
|
|
614
|
-
state->stats.
|
|
685
|
+
state->stats.cpu_sampled++;
|
|
615
686
|
|
|
616
687
|
VALUE profiler_overhead_stack_thread = state->owner_thread; // Used to attribute profiler overhead to a different stack
|
|
617
688
|
thread_context_collector_sample(state->thread_context_collector_instance, wall_time_ns_before_sample, profiler_overhead_stack_thread);
|
|
@@ -622,9 +693,9 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
|
|
|
622
693
|
// Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
|
|
623
694
|
uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
|
|
624
695
|
|
|
625
|
-
state->stats.
|
|
626
|
-
state->stats.
|
|
627
|
-
state->stats.
|
|
696
|
+
state->stats.cpu_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_min);
|
|
697
|
+
state->stats.cpu_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_max);
|
|
698
|
+
state->stats.cpu_sampling_time_ns_total += sampling_time_ns;
|
|
628
699
|
|
|
629
700
|
dynamic_sampling_rate_after_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_after_sample, sampling_time_ns);
|
|
630
701
|
|
|
@@ -823,7 +894,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance)
|
|
|
823
894
|
// Disable all tracepoints, so that there are no more attempts to mutate the profile
|
|
824
895
|
disable_tracepoints(state);
|
|
825
896
|
|
|
826
|
-
|
|
897
|
+
reset_stats_not_thread_safe(state);
|
|
827
898
|
|
|
828
899
|
// Remove all state from the `Collectors::ThreadState` and connected downstream components
|
|
829
900
|
rb_funcall(state->thread_context_collector_instance, rb_intern("reset_after_fork"), 0);
|
|
@@ -839,11 +910,27 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
|
839
910
|
struct cpu_and_wall_time_worker_state *state;
|
|
840
911
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
841
912
|
|
|
842
|
-
VALUE
|
|
843
|
-
VALUE
|
|
844
|
-
VALUE
|
|
845
|
-
VALUE
|
|
846
|
-
state->stats.
|
|
913
|
+
VALUE pretty_cpu_sampling_time_ns_min = state->stats.cpu_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_min);
|
|
914
|
+
VALUE pretty_cpu_sampling_time_ns_max = state->stats.cpu_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_max);
|
|
915
|
+
VALUE pretty_cpu_sampling_time_ns_total = state->stats.cpu_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_total);
|
|
916
|
+
VALUE pretty_cpu_sampling_time_ns_avg =
|
|
917
|
+
state->stats.cpu_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampling_time_ns_total) / state->stats.cpu_sampled);
|
|
918
|
+
|
|
919
|
+
VALUE pretty_allocation_sampling_time_ns_min = state->stats.allocation_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_min);
|
|
920
|
+
VALUE pretty_allocation_sampling_time_ns_max = state->stats.allocation_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_max);
|
|
921
|
+
VALUE pretty_allocation_sampling_time_ns_total = state->stats.allocation_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_total);
|
|
922
|
+
VALUE pretty_allocation_sampling_time_ns_avg =
|
|
923
|
+
state->stats.allocation_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampling_time_ns_total) / state->stats.allocation_sampled);
|
|
924
|
+
|
|
925
|
+
unsigned long total_cpu_samples_attempted = state->stats.cpu_sampled + state->stats.cpu_skipped;
|
|
926
|
+
VALUE effective_cpu_sample_rate =
|
|
927
|
+
total_cpu_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampled) / total_cpu_samples_attempted);
|
|
928
|
+
unsigned long total_allocation_samples_attempted = state->stats.allocation_sampled + state->stats.allocation_skipped;
|
|
929
|
+
VALUE effective_allocation_sample_rate =
|
|
930
|
+
total_allocation_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampled) / total_allocation_samples_attempted);
|
|
931
|
+
|
|
932
|
+
VALUE allocation_sampler_snapshot = state->allocation_profiling_enabled && state->dynamic_sampling_rate_enabled ?
|
|
933
|
+
discrete_dynamic_sampler_state_snapshot(&state->allocation_sampler) : Qnil;
|
|
847
934
|
|
|
848
935
|
VALUE stats_as_hash = rb_hash_new();
|
|
849
936
|
VALUE arguments[] = {
|
|
@@ -852,22 +939,42 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
|
852
939
|
ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
|
|
853
940
|
ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
|
|
854
941
|
ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
|
|
855
|
-
ID2SYM(rb_intern("sampled")), /* => */ UINT2NUM(state->stats.sampled),
|
|
856
|
-
ID2SYM(rb_intern("skipped_sample_because_of_dynamic_sampling_rate")), /* => */ UINT2NUM(state->stats.skipped_sample_because_of_dynamic_sampling_rate),
|
|
857
942
|
ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
|
|
858
943
|
ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
|
|
859
944
|
ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
|
|
860
945
|
ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
ID2SYM(rb_intern("
|
|
864
|
-
ID2SYM(rb_intern("
|
|
865
|
-
ID2SYM(rb_intern("
|
|
946
|
+
|
|
947
|
+
// CPU Stats
|
|
948
|
+
ID2SYM(rb_intern("cpu_sampled")), /* => */ UINT2NUM(state->stats.cpu_sampled),
|
|
949
|
+
ID2SYM(rb_intern("cpu_skipped")), /* => */ UINT2NUM(state->stats.cpu_skipped),
|
|
950
|
+
ID2SYM(rb_intern("cpu_effective_sample_rate")), /* => */ effective_cpu_sample_rate,
|
|
951
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_min")), /* => */ pretty_cpu_sampling_time_ns_min,
|
|
952
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_max")), /* => */ pretty_cpu_sampling_time_ns_max,
|
|
953
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ pretty_cpu_sampling_time_ns_total,
|
|
954
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_avg")), /* => */ pretty_cpu_sampling_time_ns_avg,
|
|
955
|
+
|
|
956
|
+
// Allocation stats
|
|
957
|
+
ID2SYM(rb_intern("allocation_sampled")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_sampled) : Qnil,
|
|
958
|
+
ID2SYM(rb_intern("allocation_skipped")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_skipped) : Qnil,
|
|
959
|
+
ID2SYM(rb_intern("allocation_effective_sample_rate")), /* => */ effective_allocation_sample_rate,
|
|
960
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_min")), /* => */ pretty_allocation_sampling_time_ns_min,
|
|
961
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_max")), /* => */ pretty_allocation_sampling_time_ns_max,
|
|
962
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ pretty_allocation_sampling_time_ns_total,
|
|
963
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ pretty_allocation_sampling_time_ns_avg,
|
|
964
|
+
ID2SYM(rb_intern("allocation_sampler_snapshot")), /* => */ allocation_sampler_snapshot,
|
|
965
|
+
ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
|
|
866
966
|
};
|
|
867
967
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
|
868
968
|
return stats_as_hash;
|
|
869
969
|
}
|
|
870
970
|
|
|
971
|
+
static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
972
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
973
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
974
|
+
reset_stats_not_thread_safe(state);
|
|
975
|
+
return Qnil;
|
|
976
|
+
}
|
|
977
|
+
|
|
871
978
|
void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
|
|
872
979
|
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
|
873
980
|
|
|
@@ -885,9 +992,17 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
|
|
|
885
992
|
|
|
886
993
|
static void grab_gvl_and_sample(void) { rb_thread_call_with_gvl(simulate_sampling_signal_delivery, NULL); }
|
|
887
994
|
|
|
888
|
-
static void
|
|
889
|
-
|
|
890
|
-
|
|
995
|
+
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state) {
|
|
996
|
+
// NOTE: This is not really thread safe so ongoing sampling operations that are concurrent with a reset can have their stats:
|
|
997
|
+
// * Lost (writes after stats retrieval but before reset).
|
|
998
|
+
// * Included in the previous stats window (writes before stats retrieval and reset).
|
|
999
|
+
// * Included in the following stats window (writes after stats retrieval and reset).
|
|
1000
|
+
// Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
|
|
1001
|
+
// this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
|
|
1002
|
+
state->stats = (struct stats) {
|
|
1003
|
+
.cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
|
|
1004
|
+
.allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
|
|
1005
|
+
};
|
|
891
1006
|
}
|
|
892
1007
|
|
|
893
1008
|
static void sleep_for(uint64_t time_ns) {
|
|
@@ -937,8 +1052,16 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
|
|
|
937
1052
|
return;
|
|
938
1053
|
}
|
|
939
1054
|
|
|
940
|
-
if (state->dynamic_sampling_rate_enabled
|
|
941
|
-
|
|
1055
|
+
if (state->dynamic_sampling_rate_enabled) {
|
|
1056
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
1057
|
+
if (now == 0) {
|
|
1058
|
+
delayed_error(state, ERR_CLOCK_FAIL);
|
|
1059
|
+
return;
|
|
1060
|
+
}
|
|
1061
|
+
if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) {
|
|
1062
|
+
state->stats.allocation_skipped++;
|
|
1063
|
+
return;
|
|
1064
|
+
}
|
|
942
1065
|
}
|
|
943
1066
|
|
|
944
1067
|
// @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
|
|
@@ -950,14 +1073,32 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
|
|
|
950
1073
|
// Rescue against any exceptions that happen during sampling
|
|
951
1074
|
safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
|
|
952
1075
|
|
|
953
|
-
|
|
1076
|
+
if (state->dynamic_sampling_rate_enabled) {
|
|
1077
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
1078
|
+
if (now == 0) {
|
|
1079
|
+
delayed_error(state, ERR_CLOCK_FAIL);
|
|
1080
|
+
// NOTE: Not short-circuiting here to make sure cleanup happens
|
|
1081
|
+
}
|
|
1082
|
+
uint64_t sampling_time_ns = discrete_dynamic_sampler_after_sample(&state->allocation_sampler, now);
|
|
1083
|
+
// NOTE: To keep things lean when dynamic sampling rate is disabled we skip clock interactions which is
|
|
1084
|
+
// why we're fine with having this inside this conditional.
|
|
1085
|
+
state->stats.allocation_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_min);
|
|
1086
|
+
state->stats.allocation_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_max);
|
|
1087
|
+
state->stats.allocation_sampling_time_ns_total += sampling_time_ns;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
state->stats.allocation_sampled++;
|
|
954
1091
|
|
|
955
1092
|
state->during_sample = false;
|
|
956
1093
|
}
|
|
957
1094
|
|
|
958
1095
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
|
|
959
|
-
|
|
960
|
-
|
|
1096
|
+
if (state->gc_tracepoint != Qnil) {
|
|
1097
|
+
rb_tracepoint_disable(state->gc_tracepoint);
|
|
1098
|
+
}
|
|
1099
|
+
if (state->object_allocation_tracepoint != Qnil) {
|
|
1100
|
+
rb_tracepoint_disable(state->object_allocation_tracepoint);
|
|
1101
|
+
}
|
|
961
1102
|
}
|
|
962
1103
|
|
|
963
1104
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
|
@@ -994,3 +1135,19 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
|
|
|
994
1135
|
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
|
|
995
1136
|
return Qnil;
|
|
996
1137
|
}
|
|
1138
|
+
|
|
1139
|
+
static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error) {
|
|
1140
|
+
// If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop.
|
|
1141
|
+
stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error));
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) {
|
|
1145
|
+
ENFORCE_TYPE(error_msg, T_STRING);
|
|
1146
|
+
|
|
1147
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
1148
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
1149
|
+
|
|
1150
|
+
delayed_error(state, rb_string_value_cstr(&error_msg));
|
|
1151
|
+
|
|
1152
|
+
return Qnil;
|
|
1153
|
+
}
|