ddtrace 1.17.0 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +85 -2
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +3 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
- data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +167 -125
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
- data/ext/ddtrace_profiling_native_extension/extconf.rb +44 -10
- data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
- data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
- data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
- data/ext/ddtrace_profiling_native_extension/http_transport.c +5 -2
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +83 -18
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +6 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +2 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +330 -13
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
- data/lib/datadog/appsec/component.rb +4 -1
- data/lib/datadog/appsec/configuration/settings.rb +4 -0
- data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +2 -0
- data/lib/datadog/appsec/processor/rule_loader.rb +60 -0
- data/lib/datadog/appsec/remote.rb +12 -9
- data/lib/datadog/core/configuration/settings.rb +139 -22
- data/lib/datadog/core/configuration.rb +4 -0
- data/lib/datadog/core/remote/worker.rb +1 -0
- data/lib/datadog/core/telemetry/collector.rb +10 -0
- data/lib/datadog/core/telemetry/event.rb +2 -1
- data/lib/datadog/core/telemetry/ext.rb +3 -0
- data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
- data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
- data/lib/datadog/core/workers/async.rb +1 -0
- data/lib/datadog/kit/enable_core_dumps.rb +5 -6
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +7 -11
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
- data/lib/datadog/profiling/component.rb +210 -18
- data/lib/datadog/profiling/scheduler.rb +4 -6
- data/lib/datadog/profiling/stack_recorder.rb +13 -2
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +24 -0
- data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
- data/lib/datadog/tracing/workers.rb +1 -0
- data/lib/ddtrace/version.rb +1 -1
- metadata +11 -6
@@ -3,6 +3,7 @@
|
|
3
3
|
#include "collectors_thread_context.h"
|
4
4
|
#include "clock_id.h"
|
5
5
|
#include "collectors_stack.h"
|
6
|
+
#include "collectors_gc_profiling_helper.h"
|
6
7
|
#include "helpers.h"
|
7
8
|
#include "libdatadog_helpers.h"
|
8
9
|
#include "private_vm_api_access.h"
|
@@ -37,24 +38,29 @@
|
|
37
38
|
// When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
|
38
39
|
// context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
|
39
40
|
//
|
40
|
-
// While
|
41
|
-
//
|
41
|
+
// While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
|
42
|
+
// after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
|
43
|
+
// Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
|
42
44
|
//
|
43
|
-
// (Regular samples can still account for the time between the previous sample and the start of GC.)
|
45
|
+
// (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
|
44
46
|
//
|
45
|
-
// When `thread_context_collector_on_gc_finish` gets called, the
|
46
|
-
//
|
47
|
+
// When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
|
48
|
+
// into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
|
49
|
+
// of the thread that did GC also gets adjusted to avoid double-accounting.)
|
47
50
|
//
|
48
|
-
// Finally, when `thread_context_collector_sample_after_gc` gets called,
|
51
|
+
// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
|
52
|
+
// a single placeholder `Garbage Collection` frame. This sample gets
|
53
|
+
// assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
|
54
|
+
// as metadata for the last GC.
|
49
55
|
//
|
50
|
-
//
|
51
|
-
//
|
52
|
-
//
|
53
|
-
//
|
54
|
-
//
|
55
|
-
//
|
56
|
-
//
|
57
|
-
//
|
56
|
+
// Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
|
57
|
+
// so that the application can keep doing user work in between GC steps.
|
58
|
+
// The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
|
59
|
+
// that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
|
60
|
+
// Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
|
61
|
+
// spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
|
62
|
+
// individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
|
63
|
+
// happened in the coalesced period.
|
58
64
|
//
|
59
65
|
// In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
|
60
66
|
// discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
|
@@ -68,6 +74,7 @@
|
|
68
74
|
#define IS_WALL_TIME true
|
69
75
|
#define IS_NOT_WALL_TIME false
|
70
76
|
#define MISSING_TRACER_CONTEXT_KEY 0
|
77
|
+
#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
|
71
78
|
|
72
79
|
static ID at_active_span_id; // id of :@active_span in Ruby
|
73
80
|
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
@@ -114,6 +121,14 @@ struct thread_context_collector_state {
|
|
114
121
|
// See thread_context_collector_on_gc_start for details
|
115
122
|
unsigned int gc_samples_missed_due_to_missing_context;
|
116
123
|
} stats;
|
124
|
+
|
125
|
+
struct {
|
126
|
+
unsigned long accumulated_cpu_time_ns;
|
127
|
+
unsigned long accumulated_wall_time_ns;
|
128
|
+
|
129
|
+
long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
|
130
|
+
long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
|
131
|
+
} gc_tracking;
|
117
132
|
};
|
118
133
|
|
119
134
|
// Tracks per-thread state
|
@@ -127,15 +142,10 @@ struct per_thread_context {
|
|
127
142
|
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
|
128
143
|
|
129
144
|
struct {
|
130
|
-
// Both of these fields are set by on_gc_start and kept until
|
145
|
+
// Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
|
131
146
|
// Outside of this window, they will be INVALID_TIME.
|
132
147
|
long cpu_time_at_start_ns;
|
133
148
|
long wall_time_at_start_ns;
|
134
|
-
|
135
|
-
// Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
|
136
|
-
// Outside of this window, they will be INVALID_TIME.
|
137
|
-
long cpu_time_at_finish_ns;
|
138
|
-
long wall_time_at_finish_ns;
|
139
149
|
} gc_tracking;
|
140
150
|
};
|
141
151
|
|
@@ -180,7 +190,6 @@ static void trigger_sample_for_thread(
|
|
180
190
|
VALUE stack_from_thread,
|
181
191
|
struct per_thread_context *thread_context,
|
182
192
|
sample_values values,
|
183
|
-
sample_type type,
|
184
193
|
long current_monotonic_wall_time_ns,
|
185
194
|
ddog_CharSlice *ruby_vm_type,
|
186
195
|
ddog_CharSlice *class_name
|
@@ -193,6 +202,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
|
193
202
|
static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
|
194
203
|
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
|
195
204
|
static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
|
205
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
|
196
206
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
|
197
207
|
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
|
198
208
|
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
|
@@ -200,13 +210,14 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
|
|
200
210
|
static long cpu_time_now_ns(struct per_thread_context *thread_context);
|
201
211
|
static long thread_id_for(VALUE thread);
|
202
212
|
static VALUE _native_stats(VALUE self, VALUE collector_instance);
|
213
|
+
static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
|
203
214
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
|
204
215
|
static bool should_collect_resource(VALUE root_span_type);
|
205
216
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
206
217
|
static VALUE thread_list(struct thread_context_collector_state *state);
|
207
218
|
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
|
208
219
|
static VALUE _native_new_empty_thread(VALUE self);
|
209
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
220
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
210
221
|
|
211
222
|
void collectors_thread_context_init(VALUE profiling_module) {
|
212
223
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
@@ -235,6 +246,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
235
246
|
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
|
236
247
|
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
|
237
248
|
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
|
249
|
+
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
|
238
250
|
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
|
239
251
|
|
240
252
|
at_active_span_id = rb_intern_const("@active_span");
|
@@ -243,6 +255,8 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
243
255
|
at_resource_id = rb_intern_const("@resource");
|
244
256
|
at_root_span_id = rb_intern_const("@root_span");
|
245
257
|
at_type_id = rb_intern_const("@type");
|
258
|
+
|
259
|
+
gc_profiling_init();
|
246
260
|
}
|
247
261
|
|
248
262
|
// This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
|
@@ -320,6 +334,8 @@ static VALUE _native_new(VALUE klass) {
|
|
320
334
|
state->allocation_type_enabled = true;
|
321
335
|
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
322
336
|
state->main_thread = rb_thread_main();
|
337
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
338
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
|
323
339
|
|
324
340
|
return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
|
325
341
|
}
|
@@ -465,7 +481,11 @@ void update_metrics_and_sample(
|
|
465
481
|
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
466
482
|
&thread_context->wall_time_at_previous_sample_ns,
|
467
483
|
current_monotonic_wall_time_ns,
|
468
|
-
|
484
|
+
// We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
|
485
|
+
// accounting to change during GC.
|
486
|
+
// E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
|
487
|
+
// GC or not.
|
488
|
+
INVALID_TIME,
|
469
489
|
IS_WALL_TIME
|
470
490
|
);
|
471
491
|
|
@@ -475,7 +495,6 @@ void update_metrics_and_sample(
|
|
475
495
|
stack_from_thread,
|
476
496
|
thread_context,
|
477
497
|
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
|
478
|
-
SAMPLE_REGULAR,
|
479
498
|
current_monotonic_wall_time_ns,
|
480
499
|
NULL,
|
481
500
|
NULL
|
@@ -484,7 +503,7 @@ void update_metrics_and_sample(
|
|
484
503
|
|
485
504
|
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
|
486
505
|
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
487
|
-
// create
|
506
|
+
// create an event including the cpu/wall time spent in garbage collector work.
|
488
507
|
//
|
489
508
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
490
509
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
@@ -509,27 +528,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
509
528
|
return;
|
510
529
|
}
|
511
530
|
|
512
|
-
//
|
513
|
-
//
|
514
|
-
// When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
|
515
|
-
// called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
|
516
|
-
// before we can actually run that method.
|
517
|
-
//
|
518
|
-
// We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
|
519
|
-
// `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
|
520
|
-
// then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
|
521
|
-
// there was a single, longer GC period.
|
522
|
-
if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
|
523
|
-
thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
|
524
|
-
|
525
|
-
// Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
|
531
|
+
// Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
|
526
532
|
thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
527
533
|
thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
|
528
534
|
}
|
529
535
|
|
530
536
|
// This function gets called when Ruby has finished running the Garbage Collector on the current thread.
|
531
|
-
// It
|
532
|
-
// create
|
537
|
+
// It records the cpu/wall-time observed during GC, which will be used to later
|
538
|
+
// create an event including the cpu/wall time spent from the start of garbage collector work until now.
|
533
539
|
//
|
534
540
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
535
541
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
@@ -537,9 +543,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
537
543
|
//
|
538
544
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
539
545
|
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
540
|
-
|
546
|
+
bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
541
547
|
struct thread_context_collector_state *state;
|
542
|
-
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
|
548
|
+
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
|
543
549
|
// This should never fail the the above check passes
|
544
550
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
545
551
|
|
@@ -547,29 +553,67 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
547
553
|
|
548
554
|
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
|
549
555
|
// how often this happens -- see on_gc_start.
|
550
|
-
if (thread_context == NULL) return;
|
556
|
+
if (thread_context == NULL) return false;
|
557
|
+
|
558
|
+
long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
|
559
|
+
long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
|
551
560
|
|
552
|
-
if (
|
553
|
-
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
|
561
|
+
if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
|
554
562
|
// If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
|
555
563
|
// context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
|
556
564
|
// do nothing.
|
557
|
-
return;
|
565
|
+
return false;
|
558
566
|
}
|
559
567
|
|
560
|
-
//
|
561
|
-
thread_context->gc_tracking.
|
562
|
-
thread_context->gc_tracking.
|
568
|
+
// Mark thread as no longer in GC
|
569
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
570
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
571
|
+
|
572
|
+
// Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
|
573
|
+
long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
|
574
|
+
long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
575
|
+
|
576
|
+
// If our end timestamp is not OK, we bail out
|
577
|
+
if (wall_time_at_finish_ns == 0) return false;
|
578
|
+
|
579
|
+
long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
|
580
|
+
long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
|
581
|
+
|
582
|
+
// Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
|
583
|
+
// so let's ensure we don't get negative values for time deltas.
|
584
|
+
gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
|
585
|
+
gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
|
586
|
+
|
587
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
588
|
+
state->gc_tracking.accumulated_cpu_time_ns = 0;
|
589
|
+
state->gc_tracking.accumulated_wall_time_ns = 0;
|
590
|
+
}
|
591
|
+
|
592
|
+
state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
|
593
|
+
state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
|
594
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
|
595
|
+
|
596
|
+
// Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
|
597
|
+
// We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
|
598
|
+
// `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
|
599
|
+
if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
|
600
|
+
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
601
|
+
}
|
602
|
+
|
603
|
+
// Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
|
604
|
+
// on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
|
605
|
+
// samples first.
|
606
|
+
bool finished_major_gc = gc_profiling_has_major_gc_finished();
|
607
|
+
bool over_flush_time_treshold =
|
608
|
+
(wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
|
609
|
+
|
610
|
+
return finished_major_gc || over_flush_time_treshold;
|
563
611
|
}
|
564
612
|
|
565
|
-
// This function gets called
|
613
|
+
// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
|
566
614
|
// It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
|
567
615
|
// GC-related tracking.
|
568
616
|
//
|
569
|
-
// Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
|
570
|
-
// and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
|
571
|
-
// set on their context.
|
572
|
-
//
|
573
617
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
574
618
|
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
575
619
|
// Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
|
@@ -578,70 +622,45 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
|
|
578
622
|
struct thread_context_collector_state *state;
|
579
623
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
580
624
|
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
const long thread_count = RARRAY_LEN(threads);
|
585
|
-
for (long i = 0; i < thread_count; i++) {
|
586
|
-
VALUE thread = RARRAY_AREF(threads, i);
|
587
|
-
struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
|
625
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
626
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
|
627
|
+
}
|
588
628
|
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
|
593
|
-
thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
|
594
|
-
) continue; // Ignore threads with no/incomplete garbage collection data
|
595
|
-
|
596
|
-
sampled_any_thread = true;
|
597
|
-
|
598
|
-
long gc_cpu_time_elapsed_ns =
|
599
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
|
600
|
-
long gc_wall_time_elapsed_ns =
|
601
|
-
thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
|
602
|
-
|
603
|
-
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
604
|
-
if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
|
605
|
-
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
606
|
-
// was a bug.
|
607
|
-
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
608
|
-
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
609
|
-
if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
|
610
|
-
|
611
|
-
if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
|
612
|
-
// Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
|
613
|
-
// come up with a crazy value for the frame
|
614
|
-
rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
|
615
|
-
}
|
629
|
+
int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
|
630
|
+
ddog_prof_Label labels[max_labels_needed_for_gc];
|
631
|
+
uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
|
616
632
|
|
617
|
-
|
618
|
-
state,
|
619
|
-
/* thread: */ thread,
|
620
|
-
/* stack_from_thread: */ thread,
|
621
|
-
thread_context,
|
622
|
-
(sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
|
623
|
-
SAMPLE_IN_GC,
|
624
|
-
INVALID_TIME, // For now we're not collecting timestamps for these events
|
625
|
-
NULL,
|
626
|
-
NULL
|
627
|
-
);
|
633
|
+
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
|
628
634
|
|
629
|
-
|
630
|
-
|
631
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
632
|
-
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
633
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
635
|
+
// The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
|
636
|
+
int64_t end_timestamp_ns = 0;
|
634
637
|
|
635
|
-
|
636
|
-
|
637
|
-
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
638
|
-
}
|
639
|
-
if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
|
640
|
-
thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
|
641
|
-
}
|
638
|
+
if (state->timeline_enabled) {
|
639
|
+
end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
|
642
640
|
}
|
643
641
|
|
644
|
-
|
642
|
+
record_placeholder_stack(
|
643
|
+
state->sampling_buffer,
|
644
|
+
state->recorder_instance,
|
645
|
+
(sample_values) {
|
646
|
+
// This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
|
647
|
+
// timeline duration.
|
648
|
+
// This is done to enable two use-cases:
|
649
|
+
// * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
|
650
|
+
// * the timeline duration is used when the event shows up in the timeline
|
651
|
+
.cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
|
652
|
+
.cpu_or_wall_samples = 1,
|
653
|
+
.wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
654
|
+
.timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
655
|
+
},
|
656
|
+
(sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
|
657
|
+
DDOG_CHARSLICE_C("Garbage Collection")
|
658
|
+
);
|
659
|
+
|
660
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
|
661
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
662
|
+
|
663
|
+
state->stats.gc_samples++;
|
645
664
|
|
646
665
|
// Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
|
647
666
|
return Qnil;
|
@@ -653,7 +672,6 @@ static void trigger_sample_for_thread(
|
|
653
672
|
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
654
673
|
struct per_thread_context *thread_context,
|
655
674
|
sample_values values,
|
656
|
-
sample_type type,
|
657
675
|
long current_monotonic_wall_time_ns,
|
658
676
|
// These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
|
659
677
|
ddog_CharSlice *ruby_vm_type,
|
@@ -776,8 +794,7 @@ static void trigger_sample_for_thread(
|
|
776
794
|
state->sampling_buffer,
|
777
795
|
state->recorder_instance,
|
778
796
|
values,
|
779
|
-
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
780
|
-
type
|
797
|
+
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
781
798
|
);
|
782
799
|
}
|
783
800
|
|
@@ -874,9 +891,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
|
|
874
891
|
|
875
892
|
// These will only be used during a GC operation
|
876
893
|
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
877
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
878
894
|
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
879
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
880
895
|
}
|
881
896
|
|
882
897
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
@@ -901,6 +916,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
901
916
|
state->time_converter_state.delta_to_epoch_ns
|
902
917
|
));
|
903
918
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
919
|
+
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
904
920
|
|
905
921
|
return result;
|
906
922
|
}
|
@@ -927,9 +943,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
927
943
|
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
|
928
944
|
|
929
945
|
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
930
|
-
ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
|
931
946
|
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
932
|
-
ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
|
933
947
|
};
|
934
948
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
935
949
|
|
@@ -947,6 +961,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) {
|
|
947
961
|
return stats_as_hash;
|
948
962
|
}
|
949
963
|
|
964
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) {
|
965
|
+
// Update this when modifying state struct (gc_tracking inner struct)
|
966
|
+
VALUE result = rb_hash_new();
|
967
|
+
VALUE arguments[] = {
|
968
|
+
ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns),
|
969
|
+
ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns),
|
970
|
+
ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns),
|
971
|
+
ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns),
|
972
|
+
};
|
973
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]);
|
974
|
+
return result;
|
975
|
+
}
|
976
|
+
|
950
977
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state) {
|
951
978
|
st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
|
952
979
|
}
|
@@ -1049,8 +1076,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) {
|
|
1049
1076
|
|
1050
1077
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
1051
1078
|
// It SHOULD NOT be used for other purposes.
|
1052
|
-
//
|
1053
|
-
// Returns the whole contents of the per_thread_context structs being tracked.
|
1054
1079
|
static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
1055
1080
|
struct thread_context_collector_state *state;
|
1056
1081
|
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
@@ -1058,6 +1083,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
|
|
1058
1083
|
return stats_as_ruby_hash(state);
|
1059
1084
|
}
|
1060
1085
|
|
1086
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
1087
|
+
// It SHOULD NOT be used for other purposes.
|
1088
|
+
static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
1089
|
+
struct thread_context_collector_state *state;
|
1090
|
+
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
1091
|
+
|
1092
|
+
return gc_tracking_as_ruby_hash(state);
|
1093
|
+
}
|
1094
|
+
|
1061
1095
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
1062
1096
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
|
1063
1097
|
if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
|
@@ -1150,6 +1184,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
1150
1184
|
// Since this is stack allocated, be careful about moving it
|
1151
1185
|
ddog_CharSlice class_name;
|
1152
1186
|
ddog_CharSlice *optional_class_name = NULL;
|
1187
|
+
char imemo_type[100];
|
1153
1188
|
|
1154
1189
|
if (state->allocation_type_enabled) {
|
1155
1190
|
optional_class_name = &class_name;
|
@@ -1197,19 +1232,26 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
1197
1232
|
class_name = ruby_value_type_to_class_name(type);
|
1198
1233
|
}
|
1199
1234
|
} else if (type == RUBY_T_IMEMO) {
|
1200
|
-
|
1235
|
+
const char *imemo_string = imemo_kind(new_object);
|
1236
|
+
if (imemo_string != NULL) {
|
1237
|
+
snprintf(imemo_type, 100, "(VM Internal, T_IMEMO, %s)", imemo_string);
|
1238
|
+
class_name = (ddog_CharSlice) {.ptr = imemo_type, .len = strlen(imemo_type)};
|
1239
|
+
} else { // Ruby < 3
|
1240
|
+
class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
|
1241
|
+
}
|
1201
1242
|
} else {
|
1202
1243
|
class_name = ruby_vm_type; // For other weird internal things we just use the VM type
|
1203
1244
|
}
|
1204
1245
|
}
|
1205
1246
|
|
1247
|
+
track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
|
1248
|
+
|
1206
1249
|
trigger_sample_for_thread(
|
1207
1250
|
state,
|
1208
1251
|
/* thread: */ current_thread,
|
1209
1252
|
/* stack_from_thread: */ current_thread,
|
1210
1253
|
get_or_create_context_for(current_thread, state),
|
1211
1254
|
(sample_values) {.alloc_samples = sample_weight},
|
1212
|
-
SAMPLE_REGULAR,
|
1213
1255
|
INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
|
1214
1256
|
&ruby_vm_type,
|
1215
1257
|
optional_class_name
|
@@ -1232,7 +1274,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) {
|
|
1232
1274
|
return rb_thread_create(new_empty_thread_inner, NULL);
|
1233
1275
|
}
|
1234
1276
|
|
1235
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
1277
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
1236
1278
|
switch (type) {
|
1237
1279
|
case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object");
|
1238
1280
|
case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class");
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
3
|
#include <ruby.h>
|
4
|
+
#include <stdbool.h>
|
4
5
|
|
5
6
|
void thread_context_collector_sample(
|
6
7
|
VALUE self_instance,
|
@@ -10,5 +11,5 @@ void thread_context_collector_sample(
|
|
10
11
|
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
|
11
12
|
VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
|
12
13
|
void thread_context_collector_on_gc_start(VALUE self_instance);
|
13
|
-
|
14
|
+
bool thread_context_collector_on_gc_finish(VALUE self_instance);
|
14
15
|
VALUE enforce_thread_context_collector_instance(VALUE object);
|