ddtrace 1.17.0 → 1.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +85 -2
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +3 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
- data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +167 -125
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
- data/ext/ddtrace_profiling_native_extension/extconf.rb +44 -10
- data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
- data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
- data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
- data/ext/ddtrace_profiling_native_extension/http_transport.c +5 -2
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +83 -18
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +6 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +2 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +330 -13
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
- data/lib/datadog/appsec/component.rb +4 -1
- data/lib/datadog/appsec/configuration/settings.rb +4 -0
- data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +2 -0
- data/lib/datadog/appsec/processor/rule_loader.rb +60 -0
- data/lib/datadog/appsec/remote.rb +12 -9
- data/lib/datadog/core/configuration/settings.rb +139 -22
- data/lib/datadog/core/configuration.rb +4 -0
- data/lib/datadog/core/remote/worker.rb +1 -0
- data/lib/datadog/core/telemetry/collector.rb +10 -0
- data/lib/datadog/core/telemetry/event.rb +2 -1
- data/lib/datadog/core/telemetry/ext.rb +3 -0
- data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
- data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
- data/lib/datadog/core/workers/async.rb +1 -0
- data/lib/datadog/kit/enable_core_dumps.rb +5 -6
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +7 -11
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
- data/lib/datadog/profiling/component.rb +210 -18
- data/lib/datadog/profiling/scheduler.rb +4 -6
- data/lib/datadog/profiling/stack_recorder.rb +13 -2
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +24 -0
- data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
- data/lib/datadog/tracing/workers.rb +1 -0
- data/lib/ddtrace/version.rb +1 -1
- metadata +11 -6
@@ -3,6 +3,7 @@
|
|
3
3
|
#include "collectors_thread_context.h"
|
4
4
|
#include "clock_id.h"
|
5
5
|
#include "collectors_stack.h"
|
6
|
+
#include "collectors_gc_profiling_helper.h"
|
6
7
|
#include "helpers.h"
|
7
8
|
#include "libdatadog_helpers.h"
|
8
9
|
#include "private_vm_api_access.h"
|
@@ -37,24 +38,29 @@
|
|
37
38
|
// When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
|
38
39
|
// context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
|
39
40
|
//
|
40
|
-
// While
|
41
|
-
//
|
41
|
+
// While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
|
42
|
+
// after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
|
43
|
+
// Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
|
42
44
|
//
|
43
|
-
// (Regular samples can still account for the time between the previous sample and the start of GC.)
|
45
|
+
// (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
|
44
46
|
//
|
45
|
-
// When `thread_context_collector_on_gc_finish` gets called, the
|
46
|
-
//
|
47
|
+
// When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
|
48
|
+
// into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
|
49
|
+
// of the thread that did GC also gets adjusted to avoid double-accounting.)
|
47
50
|
//
|
48
|
-
// Finally, when `thread_context_collector_sample_after_gc` gets called,
|
51
|
+
// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
|
52
|
+
// a single placeholder `Garbage Collection` frame. This sample gets
|
53
|
+
// assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
|
54
|
+
// as metadata for the last GC.
|
49
55
|
//
|
50
|
-
//
|
51
|
-
//
|
52
|
-
//
|
53
|
-
//
|
54
|
-
//
|
55
|
-
//
|
56
|
-
//
|
57
|
-
//
|
56
|
+
// Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
|
57
|
+
// so that the application can keep doing user work in between GC steps.
|
58
|
+
// The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
|
59
|
+
// that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
|
60
|
+
// Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
|
61
|
+
// spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
|
62
|
+
// individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
|
63
|
+
// happened in the coalesced period.
|
58
64
|
//
|
59
65
|
// In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
|
60
66
|
// discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
|
@@ -68,6 +74,7 @@
|
|
68
74
|
#define IS_WALL_TIME true
|
69
75
|
#define IS_NOT_WALL_TIME false
|
70
76
|
#define MISSING_TRACER_CONTEXT_KEY 0
|
77
|
+
#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
|
71
78
|
|
72
79
|
static ID at_active_span_id; // id of :@active_span in Ruby
|
73
80
|
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
@@ -114,6 +121,14 @@ struct thread_context_collector_state {
|
|
114
121
|
// See thread_context_collector_on_gc_start for details
|
115
122
|
unsigned int gc_samples_missed_due_to_missing_context;
|
116
123
|
} stats;
|
124
|
+
|
125
|
+
struct {
|
126
|
+
unsigned long accumulated_cpu_time_ns;
|
127
|
+
unsigned long accumulated_wall_time_ns;
|
128
|
+
|
129
|
+
long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
|
130
|
+
long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
|
131
|
+
} gc_tracking;
|
117
132
|
};
|
118
133
|
|
119
134
|
// Tracks per-thread state
|
@@ -127,15 +142,10 @@ struct per_thread_context {
|
|
127
142
|
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
|
128
143
|
|
129
144
|
struct {
|
130
|
-
// Both of these fields are set by on_gc_start and kept until
|
145
|
+
// Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
|
131
146
|
// Outside of this window, they will be INVALID_TIME.
|
132
147
|
long cpu_time_at_start_ns;
|
133
148
|
long wall_time_at_start_ns;
|
134
|
-
|
135
|
-
// Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
|
136
|
-
// Outside of this window, they will be INVALID_TIME.
|
137
|
-
long cpu_time_at_finish_ns;
|
138
|
-
long wall_time_at_finish_ns;
|
139
149
|
} gc_tracking;
|
140
150
|
};
|
141
151
|
|
@@ -180,7 +190,6 @@ static void trigger_sample_for_thread(
|
|
180
190
|
VALUE stack_from_thread,
|
181
191
|
struct per_thread_context *thread_context,
|
182
192
|
sample_values values,
|
183
|
-
sample_type type,
|
184
193
|
long current_monotonic_wall_time_ns,
|
185
194
|
ddog_CharSlice *ruby_vm_type,
|
186
195
|
ddog_CharSlice *class_name
|
@@ -193,6 +202,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
|
193
202
|
static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
|
194
203
|
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
|
195
204
|
static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
|
205
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
|
196
206
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
|
197
207
|
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
|
198
208
|
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
|
@@ -200,13 +210,14 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
|
|
200
210
|
static long cpu_time_now_ns(struct per_thread_context *thread_context);
|
201
211
|
static long thread_id_for(VALUE thread);
|
202
212
|
static VALUE _native_stats(VALUE self, VALUE collector_instance);
|
213
|
+
static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
|
203
214
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
|
204
215
|
static bool should_collect_resource(VALUE root_span_type);
|
205
216
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
206
217
|
static VALUE thread_list(struct thread_context_collector_state *state);
|
207
218
|
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
|
208
219
|
static VALUE _native_new_empty_thread(VALUE self);
|
209
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
220
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
210
221
|
|
211
222
|
void collectors_thread_context_init(VALUE profiling_module) {
|
212
223
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
@@ -235,6 +246,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
235
246
|
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
|
236
247
|
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
|
237
248
|
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
|
249
|
+
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
|
238
250
|
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
|
239
251
|
|
240
252
|
at_active_span_id = rb_intern_const("@active_span");
|
@@ -243,6 +255,8 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
243
255
|
at_resource_id = rb_intern_const("@resource");
|
244
256
|
at_root_span_id = rb_intern_const("@root_span");
|
245
257
|
at_type_id = rb_intern_const("@type");
|
258
|
+
|
259
|
+
gc_profiling_init();
|
246
260
|
}
|
247
261
|
|
248
262
|
// This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
|
@@ -320,6 +334,8 @@ static VALUE _native_new(VALUE klass) {
|
|
320
334
|
state->allocation_type_enabled = true;
|
321
335
|
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
322
336
|
state->main_thread = rb_thread_main();
|
337
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
338
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
|
323
339
|
|
324
340
|
return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
|
325
341
|
}
|
@@ -465,7 +481,11 @@ void update_metrics_and_sample(
|
|
465
481
|
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
466
482
|
&thread_context->wall_time_at_previous_sample_ns,
|
467
483
|
current_monotonic_wall_time_ns,
|
468
|
-
|
484
|
+
// We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
|
485
|
+
// accounting to change during GC.
|
486
|
+
// E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
|
487
|
+
// GC or not.
|
488
|
+
INVALID_TIME,
|
469
489
|
IS_WALL_TIME
|
470
490
|
);
|
471
491
|
|
@@ -475,7 +495,6 @@ void update_metrics_and_sample(
|
|
475
495
|
stack_from_thread,
|
476
496
|
thread_context,
|
477
497
|
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
|
478
|
-
SAMPLE_REGULAR,
|
479
498
|
current_monotonic_wall_time_ns,
|
480
499
|
NULL,
|
481
500
|
NULL
|
@@ -484,7 +503,7 @@ void update_metrics_and_sample(
|
|
484
503
|
|
485
504
|
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
|
486
505
|
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
487
|
-
// create
|
506
|
+
// create an event including the cpu/wall time spent in garbage collector work.
|
488
507
|
//
|
489
508
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
490
509
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
@@ -509,27 +528,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
509
528
|
return;
|
510
529
|
}
|
511
530
|
|
512
|
-
//
|
513
|
-
//
|
514
|
-
// When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
|
515
|
-
// called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
|
516
|
-
// before we can actually run that method.
|
517
|
-
//
|
518
|
-
// We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
|
519
|
-
// `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
|
520
|
-
// then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
|
521
|
-
// there was a single, longer GC period.
|
522
|
-
if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
|
523
|
-
thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
|
524
|
-
|
525
|
-
// Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
|
531
|
+
// Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
|
526
532
|
thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
527
533
|
thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
|
528
534
|
}
|
529
535
|
|
530
536
|
// This function gets called when Ruby has finished running the Garbage Collector on the current thread.
|
531
|
-
// It
|
532
|
-
// create
|
537
|
+
// It records the cpu/wall-time observed during GC, which will be used to later
|
538
|
+
// create an event including the cpu/wall time spent from the start of garbage collector work until now.
|
533
539
|
//
|
534
540
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
535
541
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
@@ -537,9 +543,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
537
543
|
//
|
538
544
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
539
545
|
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
540
|
-
|
546
|
+
bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
541
547
|
struct thread_context_collector_state *state;
|
542
|
-
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
|
548
|
+
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
|
543
549
|
// This should never fail the the above check passes
|
544
550
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
545
551
|
|
@@ -547,29 +553,67 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
547
553
|
|
548
554
|
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
|
549
555
|
// how often this happens -- see on_gc_start.
|
550
|
-
if (thread_context == NULL) return;
|
556
|
+
if (thread_context == NULL) return false;
|
557
|
+
|
558
|
+
long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
|
559
|
+
long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
|
551
560
|
|
552
|
-
if (
|
553
|
-
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
|
561
|
+
if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
|
554
562
|
// If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
|
555
563
|
// context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
|
556
564
|
// do nothing.
|
557
|
-
return;
|
565
|
+
return false;
|
558
566
|
}
|
559
567
|
|
560
|
-
//
|
561
|
-
thread_context->gc_tracking.
|
562
|
-
thread_context->gc_tracking.
|
568
|
+
// Mark thread as no longer in GC
|
569
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
570
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
571
|
+
|
572
|
+
// Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
|
573
|
+
long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
|
574
|
+
long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
575
|
+
|
576
|
+
// If our end timestamp is not OK, we bail out
|
577
|
+
if (wall_time_at_finish_ns == 0) return false;
|
578
|
+
|
579
|
+
long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
|
580
|
+
long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
|
581
|
+
|
582
|
+
// Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
|
583
|
+
// so let's ensure we don't get negative values for time deltas.
|
584
|
+
gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
|
585
|
+
gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
|
586
|
+
|
587
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
588
|
+
state->gc_tracking.accumulated_cpu_time_ns = 0;
|
589
|
+
state->gc_tracking.accumulated_wall_time_ns = 0;
|
590
|
+
}
|
591
|
+
|
592
|
+
state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
|
593
|
+
state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
|
594
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
|
595
|
+
|
596
|
+
// Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
|
597
|
+
// We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
|
598
|
+
// `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
|
599
|
+
if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
|
600
|
+
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
601
|
+
}
|
602
|
+
|
603
|
+
// Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
|
604
|
+
// on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
|
605
|
+
// samples first.
|
606
|
+
bool finished_major_gc = gc_profiling_has_major_gc_finished();
|
607
|
+
bool over_flush_time_treshold =
|
608
|
+
(wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
|
609
|
+
|
610
|
+
return finished_major_gc || over_flush_time_treshold;
|
563
611
|
}
|
564
612
|
|
565
|
-
// This function gets called
|
613
|
+
// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
|
566
614
|
// It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
|
567
615
|
// GC-related tracking.
|
568
616
|
//
|
569
|
-
// Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
|
570
|
-
// and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
|
571
|
-
// set on their context.
|
572
|
-
//
|
573
617
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
574
618
|
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
575
619
|
// Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
|
@@ -578,70 +622,45 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
|
|
578
622
|
struct thread_context_collector_state *state;
|
579
623
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
580
624
|
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
const long thread_count = RARRAY_LEN(threads);
|
585
|
-
for (long i = 0; i < thread_count; i++) {
|
586
|
-
VALUE thread = RARRAY_AREF(threads, i);
|
587
|
-
struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
|
625
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
626
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
|
627
|
+
}
|
588
628
|
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
|
593
|
-
thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
|
594
|
-
) continue; // Ignore threads with no/incomplete garbage collection data
|
595
|
-
|
596
|
-
sampled_any_thread = true;
|
597
|
-
|
598
|
-
long gc_cpu_time_elapsed_ns =
|
599
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
|
600
|
-
long gc_wall_time_elapsed_ns =
|
601
|
-
thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
|
602
|
-
|
603
|
-
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
604
|
-
if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
|
605
|
-
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
606
|
-
// was a bug.
|
607
|
-
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
608
|
-
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
609
|
-
if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
|
610
|
-
|
611
|
-
if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
|
612
|
-
// Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
|
613
|
-
// come up with a crazy value for the frame
|
614
|
-
rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
|
615
|
-
}
|
629
|
+
int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
|
630
|
+
ddog_prof_Label labels[max_labels_needed_for_gc];
|
631
|
+
uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
|
616
632
|
|
617
|
-
|
618
|
-
state,
|
619
|
-
/* thread: */ thread,
|
620
|
-
/* stack_from_thread: */ thread,
|
621
|
-
thread_context,
|
622
|
-
(sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
|
623
|
-
SAMPLE_IN_GC,
|
624
|
-
INVALID_TIME, // For now we're not collecting timestamps for these events
|
625
|
-
NULL,
|
626
|
-
NULL
|
627
|
-
);
|
633
|
+
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
|
628
634
|
|
629
|
-
|
630
|
-
|
631
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
632
|
-
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
633
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
635
|
+
// The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
|
636
|
+
int64_t end_timestamp_ns = 0;
|
634
637
|
|
635
|
-
|
636
|
-
|
637
|
-
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
638
|
-
}
|
639
|
-
if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
|
640
|
-
thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
|
641
|
-
}
|
638
|
+
if (state->timeline_enabled) {
|
639
|
+
end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
|
642
640
|
}
|
643
641
|
|
644
|
-
|
642
|
+
record_placeholder_stack(
|
643
|
+
state->sampling_buffer,
|
644
|
+
state->recorder_instance,
|
645
|
+
(sample_values) {
|
646
|
+
// This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
|
647
|
+
// timeline duration.
|
648
|
+
// This is done to enable two use-cases:
|
649
|
+
// * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
|
650
|
+
// * the timeline duration is used when the event shows up in the timeline
|
651
|
+
.cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
|
652
|
+
.cpu_or_wall_samples = 1,
|
653
|
+
.wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
654
|
+
.timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
655
|
+
},
|
656
|
+
(sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
|
657
|
+
DDOG_CHARSLICE_C("Garbage Collection")
|
658
|
+
);
|
659
|
+
|
660
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
|
661
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
662
|
+
|
663
|
+
state->stats.gc_samples++;
|
645
664
|
|
646
665
|
// Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
|
647
666
|
return Qnil;
|
@@ -653,7 +672,6 @@ static void trigger_sample_for_thread(
|
|
653
672
|
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
654
673
|
struct per_thread_context *thread_context,
|
655
674
|
sample_values values,
|
656
|
-
sample_type type,
|
657
675
|
long current_monotonic_wall_time_ns,
|
658
676
|
// These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
|
659
677
|
ddog_CharSlice *ruby_vm_type,
|
@@ -776,8 +794,7 @@ static void trigger_sample_for_thread(
|
|
776
794
|
state->sampling_buffer,
|
777
795
|
state->recorder_instance,
|
778
796
|
values,
|
779
|
-
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
780
|
-
type
|
797
|
+
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
781
798
|
);
|
782
799
|
}
|
783
800
|
|
@@ -874,9 +891,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
|
|
874
891
|
|
875
892
|
// These will only be used during a GC operation
|
876
893
|
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
877
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
878
894
|
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
879
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
880
895
|
}
|
881
896
|
|
882
897
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
@@ -901,6 +916,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
901
916
|
state->time_converter_state.delta_to_epoch_ns
|
902
917
|
));
|
903
918
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
919
|
+
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
904
920
|
|
905
921
|
return result;
|
906
922
|
}
|
@@ -927,9 +943,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
927
943
|
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
|
928
944
|
|
929
945
|
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
930
|
-
ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
|
931
946
|
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
932
|
-
ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
|
933
947
|
};
|
934
948
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
935
949
|
|
@@ -947,6 +961,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) {
|
|
947
961
|
return stats_as_hash;
|
948
962
|
}
|
949
963
|
|
964
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) {
|
965
|
+
// Update this when modifying state struct (gc_tracking inner struct)
|
966
|
+
VALUE result = rb_hash_new();
|
967
|
+
VALUE arguments[] = {
|
968
|
+
ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns),
|
969
|
+
ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns),
|
970
|
+
ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns),
|
971
|
+
ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns),
|
972
|
+
};
|
973
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]);
|
974
|
+
return result;
|
975
|
+
}
|
976
|
+
|
950
977
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state) {
|
951
978
|
st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
|
952
979
|
}
|
@@ -1049,8 +1076,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) {
|
|
1049
1076
|
|
1050
1077
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
1051
1078
|
// It SHOULD NOT be used for other purposes.
|
1052
|
-
//
|
1053
|
-
// Returns the whole contents of the per_thread_context structs being tracked.
|
1054
1079
|
static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
1055
1080
|
struct thread_context_collector_state *state;
|
1056
1081
|
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
@@ -1058,6 +1083,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
|
|
1058
1083
|
return stats_as_ruby_hash(state);
|
1059
1084
|
}
|
1060
1085
|
|
1086
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
1087
|
+
// It SHOULD NOT be used for other purposes.
|
1088
|
+
static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
1089
|
+
struct thread_context_collector_state *state;
|
1090
|
+
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
1091
|
+
|
1092
|
+
return gc_tracking_as_ruby_hash(state);
|
1093
|
+
}
|
1094
|
+
|
1061
1095
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
1062
1096
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
|
1063
1097
|
if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
|
@@ -1150,6 +1184,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
1150
1184
|
// Since this is stack allocated, be careful about moving it
|
1151
1185
|
ddog_CharSlice class_name;
|
1152
1186
|
ddog_CharSlice *optional_class_name = NULL;
|
1187
|
+
char imemo_type[100];
|
1153
1188
|
|
1154
1189
|
if (state->allocation_type_enabled) {
|
1155
1190
|
optional_class_name = &class_name;
|
@@ -1197,19 +1232,26 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
1197
1232
|
class_name = ruby_value_type_to_class_name(type);
|
1198
1233
|
}
|
1199
1234
|
} else if (type == RUBY_T_IMEMO) {
|
1200
|
-
|
1235
|
+
const char *imemo_string = imemo_kind(new_object);
|
1236
|
+
if (imemo_string != NULL) {
|
1237
|
+
snprintf(imemo_type, 100, "(VM Internal, T_IMEMO, %s)", imemo_string);
|
1238
|
+
class_name = (ddog_CharSlice) {.ptr = imemo_type, .len = strlen(imemo_type)};
|
1239
|
+
} else { // Ruby < 3
|
1240
|
+
class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
|
1241
|
+
}
|
1201
1242
|
} else {
|
1202
1243
|
class_name = ruby_vm_type; // For other weird internal things we just use the VM type
|
1203
1244
|
}
|
1204
1245
|
}
|
1205
1246
|
|
1247
|
+
track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
|
1248
|
+
|
1206
1249
|
trigger_sample_for_thread(
|
1207
1250
|
state,
|
1208
1251
|
/* thread: */ current_thread,
|
1209
1252
|
/* stack_from_thread: */ current_thread,
|
1210
1253
|
get_or_create_context_for(current_thread, state),
|
1211
1254
|
(sample_values) {.alloc_samples = sample_weight},
|
1212
|
-
SAMPLE_REGULAR,
|
1213
1255
|
INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
|
1214
1256
|
&ruby_vm_type,
|
1215
1257
|
optional_class_name
|
@@ -1232,7 +1274,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) {
|
|
1232
1274
|
return rb_thread_create(new_empty_thread_inner, NULL);
|
1233
1275
|
}
|
1234
1276
|
|
1235
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
1277
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
1236
1278
|
switch (type) {
|
1237
1279
|
case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object");
|
1238
1280
|
case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class");
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
3
|
#include <ruby.h>
|
4
|
+
#include <stdbool.h>
|
4
5
|
|
5
6
|
void thread_context_collector_sample(
|
6
7
|
VALUE self_instance,
|
@@ -10,5 +11,5 @@ void thread_context_collector_sample(
|
|
10
11
|
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
|
11
12
|
VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
|
12
13
|
void thread_context_collector_on_gc_start(VALUE self_instance);
|
13
|
-
|
14
|
+
bool thread_context_collector_on_gc_finish(VALUE self_instance);
|
14
15
|
VALUE enforce_thread_context_collector_instance(VALUE object);
|