ddtrace 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +85 -2
  3. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +3 -0
  4. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
  5. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
  6. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
  7. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  9. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
  10. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
  11. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +167 -125
  12. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
  13. data/ext/ddtrace_profiling_native_extension/extconf.rb +44 -10
  14. data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
  15. data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
  16. data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
  17. data/ext/ddtrace_profiling_native_extension/http_transport.c +5 -2
  18. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
  19. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
  20. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +83 -18
  21. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +6 -0
  22. data/ext/ddtrace_profiling_native_extension/profiling.c +2 -0
  23. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
  24. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
  25. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +330 -13
  26. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
  27. data/lib/datadog/appsec/component.rb +4 -1
  28. data/lib/datadog/appsec/configuration/settings.rb +4 -0
  29. data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +2 -0
  30. data/lib/datadog/appsec/processor/rule_loader.rb +60 -0
  31. data/lib/datadog/appsec/remote.rb +12 -9
  32. data/lib/datadog/core/configuration/settings.rb +139 -22
  33. data/lib/datadog/core/configuration.rb +4 -0
  34. data/lib/datadog/core/remote/worker.rb +1 -0
  35. data/lib/datadog/core/telemetry/collector.rb +10 -0
  36. data/lib/datadog/core/telemetry/event.rb +2 -1
  37. data/lib/datadog/core/telemetry/ext.rb +3 -0
  38. data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
  39. data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
  40. data/lib/datadog/core/workers/async.rb +1 -0
  41. data/lib/datadog/kit/enable_core_dumps.rb +5 -6
  42. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +7 -11
  43. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  44. data/lib/datadog/profiling/component.rb +210 -18
  45. data/lib/datadog/profiling/scheduler.rb +4 -6
  46. data/lib/datadog/profiling/stack_recorder.rb +13 -2
  47. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
  48. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  49. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
  50. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +24 -0
  51. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  52. data/lib/datadog/tracing/workers.rb +1 -0
  53. data/lib/ddtrace/version.rb +1 -1
  54. metadata +11 -6
@@ -3,6 +3,7 @@
3
3
  #include "collectors_thread_context.h"
4
4
  #include "clock_id.h"
5
5
  #include "collectors_stack.h"
6
+ #include "collectors_gc_profiling_helper.h"
6
7
  #include "helpers.h"
7
8
  #include "libdatadog_helpers.h"
8
9
  #include "private_vm_api_access.h"
@@ -37,24 +38,29 @@
37
38
  // When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
38
39
  // context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
39
40
  //
40
- // While these fields are set, regular samples (if any) do not account for any time that passes after these two
41
- // timestamps.
41
+ // While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
42
+ // after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
43
+ // Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
42
44
  //
43
- // (Regular samples can still account for the time between the previous sample and the start of GC.)
45
+ // (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
44
46
  //
45
- // When `thread_context_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the
46
- // thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`.
47
+ // When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
48
+ // into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
49
+ // of the thread that did GC also gets adjusted to avoid double-accounting.)
47
50
  //
48
- // Finally, when `thread_context_collector_sample_after_gc` gets called, the following happens:
51
+ // Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
52
+ // a single placeholder `Garbage Collection` frame. This sample gets
53
+ // assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
54
+ // as metadata for the last GC.
49
55
  //
50
- // 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder
51
- // `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was
52
- // recorded between calls to `on_gc_start` and `on_gc_finish`.
53
- //
54
- // 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`).
55
- //
56
- // 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in
57
- // GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC.
56
+ // Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
57
+ // so that the application can keep doing user work in between GC steps.
58
+ // The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
59
+ // that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
60
+ // Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
61
+ // spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
62
+ // individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
63
+ // happened in the coalesced period.
58
64
  //
59
65
  // In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
60
66
  // discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
@@ -68,6 +74,7 @@
68
74
  #define IS_WALL_TIME true
69
75
  #define IS_NOT_WALL_TIME false
70
76
  #define MISSING_TRACER_CONTEXT_KEY 0
77
+ #define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
71
78
 
72
79
  static ID at_active_span_id; // id of :@active_span in Ruby
73
80
  static ID at_active_trace_id; // id of :@active_trace in Ruby
@@ -114,6 +121,14 @@ struct thread_context_collector_state {
114
121
  // See thread_context_collector_on_gc_start for details
115
122
  unsigned int gc_samples_missed_due_to_missing_context;
116
123
  } stats;
124
+
125
+ struct {
126
+ unsigned long accumulated_cpu_time_ns;
127
+ unsigned long accumulated_wall_time_ns;
128
+
129
+ long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
130
+ long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
131
+ } gc_tracking;
117
132
  };
118
133
 
119
134
  // Tracks per-thread state
@@ -127,15 +142,10 @@ struct per_thread_context {
127
142
  long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
128
143
 
129
144
  struct {
130
- // Both of these fields are set by on_gc_start and kept until sample_after_gc is called.
145
+ // Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
131
146
  // Outside of this window, they will be INVALID_TIME.
132
147
  long cpu_time_at_start_ns;
133
148
  long wall_time_at_start_ns;
134
-
135
- // Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
136
- // Outside of this window, they will be INVALID_TIME.
137
- long cpu_time_at_finish_ns;
138
- long wall_time_at_finish_ns;
139
149
  } gc_tracking;
140
150
  };
141
151
 
@@ -180,7 +190,6 @@ static void trigger_sample_for_thread(
180
190
  VALUE stack_from_thread,
181
191
  struct per_thread_context *thread_context,
182
192
  sample_values values,
183
- sample_type type,
184
193
  long current_monotonic_wall_time_ns,
185
194
  ddog_CharSlice *ruby_vm_type,
186
195
  ddog_CharSlice *class_name
@@ -193,6 +202,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance);
193
202
  static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
194
203
  static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
195
204
  static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
205
+ static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
196
206
  static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
197
207
  static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
198
208
  static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
@@ -200,13 +210,14 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
200
210
  static long cpu_time_now_ns(struct per_thread_context *thread_context);
201
211
  static long thread_id_for(VALUE thread);
202
212
  static VALUE _native_stats(VALUE self, VALUE collector_instance);
213
+ static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
203
214
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
204
215
  static bool should_collect_resource(VALUE root_span_type);
205
216
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
206
217
  static VALUE thread_list(struct thread_context_collector_state *state);
207
218
  static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
208
219
  static VALUE _native_new_empty_thread(VALUE self);
209
- ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
220
+ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
210
221
 
211
222
  void collectors_thread_context_init(VALUE profiling_module) {
212
223
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -235,6 +246,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
235
246
  rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
236
247
  rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
237
248
  rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
249
+ rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
238
250
  rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
239
251
 
240
252
  at_active_span_id = rb_intern_const("@active_span");
@@ -243,6 +255,8 @@ void collectors_thread_context_init(VALUE profiling_module) {
243
255
  at_resource_id = rb_intern_const("@resource");
244
256
  at_root_span_id = rb_intern_const("@root_span");
245
257
  at_type_id = rb_intern_const("@type");
258
+
259
+ gc_profiling_init();
246
260
  }
247
261
 
248
262
  // This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
@@ -320,6 +334,8 @@ static VALUE _native_new(VALUE klass) {
320
334
  state->allocation_type_enabled = true;
321
335
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
322
336
  state->main_thread = rb_thread_main();
337
+ state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
338
+ state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
323
339
 
324
340
  return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
325
341
  }
@@ -465,7 +481,11 @@ void update_metrics_and_sample(
465
481
  long wall_time_elapsed_ns = update_time_since_previous_sample(
466
482
  &thread_context->wall_time_at_previous_sample_ns,
467
483
  current_monotonic_wall_time_ns,
468
- thread_context->gc_tracking.wall_time_at_start_ns,
484
+ // We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
485
+ // accounting to change during GC.
486
+ // E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
487
+ // GC or not.
488
+ INVALID_TIME,
469
489
  IS_WALL_TIME
470
490
  );
471
491
 
@@ -475,7 +495,6 @@ void update_metrics_and_sample(
475
495
  stack_from_thread,
476
496
  thread_context,
477
497
  (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
478
- SAMPLE_REGULAR,
479
498
  current_monotonic_wall_time_ns,
480
499
  NULL,
481
500
  NULL
@@ -484,7 +503,7 @@ void update_metrics_and_sample(
484
503
 
485
504
  // This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
486
505
  // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
487
- // create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work.
506
+ // create an event including the cpu/wall time spent in garbage collector work.
488
507
  //
489
508
  // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
490
509
  // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
@@ -509,27 +528,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
509
528
  return;
510
529
  }
511
530
 
512
- // If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`.
513
- //
514
- // When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
515
- // called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
516
- // before we can actually run that method.
517
- //
518
- // We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
519
- // `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
520
- // then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
521
- // there was a single, longer GC period.
522
- if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
523
- thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
524
-
525
- // Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
531
+ // Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
526
532
  thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
527
533
  thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
528
534
  }
529
535
 
530
536
  // This function gets called when Ruby has finished running the Garbage Collector on the current thread.
531
- // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
532
- // create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now.
537
+ // It records the cpu/wall-time observed during GC, which will be used to later
538
+ // create an event including the cpu/wall time spent from the start of garbage collector work until now.
533
539
  //
534
540
  // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
535
541
  // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
@@ -537,9 +543,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
537
543
  //
538
544
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
539
545
  // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
540
- void thread_context_collector_on_gc_finish(VALUE self_instance) {
546
+ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
541
547
  struct thread_context_collector_state *state;
542
- if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
548
+ if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
543
549
  // This should never fail the the above check passes
544
550
  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
545
551
 
@@ -547,29 +553,67 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) {
547
553
 
548
554
  // If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
549
555
  // how often this happens -- see on_gc_start.
550
- if (thread_context == NULL) return;
556
+ if (thread_context == NULL) return false;
557
+
558
+ long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
559
+ long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
551
560
 
552
- if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME &&
553
- thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
561
+ if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
554
562
  // If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
555
563
  // context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
556
564
  // do nothing.
557
- return;
565
+ return false;
558
566
  }
559
567
 
560
- // Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time
561
- thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
562
- thread_context->gc_tracking.wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
568
+ // Mark thread as no longer in GC
569
+ thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
570
+ thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
571
+
572
+ // Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
573
+ long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
574
+ long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
575
+
576
+ // If our end timestamp is not OK, we bail out
577
+ if (wall_time_at_finish_ns == 0) return false;
578
+
579
+ long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
580
+ long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
581
+
582
+ // Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
583
+ // so let's ensure we don't get negative values for time deltas.
584
+ gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
585
+ gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
586
+
587
+ if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
588
+ state->gc_tracking.accumulated_cpu_time_ns = 0;
589
+ state->gc_tracking.accumulated_wall_time_ns = 0;
590
+ }
591
+
592
+ state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
593
+ state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
594
+ state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
595
+
596
+ // Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
597
+ // We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
598
+ // `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
599
+ if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
600
+ thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
601
+ }
602
+
603
+ // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
604
+ // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
605
+ // samples first.
606
+ bool finished_major_gc = gc_profiling_has_major_gc_finished();
607
+ bool over_flush_time_treshold =
608
+ (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
609
+
610
+ return finished_major_gc || over_flush_time_treshold;
563
611
  }
564
612
 
565
- // This function gets called shortly after Ruby has finished running the Garbage Collector.
613
+ // This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
566
614
  // It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
567
615
  // GC-related tracking.
568
616
  //
569
- // Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
570
- // and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
571
- // set on their context.
572
- //
573
617
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
574
618
  // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
575
619
  // Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
@@ -578,70 +622,45 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
578
622
  struct thread_context_collector_state *state;
579
623
  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
580
624
 
581
- VALUE threads = thread_list(state);
582
- bool sampled_any_thread = false;
583
-
584
- const long thread_count = RARRAY_LEN(threads);
585
- for (long i = 0; i < thread_count; i++) {
586
- VALUE thread = RARRAY_AREF(threads, i);
587
- struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
625
+ if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
626
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
627
+ }
588
628
 
589
- if (
590
- thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME ||
591
- thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME ||
592
- thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
593
- thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
594
- ) continue; // Ignore threads with no/incomplete garbage collection data
595
-
596
- sampled_any_thread = true;
597
-
598
- long gc_cpu_time_elapsed_ns =
599
- thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
600
- long gc_wall_time_elapsed_ns =
601
- thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
602
-
603
- // We don't expect non-wall time to go backwards, so let's flag this as a bug
604
- if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
605
- // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
606
- // was a bug.
607
- // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
608
- // https://github.com/DataDog/dd-trace-rb/pull/2336.
609
- if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
610
-
611
- if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
612
- // Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
613
- // come up with a crazy value for the frame
614
- rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
615
- }
629
+ int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
630
+ ddog_prof_Label labels[max_labels_needed_for_gc];
631
+ uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
616
632
 
617
- trigger_sample_for_thread(
618
- state,
619
- /* thread: */ thread,
620
- /* stack_from_thread: */ thread,
621
- thread_context,
622
- (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
623
- SAMPLE_IN_GC,
624
- INVALID_TIME, // For now we're not collecting timestamps for these events
625
- NULL,
626
- NULL
627
- );
633
+ ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
628
634
 
629
- // Mark thread as no longer in GC
630
- thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
631
- thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
632
- thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
633
- thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
635
+ // The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
636
+ int64_t end_timestamp_ns = 0;
634
637
 
635
- // Update counters so that they won't include the time in GC during the next sample
636
- if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
637
- thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
638
- }
639
- if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
640
- thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
641
- }
638
+ if (state->timeline_enabled) {
639
+ end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
642
640
  }
643
641
 
644
- if (sampled_any_thread) state->stats.gc_samples++;
642
+ record_placeholder_stack(
643
+ state->sampling_buffer,
644
+ state->recorder_instance,
645
+ (sample_values) {
646
+ // This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
647
+ // timeline duration.
648
+ // This is done to enable two use-cases:
649
+ // * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
650
+ // * the timeline duration is used when the event shows up in the timeline
651
+ .cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
652
+ .cpu_or_wall_samples = 1,
653
+ .wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
654
+ .timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
655
+ },
656
+ (sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
657
+ DDOG_CHARSLICE_C("Garbage Collection")
658
+ );
659
+
660
+ state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
661
+ state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
662
+
663
+ state->stats.gc_samples++;
645
664
 
646
665
  // Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
647
666
  return Qnil;
@@ -653,7 +672,6 @@ static void trigger_sample_for_thread(
653
672
  VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
654
673
  struct per_thread_context *thread_context,
655
674
  sample_values values,
656
- sample_type type,
657
675
  long current_monotonic_wall_time_ns,
658
676
  // These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
659
677
  ddog_CharSlice *ruby_vm_type,
@@ -776,8 +794,7 @@ static void trigger_sample_for_thread(
776
794
  state->sampling_buffer,
777
795
  state->recorder_instance,
778
796
  values,
779
- (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns},
780
- type
797
+ (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
781
798
  );
782
799
  }
783
800
 
@@ -874,9 +891,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
874
891
 
875
892
  // These will only be used during a GC operation
876
893
  thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
877
- thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
878
894
  thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
879
- thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
880
895
  }
881
896
 
882
897
  static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
@@ -901,6 +916,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
901
916
  state->time_converter_state.delta_to_epoch_ns
902
917
  ));
903
918
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
919
+ rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
904
920
 
905
921
  return result;
906
922
  }
@@ -927,9 +943,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
927
943
  ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
928
944
 
929
945
  ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
930
- ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
931
946
  ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
932
- ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
933
947
  };
934
948
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
935
949
 
@@ -947,6 +961,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) {
947
961
  return stats_as_hash;
948
962
  }
949
963
 
964
+ static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) {
965
+ // Update this when modifying state struct (gc_tracking inner struct)
966
+ VALUE result = rb_hash_new();
967
+ VALUE arguments[] = {
968
+ ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns),
969
+ ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns),
970
+ ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns),
971
+ ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns),
972
+ };
973
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]);
974
+ return result;
975
+ }
976
+
950
977
  static void remove_context_for_dead_threads(struct thread_context_collector_state *state) {
951
978
  st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
952
979
  }
@@ -1049,8 +1076,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) {
1049
1076
 
1050
1077
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1051
1078
  // It SHOULD NOT be used for other purposes.
1052
- //
1053
- // Returns the whole contents of the per_thread_context structs being tracked.
1054
1079
  static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
1055
1080
  struct thread_context_collector_state *state;
1056
1081
  TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
@@ -1058,6 +1083,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
1058
1083
  return stats_as_ruby_hash(state);
1059
1084
  }
1060
1085
 
1086
+ // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1087
+ // It SHOULD NOT be used for other purposes.
1088
+ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
1089
+ struct thread_context_collector_state *state;
1090
+ TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1091
+
1092
+ return gc_tracking_as_ruby_hash(state);
1093
+ }
1094
+
1061
1095
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
1062
1096
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
1063
1097
  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
@@ -1150,6 +1184,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1150
1184
  // Since this is stack allocated, be careful about moving it
1151
1185
  ddog_CharSlice class_name;
1152
1186
  ddog_CharSlice *optional_class_name = NULL;
1187
+ char imemo_type[100];
1153
1188
 
1154
1189
  if (state->allocation_type_enabled) {
1155
1190
  optional_class_name = &class_name;
@@ -1197,19 +1232,26 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1197
1232
  class_name = ruby_value_type_to_class_name(type);
1198
1233
  }
1199
1234
  } else if (type == RUBY_T_IMEMO) {
1200
- class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
1235
+ const char *imemo_string = imemo_kind(new_object);
1236
+ if (imemo_string != NULL) {
1237
+ snprintf(imemo_type, 100, "(VM Internal, T_IMEMO, %s)", imemo_string);
1238
+ class_name = (ddog_CharSlice) {.ptr = imemo_type, .len = strlen(imemo_type)};
1239
+ } else { // Ruby < 3
1240
+ class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
1241
+ }
1201
1242
  } else {
1202
1243
  class_name = ruby_vm_type; // For other weird internal things we just use the VM type
1203
1244
  }
1204
1245
  }
1205
1246
 
1247
+ track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
1248
+
1206
1249
  trigger_sample_for_thread(
1207
1250
  state,
1208
1251
  /* thread: */ current_thread,
1209
1252
  /* stack_from_thread: */ current_thread,
1210
1253
  get_or_create_context_for(current_thread, state),
1211
1254
  (sample_values) {.alloc_samples = sample_weight},
1212
- SAMPLE_REGULAR,
1213
1255
  INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
1214
1256
  &ruby_vm_type,
1215
1257
  optional_class_name
@@ -1232,7 +1274,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) {
1232
1274
  return rb_thread_create(new_empty_thread_inner, NULL);
1233
1275
  }
1234
1276
 
1235
- ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1277
+ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1236
1278
  switch (type) {
1237
1279
  case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object");
1238
1280
  case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class");
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include <ruby.h>
4
+ #include <stdbool.h>
4
5
 
5
6
  void thread_context_collector_sample(
6
7
  VALUE self_instance,
@@ -10,5 +11,5 @@ void thread_context_collector_sample(
10
11
  void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
11
12
  VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
12
13
  void thread_context_collector_on_gc_start(VALUE self_instance);
13
- void thread_context_collector_on_gc_finish(VALUE self_instance);
14
+ bool thread_context_collector_on_gc_finish(VALUE self_instance);
14
15
  VALUE enforce_thread_context_collector_instance(VALUE object);