ddtrace 1.17.0 → 1.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +85 -2
  3. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +3 -0
  4. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
  5. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
  6. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
  7. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  9. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
  10. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
  11. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +167 -125
  12. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
  13. data/ext/ddtrace_profiling_native_extension/extconf.rb +44 -10
  14. data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
  15. data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
  16. data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
  17. data/ext/ddtrace_profiling_native_extension/http_transport.c +5 -2
  18. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
  19. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
  20. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +83 -18
  21. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +6 -0
  22. data/ext/ddtrace_profiling_native_extension/profiling.c +2 -0
  23. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
  24. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
  25. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +330 -13
  26. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
  27. data/lib/datadog/appsec/component.rb +4 -1
  28. data/lib/datadog/appsec/configuration/settings.rb +4 -0
  29. data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +2 -0
  30. data/lib/datadog/appsec/processor/rule_loader.rb +60 -0
  31. data/lib/datadog/appsec/remote.rb +12 -9
  32. data/lib/datadog/core/configuration/settings.rb +139 -22
  33. data/lib/datadog/core/configuration.rb +4 -0
  34. data/lib/datadog/core/remote/worker.rb +1 -0
  35. data/lib/datadog/core/telemetry/collector.rb +10 -0
  36. data/lib/datadog/core/telemetry/event.rb +2 -1
  37. data/lib/datadog/core/telemetry/ext.rb +3 -0
  38. data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
  39. data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
  40. data/lib/datadog/core/workers/async.rb +1 -0
  41. data/lib/datadog/kit/enable_core_dumps.rb +5 -6
  42. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +7 -11
  43. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  44. data/lib/datadog/profiling/component.rb +210 -18
  45. data/lib/datadog/profiling/scheduler.rb +4 -6
  46. data/lib/datadog/profiling/stack_recorder.rb +13 -2
  47. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
  48. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  49. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
  50. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +24 -0
  51. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  52. data/lib/datadog/tracing/workers.rb +1 -0
  53. data/lib/ddtrace/version.rb +1 -1
  54. metadata +11 -6
@@ -3,6 +3,7 @@
3
3
  #include "collectors_thread_context.h"
4
4
  #include "clock_id.h"
5
5
  #include "collectors_stack.h"
6
+ #include "collectors_gc_profiling_helper.h"
6
7
  #include "helpers.h"
7
8
  #include "libdatadog_helpers.h"
8
9
  #include "private_vm_api_access.h"
@@ -37,24 +38,29 @@
37
38
  // When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
38
39
  // context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
39
40
  //
40
- // While these fields are set, regular samples (if any) do not account for any time that passes after these two
41
- // timestamps.
41
+ // While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
42
+ // after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
43
+ // Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
42
44
  //
43
- // (Regular samples can still account for the time between the previous sample and the start of GC.)
45
+ // (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
44
46
  //
45
- // When `thread_context_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the
46
- // thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`.
47
+ // When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
48
+ // into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
49
+ // of the thread that did GC also gets adjusted to avoid double-accounting.)
47
50
  //
48
- // Finally, when `thread_context_collector_sample_after_gc` gets called, the following happens:
51
+ // Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
52
+ // a single placeholder `Garbage Collection` frame. This sample gets
53
+ // assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
54
+ // as metadata for the last GC.
49
55
  //
50
- // 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder
51
- // `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was
52
- // recorded between calls to `on_gc_start` and `on_gc_finish`.
53
- //
54
- // 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`).
55
- //
56
- // 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in
57
- // GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC.
56
+ // Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
57
+ // so that the application can keep doing user work in between GC steps.
58
+ // The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
59
+ // that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
60
+ // Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
61
+ // spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
62
+ // individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
63
+ // happened in the coalesced period.
58
64
  //
59
65
  // In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
60
66
  // discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
@@ -68,6 +74,7 @@
68
74
  #define IS_WALL_TIME true
69
75
  #define IS_NOT_WALL_TIME false
70
76
  #define MISSING_TRACER_CONTEXT_KEY 0
77
+ #define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
71
78
 
72
79
  static ID at_active_span_id; // id of :@active_span in Ruby
73
80
  static ID at_active_trace_id; // id of :@active_trace in Ruby
@@ -114,6 +121,14 @@ struct thread_context_collector_state {
114
121
  // See thread_context_collector_on_gc_start for details
115
122
  unsigned int gc_samples_missed_due_to_missing_context;
116
123
  } stats;
124
+
125
+ struct {
126
+ unsigned long accumulated_cpu_time_ns;
127
+ unsigned long accumulated_wall_time_ns;
128
+
129
+ long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
130
+ long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
131
+ } gc_tracking;
117
132
  };
118
133
 
119
134
  // Tracks per-thread state
@@ -127,15 +142,10 @@ struct per_thread_context {
127
142
  long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
128
143
 
129
144
  struct {
130
- // Both of these fields are set by on_gc_start and kept until sample_after_gc is called.
145
+ // Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
131
146
  // Outside of this window, they will be INVALID_TIME.
132
147
  long cpu_time_at_start_ns;
133
148
  long wall_time_at_start_ns;
134
-
135
- // Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
136
- // Outside of this window, they will be INVALID_TIME.
137
- long cpu_time_at_finish_ns;
138
- long wall_time_at_finish_ns;
139
149
  } gc_tracking;
140
150
  };
141
151
 
@@ -180,7 +190,6 @@ static void trigger_sample_for_thread(
180
190
  VALUE stack_from_thread,
181
191
  struct per_thread_context *thread_context,
182
192
  sample_values values,
183
- sample_type type,
184
193
  long current_monotonic_wall_time_ns,
185
194
  ddog_CharSlice *ruby_vm_type,
186
195
  ddog_CharSlice *class_name
@@ -193,6 +202,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance);
193
202
  static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
194
203
  static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
195
204
  static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
205
+ static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
196
206
  static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
197
207
  static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
198
208
  static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
@@ -200,13 +210,14 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
200
210
  static long cpu_time_now_ns(struct per_thread_context *thread_context);
201
211
  static long thread_id_for(VALUE thread);
202
212
  static VALUE _native_stats(VALUE self, VALUE collector_instance);
213
+ static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
203
214
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
204
215
  static bool should_collect_resource(VALUE root_span_type);
205
216
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
206
217
  static VALUE thread_list(struct thread_context_collector_state *state);
207
218
  static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
208
219
  static VALUE _native_new_empty_thread(VALUE self);
209
- ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
220
+ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
210
221
 
211
222
  void collectors_thread_context_init(VALUE profiling_module) {
212
223
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -235,6 +246,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
235
246
  rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
236
247
  rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
237
248
  rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
249
+ rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
238
250
  rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
239
251
 
240
252
  at_active_span_id = rb_intern_const("@active_span");
@@ -243,6 +255,8 @@ void collectors_thread_context_init(VALUE profiling_module) {
243
255
  at_resource_id = rb_intern_const("@resource");
244
256
  at_root_span_id = rb_intern_const("@root_span");
245
257
  at_type_id = rb_intern_const("@type");
258
+
259
+ gc_profiling_init();
246
260
  }
247
261
 
248
262
  // This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
@@ -320,6 +334,8 @@ static VALUE _native_new(VALUE klass) {
320
334
  state->allocation_type_enabled = true;
321
335
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
322
336
  state->main_thread = rb_thread_main();
337
+ state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
338
+ state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
323
339
 
324
340
  return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
325
341
  }
@@ -465,7 +481,11 @@ void update_metrics_and_sample(
465
481
  long wall_time_elapsed_ns = update_time_since_previous_sample(
466
482
  &thread_context->wall_time_at_previous_sample_ns,
467
483
  current_monotonic_wall_time_ns,
468
- thread_context->gc_tracking.wall_time_at_start_ns,
484
+ // We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
485
+ // accounting to change during GC.
486
+ // E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
487
+ // GC or not.
488
+ INVALID_TIME,
469
489
  IS_WALL_TIME
470
490
  );
471
491
 
@@ -475,7 +495,6 @@ void update_metrics_and_sample(
475
495
  stack_from_thread,
476
496
  thread_context,
477
497
  (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
478
- SAMPLE_REGULAR,
479
498
  current_monotonic_wall_time_ns,
480
499
  NULL,
481
500
  NULL
@@ -484,7 +503,7 @@ void update_metrics_and_sample(
484
503
 
485
504
  // This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
486
505
  // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
487
- // create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work.
506
+ // create an event including the cpu/wall time spent in garbage collector work.
488
507
  //
489
508
  // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
490
509
  // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
@@ -509,27 +528,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
509
528
  return;
510
529
  }
511
530
 
512
- // If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`.
513
- //
514
- // When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
515
- // called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
516
- // before we can actually run that method.
517
- //
518
- // We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
519
- // `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
520
- // then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
521
- // there was a single, longer GC period.
522
- if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
523
- thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
524
-
525
- // Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
531
+ // Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
526
532
  thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
527
533
  thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
528
534
  }
529
535
 
530
536
  // This function gets called when Ruby has finished running the Garbage Collector on the current thread.
531
- // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
532
- // create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now.
537
+ // It records the cpu/wall-time observed during GC, which will be used to later
538
+ // create an event including the cpu/wall time spent from the start of garbage collector work until now.
533
539
  //
534
540
  // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
535
541
  // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
@@ -537,9 +543,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
537
543
  //
538
544
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
539
545
  // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
540
- void thread_context_collector_on_gc_finish(VALUE self_instance) {
546
+ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
541
547
  struct thread_context_collector_state *state;
542
- if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
548
+ if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
543
549
  // This should never fail the the above check passes
544
550
  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
545
551
 
@@ -547,29 +553,67 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) {
547
553
 
548
554
  // If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
549
555
  // how often this happens -- see on_gc_start.
550
- if (thread_context == NULL) return;
556
+ if (thread_context == NULL) return false;
557
+
558
+ long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
559
+ long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
551
560
 
552
- if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME &&
553
- thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
561
+ if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
554
562
  // If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
555
563
  // context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
556
564
  // do nothing.
557
- return;
565
+ return false;
558
566
  }
559
567
 
560
- // Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time
561
- thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
562
- thread_context->gc_tracking.wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
568
+ // Mark thread as no longer in GC
569
+ thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
570
+ thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
571
+
572
+ // Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
573
+ long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
574
+ long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
575
+
576
+ // If our end timestamp is not OK, we bail out
577
+ if (wall_time_at_finish_ns == 0) return false;
578
+
579
+ long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
580
+ long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
581
+
582
+ // Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
583
+ // so let's ensure we don't get negative values for time deltas.
584
+ gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
585
+ gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
586
+
587
+ if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
588
+ state->gc_tracking.accumulated_cpu_time_ns = 0;
589
+ state->gc_tracking.accumulated_wall_time_ns = 0;
590
+ }
591
+
592
+ state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
593
+ state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
594
+ state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
595
+
596
+ // Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
597
+ // We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
598
+ // `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
599
+ if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
600
+ thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
601
+ }
602
+
603
+ // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
604
+ // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
605
+ // samples first.
606
+ bool finished_major_gc = gc_profiling_has_major_gc_finished();
607
+ bool over_flush_time_treshold =
608
+ (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
609
+
610
+ return finished_major_gc || over_flush_time_treshold;
563
611
  }
564
612
 
565
- // This function gets called shortly after Ruby has finished running the Garbage Collector.
613
+ // This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
566
614
  // It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
567
615
  // GC-related tracking.
568
616
  //
569
- // Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
570
- // and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
571
- // set on their context.
572
- //
573
617
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
574
618
  // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
575
619
  // Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
@@ -578,70 +622,45 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
578
622
  struct thread_context_collector_state *state;
579
623
  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
580
624
 
581
- VALUE threads = thread_list(state);
582
- bool sampled_any_thread = false;
583
-
584
- const long thread_count = RARRAY_LEN(threads);
585
- for (long i = 0; i < thread_count; i++) {
586
- VALUE thread = RARRAY_AREF(threads, i);
587
- struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
625
+ if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
626
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
627
+ }
588
628
 
589
- if (
590
- thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME ||
591
- thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME ||
592
- thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
593
- thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
594
- ) continue; // Ignore threads with no/incomplete garbage collection data
595
-
596
- sampled_any_thread = true;
597
-
598
- long gc_cpu_time_elapsed_ns =
599
- thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
600
- long gc_wall_time_elapsed_ns =
601
- thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
602
-
603
- // We don't expect non-wall time to go backwards, so let's flag this as a bug
604
- if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
605
- // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
606
- // was a bug.
607
- // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
608
- // https://github.com/DataDog/dd-trace-rb/pull/2336.
609
- if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
610
-
611
- if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
612
- // Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
613
- // come up with a crazy value for the frame
614
- rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
615
- }
629
+ int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
630
+ ddog_prof_Label labels[max_labels_needed_for_gc];
631
+ uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
616
632
 
617
- trigger_sample_for_thread(
618
- state,
619
- /* thread: */ thread,
620
- /* stack_from_thread: */ thread,
621
- thread_context,
622
- (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
623
- SAMPLE_IN_GC,
624
- INVALID_TIME, // For now we're not collecting timestamps for these events
625
- NULL,
626
- NULL
627
- );
633
+ ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
628
634
 
629
- // Mark thread as no longer in GC
630
- thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
631
- thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
632
- thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
633
- thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
635
+ // The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
636
+ int64_t end_timestamp_ns = 0;
634
637
 
635
- // Update counters so that they won't include the time in GC during the next sample
636
- if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
637
- thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
638
- }
639
- if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
640
- thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
641
- }
638
+ if (state->timeline_enabled) {
639
+ end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
642
640
  }
643
641
 
644
- if (sampled_any_thread) state->stats.gc_samples++;
642
+ record_placeholder_stack(
643
+ state->sampling_buffer,
644
+ state->recorder_instance,
645
+ (sample_values) {
646
+ // This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
647
+ // timeline duration.
648
+ // This is done to enable two use-cases:
649
+ // * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
650
+ // * the timeline duration is used when the event shows up in the timeline
651
+ .cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
652
+ .cpu_or_wall_samples = 1,
653
+ .wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
654
+ .timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
655
+ },
656
+ (sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
657
+ DDOG_CHARSLICE_C("Garbage Collection")
658
+ );
659
+
660
+ state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
661
+ state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
662
+
663
+ state->stats.gc_samples++;
645
664
 
646
665
  // Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
647
666
  return Qnil;
@@ -653,7 +672,6 @@ static void trigger_sample_for_thread(
653
672
  VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
654
673
  struct per_thread_context *thread_context,
655
674
  sample_values values,
656
- sample_type type,
657
675
  long current_monotonic_wall_time_ns,
658
676
  // These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
659
677
  ddog_CharSlice *ruby_vm_type,
@@ -776,8 +794,7 @@ static void trigger_sample_for_thread(
776
794
  state->sampling_buffer,
777
795
  state->recorder_instance,
778
796
  values,
779
- (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns},
780
- type
797
+ (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
781
798
  );
782
799
  }
783
800
 
@@ -874,9 +891,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
874
891
 
875
892
  // These will only be used during a GC operation
876
893
  thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
877
- thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
878
894
  thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
879
- thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
880
895
  }
881
896
 
882
897
  static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
@@ -901,6 +916,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
901
916
  state->time_converter_state.delta_to_epoch_ns
902
917
  ));
903
918
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
919
+ rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
904
920
 
905
921
  return result;
906
922
  }
@@ -927,9 +943,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
927
943
  ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
928
944
 
929
945
  ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
930
- ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
931
946
  ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
932
- ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
933
947
  };
934
948
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
935
949
 
@@ -947,6 +961,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) {
947
961
  return stats_as_hash;
948
962
  }
949
963
 
964
+ static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) {
965
+ // Update this when modifying state struct (gc_tracking inner struct)
966
+ VALUE result = rb_hash_new();
967
+ VALUE arguments[] = {
968
+ ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns),
969
+ ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns),
970
+ ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns),
971
+ ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns),
972
+ };
973
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]);
974
+ return result;
975
+ }
976
+
950
977
  static void remove_context_for_dead_threads(struct thread_context_collector_state *state) {
951
978
  st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
952
979
  }
@@ -1049,8 +1076,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) {
1049
1076
 
1050
1077
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1051
1078
  // It SHOULD NOT be used for other purposes.
1052
- //
1053
- // Returns the whole contents of the per_thread_context structs being tracked.
1054
1079
  static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
1055
1080
  struct thread_context_collector_state *state;
1056
1081
  TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
@@ -1058,6 +1083,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
1058
1083
  return stats_as_ruby_hash(state);
1059
1084
  }
1060
1085
 
1086
+ // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1087
+ // It SHOULD NOT be used for other purposes.
1088
+ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
1089
+ struct thread_context_collector_state *state;
1090
+ TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1091
+
1092
+ return gc_tracking_as_ruby_hash(state);
1093
+ }
1094
+
1061
1095
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
1062
1096
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
1063
1097
  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
@@ -1150,6 +1184,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1150
1184
  // Since this is stack allocated, be careful about moving it
1151
1185
  ddog_CharSlice class_name;
1152
1186
  ddog_CharSlice *optional_class_name = NULL;
1187
+ char imemo_type[100];
1153
1188
 
1154
1189
  if (state->allocation_type_enabled) {
1155
1190
  optional_class_name = &class_name;
@@ -1197,19 +1232,26 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1197
1232
  class_name = ruby_value_type_to_class_name(type);
1198
1233
  }
1199
1234
  } else if (type == RUBY_T_IMEMO) {
1200
- class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
1235
+ const char *imemo_string = imemo_kind(new_object);
1236
+ if (imemo_string != NULL) {
1237
+ snprintf(imemo_type, 100, "(VM Internal, T_IMEMO, %s)", imemo_string);
1238
+ class_name = (ddog_CharSlice) {.ptr = imemo_type, .len = strlen(imemo_type)};
1239
+ } else { // Ruby < 3
1240
+ class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
1241
+ }
1201
1242
  } else {
1202
1243
  class_name = ruby_vm_type; // For other weird internal things we just use the VM type
1203
1244
  }
1204
1245
  }
1205
1246
 
1247
+ track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
1248
+
1206
1249
  trigger_sample_for_thread(
1207
1250
  state,
1208
1251
  /* thread: */ current_thread,
1209
1252
  /* stack_from_thread: */ current_thread,
1210
1253
  get_or_create_context_for(current_thread, state),
1211
1254
  (sample_values) {.alloc_samples = sample_weight},
1212
- SAMPLE_REGULAR,
1213
1255
  INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
1214
1256
  &ruby_vm_type,
1215
1257
  optional_class_name
@@ -1232,7 +1274,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) {
1232
1274
  return rb_thread_create(new_empty_thread_inner, NULL);
1233
1275
  }
1234
1276
 
1235
- ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1277
+ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1236
1278
  switch (type) {
1237
1279
  case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object");
1238
1280
  case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class");
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include <ruby.h>
4
+ #include <stdbool.h>
4
5
 
5
6
  void thread_context_collector_sample(
6
7
  VALUE self_instance,
@@ -10,5 +11,5 @@ void thread_context_collector_sample(
10
11
  void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
11
12
  VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
12
13
  void thread_context_collector_on_gc_start(VALUE self_instance);
13
- void thread_context_collector_on_gc_finish(VALUE self_instance);
14
+ bool thread_context_collector_on_gc_finish(VALUE self_instance);
14
15
  VALUE enforce_thread_context_collector_instance(VALUE object);