ddtrace 1.18.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -1
  3. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
  4. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
  5. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
  6. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  7. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
  9. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
  10. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +159 -124
  11. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
  12. data/ext/ddtrace_profiling_native_extension/extconf.rb +16 -0
  13. data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
  14. data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
  15. data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
  16. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
  17. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +5 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +1 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
  22. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +329 -10
  23. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
  24. data/lib/datadog/core/configuration/settings.rb +139 -22
  25. data/lib/datadog/core/telemetry/collector.rb +10 -0
  26. data/lib/datadog/core/telemetry/event.rb +2 -1
  27. data/lib/datadog/core/telemetry/ext.rb +3 -0
  28. data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
  29. data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
  30. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -11
  31. data/lib/datadog/profiling/component.rb +197 -13
  32. data/lib/datadog/profiling/scheduler.rb +4 -6
  33. data/lib/datadog/profiling/stack_recorder.rb +13 -2
  34. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
  35. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  36. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  37. data/lib/ddtrace/version.rb +1 -1
  38. metadata +12 -7
@@ -7,6 +7,7 @@
7
7
  #include "libdatadog_helpers.h"
8
8
  #include "ruby_helpers.h"
9
9
  #include "time_helpers.h"
10
+ #include "heap_recorder.h"
10
11
 
11
12
  // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs
12
13
  // This file implements the native bits of the Datadog::Profiling::StackRecorder class
@@ -150,18 +151,29 @@ static VALUE error_symbol = Qnil; // :error in Ruby
150
151
  #define WALL_TIME_VALUE_ID 2
151
152
  #define ALLOC_SAMPLES_VALUE {.type_ = VALUE_STRING("alloc-samples"), .unit = VALUE_STRING("count")}
152
153
  #define ALLOC_SAMPLES_VALUE_ID 3
154
+ #define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
155
+ #define HEAP_SAMPLES_VALUE_ID 4
156
+ #define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")}
157
+ #define HEAP_SIZE_VALUE_ID 5
158
+ #define TIMELINE_VALUE {.type_ = VALUE_STRING("timeline"), .unit = VALUE_STRING("nanoseconds")}
159
+ #define TIMELINE_VALUE_ID 6
153
160
 
154
- static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE};
161
+ static const ddog_prof_ValueType all_value_types[] =
162
+ {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};
155
163
 
156
164
  // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
157
165
  // occupies on the all_value_types array.
158
166
  // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
159
- static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID};
167
+ static const uint8_t all_value_types_positions[] =
168
+ {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};
160
169
 
161
170
  #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
162
171
 
163
172
  // Contains native state for each instance
164
173
  struct stack_recorder_state {
174
+ // Heap recorder instance
175
+ heap_recorder *heap_recorder;
176
+
165
177
  pthread_mutex_t slot_one_mutex;
166
178
  ddog_prof_Profile slot_one_profile;
167
179
 
@@ -184,6 +196,7 @@ struct call_serialize_without_gvl_arguments {
184
196
  // Set by caller
185
197
  struct stack_recorder_state *state;
186
198
  ddog_Timespec finish_timestamp;
199
+ size_t gc_count_before_serialize;
187
200
 
188
201
  // Set by callee
189
202
  ddog_prof_Profile *profile;
@@ -197,11 +210,20 @@ static VALUE _native_new(VALUE klass);
197
210
  static void initialize_slot_concurrency_control(struct stack_recorder_state *state);
198
211
  static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types);
199
212
  static void stack_recorder_typed_data_free(void *data);
200
- static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled);
213
+ static VALUE _native_initialize(
214
+ DDTRACE_UNUSED VALUE _self,
215
+ VALUE recorder_instance,
216
+ VALUE cpu_time_enabled,
217
+ VALUE alloc_samples_enabled,
218
+ VALUE heap_samples_enabled,
219
+ VALUE heap_size_enabled,
220
+ VALUE heap_sample_every,
221
+ VALUE timeline_enabled
222
+ );
201
223
  static VALUE _native_serialize(VALUE self, VALUE recorder_instance);
202
224
  static VALUE ruby_time_from(ddog_Timespec ddprof_time);
203
225
  static void *call_serialize_without_gvl(void *call_args);
204
- static struct active_slot_pair sampler_lock_active_profile();
226
+ static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state);
205
227
  static void sampler_unlock_active_profile(struct active_slot_pair active_slot);
206
228
  static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state);
207
229
  static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
@@ -213,6 +235,14 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
213
235
  static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
214
236
  static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
215
237
  static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */);
238
+ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class);
239
+ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations);
240
+ static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
241
+ static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
242
+ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
243
+ static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj);
244
+ static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj);
245
+
216
246
 
217
247
  void stack_recorder_init(VALUE profiling_module) {
218
248
  VALUE stack_recorder_class = rb_define_class_under(profiling_module, "StackRecorder", rb_cObject);
@@ -229,13 +259,25 @@ void stack_recorder_init(VALUE profiling_module) {
229
259
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
230
260
  rb_define_alloc_func(stack_recorder_class, _native_new);
231
261
 
232
- rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 3);
262
+ rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 7);
233
263
  rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1);
234
264
  rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
235
265
  rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1);
236
266
  rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
237
267
  rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
238
268
  rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3);
269
+ rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 4);
270
+ rb_define_singleton_method(testing_module, "_native_check_heap_hashes", _native_check_heap_hashes, 1);
271
+ rb_define_singleton_method(testing_module, "_native_start_fake_slow_heap_serialization",
272
+ _native_start_fake_slow_heap_serialization, 1);
273
+ rb_define_singleton_method(testing_module, "_native_end_fake_slow_heap_serialization",
274
+ _native_end_fake_slow_heap_serialization, 1);
275
+ rb_define_singleton_method(testing_module, "_native_debug_heap_recorder",
276
+ _native_debug_heap_recorder, 1);
277
+ rb_define_singleton_method(testing_module, "_native_gc_force_recycle",
278
+ _native_gc_force_recycle, 1);
279
+ rb_define_singleton_method(testing_module, "_native_has_seen_id_flag",
280
+ _native_has_seen_id_flag, 1);
239
281
 
240
282
  ok_symbol = ID2SYM(rb_intern_const("ok"));
241
283
  error_symbol = ID2SYM(rb_intern_const("error"));
@@ -270,6 +312,12 @@ static VALUE _native_new(VALUE klass) {
270
312
 
271
313
  VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state);
272
314
 
315
+ // NOTE: We initialize this because we want a new recorder to be operational even without initialization and our
316
+ // default is everything enabled. However, if during recording initialization it turns out we don't want
317
+ // heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific
318
+ // to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops).
319
+ state->heap_recorder = heap_recorder_new();
320
+
273
321
  // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking!
274
322
 
275
323
  initialize_profiles(state, sample_types);
@@ -318,27 +366,51 @@ static void stack_recorder_typed_data_free(void *state_ptr) {
318
366
  pthread_mutex_destroy(&state->slot_two_mutex);
319
367
  ddog_prof_Profile_drop(&state->slot_two_profile);
320
368
 
369
+ heap_recorder_free(state->heap_recorder);
370
+
321
371
  ruby_xfree(state);
322
372
  }
323
373
 
324
- static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled) {
374
+ static VALUE _native_initialize(
375
+ DDTRACE_UNUSED VALUE _self,
376
+ VALUE recorder_instance,
377
+ VALUE cpu_time_enabled,
378
+ VALUE alloc_samples_enabled,
379
+ VALUE heap_samples_enabled,
380
+ VALUE heap_size_enabled,
381
+ VALUE heap_sample_every,
382
+ VALUE timeline_enabled
383
+ ) {
325
384
  ENFORCE_BOOLEAN(cpu_time_enabled);
326
385
  ENFORCE_BOOLEAN(alloc_samples_enabled);
386
+ ENFORCE_BOOLEAN(heap_samples_enabled);
387
+ ENFORCE_BOOLEAN(heap_size_enabled);
388
+ ENFORCE_TYPE(heap_sample_every, T_FIXNUM);
389
+ ENFORCE_BOOLEAN(timeline_enabled);
327
390
 
328
391
  struct stack_recorder_state *state;
329
392
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
330
393
 
331
- if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default
394
+ heap_recorder_set_sample_rate(state->heap_recorder, NUM2INT(heap_sample_every));
395
+
396
+ uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT -
397
+ (cpu_time_enabled == Qtrue ? 0 : 1) -
398
+ (alloc_samples_enabled == Qtrue? 0 : 1) -
399
+ (heap_samples_enabled == Qtrue ? 0 : 1) -
400
+ (heap_size_enabled == Qtrue ? 0 : 1) -
401
+ (timeline_enabled == Qtrue ? 0 : 1);
402
+
403
+ if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default
332
404
 
333
405
  // When some sample types are disabled, we need to reconfigure libdatadog to record less types,
334
406
  // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded.
335
407
  // See record_sample for details on the use of position_for.
336
408
 
337
- state->enabled_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1);
409
+ state->enabled_values_count = requested_values_count;
338
410
 
339
411
  ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT];
340
412
  uint8_t next_enabled_pos = 0;
341
- uint8_t next_disabled_pos = state->enabled_values_count;
413
+ uint8_t next_disabled_pos = requested_values_count;
342
414
 
343
415
  // CPU_SAMPLES_VALUE is always enabled
344
416
  enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) CPU_SAMPLES_VALUE;
@@ -362,6 +434,35 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta
362
434
  state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++;
363
435
  }
364
436
 
437
+ if (heap_samples_enabled == Qtrue) {
438
+ enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE;
439
+ state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++;
440
+ } else {
441
+ state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++;
442
+ }
443
+
444
+ if (heap_size_enabled == Qtrue) {
445
+ enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE;
446
+ state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++;
447
+ } else {
448
+ state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++;
449
+ }
450
+ heap_recorder_set_size_enabled(state->heap_recorder, heap_size_enabled);
451
+
452
+ if (heap_samples_enabled == Qfalse && heap_size_enabled == Qfalse) {
453
+ // Turns out heap sampling is disabled but we initialized everything in _native_new
454
+ // assuming all samples were enabled. We need to deinitialize the heap recorder.
455
+ heap_recorder_free(state->heap_recorder);
456
+ state->heap_recorder = NULL;
457
+ }
458
+
459
+ if (timeline_enabled == Qtrue) {
460
+ enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) TIMELINE_VALUE;
461
+ state->position_for[TIMELINE_VALUE_ID] = next_enabled_pos++;
462
+ } else {
463
+ state->position_for[TIMELINE_VALUE_ID] = next_disabled_pos++;
464
+ }
465
+
365
466
  ddog_prof_Profile_drop(&state->slot_one_profile);
366
467
  ddog_prof_Profile_drop(&state->slot_two_profile);
367
468
 
@@ -379,9 +480,18 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
379
480
  // Need to do this while still holding on to the Global VM Lock; see comments on method for why
380
481
  serializer_set_start_timestamp_for_next_profile(state, finish_timestamp);
381
482
 
483
+ // Prepare the iteration on heap recorder we'll be doing outside the GVL. The preparation needs to
484
+ // happen while holding on to the GVL.
485
+ heap_recorder_prepare_iteration(state->heap_recorder);
486
+
382
487
  // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this
383
488
  // is pending
384
- struct call_serialize_without_gvl_arguments args = {.state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false};
489
+ struct call_serialize_without_gvl_arguments args = {
490
+ .state = state,
491
+ .finish_timestamp = finish_timestamp,
492
+ .gc_count_before_serialize = rb_gc_count(),
493
+ .serialize_ran = false
494
+ };
385
495
 
386
496
  while (!args.serialize_ran) {
387
497
  // Give the Ruby VM an opportunity to process any pending interruptions (including raising exceptions).
@@ -397,6 +507,9 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
397
507
  rb_thread_call_without_gvl2(call_serialize_without_gvl, &args, NULL /* No interruption function needed in this case */, NULL /* Not needed */);
398
508
  }
399
509
 
510
+ // Cleanup after heap recorder iteration. This needs to happen while holding on to the GVL.
511
+ heap_recorder_finish_iteration(state->heap_recorder);
512
+
400
513
  ddog_prof_Profile_SerializeResult serialized_profile = args.result;
401
514
 
402
515
  if (serialized_profile.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) {
@@ -439,6 +552,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
439
552
  metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_or_wall_samples;
440
553
  metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns;
441
554
  metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
555
+ metric_values[position_for[TIMELINE_VALUE_ID]] = values.timeline_wall_time_ns;
556
+
557
+ if (values.alloc_samples != 0) {
558
+ // If we got an allocation sample end the heap allocation recording to commit the heap sample.
559
+ // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
560
+ // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
561
+ // be fixed with some refactoring but for now this leads to a less impactful change.
562
+ end_heap_allocation_recording(state->heap_recorder, locations);
563
+ }
442
564
 
443
565
  ddog_prof_Profile_Result result = ddog_prof_Profile_add(
444
566
  active_slot.profile,
@@ -457,6 +579,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
457
579
  }
458
580
  }
459
581
 
582
+ void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class) {
583
+ struct stack_recorder_state *state;
584
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
585
+ // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
586
+ // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
587
+ // be fixed with some refactoring but for now this leads to a less impactful change.
588
+ start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight, alloc_class);
589
+ }
590
+
460
591
  void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) {
461
592
  struct stack_recorder_state *state;
462
593
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
@@ -472,10 +603,103 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
472
603
  }
473
604
  }
474
605
 
606
+ #define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256
607
+
608
+ // Heap recorder iteration context allows us access to stack recorder state and profile being serialized
609
+ // during iteration of heap recorder live objects.
610
+ typedef struct heap_recorder_iteration_context {
611
+ struct stack_recorder_state *state;
612
+ ddog_prof_Profile *profile;
613
+
614
+ bool error;
615
+ char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];
616
+
617
+ size_t profile_gen;
618
+ } heap_recorder_iteration_context;
619
+
620
+ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
621
+ heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg;
622
+
623
+ live_object_data *object_data = &iteration_data.object_data;
624
+
625
+ int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0};
626
+ uint8_t *position_for = context->state->position_for;
627
+
628
+ metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight;
629
+ metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight;
630
+
631
+ ddog_prof_Label labels[2];
632
+ size_t label_offset = 0;
633
+
634
+ if (object_data->class != NULL) {
635
+ labels[label_offset++] = (ddog_prof_Label) {
636
+ .key = DDOG_CHARSLICE_C("allocation class"),
637
+ .str = (ddog_CharSlice) {
638
+ .ptr = object_data->class,
639
+ .len = strlen(object_data->class),
640
+ },
641
+ .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
642
+ };
643
+ }
644
+ labels[label_offset++] = (ddog_prof_Label) {
645
+ .key = DDOG_CHARSLICE_C("gc gen age"),
646
+ .num = context->profile_gen - object_data->alloc_gen,
647
+ };
648
+
649
+ ddog_prof_Profile_Result result = ddog_prof_Profile_add(
650
+ context->profile,
651
+ (ddog_prof_Sample) {
652
+ .locations = iteration_data.locations,
653
+ .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count},
654
+ .labels = (ddog_prof_Slice_Label) {
655
+ .ptr = labels,
656
+ .len = label_offset,
657
+ }
658
+ },
659
+ 0
660
+ );
661
+
662
+ if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
663
+ read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG);
664
+ context->error = true;
665
+ // By returning false we cancel the iteration
666
+ return false;
667
+ }
668
+
669
+ // Keep on iterating to next item!
670
+ return true;
671
+ }
672
+
673
+ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile, size_t gc_count_before_serialize) {
674
+ heap_recorder_iteration_context iteration_context = {
675
+ .state = state,
676
+ .profile = profile,
677
+ .error = false,
678
+ .error_msg = {0},
679
+ .profile_gen = gc_count_before_serialize,
680
+ };
681
+ bool iterated = heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context);
682
+ // We wait until we're out of the iteration to grab the gvl and raise. This is important because during
683
+ // iteration we may potentially acquire locks in the heap recorder and we could reach a deadlock if the
684
+ // same locks are acquired by the heap recorder while holding the gvl (since we'd be operating on the
685
+ // same locks but acquiring them in different order).
686
+ if (!iterated) {
687
+ grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: iteration cancelled");
688
+ }
689
+ else if (iteration_context.error) {
690
+ grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg);
691
+ }
692
+ }
693
+
475
694
  static void *call_serialize_without_gvl(void *call_args) {
476
695
  struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args;
477
696
 
478
697
  args->profile = serializer_flip_active_and_inactive_slots(args->state);
698
+
699
+ // Now that we have the inactive profile with all but heap samples, lets fill it with heap data
700
+ // without needing to race with the active sampler
701
+ build_heap_profile_without_gvl(args->state, args->profile, args->gc_count_before_serialize);
702
+
479
703
  // Note: The profile gets reset by the serialize call
480
704
  args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
481
705
  args->serialize_ran = true;
@@ -597,6 +821,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
597
821
  reset_profile(&state->slot_one_profile, /* start_time: */ NULL);
598
822
  reset_profile(&state->slot_two_profile, /* start_time: */ NULL);
599
823
 
824
+ heap_recorder_after_fork(state->heap_recorder);
825
+
600
826
  return Qtrue;
601
827
  }
602
828
 
@@ -614,9 +840,102 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_
614
840
  return Qtrue;
615
841
  }
616
842
 
843
+ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class) {
844
+ ENFORCE_TYPE(weight, T_FIXNUM);
845
+ ddog_CharSlice alloc_class_slice = char_slice_from_ruby_string(alloc_class);
846
+ track_object(recorder_instance, new_obj, NUM2UINT(weight), &alloc_class_slice);
847
+ return Qtrue;
848
+ }
849
+
850
+ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations) {
851
+ ENFORCE_TYPE(locations, T_ARRAY);
852
+ size_t locations_len = rb_array_len(locations);
853
+ ddog_prof_Location locations_arr[locations_len];
854
+ for (size_t i = 0; i < locations_len; i++) {
855
+ VALUE location = rb_ary_entry(locations, i);
856
+ ENFORCE_TYPE(location, T_ARRAY);
857
+ VALUE name = rb_ary_entry(location, 0);
858
+ VALUE filename = rb_ary_entry(location, 1);
859
+ VALUE line = rb_ary_entry(location, 2);
860
+ ENFORCE_TYPE(name, T_STRING);
861
+ ENFORCE_TYPE(filename, T_STRING);
862
+ ENFORCE_TYPE(line, T_FIXNUM);
863
+ locations_arr[i] = (ddog_prof_Location) {
864
+ .line = line,
865
+ .function = (ddog_prof_Function) {
866
+ .name = char_slice_from_ruby_string(name),
867
+ .filename = char_slice_from_ruby_string(filename),
868
+ }
869
+ };
870
+ }
871
+ ddog_prof_Slice_Location ddog_locations = {
872
+ .len = locations_len,
873
+ .ptr = locations_arr,
874
+ };
875
+ heap_recorder_testonly_assert_hash_matches(ddog_locations);
876
+
877
+ return Qnil;
878
+ }
879
+
617
880
  static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) {
618
881
  ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time);
619
882
  if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
620
883
  rb_raise(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err));
621
884
  }
622
885
  }
886
+
887
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
888
+ // It SHOULD NOT be used for other purposes.
889
+ static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) {
890
+ struct stack_recorder_state *state;
891
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
892
+
893
+ heap_recorder_prepare_iteration(state->heap_recorder);
894
+
895
+ return Qnil;
896
+ }
897
+
898
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
899
+ // It SHOULD NOT be used for other purposes.
900
+ static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) {
901
+ struct stack_recorder_state *state;
902
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
903
+
904
+ heap_recorder_finish_iteration(state->heap_recorder);
905
+
906
+ return Qnil;
907
+ }
908
+
909
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
910
+ // It SHOULD NOT be used for other purposes.
911
+ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) {
912
+ struct stack_recorder_state *state;
913
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
914
+
915
+ return heap_recorder_testonly_debug(state->heap_recorder);
916
+ }
917
+
918
+ #pragma GCC diagnostic push
919
+ // rb_gc_force_recycle was deprecated in latest versions of Ruby and is a noop.
920
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
921
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
922
+ // It SHOULD NOT be used for other purposes.
923
+ static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj) {
924
+ rb_gc_force_recycle(obj);
925
+ return Qnil;
926
+ }
927
+ #pragma GCC diagnostic pop
928
+
929
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
930
+ // It SHOULD NOT be used for other purposes.
931
+ static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) {
932
+ #ifndef NO_SEEN_OBJ_ID_FLAG
933
+ if (RB_FL_TEST(obj, RUBY_FL_SEEN_OBJ_ID)) {
934
+ return Qtrue;
935
+ } else {
936
+ return Qfalse;
937
+ }
938
+ #else
939
+ return Qfalse;
940
+ #endif
941
+ }
@@ -1,12 +1,14 @@
1
1
  #pragma once
2
2
 
3
3
  #include <datadog/profiling.h>
4
+ #include <ruby.h>
4
5
 
5
6
  typedef struct {
6
7
  int64_t cpu_time_ns;
7
8
  int64_t wall_time_ns;
8
9
  uint32_t cpu_or_wall_samples;
9
10
  uint32_t alloc_samples;
11
+ int64_t timeline_wall_time_ns;
10
12
  } sample_values;
11
13
 
12
14
  typedef struct sample_labels {
@@ -21,4 +23,5 @@ typedef struct sample_labels {
21
23
 
22
24
  void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
23
25
  void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
26
+ void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class);
24
27
  VALUE enforce_recorder_instance(VALUE object);