ddtrace 1.18.0 → 1.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -1
  3. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
  4. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
  5. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
  6. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  7. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
  9. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
  10. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +159 -124
  11. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
  12. data/ext/ddtrace_profiling_native_extension/extconf.rb +16 -0
  13. data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
  14. data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
  15. data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
  16. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
  17. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +5 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +1 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
  22. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +329 -10
  23. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
  24. data/lib/datadog/core/configuration/settings.rb +139 -22
  25. data/lib/datadog/core/telemetry/collector.rb +10 -0
  26. data/lib/datadog/core/telemetry/event.rb +2 -1
  27. data/lib/datadog/core/telemetry/ext.rb +3 -0
  28. data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
  29. data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
  30. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -11
  31. data/lib/datadog/profiling/component.rb +197 -13
  32. data/lib/datadog/profiling/scheduler.rb +4 -6
  33. data/lib/datadog/profiling/stack_recorder.rb +13 -2
  34. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
  35. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  36. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  37. data/lib/ddtrace/version.rb +1 -1
  38. metadata +12 -7
@@ -7,6 +7,7 @@
7
7
  #include "libdatadog_helpers.h"
8
8
  #include "ruby_helpers.h"
9
9
  #include "time_helpers.h"
10
+ #include "heap_recorder.h"
10
11
 
11
12
  // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs
12
13
  // This file implements the native bits of the Datadog::Profiling::StackRecorder class
@@ -150,18 +151,29 @@ static VALUE error_symbol = Qnil; // :error in Ruby
150
151
  #define WALL_TIME_VALUE_ID 2
151
152
  #define ALLOC_SAMPLES_VALUE {.type_ = VALUE_STRING("alloc-samples"), .unit = VALUE_STRING("count")}
152
153
  #define ALLOC_SAMPLES_VALUE_ID 3
154
+ #define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
155
+ #define HEAP_SAMPLES_VALUE_ID 4
156
+ #define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")}
157
+ #define HEAP_SIZE_VALUE_ID 5
158
+ #define TIMELINE_VALUE {.type_ = VALUE_STRING("timeline"), .unit = VALUE_STRING("nanoseconds")}
159
+ #define TIMELINE_VALUE_ID 6
153
160
 
154
- static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE};
161
+ static const ddog_prof_ValueType all_value_types[] =
162
+ {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};
155
163
 
156
164
  // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
157
165
  // occupies on the all_value_types array.
158
166
  // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
159
- static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID};
167
+ static const uint8_t all_value_types_positions[] =
168
+ {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};
160
169
 
161
170
  #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
162
171
 
163
172
  // Contains native state for each instance
164
173
  struct stack_recorder_state {
174
+ // Heap recorder instance
175
+ heap_recorder *heap_recorder;
176
+
165
177
  pthread_mutex_t slot_one_mutex;
166
178
  ddog_prof_Profile slot_one_profile;
167
179
 
@@ -184,6 +196,7 @@ struct call_serialize_without_gvl_arguments {
184
196
  // Set by caller
185
197
  struct stack_recorder_state *state;
186
198
  ddog_Timespec finish_timestamp;
199
+ size_t gc_count_before_serialize;
187
200
 
188
201
  // Set by callee
189
202
  ddog_prof_Profile *profile;
@@ -197,11 +210,20 @@ static VALUE _native_new(VALUE klass);
197
210
  static void initialize_slot_concurrency_control(struct stack_recorder_state *state);
198
211
  static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types);
199
212
  static void stack_recorder_typed_data_free(void *data);
200
- static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled);
213
+ static VALUE _native_initialize(
214
+ DDTRACE_UNUSED VALUE _self,
215
+ VALUE recorder_instance,
216
+ VALUE cpu_time_enabled,
217
+ VALUE alloc_samples_enabled,
218
+ VALUE heap_samples_enabled,
219
+ VALUE heap_size_enabled,
220
+ VALUE heap_sample_every,
221
+ VALUE timeline_enabled
222
+ );
201
223
  static VALUE _native_serialize(VALUE self, VALUE recorder_instance);
202
224
  static VALUE ruby_time_from(ddog_Timespec ddprof_time);
203
225
  static void *call_serialize_without_gvl(void *call_args);
204
- static struct active_slot_pair sampler_lock_active_profile();
226
+ static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state);
205
227
  static void sampler_unlock_active_profile(struct active_slot_pair active_slot);
206
228
  static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state);
207
229
  static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
@@ -213,6 +235,14 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
213
235
  static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
214
236
  static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
215
237
  static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */);
238
+ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class);
239
+ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations);
240
+ static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
241
+ static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
242
+ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
243
+ static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj);
244
+ static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj);
245
+
216
246
 
217
247
  void stack_recorder_init(VALUE profiling_module) {
218
248
  VALUE stack_recorder_class = rb_define_class_under(profiling_module, "StackRecorder", rb_cObject);
@@ -229,13 +259,25 @@ void stack_recorder_init(VALUE profiling_module) {
229
259
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
230
260
  rb_define_alloc_func(stack_recorder_class, _native_new);
231
261
 
232
- rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 3);
262
+ rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 7);
233
263
  rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1);
234
264
  rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
235
265
  rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1);
236
266
  rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
237
267
  rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
238
268
  rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3);
269
+ rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 4);
270
+ rb_define_singleton_method(testing_module, "_native_check_heap_hashes", _native_check_heap_hashes, 1);
271
+ rb_define_singleton_method(testing_module, "_native_start_fake_slow_heap_serialization",
272
+ _native_start_fake_slow_heap_serialization, 1);
273
+ rb_define_singleton_method(testing_module, "_native_end_fake_slow_heap_serialization",
274
+ _native_end_fake_slow_heap_serialization, 1);
275
+ rb_define_singleton_method(testing_module, "_native_debug_heap_recorder",
276
+ _native_debug_heap_recorder, 1);
277
+ rb_define_singleton_method(testing_module, "_native_gc_force_recycle",
278
+ _native_gc_force_recycle, 1);
279
+ rb_define_singleton_method(testing_module, "_native_has_seen_id_flag",
280
+ _native_has_seen_id_flag, 1);
239
281
 
240
282
  ok_symbol = ID2SYM(rb_intern_const("ok"));
241
283
  error_symbol = ID2SYM(rb_intern_const("error"));
@@ -270,6 +312,12 @@ static VALUE _native_new(VALUE klass) {
270
312
 
271
313
  VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state);
272
314
 
315
+ // NOTE: We initialize this because we want a new recorder to be operational even without initialization and our
316
+ // default is everything enabled. However, if during recording initialization it turns out we don't want
317
+ // heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific
318
+ // to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops).
319
+ state->heap_recorder = heap_recorder_new();
320
+
273
321
  // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking!
274
322
 
275
323
  initialize_profiles(state, sample_types);
@@ -318,27 +366,51 @@ static void stack_recorder_typed_data_free(void *state_ptr) {
318
366
  pthread_mutex_destroy(&state->slot_two_mutex);
319
367
  ddog_prof_Profile_drop(&state->slot_two_profile);
320
368
 
369
+ heap_recorder_free(state->heap_recorder);
370
+
321
371
  ruby_xfree(state);
322
372
  }
323
373
 
324
- static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled) {
374
+ static VALUE _native_initialize(
375
+ DDTRACE_UNUSED VALUE _self,
376
+ VALUE recorder_instance,
377
+ VALUE cpu_time_enabled,
378
+ VALUE alloc_samples_enabled,
379
+ VALUE heap_samples_enabled,
380
+ VALUE heap_size_enabled,
381
+ VALUE heap_sample_every,
382
+ VALUE timeline_enabled
383
+ ) {
325
384
  ENFORCE_BOOLEAN(cpu_time_enabled);
326
385
  ENFORCE_BOOLEAN(alloc_samples_enabled);
386
+ ENFORCE_BOOLEAN(heap_samples_enabled);
387
+ ENFORCE_BOOLEAN(heap_size_enabled);
388
+ ENFORCE_TYPE(heap_sample_every, T_FIXNUM);
389
+ ENFORCE_BOOLEAN(timeline_enabled);
327
390
 
328
391
  struct stack_recorder_state *state;
329
392
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
330
393
 
331
- if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default
394
+ heap_recorder_set_sample_rate(state->heap_recorder, NUM2INT(heap_sample_every));
395
+
396
+ uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT -
397
+ (cpu_time_enabled == Qtrue ? 0 : 1) -
398
+ (alloc_samples_enabled == Qtrue? 0 : 1) -
399
+ (heap_samples_enabled == Qtrue ? 0 : 1) -
400
+ (heap_size_enabled == Qtrue ? 0 : 1) -
401
+ (timeline_enabled == Qtrue ? 0 : 1);
402
+
403
+ if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default
332
404
 
333
405
  // When some sample types are disabled, we need to reconfigure libdatadog to record less types,
334
406
  // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded.
335
407
  // See record_sample for details on the use of position_for.
336
408
 
337
- state->enabled_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1);
409
+ state->enabled_values_count = requested_values_count;
338
410
 
339
411
  ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT];
340
412
  uint8_t next_enabled_pos = 0;
341
- uint8_t next_disabled_pos = state->enabled_values_count;
413
+ uint8_t next_disabled_pos = requested_values_count;
342
414
 
343
415
  // CPU_SAMPLES_VALUE is always enabled
344
416
  enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) CPU_SAMPLES_VALUE;
@@ -362,6 +434,35 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta
362
434
  state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++;
363
435
  }
364
436
 
437
+ if (heap_samples_enabled == Qtrue) {
438
+ enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE;
439
+ state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++;
440
+ } else {
441
+ state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++;
442
+ }
443
+
444
+ if (heap_size_enabled == Qtrue) {
445
+ enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE;
446
+ state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++;
447
+ } else {
448
+ state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++;
449
+ }
450
+ heap_recorder_set_size_enabled(state->heap_recorder, heap_size_enabled);
451
+
452
+ if (heap_samples_enabled == Qfalse && heap_size_enabled == Qfalse) {
453
+ // Turns out heap sampling is disabled but we initialized everything in _native_new
454
+ // assuming all samples were enabled. We need to deinitialize the heap recorder.
455
+ heap_recorder_free(state->heap_recorder);
456
+ state->heap_recorder = NULL;
457
+ }
458
+
459
+ if (timeline_enabled == Qtrue) {
460
+ enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) TIMELINE_VALUE;
461
+ state->position_for[TIMELINE_VALUE_ID] = next_enabled_pos++;
462
+ } else {
463
+ state->position_for[TIMELINE_VALUE_ID] = next_disabled_pos++;
464
+ }
465
+
365
466
  ddog_prof_Profile_drop(&state->slot_one_profile);
366
467
  ddog_prof_Profile_drop(&state->slot_two_profile);
367
468
 
@@ -379,9 +480,18 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
379
480
  // Need to do this while still holding on to the Global VM Lock; see comments on method for why
380
481
  serializer_set_start_timestamp_for_next_profile(state, finish_timestamp);
381
482
 
483
+ // Prepare the iteration on heap recorder we'll be doing outside the GVL. The preparation needs to
484
+ // happen while holding on to the GVL.
485
+ heap_recorder_prepare_iteration(state->heap_recorder);
486
+
382
487
  // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this
383
488
  // is pending
384
- struct call_serialize_without_gvl_arguments args = {.state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false};
489
+ struct call_serialize_without_gvl_arguments args = {
490
+ .state = state,
491
+ .finish_timestamp = finish_timestamp,
492
+ .gc_count_before_serialize = rb_gc_count(),
493
+ .serialize_ran = false
494
+ };
385
495
 
386
496
  while (!args.serialize_ran) {
387
497
  // Give the Ruby VM an opportunity to process any pending interruptions (including raising exceptions).
@@ -397,6 +507,9 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
397
507
  rb_thread_call_without_gvl2(call_serialize_without_gvl, &args, NULL /* No interruption function needed in this case */, NULL /* Not needed */);
398
508
  }
399
509
 
510
+ // Cleanup after heap recorder iteration. This needs to happen while holding on to the GVL.
511
+ heap_recorder_finish_iteration(state->heap_recorder);
512
+
400
513
  ddog_prof_Profile_SerializeResult serialized_profile = args.result;
401
514
 
402
515
  if (serialized_profile.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) {
@@ -439,6 +552,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
439
552
  metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_or_wall_samples;
440
553
  metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns;
441
554
  metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
555
+ metric_values[position_for[TIMELINE_VALUE_ID]] = values.timeline_wall_time_ns;
556
+
557
+ if (values.alloc_samples != 0) {
558
+ // If we got an allocation sample end the heap allocation recording to commit the heap sample.
559
+ // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
560
+ // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
561
+ // be fixed with some refactoring but for now this leads to a less impactful change.
562
+ end_heap_allocation_recording(state->heap_recorder, locations);
563
+ }
442
564
 
443
565
  ddog_prof_Profile_Result result = ddog_prof_Profile_add(
444
566
  active_slot.profile,
@@ -457,6 +579,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
457
579
  }
458
580
  }
459
581
 
582
+ void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class) {
583
+ struct stack_recorder_state *state;
584
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
585
+ // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
586
+ // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
587
+ // be fixed with some refactoring but for now this leads to a less impactful change.
588
+ start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight, alloc_class);
589
+ }
590
+
460
591
  void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) {
461
592
  struct stack_recorder_state *state;
462
593
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
@@ -472,10 +603,103 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
472
603
  }
473
604
  }
474
605
 
606
+ #define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256
607
+
608
+ // Heap recorder iteration context allows us access to stack recorder state and profile being serialized
609
+ // during iteration of heap recorder live objects.
610
+ typedef struct heap_recorder_iteration_context {
611
+ struct stack_recorder_state *state;
612
+ ddog_prof_Profile *profile;
613
+
614
+ bool error;
615
+ char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];
616
+
617
+ size_t profile_gen;
618
+ } heap_recorder_iteration_context;
619
+
620
+ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
621
+ heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg;
622
+
623
+ live_object_data *object_data = &iteration_data.object_data;
624
+
625
+ int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0};
626
+ uint8_t *position_for = context->state->position_for;
627
+
628
+ metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight;
629
+ metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight;
630
+
631
+ ddog_prof_Label labels[2];
632
+ size_t label_offset = 0;
633
+
634
+ if (object_data->class != NULL) {
635
+ labels[label_offset++] = (ddog_prof_Label) {
636
+ .key = DDOG_CHARSLICE_C("allocation class"),
637
+ .str = (ddog_CharSlice) {
638
+ .ptr = object_data->class,
639
+ .len = strlen(object_data->class),
640
+ },
641
+ .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
642
+ };
643
+ }
644
+ labels[label_offset++] = (ddog_prof_Label) {
645
+ .key = DDOG_CHARSLICE_C("gc gen age"),
646
+ .num = context->profile_gen - object_data->alloc_gen,
647
+ };
648
+
649
+ ddog_prof_Profile_Result result = ddog_prof_Profile_add(
650
+ context->profile,
651
+ (ddog_prof_Sample) {
652
+ .locations = iteration_data.locations,
653
+ .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count},
654
+ .labels = (ddog_prof_Slice_Label) {
655
+ .ptr = labels,
656
+ .len = label_offset,
657
+ }
658
+ },
659
+ 0
660
+ );
661
+
662
+ if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
663
+ read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG);
664
+ context->error = true;
665
+ // By returning false we cancel the iteration
666
+ return false;
667
+ }
668
+
669
+ // Keep on iterating to next item!
670
+ return true;
671
+ }
672
+
673
+ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile, size_t gc_count_before_serialize) {
674
+ heap_recorder_iteration_context iteration_context = {
675
+ .state = state,
676
+ .profile = profile,
677
+ .error = false,
678
+ .error_msg = {0},
679
+ .profile_gen = gc_count_before_serialize,
680
+ };
681
+ bool iterated = heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context);
682
+ // We wait until we're out of the iteration to grab the gvl and raise. This is important because during
683
+ // iteration we may potentially acquire locks in the heap recorder and we could reach a deadlock if the
684
+ // same locks are acquired by the heap recorder while holding the gvl (since we'd be operating on the
685
+ // same locks but acquiring them in different order).
686
+ if (!iterated) {
687
+ grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: iteration cancelled");
688
+ }
689
+ else if (iteration_context.error) {
690
+ grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg);
691
+ }
692
+ }
693
+
475
694
  static void *call_serialize_without_gvl(void *call_args) {
476
695
  struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args;
477
696
 
478
697
  args->profile = serializer_flip_active_and_inactive_slots(args->state);
698
+
699
+ // Now that we have the inactive profile with all but heap samples, lets fill it with heap data
700
+ // without needing to race with the active sampler
701
+ build_heap_profile_without_gvl(args->state, args->profile, args->gc_count_before_serialize);
702
+
479
703
  // Note: The profile gets reset by the serialize call
480
704
  args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
481
705
  args->serialize_ran = true;
@@ -597,6 +821,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
597
821
  reset_profile(&state->slot_one_profile, /* start_time: */ NULL);
598
822
  reset_profile(&state->slot_two_profile, /* start_time: */ NULL);
599
823
 
824
+ heap_recorder_after_fork(state->heap_recorder);
825
+
600
826
  return Qtrue;
601
827
  }
602
828
 
@@ -614,9 +840,102 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_
614
840
  return Qtrue;
615
841
  }
616
842
 
843
+ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class) {
844
+ ENFORCE_TYPE(weight, T_FIXNUM);
845
+ ddog_CharSlice alloc_class_slice = char_slice_from_ruby_string(alloc_class);
846
+ track_object(recorder_instance, new_obj, NUM2UINT(weight), &alloc_class_slice);
847
+ return Qtrue;
848
+ }
849
+
850
+ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations) {
851
+ ENFORCE_TYPE(locations, T_ARRAY);
852
+ size_t locations_len = rb_array_len(locations);
853
+ ddog_prof_Location locations_arr[locations_len];
854
+ for (size_t i = 0; i < locations_len; i++) {
855
+ VALUE location = rb_ary_entry(locations, i);
856
+ ENFORCE_TYPE(location, T_ARRAY);
857
+ VALUE name = rb_ary_entry(location, 0);
858
+ VALUE filename = rb_ary_entry(location, 1);
859
+ VALUE line = rb_ary_entry(location, 2);
860
+ ENFORCE_TYPE(name, T_STRING);
861
+ ENFORCE_TYPE(filename, T_STRING);
862
+ ENFORCE_TYPE(line, T_FIXNUM);
863
+ locations_arr[i] = (ddog_prof_Location) {
864
+ .line = line,
865
+ .function = (ddog_prof_Function) {
866
+ .name = char_slice_from_ruby_string(name),
867
+ .filename = char_slice_from_ruby_string(filename),
868
+ }
869
+ };
870
+ }
871
+ ddog_prof_Slice_Location ddog_locations = {
872
+ .len = locations_len,
873
+ .ptr = locations_arr,
874
+ };
875
+ heap_recorder_testonly_assert_hash_matches(ddog_locations);
876
+
877
+ return Qnil;
878
+ }
879
+
617
880
  static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) {
618
881
  ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time);
619
882
  if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
620
883
  rb_raise(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err));
621
884
  }
622
885
  }
886
+
887
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
888
+ // It SHOULD NOT be used for other purposes.
889
+ static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) {
890
+ struct stack_recorder_state *state;
891
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
892
+
893
+ heap_recorder_prepare_iteration(state->heap_recorder);
894
+
895
+ return Qnil;
896
+ }
897
+
898
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
899
+ // It SHOULD NOT be used for other purposes.
900
+ static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) {
901
+ struct stack_recorder_state *state;
902
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
903
+
904
+ heap_recorder_finish_iteration(state->heap_recorder);
905
+
906
+ return Qnil;
907
+ }
908
+
909
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
910
+ // It SHOULD NOT be used for other purposes.
911
+ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) {
912
+ struct stack_recorder_state *state;
913
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
914
+
915
+ return heap_recorder_testonly_debug(state->heap_recorder);
916
+ }
917
+
918
+ #pragma GCC diagnostic push
919
+ // rb_gc_force_recycle was deprecated in latest versions of Ruby and is a noop.
920
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
921
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
922
+ // It SHOULD NOT be used for other purposes.
923
+ static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj) {
924
+ rb_gc_force_recycle(obj);
925
+ return Qnil;
926
+ }
927
+ #pragma GCC diagnostic pop
928
+
929
+ // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
930
+ // It SHOULD NOT be used for other purposes.
931
+ static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) {
932
+ #ifndef NO_SEEN_OBJ_ID_FLAG
933
+ if (RB_FL_TEST(obj, RUBY_FL_SEEN_OBJ_ID)) {
934
+ return Qtrue;
935
+ } else {
936
+ return Qfalse;
937
+ }
938
+ #else
939
+ return Qfalse;
940
+ #endif
941
+ }
@@ -1,12 +1,14 @@
1
1
  #pragma once
2
2
 
3
3
  #include <datadog/profiling.h>
4
+ #include <ruby.h>
4
5
 
5
6
  typedef struct {
6
7
  int64_t cpu_time_ns;
7
8
  int64_t wall_time_ns;
8
9
  uint32_t cpu_or_wall_samples;
9
10
  uint32_t alloc_samples;
11
+ int64_t timeline_wall_time_ns;
10
12
  } sample_values;
11
13
 
12
14
  typedef struct sample_labels {
@@ -21,4 +23,5 @@ typedef struct sample_labels {
21
23
 
22
24
  void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
23
25
  void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
26
+ void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class);
24
27
  VALUE enforce_recorder_instance(VALUE object);