ddtrace 1.18.0 → 1.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -1
  3. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
  4. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
  5. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
  6. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  7. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
  9. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
  10. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +159 -124
  11. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
  12. data/ext/ddtrace_profiling_native_extension/extconf.rb +16 -0
  13. data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
  14. data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
  15. data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
  16. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
  17. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +5 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +1 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
  22. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +329 -10
  23. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
  24. data/lib/datadog/core/configuration/settings.rb +139 -22
  25. data/lib/datadog/core/telemetry/collector.rb +10 -0
  26. data/lib/datadog/core/telemetry/event.rb +2 -1
  27. data/lib/datadog/core/telemetry/ext.rb +3 -0
  28. data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
  29. data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
  30. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -11
  31. data/lib/datadog/profiling/component.rb +197 -13
  32. data/lib/datadog/profiling/scheduler.rb +4 -6
  33. data/lib/datadog/profiling/stack_recorder.rb +13 -2
  34. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
  35. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  36. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  37. data/lib/ddtrace/version.rb +1 -1
  38. metadata +12 -7
@@ -0,0 +1,970 @@
1
+ #include "heap_recorder.h"
2
+ #include <pthread.h>
3
+ #include "ruby/st.h"
4
+ #include "ruby_helpers.h"
5
+ #include <errno.h>
6
+ #include "collectors_stack.h"
7
+ #include "libdatadog_helpers.h"
8
+
9
+ #if (defined(HAVE_WORKING_RB_GC_FORCE_RECYCLE) && ! defined(NO_SEEN_OBJ_ID_FLAG))
10
+ #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
11
+ #endif
12
+
13
+ // A compact representation of a stacktrace frame for a heap allocation.
14
+ typedef struct {
15
+ char *name;
16
+ char *filename;
17
+ int32_t line;
18
+ } heap_frame;
19
+ static st_index_t heap_frame_hash(heap_frame*, st_index_t seed);
20
+
21
+ // A compact representation of a stacktrace for a heap allocation.
22
+ //
23
+ // We could use a ddog_prof_Slice_Location instead but it has a lot of
24
+ // unused fields. Because we have to keep these stacks around for at
25
+ // least the lifetime of the objects allocated therein, we would be
26
+ // incurring a non-negligible memory overhead for little purpose.
27
+ typedef struct {
28
+ uint16_t frames_len;
29
+ heap_frame frames[];
30
+ } heap_stack;
31
+ static heap_stack* heap_stack_new(ddog_prof_Slice_Location);
32
+ static void heap_stack_free(heap_stack*);
33
+ static st_index_t heap_stack_hash(heap_stack*, st_index_t);
34
+
35
+ #if MAX_FRAMES_LIMIT > UINT16_MAX
36
+ #error Frames len type not compatible with MAX_FRAMES_LIMIT
37
+ #endif
38
+
39
+ enum heap_record_key_type {
40
+ HEAP_STACK,
41
+ LOCATION_SLICE
42
+ };
43
+ // This struct allows us to use two different types of stacks when
44
+ // interacting with a heap_record hash.
45
+ //
46
+ // The idea is that we'll always want to use heap_stack-keys when
47
+ // adding new entries to the hash since that's the compact stack
48
+ // representation we rely on internally.
49
+ //
50
+ // However, when querying for an existing heap record, we'd save a
51
+ // lot of allocations if we could query with the
52
+ // ddog_prof_Slice_Location we receive in our external API.
53
+ //
54
+ // To allow this interchange, we need a union and need to ensure
55
+ // that whatever shape of the union, the heap_record_key_cmp_st
56
+ // and heap_record_hash_st functions return the same results for
57
+ // equivalent stacktraces.
58
+ typedef struct {
59
+ enum heap_record_key_type type;
60
+ union {
61
+ // key never owns this if set
62
+ heap_stack *heap_stack;
63
+ // key never owns this if set
64
+ ddog_prof_Slice_Location *location_slice;
65
+ };
66
+ } heap_record_key;
67
+ static heap_record_key* heap_record_key_new(heap_stack*);
68
+ static void heap_record_key_free(heap_record_key*);
69
+ static int heap_record_key_cmp_st(st_data_t, st_data_t);
70
+ static st_index_t heap_record_key_hash_st(st_data_t);
71
+ static const struct st_hash_type st_hash_type_heap_record_key = {
72
+ heap_record_key_cmp_st,
73
+ heap_record_key_hash_st,
74
+ };
75
+
76
+ // Need to implement these functions to support the location-slice based keys
77
+ static st_index_t ddog_location_hash(ddog_prof_Location, st_index_t seed);
78
+ static st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location, st_index_t seed);
79
+
80
+ // A heap record is used for deduping heap allocation stacktraces across multiple
81
+ // objects sharing the same allocation location.
82
+ typedef struct {
83
+ // How many objects are currently tracked by the heap recorder for this heap record.
84
+ uint32_t num_tracked_objects;
85
+ // stack is owned by the associated record and gets cleaned up alongside it
86
+ heap_stack *stack;
87
+ } heap_record;
88
+ static heap_record* heap_record_new(heap_stack*);
89
+ static void heap_record_free(heap_record*);
90
+
91
+ // An object record is used for storing data about currently tracked live objects
92
+ typedef struct {
93
+ long obj_id;
94
+ heap_record *heap_record;
95
+ live_object_data object_data;
96
+ } object_record;
97
+ static object_record* object_record_new(long, heap_record*, live_object_data);
98
+ static void object_record_free(object_record*);
99
+ static VALUE object_record_inspect(object_record*);
100
+ static object_record SKIPPED_RECORD = {0};
101
+
102
+ // A wrapper around an object record that is in the process of being recorded and was not
103
+ // yet committed.
104
+ typedef struct {
105
+ // Pointer to the (potentially partial) object_record containing metadata about an ongoing recording.
106
+ // When NULL, this symbolizes an unstarted/invalid recording.
107
+ object_record *object_record;
108
+ // A flag to track whether we had to force set the RUBY_FL_SEEN_OBJ_ID flag on this object
109
+ // as part of our workaround around rb_gc_force_recycle issues.
110
+ bool did_recycle_workaround;
111
+ } recording;
112
+
113
+ struct heap_recorder {
114
+ // Config
115
+ // Whether the recorder should try to determine approximate sizes for tracked objects.
116
+ bool size_enabled;
117
+ uint sample_rate;
118
+
119
+ // Map[key: heap_record_key*, record: heap_record*]
120
+ // NOTE: We always use heap_record_key.type == HEAP_STACK for storage but support lookups
121
+ // via heap_record_key.type == LOCATION_SLICE to allow for allocation-free fast-paths.
122
+ // NOTE: This table is currently only protected by the GVL since we never interact with it
123
+ // outside the GVL.
124
+ // NOTE: This table has ownership of both its heap_record_keys and heap_records.
125
+ st_table *heap_records;
126
+
127
+ // Map[obj_id: long, record: object_record*]
128
+ // NOTE: This table is currently only protected by the GVL since we never interact with it
129
+ // outside the GVL.
130
+ // NOTE: This table has ownership of its object_records. The keys are longs and so are
131
+ // passed as values.
132
+ st_table *object_records;
133
+
134
+ // Map[obj_id: long, record: object_record*]
135
+ // NOTE: This is a snapshot of object_records built ahead of a iteration. Outside of an
136
+ // iteration context, this table will be NULL. During an iteration, there will be no
137
+ // mutation of the data so iteration can occur without acquiring a lock.
138
+ // NOTE: Contrary to object_records, this table has no ownership of its data.
139
+ st_table *object_records_snapshot;
140
+
141
+ // Data for a heap recording that was started but not yet ended
142
+ recording active_recording;
143
+
144
+ // Reusable location array, implementing a flyweight pattern for things like iteration.
145
+ ddog_prof_Location *reusable_locations;
146
+
147
+ // Sampling state
148
+ uint num_recordings_skipped;
149
+ };
150
+ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Location);
151
+ static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
152
+ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, object_record *record);
153
+ static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t);
154
+ static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t);
155
+ static int st_object_record_update(st_data_t, st_data_t, st_data_t);
156
+ static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
157
+ static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
158
+ static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
159
+ static void commit_recording(heap_recorder*, heap_record*, recording);
160
+
161
+ // ==========================
162
+ // Heap Recorder External API
163
+ //
164
+ // WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop.
165
+ //
166
+ // WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs
167
+ // happens under the GVL.
168
+ //
169
+ // ==========================
170
+ heap_recorder* heap_recorder_new(void) {
171
+ heap_recorder *recorder = ruby_xcalloc(1, sizeof(heap_recorder));
172
+
173
+ recorder->heap_records = st_init_table(&st_hash_type_heap_record_key);
174
+ recorder->object_records = st_init_numtable();
175
+ recorder->object_records_snapshot = NULL;
176
+ recorder->reusable_locations = ruby_xcalloc(MAX_FRAMES_LIMIT, sizeof(ddog_prof_Location));
177
+ recorder->active_recording = (recording) {0};
178
+ recorder->size_enabled = true;
179
+ recorder->sample_rate = 1; // By default do no sampling on top of what allocation profiling already does
180
+
181
+ return recorder;
182
+ }
183
+
184
+ void heap_recorder_free(heap_recorder *heap_recorder) {
185
+ if (heap_recorder == NULL) {
186
+ return;
187
+ }
188
+
189
+ if (heap_recorder->object_records_snapshot != NULL) {
190
+ // if there's an unfinished iteration, clean it up now
191
+ // before we clean up any other state it might depend on
192
+ heap_recorder_finish_iteration(heap_recorder);
193
+ }
194
+
195
+ // Clean-up all object records
196
+ st_foreach(heap_recorder->object_records, st_object_record_entry_free, 0);
197
+ st_free_table(heap_recorder->object_records);
198
+
199
+ // Clean-up all heap records (this includes those only referred to by queued_samples)
200
+ st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, 0);
201
+ st_free_table(heap_recorder->heap_records);
202
+
203
+ if (heap_recorder->active_recording.object_record != NULL) {
204
+ // If there's a partial object record, clean it up as well
205
+ object_record_free(heap_recorder->active_recording.object_record);
206
+ }
207
+
208
+ ruby_xfree(heap_recorder->reusable_locations);
209
+
210
+ ruby_xfree(heap_recorder);
211
+ }
212
+
213
+ void heap_recorder_set_size_enabled(heap_recorder *heap_recorder, bool size_enabled) {
214
+ if (heap_recorder == NULL) {
215
+ return;
216
+ }
217
+
218
+ heap_recorder->size_enabled = size_enabled;
219
+ }
220
+
221
+ void heap_recorder_set_sample_rate(heap_recorder *heap_recorder, int sample_rate) {
222
+ if (heap_recorder == NULL) {
223
+ return;
224
+ }
225
+
226
+ if (sample_rate <= 0) {
227
+ rb_raise(rb_eArgError, "Heap sample rate must be a positive integer value but was %d", sample_rate);
228
+ }
229
+
230
+ heap_recorder->sample_rate = sample_rate;
231
+ heap_recorder->num_recordings_skipped = 0;
232
+ }
233
+
234
+ // WARN: Assumes this gets called before profiler is reinitialized on the fork
235
+ void heap_recorder_after_fork(heap_recorder *heap_recorder) {
236
+ if (heap_recorder == NULL) {
237
+ return;
238
+ }
239
+
240
+ // When forking, the child process gets a copy of the entire state of the parent process, minus
241
+ // threads.
242
+ //
243
+ // This means anything the heap recorder is tracking will still be alive after the fork and
244
+ // should thus be kept. Because this heap recorder implementation does not rely on free
245
+ // tracepoints to track liveness, any frees that happen until we fully reinitialize, will
246
+ // simply be noticed on next heap_recorder_prepare_iteration.
247
+ //
248
+ // There is one small caveat though: fork only preserves one thread and in a Ruby app, that
249
+ // will be the thread holding on to the GVL. Since we support iteration on the heap recorder
250
+ // outside of the GVL, any state specific to that interaction may be incosistent after fork
251
+ // (e.g. an acquired lock for thread safety). Iteration operates on object_records_snapshot
252
+ // though and that one will be updated on next heap_recorder_prepare_iteration so we really
253
+ // only need to finish any iteration that might have been left unfinished.
254
+ if (heap_recorder->object_records_snapshot != NULL) {
255
+ heap_recorder_finish_iteration(heap_recorder);
256
+ }
257
+ }
258
+
259
+ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *alloc_class) {
260
+ if (heap_recorder == NULL) {
261
+ return;
262
+ }
263
+
264
+ if (heap_recorder->active_recording.object_record != NULL) {
265
+ rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end.");
266
+ }
267
+
268
+ if (heap_recorder->num_recordings_skipped + 1 < heap_recorder->sample_rate) {
269
+ heap_recorder->active_recording.object_record = &SKIPPED_RECORD;
270
+ heap_recorder->num_recordings_skipped++;
271
+ return;
272
+ }
273
+
274
+ heap_recorder->num_recordings_skipped = 0;
275
+
276
+ VALUE ruby_obj_id = rb_obj_id(new_obj);
277
+ if (!FIXNUM_P(ruby_obj_id)) {
278
+ rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling.");
279
+ }
280
+
281
+ bool did_recycle_workaround = false;
282
+
283
+ #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
284
+ // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
285
+ // its usage may lead to an object being re-used outside of the typical GC cycle.
286
+ //
287
+ // This re-use is in theory invisible to us unless we're lucky enough to sample both
288
+ // the original object and the replacement that uses the recycled slot.
289
+ //
290
+ // In practice, we've observed (https://github.com/DataDog/dd-trace-rb/pull/3366)
291
+ // that non-noop implementations of rb_gc_force_recycle have an implementation bug
292
+ // which results in the object that re-used the recycled slot inheriting the same
293
+ // object id without setting the FL_SEEN_OBJ_ID flag. We rely on this knowledge to
294
+ // "observe" implicit frees when an object we are tracking is force-recycled.
295
+ //
296
+ // However, it may happen that we start tracking a new object and that object was
297
+ // allocated on a recycled slot. Due to the bug, this object would be missing the
298
+ // FL_SEEN_OBJ_ID flag even though it was not recycled itself. If we left it be,
299
+ // when we're doing our liveness check, the absence of the flag would trigger our
300
+ // implicit free workaround and the object would be inferred as recycled even though
301
+ // it might still be alive.
302
+ //
303
+ // Thus, if we detect that this new allocation is already missing the flag at the start
304
+ // of the heap allocation recording, we force-set it. This should be safe since we
305
+ // just called rb_obj_id on it above and the expectation is that any flaggable object
306
+ // that goes through it ends up with the flag set (as evidenced by the GC_ASSERT
307
+ // lines in https://github.com/ruby/ruby/blob/4a8d7246d15b2054eacb20f8ab3d29d39a3e7856/gc.c#L4050C14-L4050C14).
308
+ if (RB_FL_ABLE(new_obj) && !RB_FL_TEST(new_obj, RUBY_FL_SEEN_OBJ_ID)) {
309
+ RB_FL_SET(new_obj, RUBY_FL_SEEN_OBJ_ID);
310
+ did_recycle_workaround = true;
311
+ }
312
+ #endif
313
+
314
+ heap_recorder->active_recording = (recording) {
315
+ .object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) {
316
+ .weight = weight * heap_recorder->sample_rate,
317
+ .class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL,
318
+ .alloc_gen = rb_gc_count(),
319
+ }),
320
+ .did_recycle_workaround = did_recycle_workaround,
321
+ };
322
+ }
323
+
324
+ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
325
+ if (heap_recorder == NULL) {
326
+ return;
327
+ }
328
+
329
+ recording active_recording = heap_recorder->active_recording;
330
+
331
+ if (active_recording.object_record == NULL) {
332
+ // Recording ended without having been started?
333
+ rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started");
334
+ }
335
+ // From now on, mark the global active recording as invalid so we can short-circuit at any point
336
+ // and not end up with a still active recording. the local active_recording still holds the
337
+ // data required for committing though.
338
+ heap_recorder->active_recording = (recording) {0};
339
+
340
+ if (active_recording.object_record == &SKIPPED_RECORD) {
341
+ // special marker when we decided to skip due to sampling
342
+ return;
343
+ }
344
+
345
+ heap_record *heap_record = get_or_create_heap_record(heap_recorder, locations);
346
+
347
+ // And then commit the new allocation.
348
+ commit_recording(heap_recorder, heap_record, active_recording);
349
+ }
350
+
351
+ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
352
+ if (heap_recorder == NULL) {
353
+ return;
354
+ }
355
+
356
+ if (heap_recorder->object_records_snapshot != NULL) {
357
+ // we could trivially handle this but we raise to highlight and catch unexpected usages.
358
+ rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
359
+ }
360
+
361
+ st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);
362
+
363
+ heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
364
+ if (heap_recorder->object_records_snapshot == NULL) {
365
+ rb_raise(rb_eRuntimeError, "Failed to create heap snapshot.");
366
+ }
367
+ }
368
+
369
+ void heap_recorder_finish_iteration(heap_recorder *heap_recorder) {
370
+ if (heap_recorder == NULL) {
371
+ return;
372
+ }
373
+
374
+ if (heap_recorder->object_records_snapshot == NULL) {
375
+ // we could trivially handle this but we raise to highlight and catch unexpected usages.
376
+ rb_raise(rb_eRuntimeError, "Heap recorder iteration finished without having been prepared.");
377
+ }
378
+
379
+ st_free_table(heap_recorder->object_records_snapshot);
380
+ heap_recorder->object_records_snapshot = NULL;
381
+ }
382
+
383
+ // Internal data we need while performing iteration over live objects.
384
+ typedef struct {
385
+ // The callback we need to call for each object.
386
+ bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg);
387
+ // The extra arg to pass as the second parameter to the callback.
388
+ void *for_each_callback_extra_arg;
389
+ // A reference to the heap recorder so we can access extra stuff like reusable_locations.
390
+ heap_recorder *heap_recorder;
391
+ } iteration_context;
392
+
393
+ // WARN: Assume iterations can run without the GVL for performance reasons. Do not raise, allocate or
394
+ // do NoGVL-unsafe interactions with the Ruby runtime. Any such interactions should be done during
395
+ // heap_recorder_prepare_iteration or heap_recorder_finish_iteration.
396
+ bool heap_recorder_for_each_live_object(
397
+ heap_recorder *heap_recorder,
398
+ bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg),
399
+ void *for_each_callback_extra_arg) {
400
+ if (heap_recorder == NULL) {
401
+ return true;
402
+ }
403
+
404
+ if (heap_recorder->object_records_snapshot == NULL) {
405
+ return false;
406
+ }
407
+
408
+ iteration_context context;
409
+ context.for_each_callback = for_each_callback;
410
+ context.for_each_callback_extra_arg = for_each_callback_extra_arg;
411
+ context.heap_recorder = heap_recorder;
412
+ st_foreach(heap_recorder->object_records_snapshot, st_object_records_iterate, (st_data_t) &context);
413
+ return true;
414
+ }
415
+
416
+ void heap_recorder_testonly_assert_hash_matches(ddog_prof_Slice_Location locations) {
417
+ heap_stack *stack = heap_stack_new(locations);
418
+ heap_record_key stack_based_key = (heap_record_key) {
419
+ .type = HEAP_STACK,
420
+ .heap_stack = stack,
421
+ };
422
+ heap_record_key location_based_key = (heap_record_key) {
423
+ .type = LOCATION_SLICE,
424
+ .location_slice = &locations,
425
+ };
426
+
427
+ st_index_t stack_hash = heap_record_key_hash_st((st_data_t) &stack_based_key);
428
+ st_index_t location_hash = heap_record_key_hash_st((st_data_t) &location_based_key);
429
+
430
+ heap_stack_free(stack);
431
+
432
+ if (stack_hash != location_hash) {
433
+ rb_raise(rb_eRuntimeError, "Heap record key hashes built from the same locations differ. stack_based_hash=%"PRI_VALUE_PREFIX"u location_based_hash=%"PRI_VALUE_PREFIX"u", stack_hash, location_hash);
434
+ }
435
+ }
436
+
437
+ VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
438
+ if (heap_recorder == NULL) {
439
+ return rb_str_new2("NULL heap_recorder");
440
+ }
441
+
442
+ VALUE debug_str = rb_str_new2("object records:\n");
443
+ st_foreach(heap_recorder->object_records, st_object_records_debug, (st_data_t) debug_str);
444
+ return debug_str;
445
+ }
446
+
447
+ // ==========================
448
+ // Heap Recorder Internal API
449
+ // ==========================
450
+ static int st_heap_record_entry_free(st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
451
+ heap_record_key *record_key = (heap_record_key*) key;
452
+ heap_record_key_free(record_key);
453
+ heap_record_free((heap_record *) value);
454
+ return ST_DELETE;
455
+ }
456
+
457
+ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
458
+ object_record_free((object_record *) value);
459
+ return ST_DELETE;
460
+ }
461
+
462
+ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
463
+ long obj_id = (long) key;
464
+ object_record *record = (object_record*) value;
465
+ heap_recorder *recorder = (heap_recorder*) extra_arg;
466
+
467
+ VALUE ref;
468
+
469
+ if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
470
+ // Id no longer associated with a valid ref. Need to delete this object record!
471
+ on_committed_object_record_cleanup(recorder, record);
472
+ return ST_DELETE;
473
+ }
474
+
475
+ // If we got this far, then we found a valid live object for the tracked id.
476
+
477
+ #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
478
+ // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
479
+ // its usage may lead to an object being re-used outside of the typical GC cycle.
480
+ //
481
+ // This re-use is in theory invisible to us and would mean that the ref from which we
482
+ // collected the object_record metadata may not be the same as the current ref and
483
+ // thus any further reporting would be innacurately attributed to stale metadata.
484
+ //
485
+ // In practice, there is a way for us to notice that this happened because of a bug
486
+ // in the implementation of rb_gc_force_recycle. Our heap profiler relies on object
487
+ // ids and id2ref to detect whether objects are still alive. Turns out that when an
488
+ // object with an id is re-used via rb_gc_force_recycle, it will "inherit" the ID
489
+ // of the old object but it will NOT have the FL_SEEN_OBJ_ID as per the experiment
490
+ // in https://github.com/DataDog/dd-trace-rb/pull/3360#discussion_r1442823517
491
+ //
492
+ // Thus, if we detect that the ref we just resolved above is missing this flag, we can
493
+ // safely say re-use happened and thus treat it as an implicit free of the object
494
+ // we were tracking (the original one which got recycled).
495
+ if (RB_FL_ABLE(ref) && !RB_FL_TEST(ref, RUBY_FL_SEEN_OBJ_ID)) {
496
+
497
+ // NOTE: We don't really need to set this flag for heap recorder to work correctly
498
+ // but doing so partially mitigates a bug in runtimes with working rb_gc_force_recycle
499
+ // which leads to broken invariants and leaking of entries in obj_to_id and id_to_obj
500
+ // tables in objspace. We already do the same thing when we sample a recycled object,
501
+ // here we apply it as well to objects that replace recycled objects that were being
502
+ // tracked. More details in https://github.com/DataDog/dd-trace-rb/pull/3366
503
+ RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID);
504
+
505
+ on_committed_object_record_cleanup(recorder, record);
506
+ return ST_DELETE;
507
+ }
508
+
509
+ #endif
510
+
511
+ if (recorder->size_enabled && !record->object_data.is_frozen) {
512
+ // if we were asked to update sizes and this object was not already seen as being frozen,
513
+ // update size again.
514
+ record->object_data.size = ruby_obj_memsize_of(ref);
515
+ // Check if it's now frozen so we skip a size update next time
516
+ record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
517
+ }
518
+
519
+ return ST_CONTINUE;
520
+ }
521
+
522
+ // WARN: This can get called outside the GVL. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED.
523
+ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
524
+ object_record *record = (object_record*) value;
525
+ const heap_stack *stack = record->heap_record->stack;
526
+ iteration_context *context = (iteration_context*) extra;
527
+
528
+ ddog_prof_Location *locations = context->heap_recorder->reusable_locations;
529
+
530
+ for (uint16_t i = 0; i < stack->frames_len; i++) {
531
+ const heap_frame *frame = &stack->frames[i];
532
+ ddog_prof_Location *location = &locations[i];
533
+ location->function.name.ptr = frame->name;
534
+ location->function.name.len = strlen(frame->name);
535
+ location->function.filename.ptr = frame->filename;
536
+ location->function.filename.len = strlen(frame->filename);
537
+ location->line = frame->line;
538
+ }
539
+
540
+ heap_recorder_iteration_data iteration_data;
541
+ iteration_data.object_data = record->object_data;
542
+ iteration_data.locations = (ddog_prof_Slice_Location) {.ptr = locations, .len = stack->frames_len};
543
+
544
+ if (!context->for_each_callback(iteration_data, context->for_each_callback_extra_arg)) {
545
+ return ST_STOP;
546
+ }
547
+
548
+ return ST_CONTINUE;
549
+ }
550
+
551
+ static int st_object_records_debug(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
552
+ VALUE debug_str = (VALUE) extra;
553
+
554
+ object_record *record = (object_record*) value;
555
+
556
+ rb_str_catf(debug_str, "%"PRIsVALUE"\n", object_record_inspect(record));
557
+
558
+ return ST_CONTINUE;
559
+ }
560
+
561
+ // Struct holding data required for an update operation on heap_records
562
+ typedef struct {
563
+ // [in] The recording containing the new object record we want to add.
564
+ // NOTE: Transfer of ownership of the contained object record is assumed, do not re-use it after call to ::update_object_record_entry
565
+ recording recording;
566
+
567
+ // [in] The heap recorder where the update is happening.
568
+ heap_recorder *heap_recorder;
569
+ } object_record_update_data;
570
+
571
+ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *value, st_data_t data, int existing) {
572
+ object_record_update_data *update_data = (object_record_update_data*) data;
573
+ recording recording = update_data->recording;
574
+ object_record *new_object_record = recording.object_record;
575
+ if (existing) {
576
+ object_record *existing_record = (object_record*) (*value);
577
+ if (recording.did_recycle_workaround) {
578
+ // In this case, it's possible for an object id to be re-used and we were lucky enough to have
579
+ // sampled both the original object and the replacement so cleanup the old one and replace it with
580
+ // the new object_record (i.e. treat this as a combined free+allocation).
581
+ on_committed_object_record_cleanup(update_data->heap_recorder, existing_record);
582
+ } else {
583
+ // This is not supposed to happen, raising...
584
+ VALUE existing_inspect = object_record_inspect(existing_record);
585
+ VALUE new_inspect = object_record_inspect(new_object_record);
586
+ rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
587
+ "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect);
588
+ }
589
+ }
590
+ // Always carry on with the update, we want the new record to be there at the end
591
+ (*value) = (st_data_t) new_object_record;
592
+ return ST_CONTINUE;
593
+ }
594
+
595
+ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_record, recording recording) {
596
+ // Link the object record with the corresponding heap record. This was the last remaining thing we
597
+ // needed to fully build the object_record.
598
+ recording.object_record->heap_record = heap_record;
599
+ if (heap_record->num_tracked_objects == UINT32_MAX) {
600
+ rb_raise(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record");
601
+ }
602
+ heap_record->num_tracked_objects++;
603
+
604
+ // Update object_records with the data for this new recording
605
+ object_record_update_data update_data = (object_record_update_data) {
606
+ .heap_recorder = heap_recorder,
607
+ .recording = recording,
608
+ };
609
+ st_update(heap_recorder->object_records, recording.object_record->obj_id, update_object_record_entry, (st_data_t) &update_data);
610
+ }
611
+
612
+ // Struct holding data required for an update operation on heap_records
613
+ typedef struct {
614
+ // [in] The locations we did this update with
615
+ ddog_prof_Slice_Location locations;
616
+ // [out] Pointer that will be updated to the updated heap record to prevent having to do
617
+ // another lookup to access the updated heap record.
618
+ heap_record **record;
619
+ } heap_record_update_data;
620
+
621
+ // This function assumes ownership of stack_data is passed on to it so it'll either transfer ownership or clean-up.
622
+ static int update_heap_record_entry_with_new_allocation(st_data_t *key, st_data_t *value, st_data_t data, int existing) {
623
+ heap_record_update_data *update_data = (heap_record_update_data*) data;
624
+
625
+ if (!existing) {
626
+ // there was no matching heap record so lets create a new one...
627
+ // we need to initialize a heap_record_key with a new stack and use that for the key storage. We can't use the
628
+ // locations-based key we used for the update call because we don't own its lifecycle. So we create a new
629
+ // heap stack and will pass ownership of it to the heap_record.
630
+ heap_stack *stack = heap_stack_new(update_data->locations);
631
+ (*key) = (st_data_t) heap_record_key_new(stack);
632
+ (*value) = (st_data_t) heap_record_new(stack);
633
+ }
634
+
635
+ heap_record *record = (heap_record*) (*value);
636
+ (*update_data->record) = record;
637
+
638
+ return ST_CONTINUE;
639
+ }
640
+
641
+ static heap_record* get_or_create_heap_record(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
642
+ // For performance reasons we use a stack-allocated location-slice based key. This allows us
643
+ // to do allocation-free lookups and reuse of a matching existing heap record.
644
+ // NOTE: If we end up creating a new record, we'll create a heap-allocated key we own and use that for storage
645
+ // instead of this one.
646
+ heap_record_key lookup_key = (heap_record_key) {
647
+ .type = LOCATION_SLICE,
648
+ .location_slice = &locations,
649
+ };
650
+
651
+ heap_record *heap_record = NULL;
652
+ heap_record_update_data update_data = (heap_record_update_data) {
653
+ .locations = locations,
654
+ .record = &heap_record,
655
+ };
656
+ st_update(heap_recorder->heap_records, (st_data_t) &lookup_key, update_heap_record_entry_with_new_allocation, (st_data_t) &update_data);
657
+
658
+ return heap_record;
659
+ }
660
+
661
+ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_record *heap_record) {
662
+ if (heap_record->num_tracked_objects > 0) {
663
+ // still being used! do nothing...
664
+ return;
665
+ }
666
+
667
+ heap_record_key heap_key = (heap_record_key) {
668
+ .type = HEAP_STACK,
669
+ .heap_stack = heap_record->stack,
670
+ };
671
+ // We need to access the deleted key to free it since we gave ownership of the keys to the hash.
672
+ // st_delete will change this pointer to point to the removed key if one is found.
673
+ heap_record_key *deleted_key = &heap_key;
674
+ if (!st_delete(heap_recorder->heap_records, (st_data_t*) &deleted_key, NULL)) {
675
+ rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record");
676
+ };
677
+ heap_record_key_free(deleted_key);
678
+ heap_record_free(heap_record);
679
+ }
680
+
681
+ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, object_record *record) {
682
+ // Starting with the associated heap record. There will now be one less tracked object pointing to it
683
+ heap_record *heap_record = record->heap_record;
684
+ heap_record->num_tracked_objects--;
685
+
686
+ // One less object using this heap record, it may have become unused...
687
+ cleanup_heap_record_if_unused(heap_recorder, heap_record);
688
+
689
+ object_record_free(record);
690
+ }
691
+
692
+ // ===============
693
+ // Heap Record API
694
+ // ===============
695
+ heap_record* heap_record_new(heap_stack *stack) {
696
+ heap_record *record = ruby_xcalloc(1, sizeof(heap_record));
697
+ record->num_tracked_objects = 0;
698
+ record->stack = stack;
699
+ return record;
700
+ }
701
+
702
+ void heap_record_free(heap_record *record) {
703
+ heap_stack_free(record->stack);
704
+ ruby_xfree(record);
705
+ }
706
+
707
+
708
+ // =================
709
+ // Object Record API
710
+ // =================
711
+ object_record* object_record_new(long obj_id, heap_record *heap_record, live_object_data object_data) {
712
+ object_record *record = ruby_xcalloc(1, sizeof(object_record));
713
+ record->obj_id = obj_id;
714
+ record->heap_record = heap_record;
715
+ record->object_data = object_data;
716
+ return record;
717
+ }
718
+
719
+ void object_record_free(object_record *record) {
720
+ if (record->object_data.class != NULL) {
721
+ ruby_xfree(record->object_data.class);
722
+ }
723
+ ruby_xfree(record);
724
+ }
725
+
726
+ VALUE object_record_inspect(object_record *record) {
727
+ heap_frame top_frame = record->heap_record->stack->frames[0];
728
+ VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu ",
729
+ record->obj_id, record->object_data.weight, record->object_data.size, top_frame.filename,
730
+ (int) top_frame.line, record->object_data.alloc_gen);
731
+
732
+ const char *class = record->object_data.class;
733
+ if (class != NULL) {
734
+ rb_str_catf(inspect, "class=%s ", class);
735
+ }
736
+ VALUE ref;
737
+
738
+ if (!ruby_ref_from_id(LONG2NUM(record->obj_id), &ref)) {
739
+ rb_str_catf(inspect, "object=<invalid>");
740
+ } else {
741
+ VALUE ruby_inspect = ruby_safe_inspect(ref);
742
+ if (ruby_inspect != Qnil) {
743
+ rb_str_catf(inspect, "object=%"PRIsVALUE, ruby_inspect);
744
+ } else {
745
+ rb_str_catf(inspect, "object=%s", ruby_value_type_to_string(rb_type(ref)));
746
+ }
747
+ }
748
+
749
+ return inspect;
750
+ }
751
+
752
+ // ==============
753
+ // Heap Frame API
754
+ // ==============
755
+ int heap_frame_cmp(heap_frame *f1, heap_frame *f2) {
756
+ int line_diff = (int) (f1->line - f2->line);
757
+ if (line_diff != 0) {
758
+ return line_diff;
759
+ }
760
+ int cmp = strcmp(f1->name, f2->name);
761
+ if (cmp != 0) {
762
+ return cmp;
763
+ }
764
+ return strcmp(f1->filename, f2->filename);
765
+ }
766
+
767
+ // TODO: Research potential performance improvements around hashing stuff here
768
+ // once we have a benchmarking suite.
769
+ // Example: Each call to st_hash is calling murmur_finish and we may want
770
+ // to only finish once per structure, not per field?
771
+ // Example: There may be a more efficient hashing for line that is not the
772
+ // generic st_hash algorithm?
773
+
774
+ // WARN: Must be kept in-sync with ::char_slice_hash
775
+ st_index_t string_hash(char *str, st_index_t seed) {
776
+ return st_hash(str, strlen(str), seed);
777
+ }
778
+
779
+ // WARN: Must be kept in-sync with ::string_hash
780
+ st_index_t char_slice_hash(ddog_CharSlice char_slice, st_index_t seed) {
781
+ return st_hash(char_slice.ptr, char_slice.len, seed);
782
+ }
783
+
784
+ // WARN: Must be kept in-sync with ::ddog_location_hash
785
+ st_index_t heap_frame_hash(heap_frame *frame, st_index_t seed) {
786
+ st_index_t hash = string_hash(frame->name, seed);
787
+ hash = string_hash(frame->filename, hash);
788
+ hash = st_hash(&frame->line, sizeof(frame->line), hash);
789
+ return hash;
790
+ }
791
+
792
+ // WARN: Must be kept in-sync with ::heap_frame_hash
793
+ st_index_t ddog_location_hash(ddog_prof_Location location, st_index_t seed) {
794
+ st_index_t hash = char_slice_hash(location.function.name, seed);
795
+ hash = char_slice_hash(location.function.filename, hash);
796
+ // Convert ddog_prof line type to the same type we use for our heap_frames to
797
+ // ensure we have compatible hashes
798
+ int32_t line_as_int32 = (int32_t) location.line;
799
+ hash = st_hash(&line_as_int32, sizeof(line_as_int32), hash);
800
+ return hash;
801
+ }
802
+
803
+ // ==============
804
+ // Heap Stack API
805
+ // ==============
806
+ heap_stack* heap_stack_new(ddog_prof_Slice_Location locations) {
807
+ uint16_t frames_len = locations.len;
808
+ if (frames_len > MAX_FRAMES_LIMIT) {
809
+ // This should not be happening anyway since MAX_FRAMES_LIMIT should be shared with
810
+ // the stacktrace construction mechanism. If it happens, lets just raise. This should
811
+ // be safe since only allocate with the GVL anyway.
812
+ rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len);
813
+ }
814
+ heap_stack *stack = ruby_xcalloc(1, sizeof(heap_stack) + frames_len * sizeof(heap_frame));
815
+ stack->frames_len = frames_len;
816
+ for (uint16_t i = 0; i < stack->frames_len; i++) {
817
+ const ddog_prof_Location *location = &locations.ptr[i];
818
+ stack->frames[i] = (heap_frame) {
819
+ .name = string_from_char_slice(location->function.name),
820
+ .filename = string_from_char_slice(location->function.filename),
821
+ // ddog_prof_Location is a int64_t. We don't expect to have to profile files with more than
822
+ // 2M lines so this cast should be fairly safe?
823
+ .line = (int32_t) location->line,
824
+ };
825
+ }
826
+ return stack;
827
+ }
828
+
829
+ void heap_stack_free(heap_stack *stack) {
830
+ for (uint64_t i = 0; i < stack->frames_len; i++) {
831
+ heap_frame *frame = &stack->frames[i];
832
+ ruby_xfree(frame->name);
833
+ ruby_xfree(frame->filename);
834
+ }
835
+ ruby_xfree(stack);
836
+ }
837
+
838
+ // WARN: Must be kept in-sync with ::ddog_location_slice_hash
839
+ st_index_t heap_stack_hash(heap_stack *stack, st_index_t seed) {
840
+ st_index_t hash = seed;
841
+ for (uint64_t i = 0; i < stack->frames_len; i++) {
842
+ hash = heap_frame_hash(&stack->frames[i], hash);
843
+ }
844
+ return hash;
845
+ }
846
+
847
+ // WARN: Must be kept in-sync with ::heap_stack_hash
848
+ st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location locations, st_index_t seed) {
849
+ st_index_t hash = seed;
850
+ for (uint64_t i = 0; i < locations.len; i++) {
851
+ hash = ddog_location_hash(locations.ptr[i], hash);
852
+ }
853
+ return hash;
854
+ }
855
+
856
+ // ===================
857
+ // Heap Record Key API
858
+ // ===================
859
+ heap_record_key* heap_record_key_new(heap_stack *stack) {
860
+ heap_record_key *key = ruby_xmalloc(sizeof(heap_record_key));
861
+ key->type = HEAP_STACK;
862
+ key->heap_stack = stack;
863
+ return key;
864
+ }
865
+
866
+ void heap_record_key_free(heap_record_key *key) {
867
+ ruby_xfree(key);
868
+ }
869
+
870
+ static inline size_t heap_record_key_len(heap_record_key *key) {
871
+ if (key->type == HEAP_STACK) {
872
+ return key->heap_stack->frames_len;
873
+ } else {
874
+ return key->location_slice->len;
875
+ }
876
+ }
877
+
878
+ static inline int64_t heap_record_key_entry_line(heap_record_key *key, size_t entry_i) {
879
+ if (key->type == HEAP_STACK) {
880
+ return key->heap_stack->frames[entry_i].line;
881
+ } else {
882
+ return key->location_slice->ptr[entry_i].line;
883
+ }
884
+ }
885
+
886
+ static inline size_t heap_record_key_entry_name(heap_record_key *key, size_t entry_i, const char **name_ptr) {
887
+ if (key->type == HEAP_STACK) {
888
+ char *name = key->heap_stack->frames[entry_i].name;
889
+ (*name_ptr) = name;
890
+ return strlen(name);
891
+ } else {
892
+ ddog_CharSlice name = key->location_slice->ptr[entry_i].function.name;
893
+ (*name_ptr) = name.ptr;
894
+ return name.len;
895
+ }
896
+ }
897
+
898
+ static inline size_t heap_record_key_entry_filename(heap_record_key *key, size_t entry_i, const char **filename_ptr) {
899
+ if (key->type == HEAP_STACK) {
900
+ char *filename = key->heap_stack->frames[entry_i].filename;
901
+ (*filename_ptr) = filename;
902
+ return strlen(filename);
903
+ } else {
904
+ ddog_CharSlice filename = key->location_slice->ptr[entry_i].function.filename;
905
+ (*filename_ptr) = filename.ptr;
906
+ return filename.len;
907
+ }
908
+ }
909
+
910
+ int heap_record_key_cmp_st(st_data_t key1, st_data_t key2) {
911
+ heap_record_key *key_record1 = (heap_record_key*) key1;
912
+ heap_record_key *key_record2 = (heap_record_key*) key2;
913
+
914
+ // Fast path, check if lengths differ
915
+ size_t key_record1_len = heap_record_key_len(key_record1);
916
+ size_t key_record2_len = heap_record_key_len(key_record2);
917
+
918
+ if (key_record1_len != key_record2_len) {
919
+ return ((int) key_record1_len) - ((int) key_record2_len);
920
+ }
921
+
922
+ // If we got this far, we have same lengths so need to check item-by-item
923
+ for (size_t i = 0; i < key_record1_len; i++) {
924
+ // Lines are faster to compare, lets do that first
925
+ size_t line1 = heap_record_key_entry_line(key_record1, i);
926
+ size_t line2 = heap_record_key_entry_line(key_record2, i);
927
+ if (line1 != line2) {
928
+ return ((int) line1) - ((int)line2);
929
+ }
930
+
931
+ // Then come names, they are usually smaller than filenames
932
+ const char *name1, *name2;
933
+ size_t name1_len = heap_record_key_entry_name(key_record1, i, &name1);
934
+ size_t name2_len = heap_record_key_entry_name(key_record2, i, &name2);
935
+ if (name1_len != name2_len) {
936
+ return ((int) name1_len) - ((int) name2_len);
937
+ }
938
+ int name_cmp_result = strncmp(name1, name2, name1_len);
939
+ if (name_cmp_result != 0) {
940
+ return name_cmp_result;
941
+ }
942
+
943
+ // Then come filenames
944
+ const char *filename1, *filename2;
945
+ int64_t filename1_len = heap_record_key_entry_filename(key_record1, i, &filename1);
946
+ int64_t filename2_len = heap_record_key_entry_filename(key_record2, i, &filename2);
947
+ if (filename1_len != filename2_len) {
948
+ return ((int) filename1_len) - ((int) filename2_len);
949
+ }
950
+ int filename_cmp_result = strncmp(filename1, filename2, filename1_len);
951
+ if (filename_cmp_result != 0) {
952
+ return filename_cmp_result;
953
+ }
954
+ }
955
+
956
+ // If we survived the above for, then everything matched
957
+ return 0;
958
+ }
959
+
960
+ // Initial seed for hash functions
961
+ #define FNV1_32A_INIT 0x811c9dc5
962
+
963
+ st_index_t heap_record_key_hash_st(st_data_t key) {
964
+ heap_record_key *record_key = (heap_record_key*) key;
965
+ if (record_key->type == HEAP_STACK) {
966
+ return heap_stack_hash(record_key->heap_stack, FNV1_32A_INIT);
967
+ } else {
968
+ return ddog_location_slice_hash(*record_key->location_slice, FNV1_32A_INIT);
969
+ }
970
+ }