datadog 2.35.0 → 2.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +68 -31
  4. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +1 -1
  5. data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +1 -1
  6. data/ext/datadog_profiling_native_extension/collectors_stack.c +37 -18
  7. data/ext/datadog_profiling_native_extension/collectors_stack.h +8 -2
  8. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +434 -300
  9. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +9 -7
  10. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +7 -8
  11. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -12
  12. data/ext/datadog_profiling_native_extension/extconf.rb +2 -2
  13. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +4 -43
  14. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +15 -47
  15. data/ext/datadog_profiling_native_extension/heap_recorder.c +44 -26
  16. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +14 -35
  17. data/ext/datadog_profiling_native_extension/profiling.c +41 -4
  18. data/ext/datadog_profiling_native_extension/ruby_helpers.c +33 -34
  19. data/ext/datadog_profiling_native_extension/stack_recorder.c +24 -3
  20. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  21. data/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h +4 -2
  22. data/ext/libdatadog_api/datadog_ruby_common.c +7 -8
  23. data/ext/libdatadog_api/datadog_ruby_common.h +0 -12
  24. data/ext/libdatadog_extconf_helpers.rb +1 -1
  25. data/lib/datadog/appsec/api_security/route_extractor.rb +6 -0
  26. data/lib/datadog/appsec/component.rb +1 -1
  27. data/lib/datadog/appsec/configuration.rb +7 -0
  28. data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +37 -4
  29. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +64 -19
  30. data/lib/datadog/appsec/contrib/graphql/integration.rb +1 -0
  31. data/lib/datadog/appsec/contrib/rack/buffered_input.rb +83 -0
  32. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +41 -3
  33. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +20 -7
  34. data/lib/datadog/appsec/contrib/rack/input_peeker.rb +92 -0
  35. data/lib/datadog/appsec/contrib/rails/gateway/request.rb +33 -0
  36. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +17 -1
  37. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +20 -3
  38. data/lib/datadog/appsec/default_header_tags.rb +10 -6
  39. data/lib/datadog/core/configuration/components.rb +1 -0
  40. data/lib/datadog/core/configuration/settings.rb +1 -2
  41. data/lib/datadog/core/configuration/supported_configurations.rb +2 -0
  42. data/lib/datadog/core/remote/component.rb +1 -1
  43. data/lib/datadog/core/telemetry/event/app_started.rb +0 -21
  44. data/lib/datadog/core/utils/at_fork_monkey_patch.rb +1 -1
  45. data/lib/datadog/core/utils/forking.rb +3 -1
  46. data/lib/datadog/core/utils/spawn_monkey_patch.rb +3 -1
  47. data/lib/datadog/core.rb +3 -0
  48. data/lib/datadog/di/base.rb +4 -1
  49. data/lib/datadog/di/component.rb +1 -1
  50. data/lib/datadog/error_tracking/collector.rb +2 -1
  51. data/lib/datadog/error_tracking/component.rb +2 -2
  52. data/lib/datadog/kit/tracing/method_tracer.rb +4 -1
  53. data/lib/datadog/opentelemetry/sdk/propagator.rb +9 -3
  54. data/lib/datadog/opentelemetry/sdk/span_processor.rb +4 -1
  55. data/lib/datadog/profiling/collectors/thread_context.rb +1 -0
  56. data/lib/datadog/profiling/component.rb +13 -15
  57. data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
  58. data/lib/datadog/ruby_version.rb +25 -0
  59. data/lib/datadog/symbol_database/component.rb +306 -98
  60. data/lib/datadog/symbol_database/extractor.rb +223 -84
  61. data/lib/datadog/tracing/configuration/ext.rb +13 -0
  62. data/lib/datadog/tracing/configuration/settings.rb +17 -0
  63. data/lib/datadog/tracing/contrib/configuration/resolver.rb +7 -0
  64. data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +2 -0
  65. data/lib/datadog/tracing/contrib/grpc.rb +1 -0
  66. data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +2 -0
  67. data/lib/datadog/tracing/contrib/http.rb +1 -0
  68. data/lib/datadog/tracing/contrib/karafka/distributed/propagation.rb +2 -0
  69. data/lib/datadog/tracing/contrib/karafka.rb +1 -0
  70. data/lib/datadog/tracing/contrib/rack/middlewares.rb +3 -1
  71. data/lib/datadog/tracing/contrib/rack/route_inference.rb +3 -1
  72. data/lib/datadog/tracing/contrib/sidekiq/distributed/propagation.rb +2 -0
  73. data/lib/datadog/tracing/contrib/sidekiq.rb +1 -0
  74. data/lib/datadog/tracing/contrib/waterdrop/distributed/propagation.rb +2 -0
  75. data/lib/datadog/tracing/contrib/waterdrop.rb +1 -0
  76. data/lib/datadog/tracing/distributed/propagation.rb +33 -1
  77. data/lib/datadog/tracing/distributed/trace_context.rb +11 -2
  78. data/lib/datadog/tracing/trace_digest.rb +7 -0
  79. data/lib/datadog/tracing/trace_operation.rb +4 -1
  80. data/lib/datadog/tracing/tracer.rb +1 -0
  81. data/lib/datadog/version.rb +1 -1
  82. data/lib/datadog.rb +4 -1
  83. metadata +8 -5
@@ -76,14 +76,15 @@
76
76
  #define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
77
77
  #define THREAD_INVOKE_LOCATION_LIMIT_CHARS 512
78
78
  #define IS_WALL_TIME true
79
- #define IS_NOT_WALL_TIME false
79
+ #define IS_CPU_TIME false
80
80
  #define MISSING_TRACER_CONTEXT_KEY 0
81
81
  #define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
82
+ #define GVL_SUSPENDED ((uint64_t)1)
83
+ #define GVL_RUNNING ((uint64_t)0)
82
84
 
83
- // This is used as a placeholder to mark threads that are allowed to be profiled (enabled)
84
- // (e.g. to avoid trying to gvl profile threads that are not from the main Ractor)
85
- // and for which there's no data yet
86
- #define GVL_WAITING_ENABLED_EMPTY RUBY_FIXNUM_MAX
85
+ #define MAX(a, b) ((a) < (b) ? (b) : (a))
86
+
87
+ static ID dd_per_thread_context_id; // Hidden ivar (no @ prefix, inaccessible from Ruby)
87
88
 
88
89
  static ID at_active_span_id; // id of :@active_span in Ruby
89
90
  static ID at_active_trace_id; // id of :@active_trace in Ruby
@@ -107,12 +108,6 @@ static ID server_id; // id of :server in Ruby
107
108
  static ID otel_context_storage_id; // id of :__opentelemetry_context_storage__ in Ruby
108
109
  static ID otel_fiber_context_storage_id; // id of :@opentelemetry_context in Ruby
109
110
 
110
- // This is used by `thread_context_collector_on_gvl_running`. Because when that method gets called we're not sure if
111
- // it's safe to access the state of the thread context collector, we store this setting as a global value. This does
112
- // mean this setting is shared among all thread context collectors, and thus it's "last writer wins".
113
- // In production this should not be a problem: there should only be one profiler, which is the last one created,
114
- // and that'll be the one that last wrote this setting.
115
- static uint32_t global_waiting_for_gvl_threshold_ns = MILLIS_AS_NS(10);
116
111
 
117
112
  typedef enum { OTEL_CONTEXT_ENABLED_FALSE, OTEL_CONTEXT_ENABLED_ONLY, OTEL_CONTEXT_ENABLED_BOTH } otel_context_enabled;
118
113
  typedef enum { OTEL_CONTEXT_SOURCE_UNKNOWN, OTEL_CONTEXT_SOURCE_FIBER_IVAR, OTEL_CONTEXT_SOURCE_FIBER_LOCAL } otel_context_source;
@@ -122,22 +117,14 @@ typedef struct {
122
117
  // Note: Places in this file that usually need to be changed when this struct is changed are tagged with
123
118
  // "Update this when modifying state struct"
124
119
 
125
- // Required by Datadog::Profiling::Collectors::Stack as a scratch buffer during sampling
126
- ddog_prof_Location *locations;
127
- uint16_t max_frames;
128
- // Hashmap <Thread Object, per_thread_context>
129
- // Note: Be very careful when mutating this map, as it gets read e.g. in the middle of GC and signal handlers.
130
- st_table *hash_map_per_thread_context;
120
+ // Output buffer for stack traces, passed to sample_thread()
121
+ sample_locations locations;
131
122
  // Datadog::Profiling::StackRecorder instance
132
123
  VALUE recorder_instance;
133
124
  // If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
134
125
  // to enable code hotspots and endpoint aggregation.
135
126
  // When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
136
127
  ID tracer_context_key;
137
- // Track how many regular samples we've taken. Does not include garbage collection samples.
138
- // Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
139
- // is not (just) a stat.
140
- unsigned int sample_count;
141
128
  // Reusable array to get list of threads
142
129
  VALUE thread_list_buffer;
143
130
  // Used to omit endpoint names (retrieved from tracer) from collected data
@@ -158,12 +145,21 @@ typedef struct {
158
145
  bool native_filenames_enabled;
159
146
  // Used to cache native filename lookup results (Map[void *function_pointer, char *filename])
160
147
  st_table *native_filenames_cache;
148
+ // Used to attribute overhead during sampling to this component
149
+ VALUE overhead_filename;
150
+ // Minimum duration of a "Waiting for GVL" period to trigger a sample
151
+ uint32_t waiting_for_gvl_threshold_ns;
161
152
 
162
153
  struct stats {
154
+ // Track how many regular samples we've taken. Does not include garbage collection samples.
155
+ unsigned int sample_count;
163
156
  // Track how many garbage collection samples we've taken.
164
157
  unsigned int gc_samples;
165
158
  // See thread_context_collector_on_gc_start for details
166
159
  unsigned int gc_samples_missed_due_to_missing_context;
160
+ // How many per-thread samples were skipped because the thread has been continuously suspended
161
+ // (no GVL) since its previous sample, so its Ruby stack cannot have changed.
162
+ unsigned int inactive_thread_samples_skipped;
167
163
  } stats;
168
164
 
169
165
  struct {
@@ -176,7 +172,7 @@ typedef struct {
176
172
  } thread_context_collector_state;
177
173
 
178
174
  // Tracks per-thread state
179
- typedef struct {
175
+ struct per_thread_context {
180
176
  sampling_buffer sampling_buffer;
181
177
  char thread_id[THREAD_ID_LIMIT_CHARS];
182
178
  ddog_CharSlice thread_id_char_slice;
@@ -186,13 +182,65 @@ typedef struct {
186
182
  long cpu_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized or if getting it fails for another reason
187
183
  long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
188
184
 
185
+ // There are 3 possible states for the GVL (per thread), and 3 transitions for which we receive GVL events:
186
+ // Thread holds the GVL
187
+ // on_gvl_released() the thread releases the GVL (RUBY_INTERNAL_THREAD_EVENT_SUSPENDED)
188
+ // Thread runs without the GVL
189
+ // on_gvl_waiting() the thread wants the GVL (RUBY_INTERNAL_THREAD_EVENT_READY)
190
+ // Thread is "Waiting for GVL"
191
+ // on_gvl_running() the thread now got the GVL (RUBY_INTERNAL_THREAD_EVENT_RESUMED)
192
+ // ... and the cycle restarts
193
+
194
+
195
+ // --- GVL waiting tracking state machine ---
196
+ //
197
+ // gvl_waiting_at tracks the GVL wait state for each profiled thread:
198
+ //
199
+ // ┌───────────────────────────────────┐
200
+ // │ on_gvl_waiting │
201
+ // │ ▼
202
+ // Not Waiting (0) ◀────────────────── Waiting (> 0)
203
+ // ▲ on_gvl_running │
204
+ // │ (below threshold) │ on_gvl_running (above threshold)
205
+ // │ ▼
206
+ // └─────────────────────────── Sample Pending (< 0)
207
+ // sample / sample_after_gvl_running
208
+ //
209
+ // Not Waiting (0): thread is running or not waiting for the GVL
210
+ // Waiting (> 0): monotonic wall time (ns) when the thread started waiting
211
+ // Sample Pending (< 0): negated timestamp; the wait ended and a sample is pending
212
+ //
213
+ // The field is accessed under the GVL for most functions EXCEPT on_gvl_waiting() which writes to it without the GVL.
214
+ // So we need to pack the above state in a single long to ensure atomicity.
215
+ long gvl_waiting_at;
216
+
217
+ // Per-thread "state + version" word, updated on every GVL state transition. The encoding is:
218
+ // - low bit: current state (1 = currently suspended, 0 = currently running)
219
+ // - bits 1+: monotonic event counter (incremented on every RESUMED)
220
+ // The hooks set the state bit explicitly rather than relying on parity, so the encoding stays
221
+ // correct even when events are not paired properly (as in tests).
222
+ //
223
+ // Note that SUSPENDED can happen multiple times in a row on Ruby 3.2,
224
+ // see https://github.com/DataDog/dd-trace-rb/pull/5777#discussion_r3388560254,
225
+ // the encoding is designed to naturally not change the field in such a case.
226
+ uint64_t gvl_state_change_count;
227
+ // Snapshot of the thread's gvl_state_change_count at the moment we last sampled it.
228
+ // Equality with this snapshot means no GVL transition since the last sample.
229
+ uint64_t gvl_state_change_count_at_previous_sample;
230
+ // True when the previous per-tick sample was skipped by the SUSPENDED-skip optimization, so the
231
+ // flush-before-serialize pass knows it needs to report this thread.
232
+ // As a result, we will accumulate all wall & CPU time as a single batch per reporting period,
233
+ // but this is deemed worth it for this optimization. In any case we don't know exactly
234
+ // at what time a thread was doing CPU work (unless it's on CPU 100% of the time).
235
+ bool was_skipped_at_last_sample;
236
+
189
237
  struct {
190
238
  // Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
191
239
  // Outside of this window, they will be INVALID_TIME.
192
240
  long cpu_time_at_start_ns;
193
241
  long wall_time_at_start_ns;
194
242
  } gc_tracking;
195
- } per_thread_context;
243
+ };
196
244
 
197
245
  // Used to correlate profiles with traces
198
246
  typedef struct {
@@ -210,27 +258,26 @@ typedef struct {
210
258
 
211
259
  static void thread_context_collector_typed_data_mark(void *state_ptr);
212
260
  static void thread_context_collector_typed_data_free(void *state_ptr);
213
- static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t value_thread_context, DDTRACE_UNUSED st_data_t _argument);
214
- static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
261
+ static void per_thread_context_typed_data_mark(void *ctx_ptr);
262
+ static void per_thread_context_typed_data_free(void *ctx_ptr);
215
263
  static VALUE _native_new(VALUE klass);
216
264
  static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
217
- static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread, VALUE allow_exception);
265
+ static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE allow_exception);
218
266
  static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
219
267
  static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
220
268
  static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE allow_exception);
221
269
  static void update_metrics_and_sample(
222
270
  thread_context_collector_state *state,
223
271
  VALUE thread_being_sampled,
224
- VALUE stack_from_thread,
225
272
  per_thread_context *thread_context,
226
273
  sampling_buffer* sampling_buffer,
227
274
  long current_cpu_time_ns,
228
- long current_monotonic_wall_time_ns
275
+ long current_monotonic_wall_time_ns,
276
+ bool force_sample
229
277
  );
230
278
  static void trigger_sample_for_thread(
231
279
  thread_context_collector_state *state,
232
- VALUE thread,
233
- VALUE stack_from_thread,
280
+ VALUE thread_being_sampled,
234
281
  per_thread_context *thread_context,
235
282
  sampling_buffer* sampling_buffer,
236
283
  sample_values values,
@@ -242,16 +289,11 @@ static void trigger_sample_for_thread(
242
289
  );
243
290
  static VALUE _native_thread_list(VALUE self);
244
291
  static per_thread_context *get_or_create_context_for(VALUE thread, thread_context_collector_state *state);
245
- static per_thread_context *get_context_for(VALUE thread, thread_context_collector_state *state);
246
292
  static void initialize_context(VALUE thread, per_thread_context *thread_context, thread_context_collector_state *state);
247
- static void free_context(per_thread_context* thread_context);
248
293
  static VALUE _native_inspect(VALUE self, VALUE collector_instance);
249
- static VALUE per_thread_context_st_table_as_ruby_hash(thread_context_collector_state *state);
250
- static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
251
- static VALUE stats_as_ruby_hash(thread_context_collector_state *state);
294
+ static VALUE per_thread_context_to_ruby_hash(per_thread_context *thread_context);
295
+ static VALUE stats_to_ruby_hash(thread_context_collector_state *state, VALUE hash);
252
296
  static VALUE gc_tracking_as_ruby_hash(thread_context_collector_state *state);
253
- static void remove_context_for_dead_threads(thread_context_collector_state *state);
254
- static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
255
297
  static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
256
298
  static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time);
257
299
  static long cpu_time_now_ns(per_thread_context *thread_context);
@@ -283,7 +325,6 @@ static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self
283
325
  static bool handle_gvl_waiting(
284
326
  thread_context_collector_state *state,
285
327
  VALUE thread_being_sampled,
286
- VALUE stack_from_thread,
287
328
  per_thread_context *thread_context,
288
329
  sampling_buffer* sampling_buffer,
289
330
  long current_cpu_time_ns
@@ -291,9 +332,10 @@ static bool handle_gvl_waiting(
291
332
  #ifndef NO_GVL_INSTRUMENTATION
292
333
  static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread);
293
334
  static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread);
294
- static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread);
335
+ static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread);
336
+ static VALUE _native_on_gvl_released(DDTRACE_UNUSED VALUE self, VALUE thread);
295
337
  static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE allow_exception);
296
- static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns);
338
+ static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE thread, VALUE delta_ns);
297
339
  #endif
298
340
  static void otel_without_ddtrace_trace_identifiers_for(
299
341
  thread_context_collector_state *state,
@@ -305,7 +347,9 @@ static otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key)
305
347
  static uint64_t otel_span_id_to_uint(VALUE otel_span_id);
306
348
  static VALUE safely_lookup_hash_without_going_into_ruby_code(VALUE hash, VALUE key);
307
349
  static VALUE _native_system_epoch_time_now_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
308
- static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
350
+ static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self);
351
+ static VALUE _native_clear_per_thread_context_for(DDTRACE_UNUSED VALUE self, VALUE thread);
352
+ static bool skip_sample(thread_context_collector_state *state, per_thread_context *thread_context, bool is_gvl_waiting_state, bool force_sample_suspended);
309
353
 
310
354
  void collectors_thread_context_init(VALUE profiling_module) {
311
355
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -326,7 +370,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
326
370
  rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, -1);
327
371
  rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
328
372
  rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
329
- rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 3);
373
+ rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
330
374
  rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3);
331
375
  rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
332
376
  rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
@@ -338,13 +382,15 @@ void collectors_thread_context_init(VALUE profiling_module) {
338
382
  rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
339
383
  rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
340
384
  rb_define_singleton_method(testing_module, "_native_system_epoch_time_now_ns", _native_system_epoch_time_now_ns, 1);
341
- rb_define_singleton_method(testing_module, "_native_prepare_sample_inside_signal_handler", _native_prepare_sample_inside_signal_handler, 1);
385
+ rb_define_singleton_method(testing_module, "_native_prepare_sample_inside_signal_handler", _native_prepare_sample_inside_signal_handler, 0);
386
+ rb_define_singleton_method(testing_module, "_native_clear_per_thread_context_for", _native_clear_per_thread_context_for, 1);
342
387
  #ifndef NO_GVL_INSTRUMENTATION
343
388
  rb_define_singleton_method(testing_module, "_native_on_gvl_waiting", _native_on_gvl_waiting, 1);
344
389
  rb_define_singleton_method(testing_module, "_native_gvl_waiting_at_for", _native_gvl_waiting_at_for, 1);
345
- rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 1);
390
+ rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 2);
391
+ rb_define_singleton_method(testing_module, "_native_on_gvl_released", _native_on_gvl_released, 1);
346
392
  rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 3);
347
- rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 3);
393
+ rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 2);
348
394
  #endif
349
395
 
350
396
  at_active_span_id = rb_intern_const("@active_span");
@@ -366,10 +412,10 @@ void collectors_thread_context_init(VALUE profiling_module) {
366
412
  otel_context_storage_id = rb_intern_const("__opentelemetry_context_storage__");
367
413
  otel_fiber_context_storage_id = rb_intern_const("@opentelemetry_context");
368
414
 
369
- #ifndef NO_GVL_INSTRUMENTATION
370
- // This will raise if Ruby already ran out of thread-local keys
371
- gvl_profiling_init();
372
- #endif
415
+ dd_per_thread_context_id = rb_intern_const("dd_per_thread_context");
416
+
417
+ // This will raise if Ruby already ran out of thread-local keys
418
+ per_thread_context_tls_init();
373
419
 
374
420
  gc_profiling_init();
375
421
  }
@@ -394,10 +440,10 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
394
440
 
395
441
  // Update this when modifying state struct
396
442
  rb_gc_mark(state->recorder_instance);
397
- st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
398
443
  rb_gc_mark(state->thread_list_buffer);
399
444
  rb_gc_mark(state->main_thread);
400
445
  rb_gc_mark(state->otel_current_span_key);
446
+ rb_gc_mark(state->overhead_filename);
401
447
  }
402
448
 
403
449
  static void thread_context_collector_typed_data_free(void *state_ptr) {
@@ -407,36 +453,47 @@ static void thread_context_collector_typed_data_free(void *state_ptr) {
407
453
 
408
454
  // Important: Remember that we're only guaranteed to see here what's been set in _native_new, aka
409
455
  // pointers that have been set NULL there may still be NULL here.
410
- if (state->locations != NULL) ruby_xfree(state->locations);
411
-
412
- // Free each entry in the map
413
- st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
414
- // ...and then the map
415
- st_free_table(state->hash_map_per_thread_context);
456
+ if (state->locations.ptr != NULL) ruby_xfree(state->locations.ptr);
416
457
 
417
458
  st_free_table(state->native_filenames_cache);
418
459
 
419
460
  ruby_xfree(state);
420
461
  }
421
462
 
422
- // Mark Ruby thread references we keep as keys in hash_map_per_thread_context
423
- static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t value_thread_context, DDTRACE_UNUSED st_data_t _argument) {
424
- VALUE thread = (VALUE) key_thread;
425
- per_thread_context *thread_context = (per_thread_context *) value_thread_context;
463
+ // per_thread_context is wrapped in a TypedData Ruby object stored as an ivar on each Ruby Thread.
464
+ // This gives us automatic GC marking (for sampling_buffer iseq VALUEs) and lifecycle management.
465
+ static const rb_data_type_t per_thread_context_typed_data = {
466
+ .wrap_struct_name = "Datadog::Profiling::PerThreadContext",
467
+ .function = {
468
+ .dmark = per_thread_context_typed_data_mark,
469
+ .dfree = per_thread_context_typed_data_free,
470
+ .dsize = NULL,
471
+ },
472
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
473
+ };
426
474
 
427
- rb_gc_mark(thread);
428
- if (sampling_buffer_needs_marking(&thread_context->sampling_buffer)) {
429
- sampling_buffer_mark(&thread_context->sampling_buffer);
475
+ static void per_thread_context_typed_data_mark(void *ctx_ptr) {
476
+ per_thread_context *ctx = (per_thread_context *) ctx_ptr;
477
+ if (sampling_buffer_needs_marking(&ctx->sampling_buffer)) {
478
+ sampling_buffer_mark(&ctx->sampling_buffer);
430
479
  }
480
+ }
431
481
 
432
- return ST_CONTINUE;
482
+ static void per_thread_context_typed_data_free(void *ctx_ptr) {
483
+ per_thread_context *ctx = (per_thread_context *) ctx_ptr;
484
+ sampling_buffer_free(&ctx->sampling_buffer);
485
+ free(ctx);
433
486
  }
434
487
 
435
- // Used to clear each of the per_thread_contexts inside the hash_map_per_thread_context
436
- static int hash_map_per_thread_context_free_values(DDTRACE_UNUSED st_data_t _thread, st_data_t value_per_thread_context, DDTRACE_UNUSED st_data_t _argument) {
437
- per_thread_context *thread_context = (per_thread_context*) value_per_thread_context;
438
- free_context(thread_context);
439
- return ST_CONTINUE;
488
+ static VALUE _native_clear_per_thread_context_for(DDTRACE_UNUSED VALUE self, VALUE thread) {
489
+ per_thread_context *ctx = get_per_thread_context(thread);
490
+ if (ctx != NULL) {
491
+ set_per_thread_context(thread, NULL);
492
+ if (!RB_OBJ_FROZEN(thread)) {
493
+ rb_ivar_set(thread, dd_per_thread_context_id, Qnil);
494
+ }
495
+ }
496
+ return Qnil;
440
497
  }
441
498
 
442
499
  static VALUE _native_new(VALUE klass) {
@@ -446,11 +503,8 @@ static VALUE _native_new(VALUE klass) {
446
503
  // being leaked.
447
504
 
448
505
  // Update this when modifying state struct
449
- state->locations = NULL;
450
- state->max_frames = 0;
451
- state->hash_map_per_thread_context =
452
- // "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
453
- st_init_numtable();
506
+ state->locations.ptr = NULL;
507
+ state->locations.len = 0;
454
508
  state->recorder_instance = Qnil;
455
509
  state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
456
510
  VALUE thread_list_buffer = rb_ary_new();
@@ -492,22 +546,25 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
492
546
  VALUE waiting_for_gvl_threshold_ns = rb_hash_fetch(options, ID2SYM(rb_intern("waiting_for_gvl_threshold_ns")));
493
547
  VALUE otel_context_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("otel_context_enabled")));
494
548
  VALUE native_filenames_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("native_filenames_enabled")));
549
+ VALUE overhead_filename = rb_hash_fetch(options, ID2SYM(rb_intern("overhead_filename")));
495
550
 
496
551
  ENFORCE_TYPE(max_frames, T_FIXNUM);
497
552
  ENFORCE_BOOLEAN(endpoint_collection_enabled);
498
553
  ENFORCE_TYPE(waiting_for_gvl_threshold_ns, T_FIXNUM);
499
554
  ENFORCE_BOOLEAN(native_filenames_enabled);
555
+ ENFORCE_TYPE(overhead_filename, T_STRING);
500
556
 
501
557
  thread_context_collector_state *state;
502
558
  TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
503
559
 
504
560
  // Update this when modifying state struct
505
- state->max_frames = sampling_buffer_check_max_frames(NUM2INT(max_frames));
506
- state->locations = ruby_xcalloc(state->max_frames, sizeof(ddog_prof_Location));
507
- // hash_map_per_thread_context is already initialized, nothing to do here
561
+ state->locations.len = sampling_buffer_check_max_frames(NUM2INT(max_frames));
562
+ state->locations.ptr = ruby_xcalloc(state->locations.len, sizeof(ddog_prof_Location));
508
563
  state->recorder_instance = enforce_recorder_instance(recorder_instance);
564
+ recorder_install_on_serialize(recorder_instance, self_instance);
509
565
  state->endpoint_collection_enabled = (endpoint_collection_enabled == Qtrue);
510
566
  state->native_filenames_enabled = (native_filenames_enabled == Qtrue);
567
+ state->overhead_filename = overhead_filename;
511
568
  if (otel_context_enabled == Qfalse || otel_context_enabled == Qnil) {
512
569
  state->otel_context_enabled = OTEL_CONTEXT_ENABLED_FALSE;
513
570
  } else if (otel_context_enabled == ID2SYM(rb_intern("only"))) {
@@ -518,7 +575,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
518
575
  raise_error(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
519
576
  }
520
577
 
521
- global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
578
+ state->waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
522
579
 
523
580
  if (RTEST(tracer_context_key)) {
524
581
  ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
@@ -533,14 +590,12 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
533
590
 
534
591
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
535
592
  // It SHOULD NOT be used for other purposes.
536
- static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE profiler_overhead_stack_thread, VALUE allow_exception) {
593
+ static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE allow_exception) {
537
594
  ENFORCE_BOOLEAN(allow_exception);
538
595
 
539
- if (!is_thread_alive(profiler_overhead_stack_thread)) raise_error(rb_eArgError, "Unexpected: profiler_overhead_stack_thread is not alive");
540
-
541
596
  if (allow_exception == Qfalse) debug_enter_unsafe_context();
542
597
 
543
- thread_context_collector_sample(collector_instance, monotonic_wall_time_now_ns(RAISE_ON_FAILURE), profiler_overhead_stack_thread);
598
+ thread_context_collector_sample(collector_instance, monotonic_wall_time_now_ns(RAISE_ON_FAILURE));
544
599
 
545
600
  if (allow_exception == Qfalse) debug_leave_unsafe_context();
546
601
 
@@ -583,6 +638,53 @@ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_
583
638
  return Qtrue;
584
639
  }
585
640
 
641
+ // Record profiler sampling overhead as a placeholder stack
642
+ static void record_sampling_overhead(thread_context_collector_state *state, per_thread_context *current_thread_context) {
643
+ long wall_time_after_sampling = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
644
+ long cpu_time_after_sampling = cpu_time_now_ns(current_thread_context);
645
+
646
+ long overhead_cpu_time_ns = update_time_since_previous_sample(
647
+ &current_thread_context->cpu_time_at_previous_sample_ns,
648
+ cpu_time_after_sampling,
649
+ current_thread_context->gc_tracking.cpu_time_at_start_ns,
650
+ IS_CPU_TIME);
651
+
652
+ long overhead_wall_time_ns = update_time_since_previous_sample(
653
+ &current_thread_context->wall_time_at_previous_sample_ns,
654
+ wall_time_after_sampling,
655
+ INVALID_TIME,
656
+ IS_WALL_TIME);
657
+
658
+ ddog_prof_Label overhead_labels[] = {
659
+ {.key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("0"), .num = 0},
660
+ {.key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Datadog::Profiling::Sampling"), .num = 0},
661
+ {.key = DDOG_CHARSLICE_C("state"), .str = DDOG_CHARSLICE_C("had cpu"), .num = 0},
662
+ {.key = DDOG_CHARSLICE_C("profiler overhead"), .num = 1},
663
+ };
664
+
665
+ int64_t end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, wall_time_after_sampling);
666
+
667
+ ddog_prof_Location overhead_location = {
668
+ .mapping = {.filename = DDOG_CHARSLICE_C(""), .build_id = DDOG_CHARSLICE_C(""), .build_id_id = {}},
669
+ .function = {
670
+ .name = DDOG_CHARSLICE_C("sampling"),
671
+ .filename = char_slice_from_ruby_string(state->overhead_filename),
672
+ },
673
+ .line = 0,
674
+ };
675
+
676
+ record_sample(
677
+ state->recorder_instance,
678
+ (ddog_prof_Slice_Location) {.ptr = &overhead_location, .len = 1},
679
+ (sample_values) {.cpu_time_ns = overhead_cpu_time_ns, .cpu_or_wall_samples = 1, .wall_time_ns = overhead_wall_time_ns},
680
+ (sample_labels) {
681
+ .labels = (ddog_prof_Slice_Label) {.ptr = overhead_labels, .len = sizeof(overhead_labels) / sizeof(overhead_labels[0])},
682
+ .state_label = NULL,
683
+ .end_timestamp_ns = end_timestamp_ns,
684
+ }
685
+ );
686
+ }
687
+
586
688
  // This function gets called from the Collectors::CpuAndWallTimeWorker to trigger the actual sampling.
587
689
  //
588
690
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
@@ -591,9 +693,7 @@ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_
591
693
  // Assumption 4: This function IS NOT called in a reentrant way.
592
694
  // Assumption 5: This function is called from the main Ractor (if Ruby has support for Ractors).
593
695
  //
594
- // The `profiler_overhead_stack_thread` is used to attribute the profiler overhead to a stack borrowed from a different thread
595
- // (belonging to ddtrace), so that the overhead is visible in the profile rather than blamed on user code.
596
- void thread_context_collector_sample(VALUE self_instance, long current_monotonic_wall_time_ns, VALUE profiler_overhead_stack_thread) {
696
+ void thread_context_collector_sample(VALUE self_instance, long current_monotonic_wall_time_ns) {
597
697
  thread_context_collector_state *state;
598
698
  TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
599
699
 
@@ -608,66 +708,50 @@ void thread_context_collector_sample(VALUE self_instance, long current_monotonic
608
708
  VALUE thread = RARRAY_AREF(threads, i);
609
709
  per_thread_context *thread_context = get_or_create_context_for(thread, state);
610
710
 
611
- // We account for cpu-time for the current thread in a different way -- we use the cpu-time at sampling start, to avoid
612
- // blaming the time the profiler took on whatever's running on the thread right now
613
- long current_cpu_time_ns = thread != current_thread ? cpu_time_now_ns(thread_context) : cpu_time_at_sample_start_for_current_thread;
711
+ // We account for cpu-time for the current thread in a different way: we use the cpu-time at sampling start,
712
+ // to avoid blaming the time the profiler took on whatever is currently running on the thread,
713
+ // and instead we report that time the profiler took as sampling overhead below.
714
+ long current_cpu_time_ns = (thread == current_thread) ? cpu_time_at_sample_start_for_current_thread : cpu_time_now_ns(thread_context);
614
715
 
615
716
  update_metrics_and_sample(
616
717
  state,
617
- /* thread_being_sampled: */ thread,
618
- /* stack_from_thread: */ thread,
718
+ thread,
619
719
  thread_context,
620
720
  &thread_context->sampling_buffer,
621
721
  current_cpu_time_ns,
622
- current_monotonic_wall_time_ns
623
- );
722
+ current_monotonic_wall_time_ns,
723
+ false);
624
724
  }
625
725
 
626
- state->sample_count++;
627
-
628
- // TODO: This seems somewhat overkill and inefficient to do often; right now we just do it every few samples
629
- // but there's probably a better way to do this if we actually track when threads finish
630
- if (state->sample_count % 100 == 0) remove_context_for_dead_threads(state);
631
-
632
- update_metrics_and_sample(
633
- state,
634
- /* thread_being_sampled: */ current_thread,
635
- /* stack_from_thread: */ profiler_overhead_stack_thread,
636
- current_thread_context,
637
- // Here we use the overhead thread's sampling buffer so as to not invalidate the cache in the buffer of the thread being sampled
638
- &get_or_create_context_for(profiler_overhead_stack_thread, state)->sampling_buffer,
639
- cpu_time_now_ns(current_thread_context),
640
- monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
641
- );
726
+ state->stats.sample_count++;
727
+ record_sampling_overhead(state, current_thread_context);
642
728
  }
643
729
 
644
730
  static void update_metrics_and_sample(
645
731
  thread_context_collector_state *state,
646
732
  VALUE thread_being_sampled,
647
- VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
648
733
  per_thread_context *thread_context,
649
734
  sampling_buffer* sampling_buffer,
650
735
  long current_cpu_time_ns,
651
- long current_monotonic_wall_time_ns
736
+ long current_monotonic_wall_time_ns,
737
+ bool force_sample_suspended
652
738
  ) {
653
739
  bool is_gvl_waiting_state =
654
- handle_gvl_waiting(state, thread_being_sampled, stack_from_thread, thread_context, sampling_buffer, current_cpu_time_ns);
740
+ handle_gvl_waiting(state, thread_being_sampled, thread_context, sampling_buffer, current_cpu_time_ns);
741
+
742
+ if (skip_sample(state, thread_context, is_gvl_waiting_state, force_sample_suspended)) return;
655
743
 
656
744
  // Don't assign/update cpu during "Waiting for GVL"
657
745
  long cpu_time_elapsed_ns = is_gvl_waiting_state ? 0 : update_time_since_previous_sample(
658
746
  &thread_context->cpu_time_at_previous_sample_ns,
659
747
  current_cpu_time_ns,
660
748
  thread_context->gc_tracking.cpu_time_at_start_ns,
661
- IS_NOT_WALL_TIME
749
+ IS_CPU_TIME
662
750
  );
663
751
 
664
752
  long wall_time_elapsed_ns = update_time_since_previous_sample(
665
753
  &thread_context->wall_time_at_previous_sample_ns,
666
754
  current_monotonic_wall_time_ns,
667
- // We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
668
- // accounting to change during GC.
669
- // E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
670
- // GC or not.
671
755
  INVALID_TIME,
672
756
  IS_WALL_TIME
673
757
  );
@@ -675,7 +759,7 @@ static void update_metrics_and_sample(
675
759
  // A thread enters "Waiting for GVL", well, as the name implies, without the GVL.
676
760
  //
677
761
  // As a consequence, it's possible that a thread enters "Waiting for GVL" in parallel with the current thread working
678
- // on sampling, and thus for the `current_monotonic_wall_time_ns` (which is recorded at the start of sampling)
762
+ // on sampling, and thus for the `current_monotonic_wall_time_ns` (which is recorded at the start of sampling)
679
763
  // to be < the time at which we started Waiting for GVL.
680
764
  //
681
765
  // All together, this means that when `handle_gvl_waiting` creates an extra sample (see comments on that function for
@@ -690,7 +774,6 @@ static void update_metrics_and_sample(
690
774
  trigger_sample_for_thread(
691
775
  state,
692
776
  thread_being_sampled,
693
- stack_from_thread,
694
777
  thread_context,
695
778
  sampling_buffer,
696
779
  (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
@@ -702,6 +785,32 @@ static void update_metrics_and_sample(
702
785
  );
703
786
  }
704
787
 
788
+ static bool skip_sample(thread_context_collector_state *state, per_thread_context *thread_context, bool is_gvl_waiting_state, bool force_sample_suspended) {
789
+ // Racy read but harmless, can only cause an extra sample
790
+ uint64_t gvl_state_change_count = thread_context->gvl_state_change_count;
791
+
792
+ // Skip this per-tick sample entirely when the thread does not have the GVL and did not acquire
793
+ // it since the previous sample: its Ruby-level stack has not changed. The skipped wall-time will
794
+ // be picked up by either by an extra sample when the thread acquires the GVL, or by
795
+ // the on-serialize flush in the stack recorder (using was_skipped_at_last_sample).
796
+ // The check is gated by `!is_gvl_waiting_state` so the existing "Waiting for GVL" machinery
797
+ // in handle_gvl_waiting (situation 1 extra sample, situation 2 regular sample) keeps running.
798
+ // TODO: we could probably also skip while "Waiting for GVL"
799
+ if (!is_gvl_waiting_state &&
800
+ !force_sample_suspended &&
801
+ (gvl_state_change_count & GVL_SUSPENDED) &&
802
+ gvl_state_change_count == thread_context->gvl_state_change_count_at_previous_sample) {
803
+ state->stats.inactive_thread_samples_skipped++;
804
+ thread_context->was_skipped_at_last_sample = true;
805
+ return true; // Do NOT update wall_time_at_previous_sample_ns or cpu_time_at_previous_sample_ns
806
+ } else {
807
+ // We are going to sample, update the state accordingly:
808
+ thread_context->gvl_state_change_count_at_previous_sample = gvl_state_change_count;
809
+ thread_context->was_skipped_at_last_sample = false;
810
+ return false;
811
+ }
812
+ }
813
+
705
814
  // This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
706
815
  // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
707
816
  // create an event including the cpu/wall time spent in garbage collector work.
@@ -715,10 +824,10 @@ static void update_metrics_and_sample(
715
824
  void thread_context_collector_on_gc_start(VALUE self_instance) {
716
825
  thread_context_collector_state *state;
717
826
  if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
718
- // This should never fail the the above check passes
827
+ // This should never fail when the above check passes
719
828
  TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
720
829
 
721
- per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
830
+ per_thread_context *thread_context = get_per_thread_context(rb_thread_current());
722
831
 
723
832
  // If there was no previously-existing context for this thread, we won't allocate one (see safety). For now we just drop
724
833
  // the GC sample, under the assumption that "a thread that is so new that we never sampled it even once before it triggers
@@ -748,10 +857,10 @@ __attribute__((warn_unused_result))
748
857
  bool thread_context_collector_on_gc_finish(VALUE self_instance) {
749
858
  thread_context_collector_state *state;
750
859
  if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
751
- // This should never fail the the above check passes
860
+ // This should never fail when the above check passes
752
861
  TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
753
862
 
754
- per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
863
+ per_thread_context *thread_context = get_per_thread_context(rb_thread_current());
755
864
 
756
865
  // If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
757
866
  // how often this happens -- see on_gc_start.
@@ -871,8 +980,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
871
980
 
872
981
  static void trigger_sample_for_thread(
873
982
  thread_context_collector_state *state,
874
- VALUE thread,
875
- VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
983
+ VALUE thread_being_sampled,
876
984
  per_thread_context *thread_context,
877
985
  sampling_buffer* sampling_buffer,
878
986
  sample_values values,
@@ -888,7 +996,6 @@ static void trigger_sample_for_thread(
888
996
  int max_label_count =
889
997
  1 + // thread id
890
998
  1 + // thread name
891
- 1 + // profiler overhead
892
999
  2 + // ruby vm type and allocation class
893
1000
  1 + // state (only set for cpu/wall-time samples)
894
1001
  2; // local root span id and span id
@@ -900,13 +1007,13 @@ static void trigger_sample_for_thread(
900
1007
  .str = thread_context->thread_id_char_slice
901
1008
  };
902
1009
 
903
- VALUE thread_name = thread_name_for(thread);
1010
+ VALUE thread_name = thread_name_for(thread_being_sampled);
904
1011
  if (thread_name != Qnil) {
905
1012
  labels[label_pos++] = (ddog_prof_Label) {
906
1013
  .key = DDOG_CHARSLICE_C("thread name"),
907
1014
  .str = char_slice_from_ruby_string(thread_name)
908
1015
  };
909
- } else if (thread == state->main_thread) { // Threads are often not named, but we can have a nice fallback for this special thread
1016
+ } else if (thread_being_sampled == state->main_thread) { // Threads are often not named, but we can have a nice fallback for this special thread
910
1017
  ddog_CharSlice main_thread_name = DDOG_CHARSLICE_C("main");
911
1018
  labels[label_pos++] = (ddog_prof_Label) {
912
1019
  .key = DDOG_CHARSLICE_C("thread name"),
@@ -922,11 +1029,11 @@ static void trigger_sample_for_thread(
922
1029
  }
923
1030
 
924
1031
  trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
925
- trace_identifiers_for(state, thread, &trace_identifiers_result, is_safe_to_allocate_objects);
1032
+ trace_identifiers_for(state, thread_being_sampled, &trace_identifiers_result, is_safe_to_allocate_objects);
926
1033
 
927
1034
  if (!trace_identifiers_result.valid && state->otel_context_enabled != OTEL_CONTEXT_ENABLED_FALSE) {
928
1035
  // If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
929
- otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result, is_safe_to_allocate_objects);
1036
+ otel_without_ddtrace_trace_identifiers_for(state, thread_being_sampled, &trace_identifiers_result, is_safe_to_allocate_objects);
930
1037
  }
931
1038
 
932
1039
  if (trace_identifiers_result.valid) {
@@ -951,13 +1058,6 @@ static void trigger_sample_for_thread(
951
1058
  }
952
1059
  }
953
1060
 
954
- if (thread != stack_from_thread) {
955
- labels[label_pos++] = (ddog_prof_Label) {
956
- .key = DDOG_CHARSLICE_C("profiler overhead"),
957
- .num = 1
958
- };
959
- }
960
-
961
1061
  if (ruby_vm_type != NULL) {
962
1062
  labels[label_pos++] = (ddog_prof_Label) {
963
1063
  .key = DDOG_CHARSLICE_C("ruby vm type"),
@@ -1003,8 +1103,9 @@ static void trigger_sample_for_thread(
1003
1103
  }
1004
1104
 
1005
1105
  sample_thread(
1006
- stack_from_thread,
1106
+ thread_being_sampled,
1007
1107
  sampling_buffer,
1108
+ state->locations,
1008
1109
  state->recorder_instance,
1009
1110
  values,
1010
1111
  (sample_labels) {
@@ -1032,29 +1133,22 @@ static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
1032
1133
  return result;
1033
1134
  }
1034
1135
 
1136
+ // This allocates a Ruby object and therefore needs the GVL and is not safe to call from RUBY_INTERNAL_EVENT_* hooks.
1035
1137
  static per_thread_context *get_or_create_context_for(VALUE thread, thread_context_collector_state *state) {
1036
- per_thread_context* thread_context = NULL;
1037
- st_data_t value_context = 0;
1138
+ per_thread_context *thread_context = get_per_thread_context(thread);
1139
+ if (thread_context != NULL) return thread_context;
1038
1140
 
1039
- if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
1040
- thread_context = (per_thread_context*) value_context;
1041
- } else {
1042
- thread_context = calloc(1, sizeof(per_thread_context)); // See "note on calloc vs ruby_xcalloc use" in heap_recorder.c
1043
- initialize_context(thread, thread_context, state);
1044
- st_insert(state->hash_map_per_thread_context, (st_data_t) thread, (st_data_t) thread_context);
1141
+ if (RB_OBJ_FROZEN(thread)) {
1142
+ raise_error(rb_eFrozenError, "Cannot setup profiler state for Thread %"PRIsVALUE" because it is frozen. Please avoid freezing Thread instances and/or report the issue to dd-trace-rb", thread);
1045
1143
  }
1046
1144
 
1047
- return thread_context;
1048
- }
1145
+ thread_context = calloc(1, sizeof(per_thread_context)); // See "note on calloc vs ruby_xcalloc use" in heap_recorder.c
1146
+ initialize_context(thread, thread_context, state);
1049
1147
 
1050
- static per_thread_context *get_context_for(VALUE thread, thread_context_collector_state *state) {
1051
- per_thread_context* thread_context = NULL;
1052
- st_data_t value_context = 0;
1053
-
1054
- if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
1055
- thread_context = (per_thread_context*) value_context;
1056
- }
1148
+ VALUE wrapper = TypedData_Wrap_Struct(rb_cObject, &per_thread_context_typed_data, thread_context);
1149
+ rb_ivar_set(thread, dd_per_thread_context_id, wrapper);
1057
1150
 
1151
+ set_per_thread_context(thread, thread_context);
1058
1152
  return thread_context;
1059
1153
  }
1060
1154
 
@@ -1080,7 +1174,7 @@ static bool is_logging_gem_monkey_patch(VALUE invoke_file_location) {
1080
1174
  }
1081
1175
 
1082
1176
  static void initialize_context(VALUE thread, per_thread_context *thread_context, thread_context_collector_state *state) {
1083
- sampling_buffer_initialize(&thread_context->sampling_buffer, state->max_frames, state->locations);
1177
+ sampling_buffer_initialize(&thread_context->sampling_buffer, state->locations.len);
1084
1178
 
1085
1179
  snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
1086
1180
  thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
@@ -1121,24 +1215,8 @@ static void initialize_context(VALUE thread, per_thread_context *thread_context,
1121
1215
  thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
1122
1216
  thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
1123
1217
 
1124
- #ifndef NO_GVL_INSTRUMENTATION
1125
- // We use this special location to store data that can be accessed without any
1126
- // kind of synchronization (e.g. by threads without the GVL).
1127
- //
1128
- // We set this marker here for two purposes:
1129
- // * To make sure there's no stale data from a previous execution of the profiler.
1130
- // * To mark threads that are actually being profiled
1131
- //
1132
- // (Setting this is potentially a race, but what we want is to avoid _stale_ data, so
1133
- // if this gets set concurrently with context initialization, then such a value will belong
1134
- // to the current profiler instance, so that's OK)
1135
- gvl_profiling_state_thread_object_set(thread, GVL_WAITING_ENABLED_EMPTY);
1136
- #endif
1137
- }
1138
-
1139
- static void free_context(per_thread_context* thread_context) {
1140
- sampling_buffer_free(&thread_context->sampling_buffer);
1141
- free(thread_context); // See "note on calloc vs ruby_xcalloc use" in heap_recorder.c
1218
+ thread_context->gvl_waiting_at = 0;
1219
+ thread_context->gvl_state_change_count = 0;
1142
1220
  }
1143
1221
 
1144
1222
  static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
@@ -1148,13 +1226,11 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
1148
1226
  VALUE result = rb_str_new2(" (native state)");
1149
1227
 
1150
1228
  // Update this when modifying state struct
1151
- rb_str_concat(result, rb_sprintf(" max_frames=%d", state->max_frames));
1152
- rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
1229
+ rb_str_concat(result, rb_sprintf(" max_frames=%d", state->locations.len));
1153
1230
  rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
1154
1231
  VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
1155
1232
  rb_str_concat(result, rb_sprintf(" tracer_context_key=%+"PRIsVALUE, tracer_context_key));
1156
- rb_str_concat(result, rb_sprintf(" sample_count=%u", state->sample_count));
1157
- rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
1233
+ rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_to_ruby_hash(state, rb_hash_new())));
1158
1234
  rb_str_concat(result, rb_sprintf(" endpoint_collection_enabled=%"PRIsVALUE, state->endpoint_collection_enabled ? Qtrue : Qfalse));
1159
1235
  rb_str_concat(result, rb_sprintf(" native_filenames_enabled=%"PRIsVALUE, state->native_filenames_enabled ? Qtrue : Qfalse));
1160
1236
  // Note: `st_table_size()` is available from Ruby 3.2+ but not before
@@ -1168,23 +1244,13 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
1168
1244
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
1169
1245
  rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
1170
1246
  rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
1171
- rb_str_concat(result, rb_sprintf(" global_waiting_for_gvl_threshold_ns=%u", global_waiting_for_gvl_threshold_ns));
1172
-
1173
- return result;
1174
- }
1247
+ rb_str_concat(result, rb_sprintf(" waiting_for_gvl_threshold_ns=%u", state->waiting_for_gvl_threshold_ns));
1175
1248
 
1176
- static VALUE per_thread_context_st_table_as_ruby_hash(thread_context_collector_state *state) {
1177
- VALUE result = rb_hash_new();
1178
- st_foreach(state->hash_map_per_thread_context, per_thread_context_as_ruby_hash, result);
1179
1249
  return result;
1180
1250
  }
1181
1251
 
1182
- static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash) {
1183
- VALUE thread = (VALUE) key_thread;
1184
- per_thread_context *thread_context = (per_thread_context*) value_context;
1185
- VALUE result = (VALUE) result_hash;
1252
+ static VALUE per_thread_context_to_ruby_hash(per_thread_context *thread_context) {
1186
1253
  VALUE context_as_hash = rb_hash_new();
1187
- rb_hash_aset(result, thread, context_as_hash);
1188
1254
 
1189
1255
  VALUE arguments[] = {
1190
1256
  ID2SYM(rb_intern("thread_id")), /* => */ rb_str_new2(thread_context->thread_id),
@@ -1201,24 +1267,26 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
1201
1267
  ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
1202
1268
  ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
1203
1269
 
1204
- #ifndef NO_GVL_INSTRUMENTATION
1205
- ID2SYM(rb_intern("gvl_waiting_at")), /* => */ LONG2NUM(gvl_profiling_state_thread_object_get(thread)),
1206
- #endif
1270
+ ID2SYM(rb_intern("gvl_waiting_at")), /* => */ LONG2NUM(thread_context->gvl_waiting_at),
1271
+ ID2SYM(rb_intern("gvl_state_change_count")), /* => */ ULL2NUM(thread_context->gvl_state_change_count),
1272
+ ID2SYM(rb_intern("gvl_state_change_count_at_previous_sample")), /* => */ ULL2NUM(thread_context->gvl_state_change_count_at_previous_sample),
1273
+ ID2SYM(rb_intern("was_skipped_at_last_sample")), /* => */ thread_context->was_skipped_at_last_sample ? Qtrue : Qfalse,
1207
1274
  };
1208
1275
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
1209
1276
 
1210
- return ST_CONTINUE;
1277
+ return context_as_hash;
1211
1278
  }
1212
1279
 
1213
- static VALUE stats_as_ruby_hash(thread_context_collector_state *state) {
1280
+ static VALUE stats_to_ruby_hash(thread_context_collector_state *state, VALUE hash) {
1214
1281
  // Update this when modifying state struct (stats inner struct)
1215
- VALUE stats_as_hash = rb_hash_new();
1216
1282
  VALUE arguments[] = {
1283
+ ID2SYM(rb_intern("sample_count")), /* => */ UINT2NUM(state->stats.sample_count),
1217
1284
  ID2SYM(rb_intern("gc_samples")), /* => */ UINT2NUM(state->stats.gc_samples),
1218
1285
  ID2SYM(rb_intern("gc_samples_missed_due_to_missing_context")), /* => */ UINT2NUM(state->stats.gc_samples_missed_due_to_missing_context),
1286
+ ID2SYM(rb_intern("inactive_thread_samples_skipped")), /* => */ UINT2NUM(state->stats.inactive_thread_samples_skipped),
1219
1287
  };
1220
- for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
1221
- return stats_as_hash;
1288
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
1289
+ return hash;
1222
1290
  }
1223
1291
 
1224
1292
  static VALUE gc_tracking_as_ruby_hash(thread_context_collector_state *state) {
@@ -1234,36 +1302,35 @@ static VALUE gc_tracking_as_ruby_hash(thread_context_collector_state *state) {
1234
1302
  return result;
1235
1303
  }
1236
1304
 
1237
- static void remove_context_for_dead_threads(thread_context_collector_state *state) {
1238
- st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
1239
- }
1240
-
1241
- static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, DDTRACE_UNUSED st_data_t _argument) {
1242
- VALUE thread = (VALUE) key_thread;
1243
- per_thread_context* thread_context = (per_thread_context*) value_context;
1244
-
1245
- if (is_thread_alive(thread)) return ST_CONTINUE;
1246
-
1247
- free_context(thread_context);
1248
- return ST_DELETE;
1249
- }
1250
-
1251
1305
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1252
1306
  // It SHOULD NOT be used for other purposes.
1253
1307
  //
1254
- // Returns the whole contents of the per_thread_context structs being tracked.
1308
+ // Returns the whole contents of the per_thread_context structs being tracked, by iterating all live threads.
1255
1309
  static VALUE _native_per_thread_context(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
1256
1310
  thread_context_collector_state *state;
1257
1311
  TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1258
1312
 
1259
- return per_thread_context_st_table_as_ruby_hash(state);
1313
+ VALUE result = rb_hash_new();
1314
+ VALUE threads = thread_list(state);
1315
+ const long thread_count = RARRAY_LEN(threads);
1316
+ for (long i = 0; i < thread_count; i++) {
1317
+ VALUE thread = RARRAY_AREF(threads, i);
1318
+ per_thread_context *thread_context = get_per_thread_context(thread);
1319
+ if (thread_context != NULL) {
1320
+ rb_hash_aset(result, thread, per_thread_context_to_ruby_hash(thread_context));
1321
+ }
1322
+ }
1323
+ return result;
1260
1324
  }
1261
1325
 
1326
+ // gc_start_time_ns should only be passed if IS_CPU_TIME
1262
1327
  static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time) {
1263
1328
  // If we didn't have a time for the previous sample, we use the current one
1264
1329
  if (*time_at_previous_sample_ns == INVALID_TIME) *time_at_previous_sample_ns = current_time_ns;
1265
1330
 
1266
- bool is_thread_doing_gc = gc_start_time_ns != INVALID_TIME;
1331
+ // We don't want wall-time accounting to change during GC.
1332
+ // E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing GC or not.
1333
+ bool is_thread_doing_gc = !is_wall_time && gc_start_time_ns != INVALID_TIME;
1267
1334
  long elapsed_time_ns = -1;
1268
1335
 
1269
1336
  if (is_thread_doing_gc) {
@@ -1350,7 +1417,7 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
1350
1417
  thread_context_collector_state *state;
1351
1418
  TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1352
1419
 
1353
- return stats_as_ruby_hash(state);
1420
+ return stats_to_ruby_hash(state, rb_hash_new());
1354
1421
  }
1355
1422
 
1356
1423
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
@@ -1445,17 +1512,18 @@ static bool should_collect_resource(VALUE root_span) {
1445
1512
  //
1446
1513
  // Assumption: This method gets called BEFORE restarting profiling -- e.g. there are no components attempting to
1447
1514
  // trigger samples at the same time.
1515
+ //
1516
+ // Note that tests call this method directly in the same process without forking,
1517
+ // and in such a case non-current Threads keep running.
1448
1518
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
1449
1519
  thread_context_collector_state *state;
1450
1520
  TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1451
1521
 
1452
- // Release all context memory before clearing the existing context
1453
- st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
1454
-
1455
- st_clear(state->hash_map_per_thread_context);
1456
-
1457
1522
  state->stats = (struct stats) {}; // Resets all stats back to zero
1458
1523
 
1524
+ // Clear any leftover state from parent process in the current thread; all other threads are assumed dead
1525
+ _native_clear_per_thread_context_for(Qnil, rb_thread_current());
1526
+
1459
1527
  rb_funcall(state->recorder_instance, rb_intern("reset_after_fork"), 0);
1460
1528
 
1461
1529
  return Qtrue;
@@ -1475,14 +1543,9 @@ static VALUE thread_list(thread_context_collector_state *state) {
1475
1543
  // expected to be called from a signal handler and to be async-signal-safe.
1476
1544
  //
1477
1545
  // Also, no allocation (Ruby or malloc) can happen.
1478
- bool thread_context_collector_prepare_sample_inside_signal_handler(VALUE self_instance) {
1479
- thread_context_collector_state *state;
1480
- if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
1481
- // This should never fail if the above check passes
1482
- TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1483
-
1546
+ bool thread_context_collector_prepare_sample_inside_signal_handler(void) {
1484
1547
  VALUE current_thread = rb_thread_current();
1485
- per_thread_context *thread_context = get_context_for(current_thread, state);
1548
+ per_thread_context *thread_context = get_per_thread_context(current_thread);
1486
1549
  if (thread_context == NULL) return false;
1487
1550
 
1488
1551
  return prepare_sample_thread(current_thread, &thread_context->sampling_buffer);
@@ -1493,12 +1556,12 @@ bool thread_context_collector_prepare_sample_inside_signal_handler(VALUE self_in
1493
1556
  //
1494
1557
  // Returns true if the after_allocation needs to be called (to do work that can't be done from inside the
1495
1558
  // tracepoint, such as allocate new objects), and false if it doesn't
1496
- bool thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object) {
1559
+ //
1560
+ // The callers must ensure thread_context is non-NULL.
1561
+ bool thread_context_collector_sample_allocation(VALUE self_instance, per_thread_context *thread_context, unsigned int sample_weight, VALUE new_object) {
1497
1562
  thread_context_collector_state *state;
1498
1563
  TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1499
1564
 
1500
- VALUE current_thread = rb_thread_current();
1501
-
1502
1565
  enum ruby_value_type type = rb_type(new_object);
1503
1566
 
1504
1567
  // Tag samples with the VM internal types
@@ -1565,12 +1628,11 @@ bool thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1565
1628
 
1566
1629
  bool needs_after_allocation = track_object(state->recorder_instance, new_object, sample_weight, class_name);
1567
1630
 
1568
- per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
1631
+ VALUE current_thread = rb_thread_current();
1569
1632
 
1570
1633
  trigger_sample_for_thread(
1571
1634
  state,
1572
- /* thread: */ current_thread,
1573
- /* stack_from_thread: */ current_thread,
1635
+ current_thread,
1574
1636
  thread_context,
1575
1637
  &thread_context->sampling_buffer,
1576
1638
  (sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1, .heap_sample = true},
@@ -1587,9 +1649,13 @@ bool thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1587
1649
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1588
1650
  // It SHOULD NOT be used for other purposes.
1589
1651
  static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object) {
1652
+ thread_context_collector_state *state;
1653
+ TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1654
+ per_thread_context *thread_context = get_or_create_context_for(rb_thread_current(), state);
1655
+
1590
1656
  debug_enter_unsafe_context();
1591
1657
 
1592
- bool needs_after_allocation = thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight), new_object);
1658
+ bool needs_after_allocation = thread_context_collector_sample_allocation(collector_instance, thread_context, NUM2UINT(sample_weight), new_object);
1593
1659
 
1594
1660
  debug_leave_unsafe_context();
1595
1661
 
@@ -1598,7 +1664,10 @@ static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collecto
1598
1664
  return needs_after_allocation ? Qtrue : Qfalse;
1599
1665
  }
1600
1666
 
1601
- static VALUE new_empty_thread_inner(DDTRACE_UNUSED void *arg) { return Qnil; }
1667
+ static VALUE new_empty_thread_inner(DDTRACE_UNUSED void *arg) {
1668
+ rb_thread_sleep(INT_MAX);
1669
+ return Qnil;
1670
+ }
1602
1671
 
1603
1672
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1604
1673
  // It SHOULD NOT be used for other purposes.
@@ -1891,36 +1960,76 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1891
1960
  ((uint64_t)span_bytes[7]);
1892
1961
  }
1893
1962
 
1963
+ void thread_context_collector_stats(VALUE self_instance, VALUE stats_hash) {
1964
+ thread_context_collector_state *state;
1965
+ TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1966
+ stats_to_ruby_hash(state, stats_hash);
1967
+ }
1968
+
1969
+ void thread_context_collector_stats_reset_not_thread_safe(VALUE self_instance) {
1970
+ thread_context_collector_state *state;
1971
+ TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1972
+ state->stats = (struct stats) {};
1973
+ }
1974
+
1894
1975
  #ifndef NO_GVL_INSTRUMENTATION
1895
- // This function can get called from outside the GVL and even on non-main Ractors
1896
- void thread_context_collector_on_gvl_waiting(gvl_profiling_thread thread) {
1897
- // Because this function gets called from a thread that is NOT holding the GVL, we avoid touching the
1898
- // per-thread context directly.
1899
- //
1900
- // Instead, we ask Ruby to hold the data we need in Ruby's own special per-thread context area
1901
- // that's thread-safe and built for this kind of use
1902
- //
1903
- // Also, this function can get called on the non-main Ractor. We deal with this by checking if the value in the context
1904
- // is non-zero, since only `initialize_context` ever sets the value from 0 to non-zero for threads it sees.
1905
- intptr_t thread_being_profiled = gvl_profiling_state_get(thread);
1906
- if (!thread_being_profiled) return;
1976
+ void thread_context_collector_on_gvl_released(per_thread_context *thread_context) {
1977
+ thread_context->gvl_state_change_count |= GVL_SUSPENDED;
1978
+ }
1907
1979
 
1980
+ // Called by the stack recorder at the start of _native_serialize, so that threads whose last
1981
+ // per-tick sample was skipped by the SUSPENDED-skip optimization still get their accumulated
1982
+ // time recorded in this reporting period. Without this, a thread that sleeps across the whole
1983
+ // period would not be reported at all.
1984
+ void thread_context_collector_on_serialize(VALUE self_instance) {
1985
+ thread_context_collector_state *state;
1986
+ TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1987
+
1988
+ long current_monotonic_wall_time_ns = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
1989
+ VALUE threads = thread_list(state);
1990
+ const long thread_count = RARRAY_LEN(threads);
1991
+
1992
+ for (long i = 0; i < thread_count; i++) {
1993
+ VALUE thread = RARRAY_AREF(threads, i);
1994
+ per_thread_context *thread_context = get_per_thread_context(thread);
1995
+
1996
+ if (thread_context != NULL && thread_context->was_skipped_at_last_sample) {
1997
+ long current_cpu_time_ns = cpu_time_now_ns(thread_context);
1998
+ // We need to force_sample_suspended=true otherwise this sample would be skipped too
1999
+ update_metrics_and_sample(
2000
+ state,
2001
+ thread,
2002
+ thread_context,
2003
+ &thread_context->sampling_buffer,
2004
+ current_cpu_time_ns,
2005
+ current_monotonic_wall_time_ns,
2006
+ true);
2007
+ }
2008
+ }
2009
+ }
2010
+
2011
+ void thread_context_collector_on_gvl_waiting(per_thread_context *thread_context) {
1908
2012
  long current_monotonic_wall_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1909
- if (current_monotonic_wall_time_ns <= 0 || current_monotonic_wall_time_ns > GVL_WAITING_ENABLED_EMPTY) return;
2013
+ if (current_monotonic_wall_time_ns <= 0) return;
1910
2014
 
1911
- gvl_profiling_state_set(thread, current_monotonic_wall_time_ns);
2015
+ thread_context->gvl_waiting_at = current_monotonic_wall_time_ns;
1912
2016
  }
1913
2017
 
1914
- // This function can get called from outside the GVL and even on non-main Ractors
2018
+ // This function runs on the passed thread and has the GVL because it gets called just after the Ruby thread acquired the GVL
1915
2019
  __attribute__((warn_unused_result))
1916
- on_gvl_running_result thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
1917
- intptr_t gvl_waiting_at = gvl_profiling_state_get(thread);
2020
+ on_gvl_running_result thread_context_collector_on_gvl_running(VALUE self_instance, VALUE thread, per_thread_context *thread_context) {
2021
+ thread_context_collector_state *state;
2022
+ TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
2023
+
2024
+ // Bump the event counter and clears the state bit to "running"
2025
+ uint64_t counter_portion = thread_context->gvl_state_change_count >> 1;
2026
+ thread_context->gvl_state_change_count = ((counter_portion + 1) << 1) | GVL_RUNNING;
1918
2027
 
1919
- // Thread was not being profiled / not waiting on gvl
1920
- if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) {
2028
+ long gvl_waiting_at = thread_context->gvl_waiting_at;
2029
+ // Thread was not waiting on gvl
2030
+ if (gvl_waiting_at == 0) {
1921
2031
  return (on_gvl_running_result) {.action = ON_GVL_RUNNING_UNKNOWN, .waiting_for_gvl_duration_ns = 0};
1922
2032
  }
1923
-
1924
2033
  // @ivoanjo: I'm not sure if this can happen -- It means we should've sampled already but haven't gotten the chance yet?
1925
2034
  if (gvl_waiting_at < 0) {
1926
2035
  return (on_gvl_running_result) {.action = ON_GVL_RUNNING_SAMPLE, .waiting_for_gvl_duration_ns = 0};
@@ -1928,16 +2037,30 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1928
2037
 
1929
2038
  long waiting_for_gvl_duration_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - gvl_waiting_at;
1930
2039
 
1931
- bool should_sample = waiting_for_gvl_duration_ns >= waiting_for_gvl_threshold_ns;
2040
+ bool should_sample = waiting_for_gvl_duration_ns >= state->waiting_for_gvl_threshold_ns;
1932
2041
 
1933
2042
  if (should_sample) {
1934
2043
  // We flip the gvl_waiting_at to negative to mark that the thread is now running and no longer waiting
1935
- intptr_t gvl_waiting_at_is_now_running = -gvl_waiting_at;
2044
+ long gvl_waiting_at_is_now_running = -gvl_waiting_at;
1936
2045
 
1937
- gvl_profiling_state_set(thread, gvl_waiting_at_is_now_running);
2046
+ thread_context->gvl_waiting_at = gvl_waiting_at_is_now_running;
1938
2047
  } else {
1939
- // We decided not to sample. Let's mark the thread back to the initial "enabled but empty" state
1940
- gvl_profiling_state_set(thread, GVL_WAITING_ENABLED_EMPTY);
2048
+ thread_context->gvl_waiting_at = 0;
2049
+
2050
+ // Even though the GVL wait itself was below threshold, if the thread had skipped samples
2051
+ // (was suspended for a long time without the GVL), we still need to force a sample now.
2052
+ // Otherwise, the accumulated idle wall-time would be reported against whatever stack the
2053
+ // thread runs next, misrepresenting the time spent idle.
2054
+ if (thread_context->was_skipped_at_last_sample) {
2055
+ should_sample = true;
2056
+ }
2057
+ }
2058
+
2059
+ if (should_sample) {
2060
+ // We prepare the sample here because the postponed job might be called some time later,
2061
+ // possibly after some Ruby calls which change the Ruby stack,
2062
+ // and we want to attribute the time acquiring or without the GVL to the correct Ruby stack.
2063
+ prepare_sample_thread(thread, &thread_context->sampling_buffer);
1941
2064
  }
1942
2065
 
1943
2066
  return (on_gvl_running_result) {
@@ -1946,11 +2069,6 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1946
2069
  };
1947
2070
  }
1948
2071
 
1949
- __attribute__((warn_unused_result))
1950
- on_gvl_running_result thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
1951
- return thread_context_collector_on_gvl_running_with_threshold(thread, global_waiting_for_gvl_threshold_ns);
1952
- }
1953
-
1954
2072
  // Why does this method need to exist?
1955
2073
  //
1956
2074
  // You may be surprised to see that if we never call this function (from cpu_and_wall_time_worker), Waiting for GVL
@@ -1968,7 +2086,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1968
2086
  //
1969
2087
  // Stack:
1970
2088
  // If the thread starts working without the end of the Waiting for GVL sample, then by the time the thread is sampled
1971
- // via the regular cpu/wall-time samples mechanism, the stack can be be inaccurate (e.g. does not correctly pinpoint
2089
+ // via the regular cpu/wall-time samples mechanism, the stack can be inaccurate (e.g. does not correctly pinpoint
1972
2090
  // where the waiting happened).
1973
2091
  //
1974
2092
  // Arguably, the last sample after Waiting for GVL ended (when gvl_waiting_at < 0) should always come from this method
@@ -1977,15 +2095,19 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1977
2095
  //
1978
2096
  // ---
1979
2097
  //
2098
+ // Always called with the GVL, either from a postponed_job or from tests.
2099
+ //
1980
2100
  // NOTE: In normal use, current_thread is expected to be == rb_thread_current(); the `current_thread` parameter only
1981
2101
  // exists to enable testing.
1982
2102
  VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance, VALUE current_thread, long current_monotonic_wall_time_ns) {
1983
2103
  thread_context_collector_state *state;
1984
2104
  TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
1985
2105
 
1986
- intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread);
2106
+ per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
1987
2107
 
1988
- if (gvl_waiting_at >= 0) {
2108
+ long gvl_waiting_at = thread_context->gvl_waiting_at;
2109
+
2110
+ if (gvl_waiting_at >= 0 && !thread_context->was_skipped_at_last_sample) {
1989
2111
  // @ivoanjo: I'm not sure if this can ever happen. This means that we're not on the same thread
1990
2112
  // that ran `thread_context_collector_on_gvl_running` and made the decision to sample OR a regular sample was
1991
2113
  // triggered ahead of us.
@@ -1993,9 +2115,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1993
2115
  return Qfalse;
1994
2116
  }
1995
2117
 
1996
- per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
1997
-
1998
- // We don't actually account for cpu-time during Waiting for GVL. BUT, we may chose to push an
2118
+ // We don't actually account for cpu-time during Waiting for GVL. BUT, we may choose to push an
1999
2119
  // extra sample to represent the period prior to Waiting for GVL. To support that, we retrieve the current
2000
2120
  // cpu-time of the thread and let `update_metrics_and_sample` decide what to do with it.
2001
2121
  long cpu_time_for_thread = cpu_time_now_ns(thread_context);
@@ -2004,13 +2124,12 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2004
2124
 
2005
2125
  update_metrics_and_sample(
2006
2126
  state,
2007
- /* thread_being_sampled: */ current_thread,
2008
- /* stack_from_thread: */ current_thread,
2127
+ current_thread,
2009
2128
  thread_context,
2010
2129
  &thread_context->sampling_buffer,
2011
2130
  cpu_time_for_thread,
2012
- current_monotonic_wall_time_ns
2013
- );
2131
+ current_monotonic_wall_time_ns,
2132
+ false);
2014
2133
 
2015
2134
  return Qtrue;
2016
2135
  }
@@ -2021,14 +2140,13 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2021
2140
  static bool handle_gvl_waiting(
2022
2141
  thread_context_collector_state *state,
2023
2142
  VALUE thread_being_sampled,
2024
- VALUE stack_from_thread,
2025
2143
  per_thread_context *thread_context,
2026
2144
  sampling_buffer* sampling_buffer,
2027
2145
  long current_cpu_time_ns
2028
2146
  ) {
2029
- intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread_being_sampled);
2147
+ long gvl_waiting_at = thread_context->gvl_waiting_at;
2030
2148
 
2031
- bool is_gvl_waiting_state = gvl_waiting_at != 0 && gvl_waiting_at != GVL_WAITING_ENABLED_EMPTY;
2149
+ bool is_gvl_waiting_state = gvl_waiting_at != 0;
2032
2150
 
2033
2151
  if (!is_gvl_waiting_state) return false;
2034
2152
 
@@ -2041,17 +2159,17 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2041
2159
  // ...──────────────┬───────────────────...
2042
2160
  // Other state │ Waiting for GVL
2043
2161
  // ...──────────────┴───────────────────...
2044
- // ▲
2162
+ // ▲
2045
2163
  // └─ Previous sample └─ Regular sample (caller)
2046
2164
  //
2047
2165
  // In this case, we'll want to push two samples: a) one for the current time (handled by the caller), b) an extra sample
2048
- // to represent the remaining cpu/wall time before the "Waiting for GVL" started:
2166
+ // to represent the remaining cpu/wall time before the "Waiting for GVL" started (for timeline purposes):
2049
2167
  //
2050
2168
  // time ─────►
2051
2169
  // ...──────────────┬───────────────────...
2052
2170
  // Other state │ Waiting for GVL
2053
2171
  // ...──────────────┴───────────────────...
2054
- // ▲
2172
+ // ▲
2055
2173
  // └─ Prev... └─ Extra sample └─ Regular sample (caller)
2056
2174
  //
2057
2175
  // 2. The current sample is the n-th one after we entered the "Waiting for GVL" state
@@ -2061,7 +2179,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2061
2179
  // ...──────────────┬───────────────────────────────────────────────...
2062
2180
  // Other state │ Waiting for GVL
2063
2181
  // ...──────────────┴───────────────────────────────────────────────...
2064
- // ▲ ▲ ▲
2182
+ // ▲ ▲ ▲
2065
2183
  // └─ Previous sample └─ Previous sample └─ Regular sample (caller)
2066
2184
  //
2067
2185
  // In this case, we just report back to the caller that the thread is in the "Waiting for GVL" state.
@@ -2076,7 +2194,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2076
2194
 
2077
2195
  if (gvl_waiting_at < 0) {
2078
2196
  // Negative means the waiting for GVL just ended, so we clear the state, so next samples no longer represent waiting
2079
- gvl_profiling_state_thread_object_set(thread_being_sampled, GVL_WAITING_ENABLED_EMPTY);
2197
+ thread_context->gvl_waiting_at = 0;
2080
2198
  }
2081
2199
 
2082
2200
  long gvl_waiting_started_wall_time_ns = labs(gvl_waiting_at);
@@ -2086,7 +2204,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2086
2204
  &thread_context->cpu_time_at_previous_sample_ns,
2087
2205
  current_cpu_time_ns,
2088
2206
  thread_context->gc_tracking.cpu_time_at_start_ns,
2089
- IS_NOT_WALL_TIME
2207
+ IS_CPU_TIME
2090
2208
  );
2091
2209
 
2092
2210
  long duration_until_start_of_gvl_waiting_ns = update_time_since_previous_sample(
@@ -2100,7 +2218,6 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2100
2218
  trigger_sample_for_thread(
2101
2219
  state,
2102
2220
  thread_being_sampled,
2103
- stack_from_thread,
2104
2221
  thread_context,
2105
2222
  sampling_buffer,
2106
2223
  (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = duration_until_start_of_gvl_waiting_ns},
@@ -2120,7 +2237,8 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2120
2237
 
2121
2238
  debug_enter_unsafe_context();
2122
2239
 
2123
- thread_context_collector_on_gvl_waiting(thread_from_thread_object(thread));
2240
+ per_thread_context *thread_context = get_per_thread_context(thread);
2241
+ if (thread_context) thread_context_collector_on_gvl_waiting(thread_context);
2124
2242
 
2125
2243
  debug_leave_unsafe_context();
2126
2244
 
@@ -2132,30 +2250,48 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2132
2250
 
2133
2251
  debug_enter_unsafe_context();
2134
2252
 
2135
- intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread);
2253
+ per_thread_context *thread_context = get_per_thread_context(thread);
2254
+ VALUE result = thread_context ? LONG2NUM(thread_context->gvl_waiting_at) : Qnil;
2136
2255
 
2137
2256
  debug_leave_unsafe_context();
2138
2257
 
2139
- return LONG2NUM(gvl_waiting_at);
2258
+ return result;
2140
2259
  }
2141
2260
 
2142
- static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) {
2261
+ static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) {
2143
2262
  ENFORCE_THREAD(thread);
2144
2263
 
2145
2264
  debug_enter_unsafe_context();
2146
2265
 
2147
- VALUE result = thread_context_collector_on_gvl_running(thread_from_thread_object(thread)).action == ON_GVL_RUNNING_SAMPLE ? Qtrue : Qfalse;
2266
+ per_thread_context *thread_context = get_per_thread_context(thread);
2267
+ VALUE result;
2268
+ if (thread_context) {
2269
+ result = thread_context_collector_on_gvl_running(collector_instance, thread, thread_context).action == ON_GVL_RUNNING_SAMPLE ? Qtrue : Qfalse;
2270
+ } else {
2271
+ result = Qfalse;
2272
+ }
2148
2273
 
2149
2274
  debug_leave_unsafe_context();
2150
2275
 
2151
2276
  return result;
2152
2277
  }
2153
2278
 
2154
- static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE allow_exception) {
2279
+ static VALUE _native_on_gvl_released(DDTRACE_UNUSED VALUE self, VALUE thread) {
2155
2280
  ENFORCE_THREAD(thread);
2156
- ENFORCE_BOOLEAN(allow_exception);
2157
2281
 
2282
+ debug_enter_unsafe_context();
2283
+
2284
+ per_thread_context *thread_context = get_per_thread_context(thread);
2285
+ if (thread_context) thread_context_collector_on_gvl_released(thread_context);
2286
+
2287
+ debug_leave_unsafe_context();
2158
2288
 
2289
+ return Qnil;
2290
+ }
2291
+
2292
+ static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE allow_exception) {
2293
+ ENFORCE_THREAD(thread);
2294
+ ENFORCE_BOOLEAN(allow_exception);
2159
2295
 
2160
2296
  if (allow_exception == Qfalse) debug_enter_unsafe_context();
2161
2297
 
@@ -2170,13 +2306,10 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2170
2306
  return result;
2171
2307
  }
2172
2308
 
2173
- static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns) {
2309
+ static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE thread, VALUE delta_ns) {
2174
2310
  ENFORCE_THREAD(thread);
2175
2311
 
2176
- thread_context_collector_state *state;
2177
- TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
2178
-
2179
- per_thread_context *thread_context = get_context_for(thread, state);
2312
+ per_thread_context *thread_context = get_per_thread_context(thread);
2180
2313
  if (thread_context == NULL) raise_error(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists");
2181
2314
 
2182
2315
  thread_context->cpu_time_at_previous_sample_ns += NUM2LONG(delta_ns);
@@ -2188,11 +2321,12 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
2188
2321
  static bool handle_gvl_waiting(
2189
2322
  DDTRACE_UNUSED thread_context_collector_state *state,
2190
2323
  DDTRACE_UNUSED VALUE thread_being_sampled,
2191
- DDTRACE_UNUSED VALUE stack_from_thread,
2192
2324
  DDTRACE_UNUSED per_thread_context *thread_context,
2193
2325
  DDTRACE_UNUSED sampling_buffer* sampling_buffer,
2194
2326
  DDTRACE_UNUSED long current_cpu_time_ns
2195
2327
  ) { return false; }
2328
+
2329
+ void thread_context_collector_on_serialize(DDTRACE_UNUSED VALUE self_instance) { }
2196
2330
  #endif // NO_GVL_INSTRUMENTATION
2197
2331
 
2198
2332
  #define MAX_SAFE_LOOKUP_SIZE 16
@@ -2239,6 +2373,6 @@ static VALUE _native_system_epoch_time_now_ns(DDTRACE_UNUSED VALUE self, VALUE c
2239
2373
  return LONG2NUM(system_epoch_time_ns);
2240
2374
  }
2241
2375
 
2242
- static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
2243
- return thread_context_collector_prepare_sample_inside_signal_handler(collector_instance) ? Qtrue : Qfalse;
2376
+ static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self) {
2377
+ return thread_context_collector_prepare_sample_inside_signal_handler() ? Qtrue : Qfalse;
2244
2378
  }