ddtrace 1.20.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/LICENSE-3rdparty.csv +1 -1
  4. data/bin/ddprofrb +15 -0
  5. data/bin/ddtracerb +3 -1
  6. data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
  7. data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
  8. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +238 -61
  9. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.c +145 -72
  10. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.h +17 -5
  11. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +97 -4
  12. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +2 -2
  13. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.c +45 -3
  14. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.h +7 -1
  15. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
  16. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +4 -4
  17. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +14 -0
  18. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
  19. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +1 -1
  20. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.c +10 -0
  21. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +2 -0
  22. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +7 -9
  23. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
  24. data/lib/datadog/appsec/event.rb +1 -1
  25. data/lib/datadog/auto_instrument.rb +3 -0
  26. data/lib/datadog/core/configuration/components.rb +7 -6
  27. data/lib/datadog/core/configuration/option.rb +8 -6
  28. data/lib/datadog/core/configuration/settings.rb +130 -63
  29. data/lib/datadog/core/configuration.rb +20 -4
  30. data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
  31. data/lib/datadog/core/environment/git.rb +25 -0
  32. data/lib/datadog/core/environment/identity.rb +18 -48
  33. data/lib/datadog/core/environment/platform.rb +7 -1
  34. data/lib/datadog/core/git/ext.rb +2 -23
  35. data/lib/datadog/core/remote/client/capabilities.rb +1 -1
  36. data/lib/datadog/core/remote/negotiation.rb +2 -2
  37. data/lib/datadog/core/remote/transport/http/config.rb +1 -1
  38. data/lib/datadog/core/remote/worker.rb +7 -4
  39. data/lib/datadog/core/telemetry/client.rb +18 -10
  40. data/lib/datadog/core/telemetry/emitter.rb +9 -13
  41. data/lib/datadog/core/telemetry/event.rb +247 -57
  42. data/lib/datadog/core/telemetry/ext.rb +1 -0
  43. data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
  44. data/lib/datadog/core/telemetry/http/ext.rb +4 -1
  45. data/lib/datadog/core/telemetry/http/transport.rb +9 -4
  46. data/lib/datadog/core/telemetry/request.rb +59 -0
  47. data/lib/datadog/core/transport/ext.rb +2 -0
  48. data/lib/datadog/core/utils/url.rb +25 -0
  49. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  50. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -0
  51. data/lib/datadog/profiling/collectors/info.rb +101 -0
  52. data/lib/datadog/profiling/component.rb +34 -28
  53. data/lib/datadog/profiling/exporter.rb +19 -5
  54. data/lib/datadog/profiling/ext.rb +2 -0
  55. data/lib/datadog/profiling/flush.rb +6 -3
  56. data/lib/datadog/profiling/http_transport.rb +5 -1
  57. data/lib/datadog/profiling/load_native_extension.rb +19 -6
  58. data/lib/datadog/profiling/native_extension.rb +1 -1
  59. data/lib/datadog/profiling/tag_builder.rb +5 -0
  60. data/lib/datadog/profiling/tasks/exec.rb +3 -3
  61. data/lib/datadog/profiling/tasks/help.rb +3 -3
  62. data/lib/datadog/profiling.rb +13 -2
  63. data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
  64. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
  65. data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
  66. data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
  67. data/lib/datadog/tracing/contrib/configurable.rb +1 -1
  68. data/lib/datadog/tracing/contrib/extensions.rb +6 -2
  69. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
  70. data/lib/datadog/tracing/sampling/matcher.rb +23 -3
  71. data/lib/datadog/tracing/sampling/rule.rb +7 -2
  72. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
  73. data/lib/datadog/tracing/trace_operation.rb +1 -2
  74. data/lib/datadog/tracing/transport/http.rb +1 -0
  75. data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
  76. data/lib/ddtrace/version.rb +1 -1
  77. metadata +55 -62
  78. data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
  79. data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
  80. data/lib/datadog/core/telemetry/collector.rb +0 -250
  81. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
  82. data/lib/datadog/core/telemetry/v1/application.rb +0 -92
  83. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
  84. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
  85. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  86. data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
  87. data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
  88. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  89. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
  90. data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
  91. data/lib/datadog/core/telemetry/v2/request.rb +0 -29
  92. data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
  93. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
  94. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
  95. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
  96. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
  97. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +0 -0
  98. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +0 -0
  99. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.c +0 -0
  100. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.h +0 -0
  101. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
  102. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
  103. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +0 -0
  104. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +0 -0
  105. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +0 -0
  106. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +0 -0
  107. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +0 -0
  108. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +0 -0
  109. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
  110. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
  111. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +0 -0
  112. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
  113. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +0 -0
@@ -17,6 +17,8 @@
17
17
  #include "setup_signal_handler.h"
18
18
  #include "time_helpers.h"
19
19
 
20
+ #define ERR_CLOCK_FAIL "failed to get clock time"
21
+
20
22
  // Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
21
23
  unsigned int MAX_ALLOC_WEIGHT = 65535;
22
24
 
@@ -94,6 +96,7 @@ struct cpu_and_wall_time_worker_state {
94
96
  bool no_signals_workaround_enabled;
95
97
  bool dynamic_sampling_rate_enabled;
96
98
  bool allocation_profiling_enabled;
99
+ bool skip_idle_samples_for_testing;
97
100
  VALUE self_instance;
98
101
  VALUE thread_context_collector_instance;
99
102
  VALUE idle_sampling_helper_instance;
@@ -119,6 +122,7 @@ struct cpu_and_wall_time_worker_state {
119
122
  bool during_sample;
120
123
 
121
124
  struct stats {
125
+ // # Generic stats
122
126
  // How many times we tried to trigger a sample
123
127
  unsigned int trigger_sample_attempts;
124
128
  // How many times we tried to simulate signal delivery
@@ -129,25 +133,38 @@ struct cpu_and_wall_time_worker_state {
129
133
  unsigned int signal_handler_enqueued_sample;
130
134
  // How many times the signal handler was called from the wrong thread
131
135
  unsigned int signal_handler_wrong_thread;
132
- // How many times we actually sampled (except GC samples)
133
- unsigned int sampled;
134
- // How many times we skipped a sample because of the dynamic sampling rate mechanism
135
- unsigned int skipped_sample_because_of_dynamic_sampling_rate;
136
+ // How many times we actually tried to interrupt a thread for sampling
137
+ unsigned int interrupt_thread_attempts;
136
138
 
137
- // Stats for the results of calling rb_postponed_job_register_one
138
- // The same function was already waiting to be executed
139
+ // # Stats for the results of calling rb_postponed_job_register_one
140
+ // The same function was already waiting to be executed
139
141
  unsigned int postponed_job_skipped_already_existed;
140
- // The function was added to the queue successfully
142
+ // The function was added to the queue successfully
141
143
  unsigned int postponed_job_success;
142
- // The queue was full
144
+ // The queue was full
143
145
  unsigned int postponed_job_full;
144
- // The function returned an unknown result code
146
+ // The function returned an unknown result code
145
147
  unsigned int postponed_job_unknown_result;
146
148
 
147
- // Min/max/total wall-time spent sampling (except GC samples)
148
- uint64_t sampling_time_ns_min;
149
- uint64_t sampling_time_ns_max;
150
- uint64_t sampling_time_ns_total;
149
+ // # CPU/Walltime sampling stats
150
+ // How many times we actually CPU/wall sampled
151
+ unsigned int cpu_sampled;
152
+ // How many times we skipped a CPU/wall sample because of the dynamic sampling rate mechanism
153
+ unsigned int cpu_skipped;
154
+ // Min/max/total wall-time spent on CPU/wall sampling
155
+ uint64_t cpu_sampling_time_ns_min;
156
+ uint64_t cpu_sampling_time_ns_max;
157
+ uint64_t cpu_sampling_time_ns_total;
158
+
159
+ // # Allocation sampling stats
160
+ // How many times we actually allocation sampled
161
+ uint64_t allocation_sampled;
162
+ // How many times we skipped an allocation sample because of the dynamic sampling rate mechanism
163
+ uint64_t allocation_skipped;
164
+ // Min/max/total wall-time spent on allocation sampling
165
+ uint64_t allocation_sampling_time_ns_min;
166
+ uint64_t allocation_sampling_time_ns_max;
167
+ uint64_t allocation_sampling_time_ns_total;
151
168
  // How many times we saw allocations being done inside a sample
152
169
  unsigned int allocations_during_sample;
153
170
  } stats;
@@ -163,12 +180,14 @@ static VALUE _native_initialize(
163
180
  VALUE no_signals_workaround_enabled,
164
181
  VALUE dynamic_sampling_rate_enabled,
165
182
  VALUE dynamic_sampling_rate_overhead_target_percentage,
166
- VALUE allocation_profiling_enabled
183
+ VALUE allocation_profiling_enabled,
184
+ VALUE skip_idle_samples_for_testing
167
185
  );
168
186
  static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
169
187
  static VALUE _native_sampling_loop(VALUE self, VALUE instance);
170
188
  static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE worker_thread);
171
189
  static VALUE stop(VALUE self_instance, VALUE optional_exception);
190
+ static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception);
172
191
  static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
173
192
  static void *run_sampling_trigger_loop(void *state_ptr);
174
193
  static void interrupt_sampling_trigger_loop(void *state_ptr);
@@ -191,15 +210,18 @@ static VALUE _native_simulate_sample_from_postponed_job(DDTRACE_UNUSED VALUE sel
191
210
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance);
192
211
  static VALUE _native_is_sigprof_blocked_in_current_thread(DDTRACE_UNUSED VALUE self);
193
212
  static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
213
+ static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance);
194
214
  void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused);
195
215
  static void grab_gvl_and_sample(void);
196
- static void reset_stats(struct cpu_and_wall_time_worker_state *state);
216
+ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
197
217
  static void sleep_for(uint64_t time_ns);
198
218
  static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
199
219
  static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
200
220
  static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
201
221
  static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
202
222
  static VALUE rescued_sample_allocation(VALUE tracepoint_data);
223
+ static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error);
224
+ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg);
203
225
 
204
226
  // Note on sampler global state safety:
205
227
  //
@@ -212,6 +234,11 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data);
212
234
  static VALUE active_sampler_instance = Qnil;
213
235
  static struct cpu_and_wall_time_worker_state *active_sampler_instance_state = NULL;
214
236
 
237
+ // See handle_sampling_signal for details on what this does
238
+ #ifdef NO_POSTPONED_TRIGGER
239
+ static void *gc_finalize_deferred_workaround;
240
+ #endif
241
+
215
242
  // Used to implement CpuAndWallTimeWorker._native_allocation_count . To be able to use cheap thread-local variables
216
243
  // (here with `__thread`, see https://gcc.gnu.org/onlinedocs/gcc/Thread-Local.html), this needs to be global.
217
244
  //
@@ -230,6 +257,8 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
230
257
  if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
231
258
  rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
232
259
  }
260
+ #else
261
+ gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround();
233
262
  #endif
234
263
 
235
264
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -247,13 +276,16 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
247
276
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
248
277
  rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
249
278
 
250
- rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8);
279
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
251
280
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
252
281
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
253
282
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
254
283
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats", _native_stats, 1);
284
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats_reset_not_thread_safe", _native_stats_reset_not_thread_safe, 1);
255
285
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
286
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1);
256
287
  rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
288
+ // TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it
257
289
  rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
258
290
  rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
259
291
  rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
@@ -263,6 +295,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
263
295
  rb_define_singleton_method(testing_module, "_native_simulate_sample_from_postponed_job", _native_simulate_sample_from_postponed_job, 0);
264
296
  rb_define_singleton_method(testing_module, "_native_is_sigprof_blocked_in_current_thread", _native_is_sigprof_blocked_in_current_thread, 0);
265
297
  rb_define_singleton_method(testing_module, "_native_with_blocked_sigprof", _native_with_blocked_sigprof, 0);
298
+ rb_define_singleton_method(testing_module, "_native_delayed_error", _native_delayed_error, 2);
266
299
  }
267
300
 
268
301
  // This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_worker_state
@@ -288,11 +321,11 @@ static VALUE _native_new(VALUE klass) {
288
321
  state->no_signals_workaround_enabled = false;
289
322
  state->dynamic_sampling_rate_enabled = true;
290
323
  state->allocation_profiling_enabled = false;
324
+ state->skip_idle_samples_for_testing = false;
291
325
  state->thread_context_collector_instance = Qnil;
292
326
  state->idle_sampling_helper_instance = Qnil;
293
327
  state->owner_thread = Qnil;
294
328
  dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
295
- discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation");
296
329
  state->gc_tracepoint = Qnil;
297
330
  state->object_allocation_tracepoint = Qnil;
298
331
 
@@ -302,7 +335,15 @@ static VALUE _native_new(VALUE klass) {
302
335
 
303
336
  state->during_sample = false;
304
337
 
305
- reset_stats(state);
338
+ reset_stats_not_thread_safe(state);
339
+
340
+ long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
341
+ if (now == 0) {
342
+ ruby_xfree(state);
343
+ rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
344
+ }
345
+
346
+ discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
306
347
 
307
348
  return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
308
349
  }
@@ -316,13 +357,15 @@ static VALUE _native_initialize(
316
357
  VALUE no_signals_workaround_enabled,
317
358
  VALUE dynamic_sampling_rate_enabled,
318
359
  VALUE dynamic_sampling_rate_overhead_target_percentage,
319
- VALUE allocation_profiling_enabled
360
+ VALUE allocation_profiling_enabled,
361
+ VALUE skip_idle_samples_for_testing
320
362
  ) {
321
363
  ENFORCE_BOOLEAN(gc_profiling_enabled);
322
364
  ENFORCE_BOOLEAN(no_signals_workaround_enabled);
323
365
  ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
324
366
  ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
325
367
  ENFORCE_BOOLEAN(allocation_profiling_enabled);
368
+ ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
326
369
 
327
370
  struct cpu_and_wall_time_worker_state *state;
328
371
  TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
@@ -331,6 +374,7 @@ static VALUE _native_initialize(
331
374
  state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
332
375
  state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
333
376
  state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
377
+ state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
334
378
 
335
379
  double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
336
380
  if (!state->allocation_profiling_enabled) {
@@ -339,7 +383,8 @@ static VALUE _native_initialize(
339
383
  // TODO: May be nice to offer customization here? Distribute available "overhead" margin with a bias towards one or the other
340
384
  // sampler.
341
385
  dynamic_sampling_rate_set_overhead_target_percentage(&state->cpu_dynamic_sampling_rate, total_overhead_target_percentage / 2);
342
- discrete_dynamic_sampler_set_overhead_target_percentage(&state->allocation_sampler, total_overhead_target_percentage / 2);
386
+ long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
387
+ discrete_dynamic_sampler_set_overhead_target_percentage(&state->allocation_sampler, total_overhead_target_percentage / 2, now);
343
388
  }
344
389
 
345
390
  state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
@@ -368,6 +413,12 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
368
413
  struct cpu_and_wall_time_worker_state *state;
369
414
  TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
370
415
 
416
+ // If we already got a delayed exception registered even before starting, raise before starting
417
+ if (state->failure_exception != Qnil) {
418
+ disable_tracepoints(state);
419
+ rb_exc_raise(state->failure_exception);
420
+ }
421
+
371
422
  struct cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state;
372
423
  if (old_state != NULL) {
373
424
  if (is_thread_alive(old_state->owner_thread)) {
@@ -393,7 +444,8 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
393
444
 
394
445
  // Reset the dynamic sampling rate state, if any (reminder: the monotonic clock reference may change after a fork)
395
446
  dynamic_sampling_rate_reset(&state->cpu_dynamic_sampling_rate);
396
- discrete_dynamic_sampler_reset(&state->allocation_sampler);
447
+ long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
448
+ discrete_dynamic_sampler_reset(&state->allocation_sampler, now);
397
449
 
398
450
  // This write to a global is thread-safe BECAUSE we're still holding on to the global VM lock at this point
399
451
  active_sampler_instance_state = state;
@@ -455,15 +507,19 @@ static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE
455
507
  return stop(self_instance, /* optional_exception: */ Qnil);
456
508
  }
457
509
 
458
- static VALUE stop(VALUE self_instance, VALUE optional_exception) {
459
- struct cpu_and_wall_time_worker_state *state;
460
- TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
461
-
510
+ static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception) {
462
511
  atomic_store(&state->should_run, false);
463
512
  state->failure_exception = optional_exception;
464
513
 
465
514
  // Disable the tracepoints as soon as possible, so the VM doesn't keep on calling them
466
515
  disable_tracepoints(state);
516
+ }
517
+
518
+ static VALUE stop(VALUE self_instance, VALUE optional_exception) {
519
+ struct cpu_and_wall_time_worker_state *state;
520
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
521
+
522
+ stop_state(state, optional_exception);
467
523
 
468
524
  return Qtrue;
469
525
  }
@@ -503,7 +559,32 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
503
559
  rb_postponed_job_trigger(sample_from_postponed_job_handle);
504
560
  state->stats.postponed_job_success++; // Always succeeds
505
561
  #else
506
- int result = rb_postponed_job_register_one(0, sample_from_postponed_job, NULL);
562
+
563
+ // This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
564
+ //
565
+ // TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
566
+ // and in rare cases can cause VM crashes.
567
+ //
568
+ // Specifically, if we're interrupting `rb_postponed_job_flush` (the function that processes postponed jobs), the way
569
+ // that this function reads the jobs is not atomic, and can cause our call to
570
+ // `rb_postponed_job_register(function, arg)` to clobber an existing job that is getting dequeued.
571
+ // Clobbering an existing job is somewhat annoying, but the worst part is that it can happen that we clobber only
572
+ // the existing job's arguments.
573
+ // As surveyed in https://github.com/ruby/ruby/pull/8949#issuecomment-1821441370 clobbering the arguments turns out
574
+ // to not matter in many cases as usually `rb_postponed_job_register` calls in the VM and ecosystem ignore the argument.
575
+ //
576
+ // https://bugs.ruby-lang.org/issues/19991 is the exception: inside Ruby's `gc.c`, when dealing with object
577
+ // finalizers, Ruby calls `gc_finalize_deferred_register` which internally calls
578
+ // `rb_postponed_job_register_one(gc_finalize_deferred, objspace)`.
579
+ // Clobbering this call means that `gc_finalize_deferred` would get called with `NULL`, causing a segmentation fault.
580
+ //
581
+ // Note that this is quite rare: our signal needs to land at exactly the point where the VM has read the function
582
+ // to execute, but has yet to read the arguments. @ivoanjo: I could only reproduce it by manually changing the VM
583
+ // code to simulate this happening.
584
+ //
585
+ // Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
586
+ // In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
587
+ int result = rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
507
588
 
508
589
  // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
509
590
  // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
@@ -545,17 +626,23 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
545
626
  // Note that reading the GVL owner and sending them a signal is a race -- the Ruby VM keeps on executing while
546
627
  // we're doing this, so we may still not signal the correct thread from time to time, but our signal handler
547
628
  // includes a check to see if it got called in the right thread
629
+ state->stats.interrupt_thread_attempts++;
548
630
  pthread_kill(owner.owner, SIGPROF);
549
631
  } else {
550
- // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
551
- // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
552
- //
553
- // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
554
- // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
555
- // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
556
- // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
557
- state->stats.trigger_simulated_signal_delivery_attempts++;
558
- idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
632
+ if (state->skip_idle_samples_for_testing) {
633
+ // This was added to make sure our tests don't accidentally pass due to idle samples. Specifically, if we
634
+ // comment out the thread interruption code inside `if (owner.valid)` above, our tests should not pass!
635
+ } else {
636
+ // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
637
+ // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
638
+ //
639
+ // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
640
+ // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
641
+ // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
642
+ // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
643
+ state->stats.trigger_simulated_signal_delivery_attempts++;
644
+ idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
645
+ }
559
646
  }
560
647
  }
561
648
 
@@ -607,11 +694,11 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
607
694
  long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
608
695
 
609
696
  if (state->dynamic_sampling_rate_enabled && !dynamic_sampling_rate_should_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_before_sample)) {
610
- state->stats.skipped_sample_because_of_dynamic_sampling_rate++;
697
+ state->stats.cpu_skipped++;
611
698
  return Qnil;
612
699
  }
613
700
 
614
- state->stats.sampled++;
701
+ state->stats.cpu_sampled++;
615
702
 
616
703
  VALUE profiler_overhead_stack_thread = state->owner_thread; // Used to attribute profiler overhead to a different stack
617
704
  thread_context_collector_sample(state->thread_context_collector_instance, wall_time_ns_before_sample, profiler_overhead_stack_thread);
@@ -622,9 +709,9 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
622
709
  // Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
623
710
  uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
624
711
 
625
- state->stats.sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.sampling_time_ns_min);
626
- state->stats.sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.sampling_time_ns_max);
627
- state->stats.sampling_time_ns_total += sampling_time_ns;
712
+ state->stats.cpu_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_min);
713
+ state->stats.cpu_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_max);
714
+ state->stats.cpu_sampling_time_ns_total += sampling_time_ns;
628
715
 
629
716
  dynamic_sampling_rate_after_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_after_sample, sampling_time_ns);
630
717
 
@@ -666,6 +753,9 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
666
753
  if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
667
754
  if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
668
755
 
756
+ // Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
757
+ rb_funcall(instance, rb_intern("signal_running"), 0);
758
+
669
759
  rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
670
760
 
671
761
  // If we stopped sampling due to an exception, re-raise it (now in the worker thread)
@@ -823,7 +913,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance)
823
913
  // Disable all tracepoints, so that there are no more attempts to mutate the profile
824
914
  disable_tracepoints(state);
825
915
 
826
- reset_stats(state);
916
+ reset_stats_not_thread_safe(state);
827
917
 
828
918
  // Remove all state from the `Collectors::ThreadState` and connected downstream components
829
919
  rb_funcall(state->thread_context_collector_instance, rb_intern("reset_after_fork"), 0);
@@ -839,11 +929,27 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
839
929
  struct cpu_and_wall_time_worker_state *state;
840
930
  TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
841
931
 
842
- VALUE pretty_sampling_time_ns_min = state->stats.sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.sampling_time_ns_min);
843
- VALUE pretty_sampling_time_ns_max = state->stats.sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.sampling_time_ns_max);
844
- VALUE pretty_sampling_time_ns_total = state->stats.sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.sampling_time_ns_total);
845
- VALUE pretty_sampling_time_ns_avg =
846
- state->stats.sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.sampling_time_ns_total) / state->stats.sampled);
932
+ VALUE pretty_cpu_sampling_time_ns_min = state->stats.cpu_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_min);
933
+ VALUE pretty_cpu_sampling_time_ns_max = state->stats.cpu_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_max);
934
+ VALUE pretty_cpu_sampling_time_ns_total = state->stats.cpu_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_total);
935
+ VALUE pretty_cpu_sampling_time_ns_avg =
936
+ state->stats.cpu_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampling_time_ns_total) / state->stats.cpu_sampled);
937
+
938
+ VALUE pretty_allocation_sampling_time_ns_min = state->stats.allocation_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_min);
939
+ VALUE pretty_allocation_sampling_time_ns_max = state->stats.allocation_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_max);
940
+ VALUE pretty_allocation_sampling_time_ns_total = state->stats.allocation_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_total);
941
+ VALUE pretty_allocation_sampling_time_ns_avg =
942
+ state->stats.allocation_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampling_time_ns_total) / state->stats.allocation_sampled);
943
+
944
+ unsigned long total_cpu_samples_attempted = state->stats.cpu_sampled + state->stats.cpu_skipped;
945
+ VALUE effective_cpu_sample_rate =
946
+ total_cpu_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampled) / total_cpu_samples_attempted);
947
+ unsigned long total_allocation_samples_attempted = state->stats.allocation_sampled + state->stats.allocation_skipped;
948
+ VALUE effective_allocation_sample_rate =
949
+ total_allocation_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampled) / total_allocation_samples_attempted);
950
+
951
+ VALUE allocation_sampler_snapshot = state->allocation_profiling_enabled && state->dynamic_sampling_rate_enabled ?
952
+ discrete_dynamic_sampler_state_snapshot(&state->allocation_sampler) : Qnil;
847
953
 
848
954
  VALUE stats_as_hash = rb_hash_new();
849
955
  VALUE arguments[] = {
@@ -852,22 +958,43 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
852
958
  ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
853
959
  ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
854
960
  ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
855
- ID2SYM(rb_intern("sampled")), /* => */ UINT2NUM(state->stats.sampled),
856
- ID2SYM(rb_intern("skipped_sample_because_of_dynamic_sampling_rate")), /* => */ UINT2NUM(state->stats.skipped_sample_because_of_dynamic_sampling_rate),
857
961
  ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
858
962
  ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
859
963
  ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
860
964
  ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
861
- ID2SYM(rb_intern("sampling_time_ns_min")), /* => */ pretty_sampling_time_ns_min,
862
- ID2SYM(rb_intern("sampling_time_ns_max")), /* => */ pretty_sampling_time_ns_max,
863
- ID2SYM(rb_intern("sampling_time_ns_total")), /* => */ pretty_sampling_time_ns_total,
864
- ID2SYM(rb_intern("sampling_time_ns_avg")), /* => */ pretty_sampling_time_ns_avg,
865
- ID2SYM(rb_intern("allocations_during_sample")), /* => */ UINT2NUM(state->stats.allocations_during_sample),
965
+ ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
966
+
967
+ // CPU Stats
968
+ ID2SYM(rb_intern("cpu_sampled")), /* => */ UINT2NUM(state->stats.cpu_sampled),
969
+ ID2SYM(rb_intern("cpu_skipped")), /* => */ UINT2NUM(state->stats.cpu_skipped),
970
+ ID2SYM(rb_intern("cpu_effective_sample_rate")), /* => */ effective_cpu_sample_rate,
971
+ ID2SYM(rb_intern("cpu_sampling_time_ns_min")), /* => */ pretty_cpu_sampling_time_ns_min,
972
+ ID2SYM(rb_intern("cpu_sampling_time_ns_max")), /* => */ pretty_cpu_sampling_time_ns_max,
973
+ ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ pretty_cpu_sampling_time_ns_total,
974
+ ID2SYM(rb_intern("cpu_sampling_time_ns_avg")), /* => */ pretty_cpu_sampling_time_ns_avg,
975
+
976
+ // Allocation stats
977
+ ID2SYM(rb_intern("allocation_sampled")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_sampled) : Qnil,
978
+ ID2SYM(rb_intern("allocation_skipped")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_skipped) : Qnil,
979
+ ID2SYM(rb_intern("allocation_effective_sample_rate")), /* => */ effective_allocation_sample_rate,
980
+ ID2SYM(rb_intern("allocation_sampling_time_ns_min")), /* => */ pretty_allocation_sampling_time_ns_min,
981
+ ID2SYM(rb_intern("allocation_sampling_time_ns_max")), /* => */ pretty_allocation_sampling_time_ns_max,
982
+ ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ pretty_allocation_sampling_time_ns_total,
983
+ ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ pretty_allocation_sampling_time_ns_avg,
984
+ ID2SYM(rb_intern("allocation_sampler_snapshot")), /* => */ allocation_sampler_snapshot,
985
+ ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
866
986
  };
867
987
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
868
988
  return stats_as_hash;
869
989
  }
870
990
 
991
+ static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance) {
992
+ struct cpu_and_wall_time_worker_state *state;
993
+ TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
994
+ reset_stats_not_thread_safe(state);
995
+ return Qnil;
996
+ }
997
+
871
998
  void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
872
999
  struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
873
1000
 
@@ -885,9 +1012,17 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
885
1012
 
886
1013
  static void grab_gvl_and_sample(void) { rb_thread_call_with_gvl(simulate_sampling_signal_delivery, NULL); }
887
1014
 
888
- static void reset_stats(struct cpu_and_wall_time_worker_state *state) {
889
- state->stats = (struct stats) {}; // Resets all stats back to zero
890
- state->stats.sampling_time_ns_min = UINT64_MAX; // Since we always take the min between existing and latest sample
1015
+ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state) {
1016
+ // NOTE: This is not really thread safe so ongoing sampling operations that are concurrent with a reset can have their stats:
1017
+ // * Lost (writes after stats retrieval but before reset).
1018
+ // * Included in the previous stats window (writes before stats retrieval and reset).
1019
+ // * Included in the following stats window (writes after stats retrieval and reset).
1020
+ // Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
1021
+ // this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
1022
+ state->stats = (struct stats) {
1023
+ .cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
1024
+ .allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
1025
+ };
891
1026
  }
892
1027
 
893
1028
  static void sleep_for(uint64_t time_ns) {
@@ -937,8 +1072,16 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
937
1072
  return;
938
1073
  }
939
1074
 
940
- if (state->dynamic_sampling_rate_enabled && !discrete_dynamic_sampler_should_sample(&state->allocation_sampler)) {
941
- return;
1075
+ if (state->dynamic_sampling_rate_enabled) {
1076
+ long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1077
+ if (now == 0) {
1078
+ delayed_error(state, ERR_CLOCK_FAIL);
1079
+ return;
1080
+ }
1081
+ if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) {
1082
+ state->stats.allocation_skipped++;
1083
+ return;
1084
+ }
942
1085
  }
943
1086
 
944
1087
  // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
@@ -950,14 +1093,32 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
950
1093
  // Rescue against any exceptions that happen during sampling
951
1094
  safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
952
1095
 
953
- discrete_dynamic_sampler_after_sample(&state->allocation_sampler);
1096
+ if (state->dynamic_sampling_rate_enabled) {
1097
+ long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1098
+ if (now == 0) {
1099
+ delayed_error(state, ERR_CLOCK_FAIL);
1100
+ // NOTE: Not short-circuiting here to make sure cleanup happens
1101
+ }
1102
+ uint64_t sampling_time_ns = discrete_dynamic_sampler_after_sample(&state->allocation_sampler, now);
1103
+ // NOTE: To keep things lean when dynamic sampling rate is disabled we skip clock interactions which is
1104
+ // why we're fine with having this inside this conditional.
1105
+ state->stats.allocation_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_min);
1106
+ state->stats.allocation_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_max);
1107
+ state->stats.allocation_sampling_time_ns_total += sampling_time_ns;
1108
+ }
1109
+
1110
+ state->stats.allocation_sampled++;
954
1111
 
955
1112
  state->during_sample = false;
956
1113
  }
957
1114
 
958
1115
  static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
959
- rb_tracepoint_disable(state->gc_tracepoint);
960
- rb_tracepoint_disable(state->object_allocation_tracepoint);
1116
+ if (state->gc_tracepoint != Qnil) {
1117
+ rb_tracepoint_disable(state->gc_tracepoint);
1118
+ }
1119
+ if (state->object_allocation_tracepoint != Qnil) {
1120
+ rb_tracepoint_disable(state->object_allocation_tracepoint);
1121
+ }
961
1122
  }
962
1123
 
963
1124
  static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
@@ -994,3 +1155,19 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
994
1155
  // Return a dummy VALUE because we're called from rb_rescue2 which requires it
995
1156
  return Qnil;
996
1157
  }
1158
+
1159
+ static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error) {
1160
+ // If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop.
1161
+ stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error));
1162
+ }
1163
+
1164
+ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) {
1165
+ ENFORCE_TYPE(error_msg, T_STRING);
1166
+
1167
+ struct cpu_and_wall_time_worker_state *state;
1168
+ TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
1169
+
1170
+ delayed_error(state, rb_string_value_cstr(&error_msg));
1171
+
1172
+ return Qnil;
1173
+ }