datadog 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -1
  3. data/ext/datadog_profiling_loader/datadog_profiling_loader.c +9 -1
  4. data/ext/datadog_profiling_loader/extconf.rb +10 -22
  5. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +148 -30
  6. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +4 -2
  7. data/ext/datadog_profiling_native_extension/collectors_stack.c +89 -46
  8. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +580 -29
  9. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +9 -1
  10. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +0 -27
  11. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -4
  12. data/ext/datadog_profiling_native_extension/extconf.rb +38 -21
  13. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +50 -0
  14. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +75 -0
  15. data/ext/datadog_profiling_native_extension/heap_recorder.c +20 -6
  16. data/ext/datadog_profiling_native_extension/http_transport.c +38 -6
  17. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +52 -1
  18. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +3 -0
  19. data/ext/datadog_profiling_native_extension/profiling.c +1 -1
  20. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  21. data/ext/libdatadog_api/crashtracker.c +20 -18
  22. data/ext/libdatadog_api/datadog_ruby_common.c +0 -27
  23. data/ext/libdatadog_api/datadog_ruby_common.h +0 -4
  24. data/ext/libdatadog_extconf_helpers.rb +1 -1
  25. data/lib/datadog/appsec/assets/waf_rules/recommended.json +2184 -108
  26. data/lib/datadog/appsec/assets/waf_rules/strict.json +1430 -2
  27. data/lib/datadog/appsec/component.rb +29 -8
  28. data/lib/datadog/appsec/configuration/settings.rb +2 -2
  29. data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +1 -0
  30. data/lib/datadog/appsec/contrib/devise/patcher/rememberable_patch.rb +21 -0
  31. data/lib/datadog/appsec/contrib/devise/patcher.rb +12 -2
  32. data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +0 -14
  33. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +67 -31
  34. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +18 -15
  35. data/lib/datadog/appsec/contrib/graphql/integration.rb +14 -1
  36. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +2 -5
  37. data/lib/datadog/appsec/event.rb +1 -1
  38. data/lib/datadog/appsec/processor/rule_loader.rb +3 -1
  39. data/lib/datadog/appsec/processor/rule_merger.rb +33 -15
  40. data/lib/datadog/appsec/processor.rb +36 -37
  41. data/lib/datadog/appsec/rate_limiter.rb +25 -40
  42. data/lib/datadog/appsec/remote.rb +7 -3
  43. data/lib/datadog/appsec.rb +2 -2
  44. data/lib/datadog/core/configuration/components.rb +4 -3
  45. data/lib/datadog/core/configuration/settings.rb +84 -5
  46. data/lib/datadog/core/crashtracking/component.rb +1 -1
  47. data/lib/datadog/core/environment/execution.rb +5 -5
  48. data/lib/datadog/core/metrics/client.rb +7 -0
  49. data/lib/datadog/core/rate_limiter.rb +183 -0
  50. data/lib/datadog/core/remote/client/capabilities.rb +4 -3
  51. data/lib/datadog/core/remote/component.rb +4 -2
  52. data/lib/datadog/core/remote/negotiation.rb +4 -4
  53. data/lib/datadog/core/remote/tie.rb +2 -0
  54. data/lib/datadog/core/runtime/metrics.rb +1 -1
  55. data/lib/datadog/core/telemetry/component.rb +2 -0
  56. data/lib/datadog/core/telemetry/event.rb +12 -7
  57. data/lib/datadog/core/telemetry/logger.rb +51 -0
  58. data/lib/datadog/core/telemetry/logging.rb +50 -14
  59. data/lib/datadog/core/telemetry/request.rb +13 -1
  60. data/lib/datadog/core/utils/time.rb +12 -0
  61. data/lib/datadog/di/code_tracker.rb +168 -0
  62. data/lib/datadog/di/configuration/settings.rb +163 -0
  63. data/lib/datadog/di/configuration.rb +11 -0
  64. data/lib/datadog/di/error.rb +31 -0
  65. data/lib/datadog/di/extensions.rb +16 -0
  66. data/lib/datadog/di/probe.rb +133 -0
  67. data/lib/datadog/di/probe_builder.rb +41 -0
  68. data/lib/datadog/di/redactor.rb +188 -0
  69. data/lib/datadog/di/serializer.rb +193 -0
  70. data/lib/datadog/di.rb +14 -0
  71. data/lib/datadog/opentelemetry/sdk/propagator.rb +2 -0
  72. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +12 -10
  73. data/lib/datadog/profiling/collectors/info.rb +12 -3
  74. data/lib/datadog/profiling/collectors/thread_context.rb +26 -0
  75. data/lib/datadog/profiling/component.rb +20 -4
  76. data/lib/datadog/profiling/http_transport.rb +6 -1
  77. data/lib/datadog/profiling/scheduler.rb +2 -0
  78. data/lib/datadog/profiling/stack_recorder.rb +3 -0
  79. data/lib/datadog/single_step_instrument.rb +12 -0
  80. data/lib/datadog/tracing/contrib/action_cable/instrumentation.rb +8 -12
  81. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -0
  82. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +78 -0
  83. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +33 -0
  84. data/lib/datadog/tracing/contrib/action_pack/patcher.rb +2 -0
  85. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +4 -0
  86. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +3 -1
  87. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +3 -1
  88. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +5 -1
  89. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +5 -0
  90. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
  91. data/lib/datadog/tracing/contrib/faraday/middleware.rb +9 -0
  92. data/lib/datadog/tracing/contrib/grape/endpoint.rb +19 -0
  93. data/lib/datadog/tracing/contrib/graphql/patcher.rb +9 -12
  94. data/lib/datadog/tracing/contrib/graphql/trace_patcher.rb +3 -3
  95. data/lib/datadog/tracing/contrib/graphql/tracing_patcher.rb +3 -3
  96. data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +13 -9
  97. data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +6 -3
  98. data/lib/datadog/tracing/contrib/http/instrumentation.rb +18 -15
  99. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -5
  100. data/lib/datadog/tracing/contrib/httpclient/patcher.rb +1 -14
  101. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +5 -0
  102. data/lib/datadog/tracing/contrib/httprb/patcher.rb +1 -14
  103. data/lib/datadog/tracing/contrib/lograge/patcher.rb +1 -2
  104. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +2 -0
  105. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +13 -6
  106. data/lib/datadog/tracing/contrib/patcher.rb +2 -1
  107. data/lib/datadog/tracing/contrib/presto/patcher.rb +1 -13
  108. data/lib/datadog/tracing/contrib/rack/middlewares.rb +27 -0
  109. data/lib/datadog/tracing/contrib/redis/tags.rb +4 -0
  110. data/lib/datadog/tracing/contrib/sinatra/tracer.rb +4 -0
  111. data/lib/datadog/tracing/contrib/stripe/request.rb +3 -2
  112. data/lib/datadog/tracing/distributed/propagation.rb +7 -0
  113. data/lib/datadog/tracing/metadata/ext.rb +2 -0
  114. data/lib/datadog/tracing/remote.rb +5 -2
  115. data/lib/datadog/tracing/sampling/matcher.rb +6 -1
  116. data/lib/datadog/tracing/sampling/rate_sampler.rb +1 -1
  117. data/lib/datadog/tracing/sampling/rule.rb +2 -0
  118. data/lib/datadog/tracing/sampling/rule_sampler.rb +9 -5
  119. data/lib/datadog/tracing/sampling/span/ext.rb +1 -1
  120. data/lib/datadog/tracing/sampling/span/rule.rb +2 -2
  121. data/lib/datadog/tracing/trace_operation.rb +26 -2
  122. data/lib/datadog/tracing/tracer.rb +14 -12
  123. data/lib/datadog/tracing/transport/http/client.rb +1 -0
  124. data/lib/datadog/tracing/transport/io/client.rb +1 -0
  125. data/lib/datadog/tracing/workers/trace_writer.rb +1 -1
  126. data/lib/datadog/tracing/workers.rb +1 -1
  127. data/lib/datadog/version.rb +1 -1
  128. metadata +25 -8
  129. data/lib/datadog/tracing/sampling/rate_limiter.rb +0 -185
@@ -76,6 +76,11 @@
76
76
  #define MISSING_TRACER_CONTEXT_KEY 0
77
77
  #define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
78
78
 
79
+ // This is used as a placeholder to mark threads that are allowed to be profiled (enabled)
80
+ // (e.g. to avoid trying to gvl profile threads that are not from the main Ractor)
81
+ // and for which there's no data yet
82
+ #define GVL_WAITING_ENABLED_EMPTY RUBY_FIXNUM_MAX
83
+
79
84
  static ID at_active_span_id; // id of :@active_span in Ruby
80
85
  static ID at_active_trace_id; // id of :@active_trace in Ruby
81
86
  static ID at_id_id; // id of :@id in Ruby
@@ -86,6 +91,26 @@ static ID at_otel_values_id; // id of :@otel_values in Ruby
86
91
  static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
87
92
  static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
88
93
 
94
+ // Used to support reading trace identifiers from the opentelemetry Ruby library when the ddtrace gem tracing
95
+ // integration is NOT in use.
96
+ static ID at_span_id_id; // id of :@span_id in Ruby
97
+ static ID at_trace_id_id; // id of :@trace_id in Ruby
98
+ static ID at_entries_id; // id of :@entries in Ruby
99
+ static ID at_context_id; // id of :@context in Ruby
100
+ static ID at_kind_id; // id of :@kind in Ruby
101
+ static ID at_name_id; // id of :@name in Ruby
102
+ static ID server_id; // id of :server in Ruby
103
+ static ID otel_context_storage_id; // id of :__opentelemetry_context_storage__ in Ruby
104
+
105
+ // This is used by `thread_context_collector_on_gvl_running`. Because when that method gets called we're not sure if
106
+ // it's safe to access the state of the thread context collector, we store this setting as a global value. This does
107
+ // mean this setting is shared among all thread context collectors, and thus it's "last writer wins".
108
+ // In production this should not be a problem: there should only be one profiler, which is the last one created,
109
+ // and that'll be the one that last wrote this setting.
110
+ static uint32_t global_waiting_for_gvl_threshold_ns = MILLIS_AS_NS(10);
111
+
112
+ enum otel_context_enabled {otel_context_enabled_false, otel_context_enabled_only, otel_context_enabled_both};
113
+
89
114
  // Contains state for a single ThreadContext instance
90
115
  struct thread_context_collector_state {
91
116
  // Note: Places in this file that usually need to be changed when this struct is changed are tagged with
@@ -112,6 +137,8 @@ struct thread_context_collector_state {
112
137
  bool endpoint_collection_enabled;
113
138
  // Used to omit timestamps / timeline events from collected data
114
139
  bool timeline_enabled;
140
+ // Used to control context collection
141
+ enum otel_context_enabled otel_context_enabled;
115
142
  // Used to omit class information from collected allocation data
116
143
  bool allocation_type_enabled;
117
144
  // Used when calling monotonic_to_system_epoch_ns
@@ -119,6 +146,8 @@ struct thread_context_collector_state {
119
146
  // Used to identify the main thread, to give it a fallback name
120
147
  VALUE main_thread;
121
148
  // Used when extracting trace identifiers from otel spans. Lazily initialized.
149
+ // Qtrue serves as a marker we've not yet extracted it; when we try to extract it, we set it to an object if
150
+ // successful and Qnil if not.
122
151
  VALUE otel_current_span_key;
123
152
 
124
153
  struct stats {
@@ -164,6 +193,12 @@ struct trace_identifiers {
164
193
  VALUE trace_endpoint;
165
194
  };
166
195
 
196
+ struct otel_span {
197
+ VALUE span;
198
+ VALUE span_id;
199
+ VALUE trace_id;
200
+ };
201
+
167
202
  static void thread_context_collector_typed_data_mark(void *state_ptr);
168
203
  static void thread_context_collector_typed_data_free(void *state_ptr);
169
204
  static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
@@ -177,13 +212,15 @@ static VALUE _native_initialize(
177
212
  VALUE tracer_context_key,
178
213
  VALUE endpoint_collection_enabled,
179
214
  VALUE timeline_enabled,
215
+ VALUE waiting_for_gvl_threshold_ns,
216
+ VALUE otel_context_enabled,
180
217
  VALUE allocation_type_enabled
181
218
  );
182
219
  static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread);
183
220
  static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
184
221
  static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
185
- static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
186
- void update_metrics_and_sample(
222
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state);
223
+ static void update_metrics_and_sample(
187
224
  struct thread_context_collector_state *state,
188
225
  VALUE thread_being_sampled,
189
226
  VALUE stack_from_thread,
@@ -201,7 +238,8 @@ static void trigger_sample_for_thread(
201
238
  sample_values values,
202
239
  long current_monotonic_wall_time_ns,
203
240
  ddog_CharSlice *ruby_vm_type,
204
- ddog_CharSlice *class_name
241
+ ddog_CharSlice *class_name,
242
+ bool is_gvl_waiting_state
205
243
  );
206
244
  static VALUE _native_thread_list(VALUE self);
207
245
  static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state);
@@ -237,6 +275,26 @@ static void ddtrace_otel_trace_identifiers_for(
237
275
  VALUE otel_values
238
276
  );
239
277
  static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);
278
+ static bool handle_gvl_waiting(
279
+ struct thread_context_collector_state *state,
280
+ VALUE thread_being_sampled,
281
+ VALUE stack_from_thread,
282
+ struct per_thread_context *thread_context,
283
+ sampling_buffer* sampling_buffer,
284
+ long current_cpu_time_ns
285
+ );
286
+ static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread);
287
+ static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread);
288
+ static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread);
289
+ static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread);
290
+ static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns);
291
+ static void otel_without_ddtrace_trace_identifiers_for(
292
+ struct thread_context_collector_state *state,
293
+ VALUE thread,
294
+ struct trace_identifiers *trace_identifiers_result
295
+ );
296
+ static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key);
297
+ static uint64_t otel_span_id_to_uint(VALUE otel_span_id);
240
298
 
241
299
  void collectors_thread_context_init(VALUE profiling_module) {
242
300
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -254,20 +312,27 @@ void collectors_thread_context_init(VALUE profiling_module) {
254
312
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
255
313
  rb_define_alloc_func(collectors_thread_context_class, _native_new);
256
314
 
257
- rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, 7);
315
+ rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, 9);
258
316
  rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
259
317
  rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
260
318
  rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
261
319
  rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3);
262
320
  rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
263
321
  rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
264
- rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
322
+ rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 2);
265
323
  rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
266
324
  rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
267
325
  rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
268
326
  rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
269
327
  rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
270
328
  rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
329
+ #ifndef NO_GVL_INSTRUMENTATION
330
+ rb_define_singleton_method(testing_module, "_native_on_gvl_waiting", _native_on_gvl_waiting, 1);
331
+ rb_define_singleton_method(testing_module, "_native_gvl_waiting_at_for", _native_gvl_waiting_at_for, 1);
332
+ rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 1);
333
+ rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 2);
334
+ rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 3);
335
+ #endif
271
336
 
272
337
  at_active_span_id = rb_intern_const("@active_span");
273
338
  at_active_trace_id = rb_intern_const("@active_trace");
@@ -278,6 +343,19 @@ void collectors_thread_context_init(VALUE profiling_module) {
278
343
  at_otel_values_id = rb_intern_const("@otel_values");
279
344
  at_parent_span_id_id = rb_intern_const("@parent_span_id");
280
345
  at_datadog_trace_id = rb_intern_const("@datadog_trace");
346
+ at_span_id_id = rb_intern_const("@span_id");
347
+ at_trace_id_id = rb_intern_const("@trace_id");
348
+ at_entries_id = rb_intern_const("@entries");
349
+ at_context_id = rb_intern_const("@context");
350
+ at_kind_id = rb_intern_const("@kind");
351
+ at_name_id = rb_intern_const("@name");
352
+ server_id = rb_intern_const("server");
353
+ otel_context_storage_id = rb_intern_const("__opentelemetry_context_storage__");
354
+
355
+ #ifndef NO_GVL_INSTRUMENTATION
356
+ // This will raise if Ruby already ran out of thread-local keys
357
+ gvl_profiling_init();
358
+ #endif
281
359
 
282
360
  gc_profiling_init();
283
361
  }
@@ -357,11 +435,12 @@ static VALUE _native_new(VALUE klass) {
357
435
  state->thread_list_buffer = thread_list_buffer;
358
436
  state->endpoint_collection_enabled = true;
359
437
  state->timeline_enabled = true;
438
+ state->otel_context_enabled = otel_context_enabled_false;
360
439
  state->allocation_type_enabled = true;
361
440
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
362
441
  VALUE main_thread = rb_thread_main();
363
442
  state->main_thread = main_thread;
364
- state->otel_current_span_key = Qnil;
443
+ state->otel_current_span_key = Qtrue;
365
444
  state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
366
445
  state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
367
446
 
@@ -377,6 +456,7 @@ static VALUE _native_new(VALUE klass) {
377
456
  return instance;
378
457
  }
379
458
 
459
+ // TODO: Convert this to use options like CpuAndWallTimeWorker
380
460
  static VALUE _native_initialize(
381
461
  DDTRACE_UNUSED VALUE _self,
382
462
  VALUE collector_instance,
@@ -385,10 +465,13 @@ static VALUE _native_initialize(
385
465
  VALUE tracer_context_key,
386
466
  VALUE endpoint_collection_enabled,
387
467
  VALUE timeline_enabled,
468
+ VALUE waiting_for_gvl_threshold_ns,
469
+ VALUE otel_context_enabled,
388
470
  VALUE allocation_type_enabled
389
471
  ) {
390
472
  ENFORCE_BOOLEAN(endpoint_collection_enabled);
391
473
  ENFORCE_BOOLEAN(timeline_enabled);
474
+ ENFORCE_TYPE(waiting_for_gvl_threshold_ns, T_FIXNUM);
392
475
  ENFORCE_BOOLEAN(allocation_type_enabled);
393
476
 
394
477
  struct thread_context_collector_state *state;
@@ -401,8 +484,19 @@ static VALUE _native_initialize(
401
484
  state->recorder_instance = enforce_recorder_instance(recorder_instance);
402
485
  state->endpoint_collection_enabled = (endpoint_collection_enabled == Qtrue);
403
486
  state->timeline_enabled = (timeline_enabled == Qtrue);
487
+ if (otel_context_enabled == Qfalse || otel_context_enabled == Qnil) {
488
+ state->otel_context_enabled = otel_context_enabled_false;
489
+ } else if (otel_context_enabled == ID2SYM(rb_intern("only"))) {
490
+ state->otel_context_enabled = otel_context_enabled_only;
491
+ } else if (otel_context_enabled == ID2SYM(rb_intern("both"))) {
492
+ state->otel_context_enabled = otel_context_enabled_both;
493
+ } else {
494
+ rb_raise(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
495
+ }
404
496
  state->allocation_type_enabled = (allocation_type_enabled == Qtrue);
405
497
 
498
+ global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
499
+
406
500
  if (RTEST(tracer_context_key)) {
407
501
  ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
408
502
  // Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
@@ -433,13 +527,22 @@ static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_inst
433
527
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
434
528
  // It SHOULD NOT be used for other purposes.
435
529
  static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
436
- thread_context_collector_on_gc_finish(collector_instance);
530
+ (void) !thread_context_collector_on_gc_finish(collector_instance);
437
531
  return Qtrue;
438
532
  }
439
533
 
440
534
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
441
535
  // It SHOULD NOT be used for other purposes.
442
- static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
536
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state) {
537
+ ENFORCE_BOOLEAN(reset_monotonic_to_system_state);
538
+
539
+ struct thread_context_collector_state *state;
540
+ TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
541
+
542
+ if (reset_monotonic_to_system_state == Qtrue) {
543
+ state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
544
+ }
545
+
443
546
  thread_context_collector_sample_after_gc(collector_instance);
444
547
  return Qtrue;
445
548
  }
@@ -502,7 +605,7 @@ void thread_context_collector_sample(VALUE self_instance, long current_monotonic
502
605
  );
503
606
  }
504
607
 
505
- void update_metrics_and_sample(
608
+ static void update_metrics_and_sample(
506
609
  struct thread_context_collector_state *state,
507
610
  VALUE thread_being_sampled,
508
611
  VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
@@ -511,12 +614,17 @@ void update_metrics_and_sample(
511
614
  long current_cpu_time_ns,
512
615
  long current_monotonic_wall_time_ns
513
616
  ) {
514
- long cpu_time_elapsed_ns = update_time_since_previous_sample(
617
+ bool is_gvl_waiting_state =
618
+ handle_gvl_waiting(state, thread_being_sampled, stack_from_thread, thread_context, sampling_buffer, current_cpu_time_ns);
619
+
620
+ // Don't assign/update cpu during "Waiting for GVL"
621
+ long cpu_time_elapsed_ns = is_gvl_waiting_state ? 0 : update_time_since_previous_sample(
515
622
  &thread_context->cpu_time_at_previous_sample_ns,
516
623
  current_cpu_time_ns,
517
624
  thread_context->gc_tracking.cpu_time_at_start_ns,
518
625
  IS_NOT_WALL_TIME
519
626
  );
627
+
520
628
  long wall_time_elapsed_ns = update_time_since_previous_sample(
521
629
  &thread_context->wall_time_at_previous_sample_ns,
522
630
  current_monotonic_wall_time_ns,
@@ -528,6 +636,21 @@ void update_metrics_and_sample(
528
636
  IS_WALL_TIME
529
637
  );
530
638
 
639
+ // A thread enters "Waiting for GVL", well, as the name implies, without the GVL.
640
+ //
641
+ // As a consequence, it's possible that a thread enters "Waiting for GVL" in parallel with the current thread working
642
+ // on sampling, and thus for the `current_monotonic_wall_time_ns` (which is recorded at the start of sampling)
643
+ // to be < the time at which we started Waiting for GVL.
644
+ //
645
+ // All together, this means that when `handle_gvl_waiting` creates an extra sample (see comments on that function for
646
+ // what the extra sample is), it's possible that there's no more wall-time to be assigned.
647
+ // Thus, in this case, we don't want to produce a sample representing Waiting for GVL with a wall-time of 0, and
648
+ // thus we skip creating such a sample.
649
+ if (is_gvl_waiting_state && wall_time_elapsed_ns == 0) return;
650
+ // ...you may also wonder: is there any other situation where it makes sense to produce a sample with
651
+ // wall_time_elapsed_ns == 0? I believe that yes, because the sample still includes a timestamp and a stack, but we
652
+ // may revisit/change our minds on this in the future.
653
+
531
654
  trigger_sample_for_thread(
532
655
  state,
533
656
  thread_being_sampled,
@@ -537,7 +660,8 @@ void update_metrics_and_sample(
537
660
  (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
538
661
  current_monotonic_wall_time_ns,
539
662
  NULL,
540
- NULL
663
+ NULL,
664
+ is_gvl_waiting_state
541
665
  );
542
666
  }
543
667
 
@@ -583,6 +707,7 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
583
707
  //
584
708
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
585
709
  // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
710
+ __attribute__((warn_unused_result))
586
711
  bool thread_context_collector_on_gc_finish(VALUE self_instance) {
587
712
  struct thread_context_collector_state *state;
588
713
  if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
@@ -718,7 +843,8 @@ static void trigger_sample_for_thread(
718
843
  long current_monotonic_wall_time_ns,
719
844
  // These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
720
845
  ddog_CharSlice *ruby_vm_type,
721
- ddog_CharSlice *class_name
846
+ ddog_CharSlice *class_name,
847
+ bool is_gvl_waiting_state
722
848
  ) {
723
849
  int max_label_count =
724
850
  1 + // thread id
@@ -759,6 +885,11 @@ static void trigger_sample_for_thread(
759
885
  struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
760
886
  trace_identifiers_for(state, thread, &trace_identifiers_result);
761
887
 
888
+ if (!trace_identifiers_result.valid && state->otel_context_enabled != otel_context_enabled_false) {
889
+ // If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
890
+ otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result);
891
+ }
892
+
762
893
  if (trace_identifiers_result.valid) {
763
894
  labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .num = trace_identifiers_result.local_root_span_id};
764
895
  labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("span id"), .num = trace_identifiers_result.span_id};
@@ -837,7 +968,12 @@ static void trigger_sample_for_thread(
837
968
  sampling_buffer,
838
969
  state->recorder_instance,
839
970
  values,
840
- (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
971
+ (sample_labels) {
972
+ .labels = slice_labels,
973
+ .state_label = state_label,
974
+ .end_timestamp_ns = end_timestamp_ns,
975
+ .is_gvl_waiting_state = is_gvl_waiting_state,
976
+ }
841
977
  );
842
978
  }
843
979
 
@@ -887,9 +1023,9 @@ static struct per_thread_context *get_context_for(VALUE thread, struct thread_co
887
1023
  // to either run Ruby code during sampling (not great), or otherwise use some of the VM private APIs to detect this.
888
1024
  //
889
1025
  static bool is_logging_gem_monkey_patch(VALUE invoke_file_location) {
890
- int logging_gem_path_len = strlen(LOGGING_GEM_PATH);
1026
+ unsigned long logging_gem_path_len = strlen(LOGGING_GEM_PATH);
891
1027
  char *invoke_file = StringValueCStr(invoke_file_location);
892
- int invoke_file_len = strlen(invoke_file);
1028
+ unsigned long invoke_file_len = strlen(invoke_file);
893
1029
 
894
1030
  if (invoke_file_len < logging_gem_path_len) return false;
895
1031
 
@@ -937,6 +1073,20 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
937
1073
  // These will only be used during a GC operation
938
1074
  thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
939
1075
  thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
1076
+
1077
+ #ifndef NO_GVL_INSTRUMENTATION
1078
+ // We use this special location to store data that can be accessed without any
1079
+ // kind of synchronization (e.g. by threads without the GVL).
1080
+ //
1081
+ // We set this marker here for two purposes:
1082
+ // * To make sure there's no stale data from a previous execution of the profiler.
1083
+ // * To mark threads that are actually being profiled
1084
+ //
1085
+ // (Setting this is potentially a race, but what we want is to avoid _stale_ data, so
1086
+ // if this gets set concurrently with context initialization, then such a value will belong
1087
+ // to the current profiler instance, so that's OK)
1088
+ gvl_profiling_state_thread_object_set(thread, GVL_WAITING_ENABLED_EMPTY);
1089
+ #endif
940
1090
  }
941
1091
 
942
1092
  static void free_context(struct per_thread_context* thread_context) {
@@ -960,6 +1110,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
960
1110
  rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
961
1111
  rb_str_concat(result, rb_sprintf(" endpoint_collection_enabled=%"PRIsVALUE, state->endpoint_collection_enabled ? Qtrue : Qfalse));
962
1112
  rb_str_concat(result, rb_sprintf(" timeline_enabled=%"PRIsVALUE, state->timeline_enabled ? Qtrue : Qfalse));
1113
+ rb_str_concat(result, rb_sprintf(" otel_context_enabled=%d", state->otel_context_enabled));
963
1114
  rb_str_concat(result, rb_sprintf(" allocation_type_enabled=%"PRIsVALUE, state->allocation_type_enabled ? Qtrue : Qfalse));
964
1115
  rb_str_concat(result, rb_sprintf(
965
1116
  " time_converter_state={.system_epoch_ns_reference=%ld, .delta_to_epoch_ns=%ld}",
@@ -969,6 +1120,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
969
1120
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
970
1121
  rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
971
1122
  rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
1123
+ rb_str_concat(result, rb_sprintf(" global_waiting_for_gvl_threshold_ns=%u", global_waiting_for_gvl_threshold_ns));
972
1124
 
973
1125
  return result;
974
1126
  }
@@ -996,6 +1148,10 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
996
1148
 
997
1149
  ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
998
1150
  ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
1151
+
1152
+ #ifndef NO_GVL_INSTRUMENTATION
1153
+ ID2SYM(rb_intern("gvl_waiting_at")), /* => */ LONG2NUM(gvl_profiling_state_thread_object_get(thread)),
1154
+ #endif
999
1155
  };
1000
1156
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
1001
1157
 
@@ -1146,6 +1302,7 @@ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_ins
1146
1302
 
1147
1303
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
1148
1304
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
1305
+ if (state->otel_context_enabled == otel_context_enabled_only) return;
1149
1306
  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
1150
1307
 
1151
1308
  VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
@@ -1200,7 +1357,7 @@ static bool should_collect_resource(VALUE root_span) {
1200
1357
  if (root_span_type == Qnil) return false;
1201
1358
  ENFORCE_TYPE(root_span_type, T_STRING);
1202
1359
 
1203
- int root_span_type_length = RSTRING_LEN(root_span_type);
1360
+ long root_span_type_length = RSTRING_LEN(root_span_type);
1204
1361
  const char *root_span_type_value = StringValuePtr(root_span_type);
1205
1362
 
1206
1363
  bool is_web_request =
@@ -1223,6 +1380,9 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector
1223
1380
  struct thread_context_collector_state *state;
1224
1381
  TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1225
1382
 
1383
+ // Release all context memory before clearing the existing context
1384
+ st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
1385
+
1226
1386
  st_clear(state->hash_map_per_thread_context);
1227
1387
 
1228
1388
  state->stats = (struct stats) {}; // Resets all stats back to zero
@@ -1326,7 +1486,8 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1326
1486
  (sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1, .heap_sample = true},
1327
1487
  INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
1328
1488
  &ruby_vm_type,
1329
- optional_class_name
1489
+ optional_class_name,
1490
+ false
1330
1491
  );
1331
1492
  }
1332
1493
 
@@ -1372,25 +1533,29 @@ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1372
1533
  }
1373
1534
  }
1374
1535
 
1536
+ // Used to access OpenTelemetry::Trace.const_get(:CURRENT_SPAN_KEY). Will raise exceptions if it fails.
1537
+ static VALUE read_otel_current_span_key_const(DDTRACE_UNUSED VALUE _unused) {
1538
+ VALUE opentelemetry_module = rb_const_get(rb_cObject, rb_intern("OpenTelemetry"));
1539
+ ENFORCE_TYPE(opentelemetry_module, T_MODULE);
1540
+ VALUE trace_module = rb_const_get(opentelemetry_module, rb_intern("Trace"));
1541
+ ENFORCE_TYPE(trace_module, T_MODULE);
1542
+ return rb_const_get(trace_module, rb_intern("CURRENT_SPAN_KEY"));
1543
+ }
1544
+
1375
1545
  static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
1376
- if (state->otel_current_span_key == Qnil) {
1377
- VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
1378
- VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
1379
- VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
1380
- VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
1381
- VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
1382
-
1383
- if (current_span_key == Qnil) {
1384
- rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
1385
- }
1546
+ if (state->otel_current_span_key == Qtrue) { // Qtrue means we haven't tried to extract it yet
1547
+ // If this fails, we want to fail gracefully, rather than raise an exception (e.g. if the opentelemetry gem
1548
+ // gets refactored, we should not fall on our face)
1549
+ VALUE span_key = rb_protect(read_otel_current_span_key_const, Qnil, NULL);
1386
1550
 
1387
- state->otel_current_span_key = current_span_key;
1551
+ // Note that this gets set to Qnil if we failed to extract the correct value, and thus we won't try to extract it again
1552
+ state->otel_current_span_key = span_key;
1388
1553
  }
1389
1554
 
1390
1555
  return state->otel_current_span_key;
1391
1556
  }
1392
1557
 
1393
- // This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
1558
+ // This method gets used when ddtrace is being used indirectly via the opentelemetry APIs. Information gets stored slightly
1394
1559
  // differently, and this codepath handles it.
1395
1560
  static void ddtrace_otel_trace_identifiers_for(
1396
1561
  struct thread_context_collector_state *state,
@@ -1410,6 +1575,7 @@ static void ddtrace_otel_trace_identifiers_for(
1410
1575
  if (resolved_numeric_span_id == Qnil) return;
1411
1576
 
1412
1577
  VALUE otel_current_span_key = get_otel_current_span_key(state);
1578
+ if (otel_current_span_key == Qnil) return;
1413
1579
  VALUE current_trace = *active_trace;
1414
1580
 
1415
1581
  // ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
@@ -1462,3 +1628,388 @@ static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self
1462
1628
  thread_context_collector_sample_skipped_allocation_samples(collector_instance, NUM2UINT(skipped_samples));
1463
1629
  return Qtrue;
1464
1630
  }
1631
+
1632
+ // This method differs from trace_identifiers_for/ddtrace_otel_trace_identifiers_for to support the situation where
1633
+ // the opentelemetry ruby library is being used for tracing AND the ddtrace tracing bits are not involved at all.
1634
+ //
1635
+ // Thus, in this case, we're directly reading from the opentelemetry stuff, which is different to how ddtrace tracing
1636
+ // does it.
1637
+ //
1638
+ // This is somewhat brittle: we're coupling on internal details of the opentelemetry gem to get what we need. In the
1639
+ // future maybe the otel ruby folks would be open to having a nice public way of getting this data that suits the
1640
+ // usecase of profilers.
1641
+ // Until then, the strategy below is to be extremely defensive, and if anything is out of place, we immediately return
1642
+ // and give up on getting trace data from opentelemetry. (Thus, worst case would be -- you upgrade opentelemetry and
1643
+ // profiling features relying on reading this data stop working, but you'll still get profiles and the app will be
1644
+ // otherwise undisturbed).
1645
+ //
1646
+ // Specifically, the way this works is:
1647
+ // 1. The latest entry in the opentelemetry context storage represents the current span (if any). We take the span id
1648
+ // and trace id from this span.
1649
+ // 2. To find the local root span id, we walk the context storage backwards from the current span, and find the earliest
1650
+ // entry in the context storage that has the same trace id as the current span; we use the found span as the local
1651
+ // root span id.
1652
+ // This matches the semantics of how ddtrace tracing creates a TraceOperation and assigns a local root span to it.
1653
+ static void otel_without_ddtrace_trace_identifiers_for(
1654
+ struct thread_context_collector_state *state,
1655
+ VALUE thread,
1656
+ struct trace_identifiers *trace_identifiers_result
1657
+ ) {
1658
+ VALUE context_storage = rb_thread_local_aref(thread, otel_context_storage_id /* __opentelemetry_context_storage__ */);
1659
+
1660
+ // If it exists, context_storage is expected to be an Array[OpenTelemetry::Context]
1661
+ if (context_storage == Qnil || !RB_TYPE_P(context_storage, T_ARRAY)) return;
1662
+
1663
+ VALUE otel_current_span_key = get_otel_current_span_key(state);
1664
+ if (otel_current_span_key == Qnil) return;
1665
+
1666
+ int active_context_index = RARRAY_LEN(context_storage) - 1;
1667
+ if (active_context_index < 0) return;
1668
+
1669
+ struct otel_span active_span = otel_span_from(rb_ary_entry(context_storage, active_context_index), otel_current_span_key);
1670
+ if (active_span.span == Qnil) return;
1671
+
1672
+ struct otel_span local_root_span = active_span;
1673
+
1674
+ // Now find the oldest span starting from the active span that still has the same trace id as the active span
1675
+ for (int i = active_context_index - 1; i >= 0; i--) {
1676
+ struct otel_span checking_span = otel_span_from(rb_ary_entry(context_storage, i), otel_current_span_key);
1677
+ if (checking_span.span == Qnil) return;
1678
+
1679
+ if (rb_str_equal(active_span.trace_id, checking_span.trace_id) == Qfalse) break;
1680
+
1681
+ local_root_span = checking_span;
1682
+ }
1683
+
1684
+ // Convert the span ids into uint64_t to match what the Datadog tracer does
1685
+ trace_identifiers_result->span_id = otel_span_id_to_uint(active_span.span_id);
1686
+ trace_identifiers_result->local_root_span_id = otel_span_id_to_uint(local_root_span.span_id);
1687
+
1688
+ if (trace_identifiers_result->span_id == 0 || trace_identifiers_result->local_root_span_id == 0) return;
1689
+
1690
+ trace_identifiers_result->valid = true;
1691
+
1692
+ if (!state->endpoint_collection_enabled) return;
1693
+
1694
+ VALUE root_span_type = rb_ivar_get(local_root_span.span, at_kind_id /* @kind */);
1695
+ // We filter out spans that don't have `kind: :server`
1696
+ if (root_span_type == Qnil || !RB_TYPE_P(root_span_type, T_SYMBOL) || SYM2ID(root_span_type) != server_id) return;
1697
+
1698
+ VALUE trace_resource = rb_ivar_get(local_root_span.span, at_name_id /* @name */);
1699
+ if (!RB_TYPE_P(trace_resource, T_STRING)) return;
1700
+
1701
+ trace_identifiers_result->trace_endpoint = trace_resource;
1702
+ }
1703
+
1704
+ static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key) {
1705
+ struct otel_span failed = {.span = Qnil, .span_id = Qnil, .trace_id = Qnil};
1706
+
1707
+ if (otel_context == Qnil) return failed;
1708
+
1709
+ VALUE context_entries = rb_ivar_get(otel_context, at_entries_id /* @entries */);
1710
+ if (context_entries == Qnil || !RB_TYPE_P(context_entries, T_HASH)) return failed;
1711
+
1712
+ // If it exists, context_entries is expected to be a Hash[OpenTelemetry::Context::Key, OpenTelemetry::Trace::Span]
1713
+ VALUE span = rb_hash_lookup(context_entries, otel_current_span_key);
1714
+ if (span == Qnil) return failed;
1715
+
1716
+ // If it exists, span_context is expected to be a OpenTelemetry::Trace::SpanContext (don't confuse it with OpenTelemetry::Context)
1717
+ VALUE span_context = rb_ivar_get(span, at_context_id /* @context */);
1718
+ if (span_context == Qnil) return failed;
1719
+
1720
+ VALUE span_id = rb_ivar_get(span_context, at_span_id_id /* @span_id */);
1721
+ VALUE trace_id = rb_ivar_get(span_context, at_trace_id_id /* @trace_id */);
1722
+ if (span_id == Qnil || trace_id == Qnil || !RB_TYPE_P(span_id, T_STRING) || !RB_TYPE_P(trace_id, T_STRING)) return failed;
1723
+
1724
+ return (struct otel_span) {.span = span, .span_id = span_id, .trace_id = trace_id};
1725
+ }
1726
+
1727
+ // Otel span ids are represented as a big-endian 8-byte string
1728
+ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1729
+ if (!RB_TYPE_P(otel_span_id, T_STRING) || RSTRING_LEN(otel_span_id) != 8) { return 0; }
1730
+
1731
+ unsigned char *span_bytes = (unsigned char*) StringValuePtr(otel_span_id);
1732
+
1733
+ return \
1734
+ ((uint64_t)span_bytes[0] << 56) |
1735
+ ((uint64_t)span_bytes[1] << 48) |
1736
+ ((uint64_t)span_bytes[2] << 40) |
1737
+ ((uint64_t)span_bytes[3] << 32) |
1738
+ ((uint64_t)span_bytes[4] << 24) |
1739
+ ((uint64_t)span_bytes[5] << 16) |
1740
+ ((uint64_t)span_bytes[6] << 8) |
1741
+ ((uint64_t)span_bytes[7]);
1742
+ }
1743
+
1744
+ #ifndef NO_GVL_INSTRUMENTATION
1745
+ // This function can get called from outside the GVL and even on non-main Ractors
1746
+ void thread_context_collector_on_gvl_waiting(gvl_profiling_thread thread) {
1747
+ // Because this function gets called from a thread that is NOT holding the GVL, we avoid touching the
1748
+ // per-thread context directly.
1749
+ //
1750
+ // Instead, we ask Ruby to hold the data we need in Ruby's own special per-thread context area
1751
+ // that's thread-safe and built for this kind of use
1752
+ //
1753
+ // Also, this function can get called on the non-main Ractor. We deal with this by checking if the value in the context
1754
+ // is non-zero, since only `initialize_context` ever sets the value from 0 to non-zero for threads it sees.
1755
+ intptr_t thread_being_profiled = gvl_profiling_state_get(thread);
1756
+ if (!thread_being_profiled) return;
1757
+
1758
+ long current_monotonic_wall_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1759
+ if (current_monotonic_wall_time_ns <= 0 || current_monotonic_wall_time_ns > GVL_WAITING_ENABLED_EMPTY) return;
1760
+
1761
+ gvl_profiling_state_set(thread, current_monotonic_wall_time_ns);
1762
+ }
1763
+
1764
+ // This function can get called from outside the GVL and even on non-main Ractors
1765
+ __attribute__((warn_unused_result))
1766
+ bool thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
1767
+ intptr_t gvl_waiting_at = gvl_profiling_state_get(thread);
1768
+
1769
+ // Thread was not being profiled / not waiting on gvl
1770
+ if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) return false;
1771
+
1772
+ // @ivoanjo: I'm not sure if this can happen -- It means we should've sampled already but haven't gotten the chance yet?
1773
+ if (gvl_waiting_at < 0) return true;
1774
+
1775
+ long waiting_for_gvl_duration_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - gvl_waiting_at;
1776
+
1777
+ bool should_sample = waiting_for_gvl_duration_ns >= waiting_for_gvl_threshold_ns;
1778
+
1779
+ if (should_sample) {
1780
+ // We flip the gvl_waiting_at to negative to mark that the thread is now running and no longer waiting
1781
+ intptr_t gvl_waiting_at_is_now_running = -gvl_waiting_at;
1782
+
1783
+ gvl_profiling_state_set(thread, gvl_waiting_at_is_now_running);
1784
+ } else {
1785
+ // We decided not to sample. Let's mark the thread back to the initial "enabled but empty" state
1786
+ gvl_profiling_state_set(thread, GVL_WAITING_ENABLED_EMPTY);
1787
+ }
1788
+
1789
+ return should_sample;
1790
+ }
1791
+
1792
+ __attribute__((warn_unused_result))
1793
+ bool thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
1794
+ return thread_context_collector_on_gvl_running_with_threshold(thread, global_waiting_for_gvl_threshold_ns);
1795
+ }
1796
+
1797
+ // Why does this method need to exist?
1798
+ //
1799
+ // You may be surprised to see that if we never call this function (from cpu_and_wall_time_worker), Waiting for GVL
1800
+ // samples will still show up.
1801
+ // This is because regular cpu/wall-time samples also use `update_metrics_and_sample` which will do the right thing
1802
+ // and push "Waiting for GVL" samples as needed.
1803
+ //
1804
+ // The reason this method needs to exist and be called very shortly after thread_context_collector_on_gvl_running
1805
+ // returning true is to ensure accuracy of both the timing and stack for the Waiting for GVL sample.
1806
+ //
1807
+ // Timing:
1808
+ // Because we currently only record the timestamp when the Waiting for GVL started and not when the Waiting for GVL ended,
1809
+ // we rely on pushing a sample as soon as possible when the Waiting for GVL ends so that the timestamp of the sample
1810
+ // actually matches when we stopped waiting.
1811
+ //
1812
+ // Stack:
1813
+ // If the thread starts working without the end of the Waiting for GVL sample, then by the time the thread is sampled
1814
+ // via the regular cpu/wall-time samples mechanism, the stack can be be inaccurate (e.g. does not correctly pinpoint
1815
+ // where the waiting happened).
1816
+ //
1817
+ // Arguably, the last sample after Waiting for GVL ended (when gvl_waiting_at < 0) should always come from this method
1818
+ // and not a regular cpu/wall-time sample BUT since all of these things are happening in parallel/concurrently I suspect
1819
+ // it's possible for a regular sample to kick in just before this one.
1820
+ //
1821
+ // ---
1822
+ //
1823
+ // NOTE: In normal use, current_thread is expected to be == rb_thread_current(); the `current_thread` parameter only
1824
+ // exists to enable testing.
1825
+ VALUE thread_context_collector_sample_after_gvl_running_with_thread(VALUE self_instance, VALUE current_thread) {
1826
+ struct thread_context_collector_state *state;
1827
+ TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1828
+
1829
+ if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled");
1830
+
1831
+ intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread);
1832
+
1833
+ if (gvl_waiting_at >= 0) {
1834
+ // @ivoanjo: I'm not sure if this can ever happen. This means that we're not on the same thread
1835
+ // that ran `thread_context_collector_on_gvl_running` and made the decision to sample OR a regular sample was
1836
+ // triggered ahead of us.
1837
+ // We do nothing in this case.
1838
+ return Qfalse;
1839
+ }
1840
+
1841
+ struct per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
1842
+
1843
+ // We don't actually account for cpu-time during Waiting for GVL. BUT, we may chose to push an
1844
+ // extra sample to represent the period prior to Waiting for GVL. To support that, we retrieve the current
1845
+ // cpu-time of the thread and let `update_metrics_and_sample` decide what to do with it.
1846
+ long cpu_time_for_thread = cpu_time_now_ns(thread_context);
1847
+
1848
+ // TODO: Should we update the dynamic sampling rate overhead tracking with this sample as well?
1849
+
1850
+ update_metrics_and_sample(
1851
+ state,
1852
+ /* thread_being_sampled: */ current_thread,
1853
+ /* stack_from_thread: */ current_thread,
1854
+ thread_context,
1855
+ thread_context->sampling_buffer,
1856
+ cpu_time_for_thread,
1857
+ monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
1858
+ );
1859
+
1860
+ return Qtrue; // To allow this to be called from rb_rescue2
1861
+ }
1862
+
1863
+ VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance) {
1864
+ return thread_context_collector_sample_after_gvl_running_with_thread(self_instance, rb_thread_current());
1865
+ }
1866
+
1867
+ // This method is intended to be called from update_metrics_and_sample. It exists to handle extra sampling steps we
1868
+ // need to take when sampling cpu/wall-time for a thread that's in the "Waiting for GVL" state.
1869
+ __attribute__((warn_unused_result))
1870
+ static bool handle_gvl_waiting(
1871
+ struct thread_context_collector_state *state,
1872
+ VALUE thread_being_sampled,
1873
+ VALUE stack_from_thread,
1874
+ struct per_thread_context *thread_context,
1875
+ sampling_buffer* sampling_buffer,
1876
+ long current_cpu_time_ns
1877
+ ) {
1878
+ intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread_being_sampled);
1879
+
1880
+ bool is_gvl_waiting_state = gvl_waiting_at != 0 && gvl_waiting_at != GVL_WAITING_ENABLED_EMPTY;
1881
+
1882
+ if (!is_gvl_waiting_state) return false;
1883
+
1884
+ // We can be in one of 2 situations here:
1885
+ //
1886
+ // 1. The current sample is the first one after we entered the "Waiting for GVL" state
1887
+ // (wall_time_at_previous_sample_ns < abs(gvl_waiting_at))
1888
+ //
1889
+ // time ─────►
1890
+ // ...──────────────┬───────────────────...
1891
+ // Other state │ Waiting for GVL
1892
+ // ...──────────────┴───────────────────...
1893
+ // ▲ ▲
1894
+ // └─ Previous sample └─ Regular sample (caller)
1895
+ //
1896
+ // In this case, we'll want to push two samples: a) one for the current time (handled by the caller), b) an extra sample
1897
+ // to represent the remaining cpu/wall time before the "Waiting for GVL" started:
1898
+ //
1899
+ // time ─────►
1900
+ // ...──────────────┬───────────────────...
1901
+ // Other state │ Waiting for GVL
1902
+ // ...──────────────┴───────────────────...
1903
+ // ▲ ▲ ▲
1904
+ // └─ Prev... └─ Extra sample └─ Regular sample (caller)
1905
+ //
1906
+ // 2. The current sample is the n-th one after we entered the "Waiting for GVL" state
1907
+ // (wall_time_at_previous_sample_ns > abs(gvl_waiting_at))
1908
+ //
1909
+ // time ─────►
1910
+ // ...──────────────┬───────────────────────────────────────────────...
1911
+ // Other state │ Waiting for GVL
1912
+ // ...──────────────┴───────────────────────────────────────────────...
1913
+ // ▲ ▲ ▲
1914
+ // └─ Previous sample └─ Previous sample └─ Regular sample (caller)
1915
+ //
1916
+ // In this case, we just report back to the caller that the thread is in the "Waiting for GVL" state.
1917
+ //
1918
+ // ---
1919
+ //
1920
+ // Overall, gvl_waiting_at will be > 0 if still in the "Waiting for GVL" state and < 0 if we actually reached the end of
1921
+ // the wait.
1922
+ //
1923
+ // It doesn't really matter if the thread is still waiting or just reached the end of the wait: each sample represents
1924
+ // a snapshot at time ending now, so if the state finished, it just means the next sample will be a regular one.
1925
+
1926
+ if (gvl_waiting_at < 0) {
1927
+ // Negative means the waiting for GVL just ended, so we clear the state, so next samples no longer represent waiting
1928
+ gvl_profiling_state_thread_object_set(thread_being_sampled, GVL_WAITING_ENABLED_EMPTY);
1929
+ }
1930
+
1931
+ long gvl_waiting_started_wall_time_ns = labs(gvl_waiting_at);
1932
+
1933
+ if (thread_context->wall_time_at_previous_sample_ns < gvl_waiting_started_wall_time_ns) { // situation 1 above
1934
+ long cpu_time_elapsed_ns = update_time_since_previous_sample(
1935
+ &thread_context->cpu_time_at_previous_sample_ns,
1936
+ current_cpu_time_ns,
1937
+ thread_context->gc_tracking.cpu_time_at_start_ns,
1938
+ IS_NOT_WALL_TIME
1939
+ );
1940
+
1941
+ long duration_until_start_of_gvl_waiting_ns = update_time_since_previous_sample(
1942
+ &thread_context->wall_time_at_previous_sample_ns,
1943
+ gvl_waiting_started_wall_time_ns,
1944
+ INVALID_TIME,
1945
+ IS_WALL_TIME
1946
+ );
1947
+
1948
+ // Push extra sample
1949
+ trigger_sample_for_thread(
1950
+ state,
1951
+ thread_being_sampled,
1952
+ stack_from_thread,
1953
+ thread_context,
1954
+ sampling_buffer,
1955
+ (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = duration_until_start_of_gvl_waiting_ns},
1956
+ gvl_waiting_started_wall_time_ns,
1957
+ NULL,
1958
+ NULL,
1959
+ false // This is the extra sample before the wait begun; only the next sample will be in the gvl waiting state
1960
+ );
1961
+ }
1962
+
1963
+ return true;
1964
+ }
1965
+
1966
+ static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread) {
1967
+ ENFORCE_THREAD(thread);
1968
+
1969
+ thread_context_collector_on_gvl_waiting(thread_from_thread_object(thread));
1970
+ return Qnil;
1971
+ }
1972
+
1973
+ static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread) {
1974
+ ENFORCE_THREAD(thread);
1975
+
1976
+ intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread);
1977
+ return LONG2NUM(gvl_waiting_at);
1978
+ }
1979
+
1980
+ static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) {
1981
+ ENFORCE_THREAD(thread);
1982
+
1983
+ return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) ? Qtrue : Qfalse;
1984
+ }
1985
+
1986
+ static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) {
1987
+ ENFORCE_THREAD(thread);
1988
+
1989
+ return thread_context_collector_sample_after_gvl_running_with_thread(collector_instance, thread);
1990
+ }
1991
+
1992
+ static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns) {
1993
+ ENFORCE_THREAD(thread);
1994
+
1995
+ struct thread_context_collector_state *state;
1996
+ TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1997
+
1998
+ struct per_thread_context *thread_context = get_context_for(thread, state);
1999
+ if (thread_context == NULL) rb_raise(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists");
2000
+
2001
+ thread_context->cpu_time_at_previous_sample_ns += NUM2LONG(delta_ns);
2002
+
2003
+ return Qtrue;
2004
+ }
2005
+
2006
+ #else
2007
+ static bool handle_gvl_waiting(
2008
+ DDTRACE_UNUSED struct thread_context_collector_state *state,
2009
+ DDTRACE_UNUSED VALUE thread_being_sampled,
2010
+ DDTRACE_UNUSED VALUE stack_from_thread,
2011
+ DDTRACE_UNUSED struct per_thread_context *thread_context,
2012
+ DDTRACE_UNUSED sampling_buffer* sampling_buffer,
2013
+ DDTRACE_UNUSED long current_cpu_time_ns
2014
+ ) { return false; }
2015
+ #endif // NO_GVL_INSTRUMENTATION