datadog 2.2.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +87 -2
  3. data/ext/datadog_profiling_loader/datadog_profiling_loader.c +9 -1
  4. data/ext/datadog_profiling_loader/extconf.rb +14 -26
  5. data/ext/datadog_profiling_native_extension/clock_id.h +1 -0
  6. data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -2
  7. data/ext/datadog_profiling_native_extension/clock_id_noop.c +1 -2
  8. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +257 -69
  9. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +53 -28
  10. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +34 -4
  11. data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +4 -0
  12. data/ext/datadog_profiling_native_extension/collectors_stack.c +136 -81
  13. data/ext/datadog_profiling_native_extension/collectors_stack.h +2 -2
  14. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +661 -48
  15. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +10 -1
  16. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +83 -0
  17. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +53 -0
  18. data/ext/datadog_profiling_native_extension/extconf.rb +91 -69
  19. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +50 -0
  20. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +75 -0
  21. data/ext/datadog_profiling_native_extension/heap_recorder.c +54 -12
  22. data/ext/datadog_profiling_native_extension/heap_recorder.h +3 -1
  23. data/ext/datadog_profiling_native_extension/helpers.h +6 -17
  24. data/ext/datadog_profiling_native_extension/http_transport.c +41 -9
  25. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +0 -86
  26. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +2 -23
  27. data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +61 -172
  28. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +116 -139
  29. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +20 -11
  30. data/ext/datadog_profiling_native_extension/profiling.c +1 -3
  31. data/ext/datadog_profiling_native_extension/ruby_helpers.c +0 -33
  32. data/ext/datadog_profiling_native_extension/ruby_helpers.h +1 -26
  33. data/ext/datadog_profiling_native_extension/setup_signal_handler.h +1 -0
  34. data/ext/datadog_profiling_native_extension/stack_recorder.c +14 -2
  35. data/ext/datadog_profiling_native_extension/stack_recorder.h +2 -0
  36. data/ext/datadog_profiling_native_extension/time_helpers.c +0 -15
  37. data/ext/datadog_profiling_native_extension/time_helpers.h +36 -6
  38. data/ext/{datadog_profiling_native_extension → libdatadog_api}/crashtracker.c +37 -22
  39. data/ext/libdatadog_api/datadog_ruby_common.c +83 -0
  40. data/ext/libdatadog_api/datadog_ruby_common.h +53 -0
  41. data/ext/libdatadog_api/extconf.rb +108 -0
  42. data/ext/libdatadog_api/macos_development.md +26 -0
  43. data/ext/libdatadog_extconf_helpers.rb +130 -0
  44. data/lib/datadog/appsec/assets/waf_rules/recommended.json +2184 -108
  45. data/lib/datadog/appsec/assets/waf_rules/strict.json +1430 -2
  46. data/lib/datadog/appsec/component.rb +29 -8
  47. data/lib/datadog/appsec/configuration/settings.rb +2 -2
  48. data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +1 -0
  49. data/lib/datadog/appsec/contrib/devise/patcher/rememberable_patch.rb +21 -0
  50. data/lib/datadog/appsec/contrib/devise/patcher.rb +12 -2
  51. data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +35 -0
  52. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +109 -0
  53. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +71 -0
  54. data/lib/datadog/appsec/contrib/graphql/integration.rb +54 -0
  55. data/lib/datadog/appsec/contrib/graphql/patcher.rb +37 -0
  56. data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +59 -0
  57. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +3 -6
  58. data/lib/datadog/appsec/event.rb +1 -1
  59. data/lib/datadog/appsec/processor/actions.rb +1 -1
  60. data/lib/datadog/appsec/processor/rule_loader.rb +3 -1
  61. data/lib/datadog/appsec/processor/rule_merger.rb +33 -15
  62. data/lib/datadog/appsec/processor.rb +36 -37
  63. data/lib/datadog/appsec/rate_limiter.rb +25 -40
  64. data/lib/datadog/appsec/remote.rb +7 -3
  65. data/lib/datadog/appsec/response.rb +15 -1
  66. data/lib/datadog/appsec.rb +3 -2
  67. data/lib/datadog/core/configuration/components.rb +18 -15
  68. data/lib/datadog/core/configuration/settings.rb +135 -9
  69. data/lib/datadog/core/crashtracking/agent_base_url.rb +21 -0
  70. data/lib/datadog/core/crashtracking/component.rb +111 -0
  71. data/lib/datadog/core/crashtracking/tag_builder.rb +39 -0
  72. data/lib/datadog/core/diagnostics/environment_logger.rb +8 -11
  73. data/lib/datadog/core/environment/execution.rb +5 -5
  74. data/lib/datadog/core/metrics/client.rb +7 -0
  75. data/lib/datadog/core/rate_limiter.rb +183 -0
  76. data/lib/datadog/core/remote/client/capabilities.rb +4 -3
  77. data/lib/datadog/core/remote/component.rb +4 -2
  78. data/lib/datadog/core/remote/negotiation.rb +4 -4
  79. data/lib/datadog/core/remote/tie.rb +2 -0
  80. data/lib/datadog/core/runtime/metrics.rb +1 -1
  81. data/lib/datadog/core/telemetry/component.rb +51 -2
  82. data/lib/datadog/core/telemetry/emitter.rb +9 -11
  83. data/lib/datadog/core/telemetry/event.rb +37 -1
  84. data/lib/datadog/core/telemetry/ext.rb +1 -0
  85. data/lib/datadog/core/telemetry/http/adapters/net.rb +10 -12
  86. data/lib/datadog/core/telemetry/http/ext.rb +3 -0
  87. data/lib/datadog/core/telemetry/http/transport.rb +38 -9
  88. data/lib/datadog/core/telemetry/logger.rb +51 -0
  89. data/lib/datadog/core/telemetry/logging.rb +71 -0
  90. data/lib/datadog/core/telemetry/request.rb +13 -1
  91. data/lib/datadog/core/utils/at_fork_monkey_patch.rb +102 -0
  92. data/lib/datadog/core/utils/time.rb +12 -0
  93. data/lib/datadog/di/code_tracker.rb +168 -0
  94. data/lib/datadog/di/configuration/settings.rb +163 -0
  95. data/lib/datadog/di/configuration.rb +11 -0
  96. data/lib/datadog/di/error.rb +31 -0
  97. data/lib/datadog/di/extensions.rb +16 -0
  98. data/lib/datadog/di/probe.rb +133 -0
  99. data/lib/datadog/di/probe_builder.rb +41 -0
  100. data/lib/datadog/di/redactor.rb +188 -0
  101. data/lib/datadog/di/serializer.rb +193 -0
  102. data/lib/datadog/di.rb +14 -0
  103. data/lib/datadog/kit/appsec/events.rb +2 -4
  104. data/lib/datadog/opentelemetry/sdk/propagator.rb +2 -0
  105. data/lib/datadog/opentelemetry/sdk/span_processor.rb +10 -0
  106. data/lib/datadog/opentelemetry/sdk/trace/span.rb +23 -0
  107. data/lib/datadog/profiling/collectors/code_provenance.rb +7 -7
  108. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +28 -26
  109. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +11 -13
  110. data/lib/datadog/profiling/collectors/info.rb +15 -6
  111. data/lib/datadog/profiling/collectors/thread_context.rb +30 -2
  112. data/lib/datadog/profiling/component.rb +89 -95
  113. data/lib/datadog/profiling/exporter.rb +3 -3
  114. data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
  115. data/lib/datadog/profiling/ext.rb +21 -21
  116. data/lib/datadog/profiling/flush.rb +1 -1
  117. data/lib/datadog/profiling/http_transport.rb +14 -7
  118. data/lib/datadog/profiling/load_native_extension.rb +5 -5
  119. data/lib/datadog/profiling/preload.rb +1 -1
  120. data/lib/datadog/profiling/profiler.rb +5 -8
  121. data/lib/datadog/profiling/scheduler.rb +33 -25
  122. data/lib/datadog/profiling/stack_recorder.rb +3 -0
  123. data/lib/datadog/profiling/tag_builder.rb +2 -2
  124. data/lib/datadog/profiling/tasks/exec.rb +5 -5
  125. data/lib/datadog/profiling/tasks/setup.rb +16 -35
  126. data/lib/datadog/profiling.rb +4 -5
  127. data/lib/datadog/single_step_instrument.rb +12 -0
  128. data/lib/datadog/tracing/contrib/action_cable/instrumentation.rb +8 -12
  129. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -0
  130. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +78 -0
  131. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +33 -0
  132. data/lib/datadog/tracing/contrib/action_pack/patcher.rb +2 -0
  133. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +4 -0
  134. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +3 -1
  135. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +4 -1
  136. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +5 -1
  137. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +5 -0
  138. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
  139. data/lib/datadog/tracing/contrib/ext.rb +14 -0
  140. data/lib/datadog/tracing/contrib/faraday/middleware.rb +9 -0
  141. data/lib/datadog/tracing/contrib/grape/endpoint.rb +19 -0
  142. data/lib/datadog/tracing/contrib/graphql/patcher.rb +9 -12
  143. data/lib/datadog/tracing/contrib/graphql/trace_patcher.rb +3 -3
  144. data/lib/datadog/tracing/contrib/graphql/tracing_patcher.rb +3 -3
  145. data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +14 -10
  146. data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +10 -4
  147. data/lib/datadog/tracing/contrib/http/instrumentation.rb +18 -15
  148. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -5
  149. data/lib/datadog/tracing/contrib/httpclient/patcher.rb +1 -14
  150. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +5 -0
  151. data/lib/datadog/tracing/contrib/httprb/patcher.rb +1 -14
  152. data/lib/datadog/tracing/contrib/lograge/patcher.rb +15 -0
  153. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +2 -0
  154. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
  155. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +17 -13
  156. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +13 -6
  157. data/lib/datadog/tracing/contrib/patcher.rb +2 -1
  158. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
  159. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +4 -1
  160. data/lib/datadog/tracing/contrib/presto/patcher.rb +1 -13
  161. data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +28 -0
  162. data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +5 -1
  163. data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +22 -10
  164. data/lib/datadog/tracing/contrib/rack/middlewares.rb +27 -0
  165. data/lib/datadog/tracing/contrib/redis/tags.rb +4 -0
  166. data/lib/datadog/tracing/contrib/sinatra/tracer.rb +4 -0
  167. data/lib/datadog/tracing/contrib/stripe/request.rb +3 -2
  168. data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +5 -0
  169. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +4 -1
  170. data/lib/datadog/tracing/diagnostics/environment_logger.rb +14 -16
  171. data/lib/datadog/tracing/distributed/propagation.rb +7 -0
  172. data/lib/datadog/tracing/metadata/errors.rb +9 -1
  173. data/lib/datadog/tracing/metadata/ext.rb +6 -0
  174. data/lib/datadog/tracing/pipeline/span_filter.rb +2 -2
  175. data/lib/datadog/tracing/remote.rb +5 -2
  176. data/lib/datadog/tracing/sampling/matcher.rb +6 -1
  177. data/lib/datadog/tracing/sampling/rate_sampler.rb +1 -1
  178. data/lib/datadog/tracing/sampling/rule.rb +2 -0
  179. data/lib/datadog/tracing/sampling/rule_sampler.rb +9 -5
  180. data/lib/datadog/tracing/sampling/span/ext.rb +1 -1
  181. data/lib/datadog/tracing/sampling/span/rule.rb +2 -2
  182. data/lib/datadog/tracing/span.rb +9 -2
  183. data/lib/datadog/tracing/span_event.rb +41 -0
  184. data/lib/datadog/tracing/span_operation.rb +6 -2
  185. data/lib/datadog/tracing/trace_operation.rb +26 -2
  186. data/lib/datadog/tracing/tracer.rb +14 -12
  187. data/lib/datadog/tracing/transport/http/client.rb +1 -0
  188. data/lib/datadog/tracing/transport/io/client.rb +1 -0
  189. data/lib/datadog/tracing/transport/serializable_trace.rb +3 -0
  190. data/lib/datadog/tracing/workers/trace_writer.rb +1 -1
  191. data/lib/datadog/tracing/workers.rb +1 -1
  192. data/lib/datadog/version.rb +1 -1
  193. metadata +46 -11
  194. data/lib/datadog/profiling/crashtracker.rb +0 -91
  195. data/lib/datadog/profiling/ext/forking.rb +0 -98
  196. data/lib/datadog/tracing/sampling/rate_limiter.rb +0 -185
@@ -76,6 +76,11 @@
76
76
  #define MISSING_TRACER_CONTEXT_KEY 0
77
77
  #define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
78
78
 
79
+ // This is used as a placeholder to mark threads that are allowed to be profiled (enabled)
80
+ // (e.g. to avoid trying to gvl profile threads that are not from the main Ractor)
81
+ // and for which there's no data yet
82
+ #define GVL_WAITING_ENABLED_EMPTY RUBY_FIXNUM_MAX
83
+
79
84
  static ID at_active_span_id; // id of :@active_span in Ruby
80
85
  static ID at_active_trace_id; // id of :@active_trace in Ruby
81
86
  static ID at_id_id; // id of :@id in Ruby
@@ -86,13 +91,34 @@ static ID at_otel_values_id; // id of :@otel_values in Ruby
86
91
  static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
87
92
  static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
88
93
 
94
+ // Used to support reading trace identifiers from the opentelemetry Ruby library when the ddtrace gem tracing
95
+ // integration is NOT in use.
96
+ static ID at_span_id_id; // id of :@span_id in Ruby
97
+ static ID at_trace_id_id; // id of :@trace_id in Ruby
98
+ static ID at_entries_id; // id of :@entries in Ruby
99
+ static ID at_context_id; // id of :@context in Ruby
100
+ static ID at_kind_id; // id of :@kind in Ruby
101
+ static ID at_name_id; // id of :@name in Ruby
102
+ static ID server_id; // id of :server in Ruby
103
+ static ID otel_context_storage_id; // id of :__opentelemetry_context_storage__ in Ruby
104
+
105
+ // This is used by `thread_context_collector_on_gvl_running`. Because when that method gets called we're not sure if
106
+ // it's safe to access the state of the thread context collector, we store this setting as a global value. This does
107
+ // mean this setting is shared among all thread context collectors, and thus it's "last writer wins".
108
+ // In production this should not be a problem: there should only be one profiler, which is the last one created,
109
+ // and that'll be the one that last wrote this setting.
110
+ static uint32_t global_waiting_for_gvl_threshold_ns = MILLIS_AS_NS(10);
111
+
112
+ enum otel_context_enabled {otel_context_enabled_false, otel_context_enabled_only, otel_context_enabled_both};
113
+
89
114
  // Contains state for a single ThreadContext instance
90
115
  struct thread_context_collector_state {
91
116
  // Note: Places in this file that usually need to be changed when this struct is changed are tagged with
92
117
  // "Update this when modifying state struct"
93
118
 
94
119
  // Required by Datadog::Profiling::Collectors::Stack as a scratch buffer during sampling
95
- sampling_buffer *sampling_buffer;
120
+ ddog_prof_Location *locations;
121
+ uint16_t max_frames;
96
122
  // Hashmap <Thread Object, struct per_thread_context>
97
123
  st_table *hash_map_per_thread_context;
98
124
  // Datadog::Profiling::StackRecorder instance
@@ -111,6 +137,8 @@ struct thread_context_collector_state {
111
137
  bool endpoint_collection_enabled;
112
138
  // Used to omit timestamps / timeline events from collected data
113
139
  bool timeline_enabled;
140
+ // Used to control context collection
141
+ enum otel_context_enabled otel_context_enabled;
114
142
  // Used to omit class information from collected allocation data
115
143
  bool allocation_type_enabled;
116
144
  // Used when calling monotonic_to_system_epoch_ns
@@ -118,6 +146,8 @@ struct thread_context_collector_state {
118
146
  // Used to identify the main thread, to give it a fallback name
119
147
  VALUE main_thread;
120
148
  // Used when extracting trace identifiers from otel spans. Lazily initialized.
149
+ // Qtrue serves as a marker we've not yet extracted it; when we try to extract it, we set it to an object if
150
+ // successful and Qnil if not.
121
151
  VALUE otel_current_span_key;
122
152
 
123
153
  struct stats {
@@ -138,6 +168,7 @@ struct thread_context_collector_state {
138
168
 
139
169
  // Tracks per-thread state
140
170
  struct per_thread_context {
171
+ sampling_buffer *sampling_buffer;
141
172
  char thread_id[THREAD_ID_LIMIT_CHARS];
142
173
  ddog_CharSlice thread_id_char_slice;
143
174
  char thread_invoke_location[THREAD_INVOKE_LOCATION_LIMIT_CHARS];
@@ -162,6 +193,12 @@ struct trace_identifiers {
162
193
  VALUE trace_endpoint;
163
194
  };
164
195
 
196
+ struct otel_span {
197
+ VALUE span;
198
+ VALUE span_id;
199
+ VALUE trace_id;
200
+ };
201
+
165
202
  static void thread_context_collector_typed_data_mark(void *state_ptr);
166
203
  static void thread_context_collector_typed_data_free(void *state_ptr);
167
204
  static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
@@ -175,17 +212,20 @@ static VALUE _native_initialize(
175
212
  VALUE tracer_context_key,
176
213
  VALUE endpoint_collection_enabled,
177
214
  VALUE timeline_enabled,
215
+ VALUE waiting_for_gvl_threshold_ns,
216
+ VALUE otel_context_enabled,
178
217
  VALUE allocation_type_enabled
179
218
  );
180
219
  static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread);
181
220
  static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
182
221
  static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
183
- static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
184
- void update_metrics_and_sample(
222
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state);
223
+ static void update_metrics_and_sample(
185
224
  struct thread_context_collector_state *state,
186
225
  VALUE thread_being_sampled,
187
- VALUE profiler_overhead_stack_thread,
226
+ VALUE stack_from_thread,
188
227
  struct per_thread_context *thread_context,
228
+ sampling_buffer* sampling_buffer,
189
229
  long current_cpu_time_ns,
190
230
  long current_monotonic_wall_time_ns
191
231
  );
@@ -194,15 +234,18 @@ static void trigger_sample_for_thread(
194
234
  VALUE thread,
195
235
  VALUE stack_from_thread,
196
236
  struct per_thread_context *thread_context,
237
+ sampling_buffer* sampling_buffer,
197
238
  sample_values values,
198
239
  long current_monotonic_wall_time_ns,
199
240
  ddog_CharSlice *ruby_vm_type,
200
- ddog_CharSlice *class_name
241
+ ddog_CharSlice *class_name,
242
+ bool is_gvl_waiting_state
201
243
  );
202
244
  static VALUE _native_thread_list(VALUE self);
203
245
  static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state);
204
246
  static struct per_thread_context *get_context_for(VALUE thread, struct thread_context_collector_state *state);
205
247
  static void initialize_context(VALUE thread, struct per_thread_context *thread_context, struct thread_context_collector_state *state);
248
+ static void free_context(struct per_thread_context* thread_context);
206
249
  static VALUE _native_inspect(VALUE self, VALUE collector_instance);
207
250
  static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
208
251
  static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
@@ -231,6 +274,27 @@ static void ddtrace_otel_trace_identifiers_for(
231
274
  VALUE active_span,
232
275
  VALUE otel_values
233
276
  );
277
+ static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);
278
+ static bool handle_gvl_waiting(
279
+ struct thread_context_collector_state *state,
280
+ VALUE thread_being_sampled,
281
+ VALUE stack_from_thread,
282
+ struct per_thread_context *thread_context,
283
+ sampling_buffer* sampling_buffer,
284
+ long current_cpu_time_ns
285
+ );
286
+ static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread);
287
+ static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread);
288
+ static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread);
289
+ static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread);
290
+ static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns);
291
+ static void otel_without_ddtrace_trace_identifiers_for(
292
+ struct thread_context_collector_state *state,
293
+ VALUE thread,
294
+ struct trace_identifiers *trace_identifiers_result
295
+ );
296
+ static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key);
297
+ static uint64_t otel_span_id_to_uint(VALUE otel_span_id);
234
298
 
235
299
  void collectors_thread_context_init(VALUE profiling_module) {
236
300
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -248,19 +312,27 @@ void collectors_thread_context_init(VALUE profiling_module) {
248
312
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
249
313
  rb_define_alloc_func(collectors_thread_context_class, _native_new);
250
314
 
251
- rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, 7);
315
+ rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, 9);
252
316
  rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
253
317
  rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
254
318
  rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
255
319
  rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3);
256
320
  rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
257
321
  rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
258
- rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
322
+ rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 2);
259
323
  rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
260
324
  rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
261
325
  rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
262
326
  rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
263
327
  rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
328
+ rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
329
+ #ifndef NO_GVL_INSTRUMENTATION
330
+ rb_define_singleton_method(testing_module, "_native_on_gvl_waiting", _native_on_gvl_waiting, 1);
331
+ rb_define_singleton_method(testing_module, "_native_gvl_waiting_at_for", _native_gvl_waiting_at_for, 1);
332
+ rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 1);
333
+ rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 2);
334
+ rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 3);
335
+ #endif
264
336
 
265
337
  at_active_span_id = rb_intern_const("@active_span");
266
338
  at_active_trace_id = rb_intern_const("@active_trace");
@@ -271,6 +343,19 @@ void collectors_thread_context_init(VALUE profiling_module) {
271
343
  at_otel_values_id = rb_intern_const("@otel_values");
272
344
  at_parent_span_id_id = rb_intern_const("@parent_span_id");
273
345
  at_datadog_trace_id = rb_intern_const("@datadog_trace");
346
+ at_span_id_id = rb_intern_const("@span_id");
347
+ at_trace_id_id = rb_intern_const("@trace_id");
348
+ at_entries_id = rb_intern_const("@entries");
349
+ at_context_id = rb_intern_const("@context");
350
+ at_kind_id = rb_intern_const("@kind");
351
+ at_name_id = rb_intern_const("@name");
352
+ server_id = rb_intern_const("server");
353
+ otel_context_storage_id = rb_intern_const("__opentelemetry_context_storage__");
354
+
355
+ #ifndef NO_GVL_INSTRUMENTATION
356
+ // This will raise if Ruby already ran out of thread-local keys
357
+ gvl_profiling_init();
358
+ #endif
274
359
 
275
360
  gc_profiling_init();
276
361
  }
@@ -308,7 +393,7 @@ static void thread_context_collector_typed_data_free(void *state_ptr) {
308
393
 
309
394
  // Important: Remember that we're only guaranteed to see here what's been set in _native_new, aka
310
395
  // pointers that have been set NULL there may still be NULL here.
311
- if (state->sampling_buffer != NULL) sampling_buffer_free(state->sampling_buffer);
396
+ if (state->locations != NULL) ruby_xfree(state->locations);
312
397
 
313
398
  // Free each entry in the map
314
399
  st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
@@ -327,8 +412,8 @@ static int hash_map_per_thread_context_mark(st_data_t key_thread, DDTRACE_UNUSED
327
412
 
328
413
  // Used to clear each of the per_thread_contexts inside the hash_map_per_thread_context
329
414
  static int hash_map_per_thread_context_free_values(DDTRACE_UNUSED st_data_t _thread, st_data_t value_per_thread_context, DDTRACE_UNUSED st_data_t _argument) {
330
- struct per_thread_context *per_thread_context = (struct per_thread_context*) value_per_thread_context;
331
- ruby_xfree(per_thread_context);
415
+ struct per_thread_context *thread_context = (struct per_thread_context*) value_per_thread_context;
416
+ free_context(thread_context);
332
417
  return ST_CONTINUE;
333
418
  }
334
419
 
@@ -339,25 +424,39 @@ static VALUE _native_new(VALUE klass) {
339
424
  // being leaked.
340
425
 
341
426
  // Update this when modifying state struct
342
- state->sampling_buffer = NULL;
427
+ state->locations = NULL;
428
+ state->max_frames = 0;
343
429
  state->hash_map_per_thread_context =
344
430
  // "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
345
431
  st_init_numtable();
346
432
  state->recorder_instance = Qnil;
347
433
  state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
348
- state->thread_list_buffer = rb_ary_new();
434
+ VALUE thread_list_buffer = rb_ary_new();
435
+ state->thread_list_buffer = thread_list_buffer;
349
436
  state->endpoint_collection_enabled = true;
350
437
  state->timeline_enabled = true;
438
+ state->otel_context_enabled = otel_context_enabled_false;
351
439
  state->allocation_type_enabled = true;
352
440
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
353
- state->main_thread = rb_thread_main();
354
- state->otel_current_span_key = Qnil;
441
+ VALUE main_thread = rb_thread_main();
442
+ state->main_thread = main_thread;
443
+ state->otel_current_span_key = Qtrue;
355
444
  state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
356
445
  state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
357
446
 
358
- return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
447
+ // Note: Remember to keep any new allocated objects that get stored in the state also on the stack + mark them with
448
+ // RB_GC_GUARD -- otherwise it's possible for a GC to run and
449
+ // since the instance representing the state does not yet exist, such objects will not get marked.
450
+
451
+ VALUE instance = TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
452
+
453
+ RB_GC_GUARD(thread_list_buffer);
454
+ RB_GC_GUARD(main_thread); // Arguably not needed, but perhaps can be move in some future Ruby release?
455
+
456
+ return instance;
359
457
  }
360
458
 
459
+ // TODO: Convert this to use options like CpuAndWallTimeWorker
361
460
  static VALUE _native_initialize(
362
461
  DDTRACE_UNUSED VALUE _self,
363
462
  VALUE collector_instance,
@@ -366,26 +465,38 @@ static VALUE _native_initialize(
366
465
  VALUE tracer_context_key,
367
466
  VALUE endpoint_collection_enabled,
368
467
  VALUE timeline_enabled,
468
+ VALUE waiting_for_gvl_threshold_ns,
469
+ VALUE otel_context_enabled,
369
470
  VALUE allocation_type_enabled
370
471
  ) {
371
472
  ENFORCE_BOOLEAN(endpoint_collection_enabled);
372
473
  ENFORCE_BOOLEAN(timeline_enabled);
474
+ ENFORCE_TYPE(waiting_for_gvl_threshold_ns, T_FIXNUM);
373
475
  ENFORCE_BOOLEAN(allocation_type_enabled);
374
476
 
375
477
  struct thread_context_collector_state *state;
376
478
  TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
377
479
 
378
- int max_frames_requested = NUM2INT(max_frames);
379
- if (max_frames_requested < 0) rb_raise(rb_eArgError, "Invalid max_frames: value must not be negative");
380
-
381
480
  // Update this when modifying state struct
382
- state->sampling_buffer = sampling_buffer_new(max_frames_requested);
481
+ state->max_frames = sampling_buffer_check_max_frames(NUM2INT(max_frames));
482
+ state->locations = ruby_xcalloc(state->max_frames, sizeof(ddog_prof_Location));
383
483
  // hash_map_per_thread_context is already initialized, nothing to do here
384
484
  state->recorder_instance = enforce_recorder_instance(recorder_instance);
385
485
  state->endpoint_collection_enabled = (endpoint_collection_enabled == Qtrue);
386
486
  state->timeline_enabled = (timeline_enabled == Qtrue);
487
+ if (otel_context_enabled == Qfalse || otel_context_enabled == Qnil) {
488
+ state->otel_context_enabled = otel_context_enabled_false;
489
+ } else if (otel_context_enabled == ID2SYM(rb_intern("only"))) {
490
+ state->otel_context_enabled = otel_context_enabled_only;
491
+ } else if (otel_context_enabled == ID2SYM(rb_intern("both"))) {
492
+ state->otel_context_enabled = otel_context_enabled_both;
493
+ } else {
494
+ rb_raise(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
495
+ }
387
496
  state->allocation_type_enabled = (allocation_type_enabled == Qtrue);
388
497
 
498
+ global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
499
+
389
500
  if (RTEST(tracer_context_key)) {
390
501
  ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
391
502
  // Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
@@ -416,13 +527,22 @@ static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_inst
416
527
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
417
528
  // It SHOULD NOT be used for other purposes.
418
529
  static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
419
- thread_context_collector_on_gc_finish(collector_instance);
530
+ (void) !thread_context_collector_on_gc_finish(collector_instance);
420
531
  return Qtrue;
421
532
  }
422
533
 
423
534
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
424
535
  // It SHOULD NOT be used for other purposes.
425
- static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
536
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state) {
537
+ ENFORCE_BOOLEAN(reset_monotonic_to_system_state);
538
+
539
+ struct thread_context_collector_state *state;
540
+ TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
541
+
542
+ if (reset_monotonic_to_system_state == Qtrue) {
543
+ state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
544
+ }
545
+
426
546
  thread_context_collector_sample_after_gc(collector_instance);
427
547
  return Qtrue;
428
548
  }
@@ -461,6 +581,7 @@ void thread_context_collector_sample(VALUE self_instance, long current_monotonic
461
581
  /* thread_being_sampled: */ thread,
462
582
  /* stack_from_thread: */ thread,
463
583
  thread_context,
584
+ thread_context->sampling_buffer,
464
585
  current_cpu_time_ns,
465
586
  current_monotonic_wall_time_ns
466
587
  );
@@ -477,25 +598,33 @@ void thread_context_collector_sample(VALUE self_instance, long current_monotonic
477
598
  /* thread_being_sampled: */ current_thread,
478
599
  /* stack_from_thread: */ profiler_overhead_stack_thread,
479
600
  current_thread_context,
601
+ // Here we use the overhead thread's sampling buffer so as to not invalidate the cache in the buffer of the thread being sampled
602
+ get_or_create_context_for(profiler_overhead_stack_thread, state)->sampling_buffer,
480
603
  cpu_time_now_ns(current_thread_context),
481
604
  monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
482
605
  );
483
606
  }
484
607
 
485
- void update_metrics_and_sample(
608
+ static void update_metrics_and_sample(
486
609
  struct thread_context_collector_state *state,
487
610
  VALUE thread_being_sampled,
488
611
  VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
489
612
  struct per_thread_context *thread_context,
613
+ sampling_buffer* sampling_buffer,
490
614
  long current_cpu_time_ns,
491
615
  long current_monotonic_wall_time_ns
492
616
  ) {
493
- long cpu_time_elapsed_ns = update_time_since_previous_sample(
617
+ bool is_gvl_waiting_state =
618
+ handle_gvl_waiting(state, thread_being_sampled, stack_from_thread, thread_context, sampling_buffer, current_cpu_time_ns);
619
+
620
+ // Don't assign/update cpu during "Waiting for GVL"
621
+ long cpu_time_elapsed_ns = is_gvl_waiting_state ? 0 : update_time_since_previous_sample(
494
622
  &thread_context->cpu_time_at_previous_sample_ns,
495
623
  current_cpu_time_ns,
496
624
  thread_context->gc_tracking.cpu_time_at_start_ns,
497
625
  IS_NOT_WALL_TIME
498
626
  );
627
+
499
628
  long wall_time_elapsed_ns = update_time_since_previous_sample(
500
629
  &thread_context->wall_time_at_previous_sample_ns,
501
630
  current_monotonic_wall_time_ns,
@@ -507,15 +636,32 @@ void update_metrics_and_sample(
507
636
  IS_WALL_TIME
508
637
  );
509
638
 
639
+ // A thread enters "Waiting for GVL", well, as the name implies, without the GVL.
640
+ //
641
+ // As a consequence, it's possible that a thread enters "Waiting for GVL" in parallel with the current thread working
642
+ // on sampling, and thus for the `current_monotonic_wall_time_ns` (which is recorded at the start of sampling)
643
+ // to be < the time at which we started Waiting for GVL.
644
+ //
645
+ // All together, this means that when `handle_gvl_waiting` creates an extra sample (see comments on that function for
646
+ // what the extra sample is), it's possible that there's no more wall-time to be assigned.
647
+ // Thus, in this case, we don't want to produce a sample representing Waiting for GVL with a wall-time of 0, and
648
+ // thus we skip creating such a sample.
649
+ if (is_gvl_waiting_state && wall_time_elapsed_ns == 0) return;
650
+ // ...you may also wonder: is there any other situation where it makes sense to produce a sample with
651
+ // wall_time_elapsed_ns == 0? I believe that yes, because the sample still includes a timestamp and a stack, but we
652
+ // may revisit/change our minds on this in the future.
653
+
510
654
  trigger_sample_for_thread(
511
655
  state,
512
656
  thread_being_sampled,
513
657
  stack_from_thread,
514
658
  thread_context,
659
+ sampling_buffer,
515
660
  (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
516
661
  current_monotonic_wall_time_ns,
517
662
  NULL,
518
- NULL
663
+ NULL,
664
+ is_gvl_waiting_state
519
665
  );
520
666
  }
521
667
 
@@ -561,6 +707,7 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
561
707
  //
562
708
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
563
709
  // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
710
+ __attribute__((warn_unused_result))
564
711
  bool thread_context_collector_on_gc_finish(VALUE self_instance) {
565
712
  struct thread_context_collector_state *state;
566
713
  if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
@@ -661,7 +808,6 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
661
808
  }
662
809
 
663
810
  record_placeholder_stack(
664
- state->sampling_buffer,
665
811
  state->recorder_instance,
666
812
  (sample_values) {
667
813
  // This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
@@ -692,11 +838,13 @@ static void trigger_sample_for_thread(
692
838
  VALUE thread,
693
839
  VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
694
840
  struct per_thread_context *thread_context,
841
+ sampling_buffer* sampling_buffer,
695
842
  sample_values values,
696
843
  long current_monotonic_wall_time_ns,
697
844
  // These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
698
845
  ddog_CharSlice *ruby_vm_type,
699
- ddog_CharSlice *class_name
846
+ ddog_CharSlice *class_name,
847
+ bool is_gvl_waiting_state
700
848
  ) {
701
849
  int max_label_count =
702
850
  1 + // thread id
@@ -737,6 +885,11 @@ static void trigger_sample_for_thread(
737
885
  struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
738
886
  trace_identifiers_for(state, thread, &trace_identifiers_result);
739
887
 
888
+ if (!trace_identifiers_result.valid && state->otel_context_enabled != otel_context_enabled_false) {
889
+ // If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
890
+ otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result);
891
+ }
892
+
740
893
  if (trace_identifiers_result.valid) {
741
894
  labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .num = trace_identifiers_result.local_root_span_id};
742
895
  labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("span id"), .num = trace_identifiers_result.span_id};
@@ -812,10 +965,15 @@ static void trigger_sample_for_thread(
812
965
 
813
966
  sample_thread(
814
967
  stack_from_thread,
815
- state->sampling_buffer,
968
+ sampling_buffer,
816
969
  state->recorder_instance,
817
970
  values,
818
- (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
971
+ (sample_labels) {
972
+ .labels = slice_labels,
973
+ .state_label = state_label,
974
+ .end_timestamp_ns = end_timestamp_ns,
975
+ .is_gvl_waiting_state = is_gvl_waiting_state,
976
+ }
819
977
  );
820
978
  }
821
979
 
@@ -865,9 +1023,9 @@ static struct per_thread_context *get_context_for(VALUE thread, struct thread_co
865
1023
  // to either run Ruby code during sampling (not great), or otherwise use some of the VM private APIs to detect this.
866
1024
  //
867
1025
  static bool is_logging_gem_monkey_patch(VALUE invoke_file_location) {
868
- int logging_gem_path_len = strlen(LOGGING_GEM_PATH);
1026
+ unsigned long logging_gem_path_len = strlen(LOGGING_GEM_PATH);
869
1027
  char *invoke_file = StringValueCStr(invoke_file_location);
870
- int invoke_file_len = strlen(invoke_file);
1028
+ unsigned long invoke_file_len = strlen(invoke_file);
871
1029
 
872
1030
  if (invoke_file_len < logging_gem_path_len) return false;
873
1031
 
@@ -875,6 +1033,8 @@ static bool is_logging_gem_monkey_patch(VALUE invoke_file_location) {
875
1033
  }
876
1034
 
877
1035
  static void initialize_context(VALUE thread, struct per_thread_context *thread_context, struct thread_context_collector_state *state) {
1036
+ thread_context->sampling_buffer = sampling_buffer_new(state->max_frames, state->locations);
1037
+
878
1038
  snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
879
1039
  thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
880
1040
 
@@ -913,6 +1073,25 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
913
1073
  // These will only be used during a GC operation
914
1074
  thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
915
1075
  thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
1076
+
1077
+ #ifndef NO_GVL_INSTRUMENTATION
1078
+ // We use this special location to store data that can be accessed without any
1079
+ // kind of synchronization (e.g. by threads without the GVL).
1080
+ //
1081
+ // We set this marker here for two purposes:
1082
+ // * To make sure there's no stale data from a previous execution of the profiler.
1083
+ // * To mark threads that are actually being profiled
1084
+ //
1085
+ // (Setting this is potentially a race, but what we want is to avoid _stale_ data, so
1086
+ // if this gets set concurrently with context initialization, then such a value will belong
1087
+ // to the current profiler instance, so that's OK)
1088
+ gvl_profiling_state_thread_object_set(thread, GVL_WAITING_ENABLED_EMPTY);
1089
+ #endif
1090
+ }
1091
+
1092
+ static void free_context(struct per_thread_context* thread_context) {
1093
+ sampling_buffer_free(thread_context->sampling_buffer);
1094
+ ruby_xfree(thread_context);
916
1095
  }
917
1096
 
918
1097
  static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
@@ -922,6 +1101,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
922
1101
  VALUE result = rb_str_new2(" (native state)");
923
1102
 
924
1103
  // Update this when modifying state struct
1104
+ rb_str_concat(result, rb_sprintf(" max_frames=%d", state->max_frames));
925
1105
  rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
926
1106
  rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
927
1107
  VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
@@ -930,6 +1110,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
930
1110
  rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
931
1111
  rb_str_concat(result, rb_sprintf(" endpoint_collection_enabled=%"PRIsVALUE, state->endpoint_collection_enabled ? Qtrue : Qfalse));
932
1112
  rb_str_concat(result, rb_sprintf(" timeline_enabled=%"PRIsVALUE, state->timeline_enabled ? Qtrue : Qfalse));
1113
+ rb_str_concat(result, rb_sprintf(" otel_context_enabled=%d", state->otel_context_enabled));
933
1114
  rb_str_concat(result, rb_sprintf(" allocation_type_enabled=%"PRIsVALUE, state->allocation_type_enabled ? Qtrue : Qfalse));
934
1115
  rb_str_concat(result, rb_sprintf(
935
1116
  " time_converter_state={.system_epoch_ns_reference=%ld, .delta_to_epoch_ns=%ld}",
@@ -939,6 +1120,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
939
1120
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
940
1121
  rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
941
1122
  rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
1123
+ rb_str_concat(result, rb_sprintf(" global_waiting_for_gvl_threshold_ns=%u", global_waiting_for_gvl_threshold_ns));
942
1124
 
943
1125
  return result;
944
1126
  }
@@ -966,6 +1148,10 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
966
1148
 
967
1149
  ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
968
1150
  ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
1151
+
1152
+ #ifndef NO_GVL_INSTRUMENTATION
1153
+ ID2SYM(rb_intern("gvl_waiting_at")), /* => */ LONG2NUM(gvl_profiling_state_thread_object_get(thread)),
1154
+ #endif
969
1155
  };
970
1156
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
971
1157
 
@@ -1006,7 +1192,7 @@ static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context,
1006
1192
 
1007
1193
  if (is_thread_alive(thread)) return ST_CONTINUE;
1008
1194
 
1009
- ruby_xfree(thread_context);
1195
+ free_context(thread_context);
1010
1196
  return ST_DELETE;
1011
1197
  }
1012
1198
 
@@ -1116,6 +1302,7 @@ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_ins
1116
1302
 
1117
1303
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
1118
1304
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
1305
+ if (state->otel_context_enabled == otel_context_enabled_only) return;
1119
1306
  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
1120
1307
 
1121
1308
  VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
@@ -1170,7 +1357,7 @@ static bool should_collect_resource(VALUE root_span) {
1170
1357
  if (root_span_type == Qnil) return false;
1171
1358
  ENFORCE_TYPE(root_span_type, T_STRING);
1172
1359
 
1173
- int root_span_type_length = RSTRING_LEN(root_span_type);
1360
+ long root_span_type_length = RSTRING_LEN(root_span_type);
1174
1361
  const char *root_span_type_value = StringValuePtr(root_span_type);
1175
1362
 
1176
1363
  bool is_web_request =
@@ -1193,6 +1380,9 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector
1193
1380
  struct thread_context_collector_state *state;
1194
1381
  TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1195
1382
 
1383
+ // Release all context memory before clearing the existing context
1384
+ st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
1385
+
1196
1386
  st_clear(state->hash_map_per_thread_context);
1197
1387
 
1198
1388
  state->stats = (struct stats) {}; // Resets all stats back to zero
@@ -1257,7 +1447,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1257
1447
  // Thus, we need to make sure there's actually a class before getting its name.
1258
1448
 
1259
1449
  if (klass != 0) {
1260
- const char *name = rb_obj_classname(new_object);
1450
+ const char *name = rb_class2name(klass);
1261
1451
  size_t name_length = name != NULL ? strlen(name) : 0;
1262
1452
 
1263
1453
  if (name_length > 0) {
@@ -1285,15 +1475,19 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1285
1475
 
1286
1476
  track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
1287
1477
 
1478
+ struct per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
1479
+
1288
1480
  trigger_sample_for_thread(
1289
1481
  state,
1290
1482
  /* thread: */ current_thread,
1291
1483
  /* stack_from_thread: */ current_thread,
1292
- get_or_create_context_for(current_thread, state),
1293
- (sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1},
1484
+ thread_context,
1485
+ thread_context->sampling_buffer,
1486
+ (sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1, .heap_sample = true},
1294
1487
  INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
1295
1488
  &ruby_vm_type,
1296
- optional_class_name
1489
+ optional_class_name,
1490
+ false
1297
1491
  );
1298
1492
  }
1299
1493
 
@@ -1339,25 +1533,29 @@ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1339
1533
  }
1340
1534
  }
1341
1535
 
1536
+ // Used to access OpenTelemetry::Trace.const_get(:CURRENT_SPAN_KEY). Will raise exceptions if it fails.
1537
+ static VALUE read_otel_current_span_key_const(DDTRACE_UNUSED VALUE _unused) {
1538
+ VALUE opentelemetry_module = rb_const_get(rb_cObject, rb_intern("OpenTelemetry"));
1539
+ ENFORCE_TYPE(opentelemetry_module, T_MODULE);
1540
+ VALUE trace_module = rb_const_get(opentelemetry_module, rb_intern("Trace"));
1541
+ ENFORCE_TYPE(trace_module, T_MODULE);
1542
+ return rb_const_get(trace_module, rb_intern("CURRENT_SPAN_KEY"));
1543
+ }
1544
+
1342
1545
  static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
1343
- if (state->otel_current_span_key == Qnil) {
1344
- VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
1345
- VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
1346
- VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
1347
- VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
1348
- VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
1349
-
1350
- if (current_span_key == Qnil) {
1351
- rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
1352
- }
1546
+ if (state->otel_current_span_key == Qtrue) { // Qtrue means we haven't tried to extract it yet
1547
+ // If this fails, we want to fail gracefully, rather than raise an exception (e.g. if the opentelemetry gem
1548
+ // gets refactored, we should not fall on our face)
1549
+ VALUE span_key = rb_protect(read_otel_current_span_key_const, Qnil, NULL);
1353
1550
 
1354
- state->otel_current_span_key = current_span_key;
1551
+ // Note that this gets set to Qnil if we failed to extract the correct value, and thus we won't try to extract it again
1552
+ state->otel_current_span_key = span_key;
1355
1553
  }
1356
1554
 
1357
1555
  return state->otel_current_span_key;
1358
1556
  }
1359
1557
 
1360
- // This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
1558
+ // This method gets used when ddtrace is being used indirectly via the opentelemetry APIs. Information gets stored slightly
1361
1559
  // differently, and this codepath handles it.
1362
1560
  static void ddtrace_otel_trace_identifiers_for(
1363
1561
  struct thread_context_collector_state *state,
@@ -1377,6 +1575,7 @@ static void ddtrace_otel_trace_identifiers_for(
1377
1575
  if (resolved_numeric_span_id == Qnil) return;
1378
1576
 
1379
1577
  VALUE otel_current_span_key = get_otel_current_span_key(state);
1578
+ if (otel_current_span_key == Qnil) return;
1380
1579
  VALUE current_trace = *active_trace;
1381
1580
 
1382
1581
  // ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
@@ -1400,3 +1599,417 @@ static void ddtrace_otel_trace_identifiers_for(
1400
1599
  *active_trace = current_trace;
1401
1600
  *numeric_span_id = resolved_numeric_span_id;
1402
1601
  }
1602
+
1603
+ void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples) {
1604
+ struct thread_context_collector_state *state;
1605
+ TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1606
+
1607
+ ddog_prof_Label labels[] = {
1608
+ // Providing .num = 0 should not be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
1609
+ {.key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("SS"), .num = 0},
1610
+ {.key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Skipped Samples"), .num = 0},
1611
+ {.key = DDOG_CHARSLICE_C("allocation class"), .str = DDOG_CHARSLICE_C("(Skipped Samples)"), .num = 0},
1612
+ };
1613
+ ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = sizeof(labels) / sizeof(labels[0])};
1614
+
1615
+ record_placeholder_stack(
1616
+ state->recorder_instance,
1617
+ (sample_values) {.alloc_samples = skipped_samples},
1618
+ (sample_labels) {
1619
+ .labels = slice_labels,
1620
+ .state_label = NULL,
1621
+ .end_timestamp_ns = 0, // For now we're not collecting timestamps for allocation events
1622
+ },
1623
+ DDOG_CHARSLICE_C("Skipped Samples")
1624
+ );
1625
+ }
1626
+
1627
+ static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples) {
1628
+ thread_context_collector_sample_skipped_allocation_samples(collector_instance, NUM2UINT(skipped_samples));
1629
+ return Qtrue;
1630
+ }
1631
+
1632
+ // This method differs from trace_identifiers_for/ddtrace_otel_trace_identifiers_for to support the situation where
1633
+ // the opentelemetry ruby library is being used for tracing AND the ddtrace tracing bits are not involved at all.
1634
+ //
1635
+ // Thus, in this case, we're directly reading from the opentelemetry stuff, which is different to how ddtrace tracing
1636
+ // does it.
1637
+ //
1638
+ // This is somewhat brittle: we're coupling on internal details of the opentelemetry gem to get what we need. In the
1639
+ // future maybe the otel ruby folks would be open to having a nice public way of getting this data that suits the
1640
+ // usecase of profilers.
1641
+ // Until then, the strategy below is to be extremely defensive, and if anything is out of place, we immediately return
1642
+ // and give up on getting trace data from opentelemetry. (Thus, worst case would be -- you upgrade opentelemetry and
1643
+ // profiling features relying on reading this data stop working, but you'll still get profiles and the app will be
1644
+ // otherwise undisturbed).
1645
+ //
1646
+ // Specifically, the way this works is:
1647
+ // 1. The latest entry in the opentelemetry context storage represents the current span (if any). We take the span id
1648
+ // and trace id from this span.
1649
+ // 2. To find the local root span id, we walk the context storage backwards from the current span, and find the earliest
1650
+ // entry in the context storage that has the same trace id as the current span; we use the found span as the local
1651
+ // root span id.
1652
+ // This matches the semantics of how ddtrace tracing creates a TraceOperation and assigns a local root span to it.
1653
+ static void otel_without_ddtrace_trace_identifiers_for(
1654
+ struct thread_context_collector_state *state,
1655
+ VALUE thread,
1656
+ struct trace_identifiers *trace_identifiers_result
1657
+ ) {
1658
+ VALUE context_storage = rb_thread_local_aref(thread, otel_context_storage_id /* __opentelemetry_context_storage__ */);
1659
+
1660
+ // If it exists, context_storage is expected to be an Array[OpenTelemetry::Context]
1661
+ if (context_storage == Qnil || !RB_TYPE_P(context_storage, T_ARRAY)) return;
1662
+
1663
+ VALUE otel_current_span_key = get_otel_current_span_key(state);
1664
+ if (otel_current_span_key == Qnil) return;
1665
+
1666
+ int active_context_index = RARRAY_LEN(context_storage) - 1;
1667
+ if (active_context_index < 0) return;
1668
+
1669
+ struct otel_span active_span = otel_span_from(rb_ary_entry(context_storage, active_context_index), otel_current_span_key);
1670
+ if (active_span.span == Qnil) return;
1671
+
1672
+ struct otel_span local_root_span = active_span;
1673
+
1674
+ // Now find the oldest span starting from the active span that still has the same trace id as the active span
1675
+ for (int i = active_context_index - 1; i >= 0; i--) {
1676
+ struct otel_span checking_span = otel_span_from(rb_ary_entry(context_storage, i), otel_current_span_key);
1677
+ if (checking_span.span == Qnil) return;
1678
+
1679
+ if (rb_str_equal(active_span.trace_id, checking_span.trace_id) == Qfalse) break;
1680
+
1681
+ local_root_span = checking_span;
1682
+ }
1683
+
1684
+ // Convert the span ids into uint64_t to match what the Datadog tracer does
1685
+ trace_identifiers_result->span_id = otel_span_id_to_uint(active_span.span_id);
1686
+ trace_identifiers_result->local_root_span_id = otel_span_id_to_uint(local_root_span.span_id);
1687
+
1688
+ if (trace_identifiers_result->span_id == 0 || trace_identifiers_result->local_root_span_id == 0) return;
1689
+
1690
+ trace_identifiers_result->valid = true;
1691
+
1692
+ if (!state->endpoint_collection_enabled) return;
1693
+
1694
+ VALUE root_span_type = rb_ivar_get(local_root_span.span, at_kind_id /* @kind */);
1695
+ // We filter out spans that don't have `kind: :server`
1696
+ if (root_span_type == Qnil || !RB_TYPE_P(root_span_type, T_SYMBOL) || SYM2ID(root_span_type) != server_id) return;
1697
+
1698
+ VALUE trace_resource = rb_ivar_get(local_root_span.span, at_name_id /* @name */);
1699
+ if (!RB_TYPE_P(trace_resource, T_STRING)) return;
1700
+
1701
+ trace_identifiers_result->trace_endpoint = trace_resource;
1702
+ }
1703
+
1704
+ static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key) {
1705
+ struct otel_span failed = {.span = Qnil, .span_id = Qnil, .trace_id = Qnil};
1706
+
1707
+ if (otel_context == Qnil) return failed;
1708
+
1709
+ VALUE context_entries = rb_ivar_get(otel_context, at_entries_id /* @entries */);
1710
+ if (context_entries == Qnil || !RB_TYPE_P(context_entries, T_HASH)) return failed;
1711
+
1712
+ // If it exists, context_entries is expected to be a Hash[OpenTelemetry::Context::Key, OpenTelemetry::Trace::Span]
1713
+ VALUE span = rb_hash_lookup(context_entries, otel_current_span_key);
1714
+ if (span == Qnil) return failed;
1715
+
1716
+ // If it exists, span_context is expected to be a OpenTelemetry::Trace::SpanContext (don't confuse it with OpenTelemetry::Context)
1717
+ VALUE span_context = rb_ivar_get(span, at_context_id /* @context */);
1718
+ if (span_context == Qnil) return failed;
1719
+
1720
+ VALUE span_id = rb_ivar_get(span_context, at_span_id_id /* @span_id */);
1721
+ VALUE trace_id = rb_ivar_get(span_context, at_trace_id_id /* @trace_id */);
1722
+ if (span_id == Qnil || trace_id == Qnil || !RB_TYPE_P(span_id, T_STRING) || !RB_TYPE_P(trace_id, T_STRING)) return failed;
1723
+
1724
+ return (struct otel_span) {.span = span, .span_id = span_id, .trace_id = trace_id};
1725
+ }
1726
+
1727
+ // Otel span ids are represented as a big-endian 8-byte string
1728
+ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1729
+ if (!RB_TYPE_P(otel_span_id, T_STRING) || RSTRING_LEN(otel_span_id) != 8) { return 0; }
1730
+
1731
+ unsigned char *span_bytes = (unsigned char*) StringValuePtr(otel_span_id);
1732
+
1733
+ return \
1734
+ ((uint64_t)span_bytes[0] << 56) |
1735
+ ((uint64_t)span_bytes[1] << 48) |
1736
+ ((uint64_t)span_bytes[2] << 40) |
1737
+ ((uint64_t)span_bytes[3] << 32) |
1738
+ ((uint64_t)span_bytes[4] << 24) |
1739
+ ((uint64_t)span_bytes[5] << 16) |
1740
+ ((uint64_t)span_bytes[6] << 8) |
1741
+ ((uint64_t)span_bytes[7]);
1742
+ }
1743
+
1744
+ #ifndef NO_GVL_INSTRUMENTATION
1745
+ // This function can get called from outside the GVL and even on non-main Ractors
1746
+ void thread_context_collector_on_gvl_waiting(gvl_profiling_thread thread) {
1747
+ // Because this function gets called from a thread that is NOT holding the GVL, we avoid touching the
1748
+ // per-thread context directly.
1749
+ //
1750
+ // Instead, we ask Ruby to hold the data we need in Ruby's own special per-thread context area
1751
+ // that's thread-safe and built for this kind of use
1752
+ //
1753
+ // Also, this function can get called on the non-main Ractor. We deal with this by checking if the value in the context
1754
+ // is non-zero, since only `initialize_context` ever sets the value from 0 to non-zero for threads it sees.
1755
+ intptr_t thread_being_profiled = gvl_profiling_state_get(thread);
1756
+ if (!thread_being_profiled) return;
1757
+
1758
+ long current_monotonic_wall_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1759
+ if (current_monotonic_wall_time_ns <= 0 || current_monotonic_wall_time_ns > GVL_WAITING_ENABLED_EMPTY) return;
1760
+
1761
+ gvl_profiling_state_set(thread, current_monotonic_wall_time_ns);
1762
+ }
1763
+
1764
+ // This function can get called from outside the GVL and even on non-main Ractors
1765
+ __attribute__((warn_unused_result))
1766
+ bool thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
1767
+ intptr_t gvl_waiting_at = gvl_profiling_state_get(thread);
1768
+
1769
+ // Thread was not being profiled / not waiting on gvl
1770
+ if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) return false;
1771
+
1772
+ // @ivoanjo: I'm not sure if this can happen -- It means we should've sampled already but haven't gotten the chance yet?
1773
+ if (gvl_waiting_at < 0) return true;
1774
+
1775
+ long waiting_for_gvl_duration_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - gvl_waiting_at;
1776
+
1777
+ bool should_sample = waiting_for_gvl_duration_ns >= waiting_for_gvl_threshold_ns;
1778
+
1779
+ if (should_sample) {
1780
+ // We flip the gvl_waiting_at to negative to mark that the thread is now running and no longer waiting
1781
+ intptr_t gvl_waiting_at_is_now_running = -gvl_waiting_at;
1782
+
1783
+ gvl_profiling_state_set(thread, gvl_waiting_at_is_now_running);
1784
+ } else {
1785
+ // We decided not to sample. Let's mark the thread back to the initial "enabled but empty" state
1786
+ gvl_profiling_state_set(thread, GVL_WAITING_ENABLED_EMPTY);
1787
+ }
1788
+
1789
+ return should_sample;
1790
+ }
1791
+
1792
+ __attribute__((warn_unused_result))
1793
+ bool thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
1794
+ return thread_context_collector_on_gvl_running_with_threshold(thread, global_waiting_for_gvl_threshold_ns);
1795
+ }
1796
+
1797
+ // Why does this method need to exist?
1798
+ //
1799
+ // You may be surprised to see that if we never call this function (from cpu_and_wall_time_worker), Waiting for GVL
1800
+ // samples will still show up.
1801
+ // This is because regular cpu/wall-time samples also use `update_metrics_and_sample` which will do the right thing
1802
+ // and push "Waiting for GVL" samples as needed.
1803
+ //
1804
+ // The reason this method needs to exist and be called very shortly after thread_context_collector_on_gvl_running
1805
+ // returning true is to ensure accuracy of both the timing and stack for the Waiting for GVL sample.
1806
+ //
1807
+ // Timing:
1808
+ // Because we currently only record the timestamp when the Waiting for GVL started and not when the Waiting for GVL ended,
1809
+ // we rely on pushing a sample as soon as possible when the Waiting for GVL ends so that the timestamp of the sample
1810
+ // actually matches when we stopped waiting.
1811
+ //
1812
+ // Stack:
1813
+ // If the thread starts working without the end of the Waiting for GVL sample, then by the time the thread is sampled
1814
+ // via the regular cpu/wall-time samples mechanism, the stack can be be inaccurate (e.g. does not correctly pinpoint
1815
+ // where the waiting happened).
1816
+ //
1817
+ // Arguably, the last sample after Waiting for GVL ended (when gvl_waiting_at < 0) should always come from this method
1818
+ // and not a regular cpu/wall-time sample BUT since all of these things are happening in parallel/concurrently I suspect
1819
+ // it's possible for a regular sample to kick in just before this one.
1820
+ //
1821
+ // ---
1822
+ //
1823
+ // NOTE: In normal use, current_thread is expected to be == rb_thread_current(); the `current_thread` parameter only
1824
+ // exists to enable testing.
1825
+ VALUE thread_context_collector_sample_after_gvl_running_with_thread(VALUE self_instance, VALUE current_thread) {
1826
+ struct thread_context_collector_state *state;
1827
+ TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1828
+
1829
+ if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled");
1830
+
1831
+ intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread);
1832
+
1833
+ if (gvl_waiting_at >= 0) {
1834
+ // @ivoanjo: I'm not sure if this can ever happen. This means that we're not on the same thread
1835
+ // that ran `thread_context_collector_on_gvl_running` and made the decision to sample OR a regular sample was
1836
+ // triggered ahead of us.
1837
+ // We do nothing in this case.
1838
+ return Qfalse;
1839
+ }
1840
+
1841
+ struct per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
1842
+
1843
+ // We don't actually account for cpu-time during Waiting for GVL. BUT, we may chose to push an
1844
+ // extra sample to represent the period prior to Waiting for GVL. To support that, we retrieve the current
1845
+ // cpu-time of the thread and let `update_metrics_and_sample` decide what to do with it.
1846
+ long cpu_time_for_thread = cpu_time_now_ns(thread_context);
1847
+
1848
+ // TODO: Should we update the dynamic sampling rate overhead tracking with this sample as well?
1849
+
1850
+ update_metrics_and_sample(
1851
+ state,
1852
+ /* thread_being_sampled: */ current_thread,
1853
+ /* stack_from_thread: */ current_thread,
1854
+ thread_context,
1855
+ thread_context->sampling_buffer,
1856
+ cpu_time_for_thread,
1857
+ monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
1858
+ );
1859
+
1860
+ return Qtrue; // To allow this to be called from rb_rescue2
1861
+ }
1862
+
1863
+ VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance) {
1864
+ return thread_context_collector_sample_after_gvl_running_with_thread(self_instance, rb_thread_current());
1865
+ }
1866
+
1867
+ // This method is intended to be called from update_metrics_and_sample. It exists to handle extra sampling steps we
1868
+ // need to take when sampling cpu/wall-time for a thread that's in the "Waiting for GVL" state.
1869
+ __attribute__((warn_unused_result))
1870
+ static bool handle_gvl_waiting(
1871
+ struct thread_context_collector_state *state,
1872
+ VALUE thread_being_sampled,
1873
+ VALUE stack_from_thread,
1874
+ struct per_thread_context *thread_context,
1875
+ sampling_buffer* sampling_buffer,
1876
+ long current_cpu_time_ns
1877
+ ) {
1878
+ intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread_being_sampled);
1879
+
1880
+ bool is_gvl_waiting_state = gvl_waiting_at != 0 && gvl_waiting_at != GVL_WAITING_ENABLED_EMPTY;
1881
+
1882
+ if (!is_gvl_waiting_state) return false;
1883
+
1884
+ // We can be in one of 2 situations here:
1885
+ //
1886
+ // 1. The current sample is the first one after we entered the "Waiting for GVL" state
1887
+ // (wall_time_at_previous_sample_ns < abs(gvl_waiting_at))
1888
+ //
1889
+ // time ─────►
1890
+ // ...──────────────┬───────────────────...
1891
+ // Other state │ Waiting for GVL
1892
+ // ...──────────────┴───────────────────...
1893
+ // ▲ ▲
1894
+ // └─ Previous sample └─ Regular sample (caller)
1895
+ //
1896
+ // In this case, we'll want to push two samples: a) one for the current time (handled by the caller), b) an extra sample
1897
+ // to represent the remaining cpu/wall time before the "Waiting for GVL" started:
1898
+ //
1899
+ // time ─────►
1900
+ // ...──────────────┬───────────────────...
1901
+ // Other state │ Waiting for GVL
1902
+ // ...──────────────┴───────────────────...
1903
+ // ▲ ▲ ▲
1904
+ // └─ Prev... └─ Extra sample └─ Regular sample (caller)
1905
+ //
1906
+ // 2. The current sample is the n-th one after we entered the "Waiting for GVL" state
1907
+ // (wall_time_at_previous_sample_ns > abs(gvl_waiting_at))
1908
+ //
1909
+ // time ─────►
1910
+ // ...──────────────┬───────────────────────────────────────────────...
1911
+ // Other state │ Waiting for GVL
1912
+ // ...──────────────┴───────────────────────────────────────────────...
1913
+ // ▲ ▲ ▲
1914
+ // └─ Previous sample └─ Previous sample └─ Regular sample (caller)
1915
+ //
1916
+ // In this case, we just report back to the caller that the thread is in the "Waiting for GVL" state.
1917
+ //
1918
+ // ---
1919
+ //
1920
+ // Overall, gvl_waiting_at will be > 0 if still in the "Waiting for GVL" state and < 0 if we actually reached the end of
1921
+ // the wait.
1922
+ //
1923
+ // It doesn't really matter if the thread is still waiting or just reached the end of the wait: each sample represents
1924
+ // a snapshot at time ending now, so if the state finished, it just means the next sample will be a regular one.
1925
+
1926
+ if (gvl_waiting_at < 0) {
1927
+ // Negative means the waiting for GVL just ended, so we clear the state, so next samples no longer represent waiting
1928
+ gvl_profiling_state_thread_object_set(thread_being_sampled, GVL_WAITING_ENABLED_EMPTY);
1929
+ }
1930
+
1931
+ long gvl_waiting_started_wall_time_ns = labs(gvl_waiting_at);
1932
+
1933
+ if (thread_context->wall_time_at_previous_sample_ns < gvl_waiting_started_wall_time_ns) { // situation 1 above
1934
+ long cpu_time_elapsed_ns = update_time_since_previous_sample(
1935
+ &thread_context->cpu_time_at_previous_sample_ns,
1936
+ current_cpu_time_ns,
1937
+ thread_context->gc_tracking.cpu_time_at_start_ns,
1938
+ IS_NOT_WALL_TIME
1939
+ );
1940
+
1941
+ long duration_until_start_of_gvl_waiting_ns = update_time_since_previous_sample(
1942
+ &thread_context->wall_time_at_previous_sample_ns,
1943
+ gvl_waiting_started_wall_time_ns,
1944
+ INVALID_TIME,
1945
+ IS_WALL_TIME
1946
+ );
1947
+
1948
+ // Push extra sample
1949
+ trigger_sample_for_thread(
1950
+ state,
1951
+ thread_being_sampled,
1952
+ stack_from_thread,
1953
+ thread_context,
1954
+ sampling_buffer,
1955
+ (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = duration_until_start_of_gvl_waiting_ns},
1956
+ gvl_waiting_started_wall_time_ns,
1957
+ NULL,
1958
+ NULL,
1959
+ false // This is the extra sample before the wait begun; only the next sample will be in the gvl waiting state
1960
+ );
1961
+ }
1962
+
1963
+ return true;
1964
+ }
1965
+
1966
+ static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread) {
1967
+ ENFORCE_THREAD(thread);
1968
+
1969
+ thread_context_collector_on_gvl_waiting(thread_from_thread_object(thread));
1970
+ return Qnil;
1971
+ }
1972
+
1973
+ static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread) {
1974
+ ENFORCE_THREAD(thread);
1975
+
1976
+ intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread);
1977
+ return LONG2NUM(gvl_waiting_at);
1978
+ }
1979
+
1980
+ static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) {
1981
+ ENFORCE_THREAD(thread);
1982
+
1983
+ return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) ? Qtrue : Qfalse;
1984
+ }
1985
+
1986
+ static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) {
1987
+ ENFORCE_THREAD(thread);
1988
+
1989
+ return thread_context_collector_sample_after_gvl_running_with_thread(collector_instance, thread);
1990
+ }
1991
+
1992
+ static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns) {
1993
+ ENFORCE_THREAD(thread);
1994
+
1995
+ struct thread_context_collector_state *state;
1996
+ TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1997
+
1998
+ struct per_thread_context *thread_context = get_context_for(thread, state);
1999
+ if (thread_context == NULL) rb_raise(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists");
2000
+
2001
+ thread_context->cpu_time_at_previous_sample_ns += NUM2LONG(delta_ns);
2002
+
2003
+ return Qtrue;
2004
+ }
2005
+
2006
+ #else
2007
+ static bool handle_gvl_waiting(
2008
+ DDTRACE_UNUSED struct thread_context_collector_state *state,
2009
+ DDTRACE_UNUSED VALUE thread_being_sampled,
2010
+ DDTRACE_UNUSED VALUE stack_from_thread,
2011
+ DDTRACE_UNUSED struct per_thread_context *thread_context,
2012
+ DDTRACE_UNUSED sampling_buffer* sampling_buffer,
2013
+ DDTRACE_UNUSED long current_cpu_time_ns
2014
+ ) { return false; }
2015
+ #endif // NO_GVL_INSTRUMENTATION