datadog 2.4.0 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +28 -2
  3. data/ext/datadog_profiling_native_extension/NativeExtensionDesign.md +3 -3
  4. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +57 -18
  5. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +93 -106
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +8 -2
  7. data/ext/datadog_profiling_native_extension/extconf.rb +8 -8
  8. data/ext/datadog_profiling_native_extension/heap_recorder.c +174 -28
  9. data/ext/datadog_profiling_native_extension/heap_recorder.h +11 -0
  10. data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +1 -1
  11. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +1 -1
  12. data/ext/datadog_profiling_native_extension/ruby_helpers.c +14 -11
  13. data/ext/datadog_profiling_native_extension/stack_recorder.c +58 -22
  14. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  15. data/ext/libdatadog_extconf_helpers.rb +1 -1
  16. data/lib/datadog/appsec/configuration/settings.rb +8 -0
  17. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +1 -5
  18. data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +7 -20
  19. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +9 -15
  20. data/lib/datadog/appsec/contrib/rack/reactive/request.rb +6 -18
  21. data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +7 -20
  22. data/lib/datadog/appsec/contrib/rack/reactive/response.rb +5 -18
  23. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +3 -1
  24. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +3 -5
  25. data/lib/datadog/appsec/contrib/rails/reactive/action.rb +5 -18
  26. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +6 -10
  27. data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +7 -20
  28. data/lib/datadog/appsec/event.rb +24 -0
  29. data/lib/datadog/appsec/ext.rb +4 -0
  30. data/lib/datadog/appsec/monitor/gateway/watcher.rb +3 -5
  31. data/lib/datadog/appsec/monitor/reactive/set_user.rb +7 -20
  32. data/lib/datadog/appsec/processor/context.rb +109 -0
  33. data/lib/datadog/appsec/processor.rb +7 -71
  34. data/lib/datadog/appsec/scope.rb +1 -4
  35. data/lib/datadog/appsec/utils/trace_operation.rb +15 -0
  36. data/lib/datadog/appsec/utils.rb +2 -0
  37. data/lib/datadog/appsec.rb +1 -0
  38. data/lib/datadog/core/configuration/agent_settings_resolver.rb +26 -25
  39. data/lib/datadog/core/configuration/settings.rb +12 -0
  40. data/lib/datadog/core/configuration.rb +1 -3
  41. data/lib/datadog/core/crashtracking/component.rb +8 -5
  42. data/lib/datadog/core/environment/yjit.rb +5 -0
  43. data/lib/datadog/core/remote/transport/http.rb +5 -0
  44. data/lib/datadog/core/remote/worker.rb +1 -1
  45. data/lib/datadog/core/runtime/ext.rb +1 -0
  46. data/lib/datadog/core/runtime/metrics.rb +4 -0
  47. data/lib/datadog/core/semaphore.rb +35 -0
  48. data/lib/datadog/core/telemetry/logging.rb +10 -10
  49. data/lib/datadog/core/transport/ext.rb +1 -0
  50. data/lib/datadog/core/workers/async.rb +1 -1
  51. data/lib/datadog/di/code_tracker.rb +11 -13
  52. data/lib/datadog/di/instrumenter.rb +301 -0
  53. data/lib/datadog/di/probe.rb +29 -0
  54. data/lib/datadog/di/probe_builder.rb +7 -1
  55. data/lib/datadog/di/probe_notification_builder.rb +207 -0
  56. data/lib/datadog/di/probe_notifier_worker.rb +244 -0
  57. data/lib/datadog/di/serializer.rb +23 -1
  58. data/lib/datadog/di/transport.rb +67 -0
  59. data/lib/datadog/di/utils.rb +39 -0
  60. data/lib/datadog/di.rb +43 -0
  61. data/lib/datadog/profiling/collectors/thread_context.rb +9 -11
  62. data/lib/datadog/profiling/component.rb +1 -0
  63. data/lib/datadog/profiling/stack_recorder.rb +37 -9
  64. data/lib/datadog/tracing/component.rb +13 -0
  65. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +4 -0
  66. data/lib/datadog/tracing/contrib/excon/middleware.rb +3 -0
  67. data/lib/datadog/tracing/contrib/faraday/middleware.rb +3 -0
  68. data/lib/datadog/tracing/contrib/grape/endpoint.rb +5 -2
  69. data/lib/datadog/tracing/contrib/http/circuit_breaker.rb +9 -0
  70. data/lib/datadog/tracing/contrib/http/instrumentation.rb +4 -0
  71. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +4 -0
  72. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +4 -0
  73. data/lib/datadog/tracing/contrib/rails/runner.rb +1 -1
  74. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +3 -0
  75. data/lib/datadog/tracing/sampling/rule_sampler.rb +6 -4
  76. data/lib/datadog/tracing/tracer.rb +15 -10
  77. data/lib/datadog/tracing/transport/http.rb +4 -0
  78. data/lib/datadog/tracing/workers.rb +1 -1
  79. data/lib/datadog/tracing/writer.rb +26 -28
  80. data/lib/datadog/version.rb +1 -1
  81. metadata +22 -14
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba1dc00d9afe70c54ea0918673e832793bb7c7a629618dc3d36e066fd121ea56
4
- data.tar.gz: 9818bf0ba8dbd989451b7b0492efe848a9cdf97239acac3637aaa9d17937b541
3
+ metadata.gz: c1aa5227f68791f80fe6deb7f5a8b53e5adff15a7e443f78788f827888945c83
4
+ data.tar.gz: 7931209b3685744b1aa17f709766d9a5da91b5b34987a8ac975032ebee0a7caa
5
5
  SHA512:
6
- metadata.gz: 419ac80497f45159d199d2b9d3b16c70c46903d65d828c6e5ab52029758c9b38e11b243e112d52e3e9d45f05d5e1acf814bcb3f7582496f1e5be120c13f0f685
7
- data.tar.gz: ce8f36e974b194fbdedd40fdf634e49115c0bd6befb350092dfc4cb94185a173a7d3325b6adc9cf50e060b05a4968ca7858a52082748a4979090a7eb32aa96ba
6
+ metadata.gz: '081740cc882d48f62d18f8a823696339346ad574f4ac5c67cd64430ee37f347ec33d6c536e3790f3697ea3a6cb72f2275a6b5b14808e48a4246cc20786fcc20b'
7
+ data.tar.gz: c51018d1655343c234e45d72ab5dc9a27ebc465e253462eec483faf862e3c6fde0bbba6d4f3c1ca6656a6d7c33525a5942481a69beee7a7d889cbbe75c900ff7
data/CHANGELOG.md CHANGED
@@ -2,6 +2,24 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.5.0] - 2024-11-05
6
+
7
+ ### Added
8
+
9
+ * Performance: Profiling: Add setting to lower heap profiling memory use/latency by cleaning up young objects after Ruby GC ([#4020][])
10
+
11
+ ### Changed
12
+
13
+ * Core: Replace the `debase-ruby_core_source` gem with the `datadog-ruby_core_source` ([#4014][])
14
+ * Core: Upgrade to `libdatadog` 13.1 ([#3997][])
15
+
16
+ ### Fixed
17
+
18
+ * Fix `undefined method` error for Rails runner ([#3996][])
19
+ * Apply version tag only to spans that use the global/default service name ([#4027][])
20
+ * Ensure UDS takes precedence over HTTP when both Agent configurations defined ([#4024][])
21
+ * Remove duplicate leading slash in resource name for Grape routes ([#4033][])
22
+
5
23
  ## [2.4.0] - 2024-10-11
6
24
 
7
25
  ### Added
@@ -2985,7 +3003,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
2985
3003
  Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
2986
3004
 
2987
3005
 
2988
- [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.4.0...master
3006
+ [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.5.0...master
3007
+ [2.5.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.4.0...v2.5.0
2989
3008
  [2.4.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.3.0...v2.4.0
2990
3009
  [2.3.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.2.0...v2.3.0
2991
3010
  [2.2.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.1.0...v2.2.0
@@ -4419,6 +4438,13 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4419
4438
  [#3969]: https://github.com/DataDog/dd-trace-rb/issues/3969
4420
4439
  [#3970]: https://github.com/DataDog/dd-trace-rb/issues/3970
4421
4440
  [#3984]: https://github.com/DataDog/dd-trace-rb/issues/3984
4441
+ [#3996]: https://github.com/DataDog/dd-trace-rb/issues/3996
4442
+ [#3997]: https://github.com/DataDog/dd-trace-rb/issues/3997
4443
+ [#4014]: https://github.com/DataDog/dd-trace-rb/issues/4014
4444
+ [#4020]: https://github.com/DataDog/dd-trace-rb/issues/4020
4445
+ [#4024]: https://github.com/DataDog/dd-trace-rb/issues/4024
4446
+ [#4027]: https://github.com/DataDog/dd-trace-rb/issues/4027
4447
+ [#4033]: https://github.com/DataDog/dd-trace-rb/issues/4033
4422
4448
  [@AdrianLC]: https://github.com/AdrianLC
4423
4449
  [@Azure7111]: https://github.com/Azure7111
4424
4450
  [@BabyGroot]: https://github.com/BabyGroot
@@ -4570,4 +4596,4 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4570
4596
  [@y-yagi]: https://github.com/y-yagi
4571
4597
  [@yujideveloper]: https://github.com/yujideveloper
4572
4598
  [@yukimurasawa]: https://github.com/yukimurasawa
4573
- [@zachmccormick]: https://github.com/zachmccormick
4599
+ [@zachmccormick]: https://github.com/zachmccormick
@@ -68,7 +68,7 @@ internal types, structures and functions).
68
68
  Because these private header files are not included in regular Ruby installations, we have two different workarounds:
69
69
 
70
70
  1. for Ruby versions 2.6 to 3.2 we make use use the Ruby private MJIT header
71
- 2. for Ruby versions < 2.6 and > 3.2 we make use of the `debase-ruby_core_source` gem
71
+ 2. for Ruby versions < 2.6 and > 3.2 we make use of the `datadog-ruby_core_source` gem
72
72
 
73
73
  Functions which make use of these headers are defined in the <private_vm_api_acccess.c> file.
74
74
 
@@ -91,9 +91,9 @@ version. e.g. `rb_mjit_min_header-2.7.4.h`.
91
91
 
92
92
  This header was removed in Ruby 3.3.
93
93
 
94
- ### Approach 2: Using the `debase-ruby_core_source` gem
94
+ ### Approach 2: Using the `datadog-ruby_core_source` gem
95
95
 
96
- The [`debase-ruby_core_source`](https://github.com/ruby-debug/debase-ruby_core_source) contains almost no code;
96
+ The [`datadog-ruby_core_source`](https://github.com/DataDog/datadog-ruby_core_source) contains almost no code;
97
97
  instead, it just contains per-Ruby-version folders with the private VM headers (`.h`) files for that version.
98
98
 
99
99
  Thus, even though a regular Ruby installation does not include these files, we can access the copy inside this gem.
@@ -17,13 +17,6 @@
17
17
  #include "setup_signal_handler.h"
18
18
  #include "time_helpers.h"
19
19
 
20
- #define ERR_CLOCK_FAIL "failed to get clock time"
21
-
22
- // Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
23
- // See https://docs.google.com/document/d/1lWLB714wlLBBq6T4xZyAc4a5wtWhSmr4-hgiPKeErlA/edit#heading=h.ugp0zxcj5iqh
24
- // (Datadog-only link) for research backing the choice of this value.
25
- unsigned int MAX_ALLOC_WEIGHT = 10000;
26
-
27
20
  // Used to trigger the execution of Collectors::ThreadState, which implements all of the sampling logic
28
21
  // itself; this class only implements the "when to do it" part.
29
22
  //
@@ -83,6 +76,13 @@ unsigned int MAX_ALLOC_WEIGHT = 10000;
83
76
  //
84
77
  // ---
85
78
 
79
+ #define ERR_CLOCK_FAIL "failed to get clock time"
80
+
81
+ // Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
82
+ // See https://docs.google.com/document/d/1lWLB714wlLBBq6T4xZyAc4a5wtWhSmr4-hgiPKeErlA/edit#heading=h.ugp0zxcj5iqh
83
+ // (Datadog-only link) for research backing the choice of this value.
84
+ unsigned int MAX_ALLOC_WEIGHT = 10000;
85
+
86
86
  #ifndef NO_POSTPONED_TRIGGER
87
87
  // Used to call the rb_postponed_job_trigger from Ruby 3.3+. These get initialized in
88
88
  // `collectors_cpu_and_wall_time_worker_init` below and always get reused after that.
@@ -180,6 +180,12 @@ struct cpu_and_wall_time_worker_state {
180
180
  // # GVL profiling stats
181
181
  // How many times we triggered the after_gvl_running sampling
182
182
  unsigned int after_gvl_running;
183
+ // How many times we skipped the after_gvl_running sampling
184
+ unsigned int gvl_dont_sample;
185
+ // Min/max/total wall-time spent on gvl sampling
186
+ uint64_t gvl_sampling_time_ns_min;
187
+ uint64_t gvl_sampling_time_ns_max;
188
+ uint64_t gvl_sampling_time_ns_total;
183
189
  } stats;
184
190
  };
185
191
 
@@ -230,6 +236,7 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self);
230
236
  static void on_gvl_event(rb_event_flag_t event_id, const rb_internal_thread_event_data_t *event_data, DDTRACE_UNUSED void *_unused);
231
237
  static void after_gvl_running_from_postponed_job(DDTRACE_UNUSED void *_unused);
232
238
  #endif
239
+ static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance);
233
240
  static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance);
234
241
 
235
242
  // We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
@@ -315,8 +322,6 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
315
322
  rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
316
323
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_hold_signals", _native_hold_signals, 0);
317
324
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_resume_signals", _native_resume_signals, 0);
318
- // TODO: Remove `_native_is_running` from `testing_module` (should be in class) once `prof-correctness` has been updated to not need it
319
- rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
320
325
  rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
321
326
  rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
322
327
  rb_define_singleton_method(testing_module, "_native_trigger_sample", _native_trigger_sample, 0);
@@ -1042,7 +1047,12 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
1042
1047
  ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
1043
1048
 
1044
1049
  // GVL profiling stats
1045
- ID2SYM(rb_intern("after_gvl_running")), /* => */ UINT2NUM(state->stats.after_gvl_running),
1050
+ ID2SYM(rb_intern("after_gvl_running")), /* => */ UINT2NUM(state->stats.after_gvl_running),
1051
+ ID2SYM(rb_intern("gvl_dont_sample")), /* => */ UINT2NUM(state->stats.gvl_dont_sample),
1052
+ ID2SYM(rb_intern("gvl_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
1053
+ ID2SYM(rb_intern("gvl_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_max, > 0, ULL2NUM),
1054
+ ID2SYM(rb_intern("gvl_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_total, > 0, ULL2NUM),
1055
+ ID2SYM(rb_intern("gvl_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.gvl_sampling_time_ns_total, state->stats.after_gvl_running),
1046
1056
  };
1047
1057
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
1048
1058
  return stats_as_hash;
@@ -1080,8 +1090,10 @@ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *s
1080
1090
  // Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
1081
1091
  // this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
1082
1092
  state->stats = (struct stats) {
1083
- .cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
1084
- .allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
1093
+ // All these values are initialized to their highest value possible since we always take the min between existing and latest sample
1094
+ .cpu_sampling_time_ns_min = UINT64_MAX,
1095
+ .allocation_sampling_time_ns_min = UINT64_MAX,
1096
+ .gvl_sampling_time_ns_min = UINT64_MAX,
1085
1097
  };
1086
1098
  }
1087
1099
 
@@ -1315,23 +1327,29 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
1315
1327
  } else if (event_id == RUBY_INTERNAL_THREAD_EVENT_RESUMED) { /* running/runnable */
1316
1328
  // Interesting note: A RUBY_INTERNAL_THREAD_EVENT_RESUMED is guaranteed to be called with the GVL being acquired.
1317
1329
  // (And... I think target_thread will be == rb_thread_current()?)
1330
+ //
1318
1331
  // But we're not sure if we're on the main Ractor yet. The thread context collector actually can actually help here:
1319
1332
  // it tags threads it's tracking, so if a thread is tagged then by definition we know that thread belongs to the main
1320
- // Ractor. Thus, if we really really wanted to access the state, we could do it after making sure we're on the correct Ractor.
1333
+ // Ractor. Thus, if we get a ON_GVL_RUNNING_UNKNOWN result we shouldn't touch any state, but otherwise we're good to go.
1321
1334
 
1322
1335
  #ifdef USE_GVL_PROFILING_3_2_WORKAROUNDS
1323
1336
  target_thread = gvl_profiling_state_maybe_initialize();
1324
1337
  #endif
1325
1338
 
1326
- bool should_sample = thread_context_collector_on_gvl_running(target_thread);
1339
+ on_gvl_running_result result = thread_context_collector_on_gvl_running(target_thread);
1327
1340
 
1328
- if (should_sample) {
1329
- // should_sample is only true if a thread belongs to the main Ractor, so we're good to go
1341
+ if (result == ON_GVL_RUNNING_SAMPLE) {
1330
1342
  #ifndef NO_POSTPONED_TRIGGER
1331
1343
  rb_postponed_job_trigger(after_gvl_running_from_postponed_job_handle);
1332
1344
  #else
1333
1345
  rb_postponed_job_register_one(0, after_gvl_running_from_postponed_job, NULL);
1334
1346
  #endif
1347
+ } else if (result == ON_GVL_RUNNING_DONT_SAMPLE) {
1348
+ struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
1349
+
1350
+ if (state == NULL) return; // This should not happen, but just in case...
1351
+
1352
+ state->stats.gvl_dont_sample++;
1335
1353
  }
1336
1354
  } else {
1337
1355
  // This is a very delicate time and it's hard for us to raise an exception so let's at least complain to stderr
@@ -1347,11 +1365,32 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
1347
1365
 
1348
1366
  state->during_sample = true;
1349
1367
 
1350
- safely_call(thread_context_collector_sample_after_gvl_running, state->thread_context_collector_instance, state->self_instance);
1368
+ // Rescue against any exceptions that happen during sampling
1369
+ safely_call(rescued_after_gvl_running_from_postponed_job, state->self_instance, state->self_instance);
1370
+
1371
+ state->during_sample = false;
1372
+ }
1373
+
1374
+ static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance) {
1375
+ struct cpu_and_wall_time_worker_state *state;
1376
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
1377
+
1378
+ long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
1379
+ thread_context_collector_sample_after_gvl_running(state->thread_context_collector_instance, rb_thread_current(), wall_time_ns_before_sample);
1380
+ long wall_time_ns_after_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
1381
+
1382
+ long delta_ns = wall_time_ns_after_sample - wall_time_ns_before_sample;
1383
+
1384
+ // Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
1385
+ uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
1386
+
1387
+ state->stats.gvl_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_min);
1388
+ state->stats.gvl_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_max);
1389
+ state->stats.gvl_sampling_time_ns_total += sampling_time_ns;
1351
1390
 
1352
1391
  state->stats.after_gvl_running++;
1353
1392
 
1354
- state->during_sample = false;
1393
+ return Qnil;
1355
1394
  }
1356
1395
 
1357
1396
  static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance) {
@@ -109,7 +109,7 @@ static ID otel_context_storage_id; // id of :__opentelemetry_context_storage__ i
109
109
  // and that'll be the one that last wrote this setting.
110
110
  static uint32_t global_waiting_for_gvl_threshold_ns = MILLIS_AS_NS(10);
111
111
 
112
- enum otel_context_enabled {otel_context_enabled_false, otel_context_enabled_only, otel_context_enabled_both};
112
+ typedef enum { OTEL_CONTEXT_ENABLED_FALSE, OTEL_CONTEXT_ENABLED_ONLY, OTEL_CONTEXT_ENABLED_BOTH } otel_context_enabled;
113
113
 
114
114
  // Contains state for a single ThreadContext instance
115
115
  struct thread_context_collector_state {
@@ -138,9 +138,7 @@ struct thread_context_collector_state {
138
138
  // Used to omit timestamps / timeline events from collected data
139
139
  bool timeline_enabled;
140
140
  // Used to control context collection
141
- enum otel_context_enabled otel_context_enabled;
142
- // Used to omit class information from collected allocation data
143
- bool allocation_type_enabled;
141
+ otel_context_enabled otel_context_enabled;
144
142
  // Used when calling monotonic_to_system_epoch_ns
145
143
  monotonic_to_system_epoch_state time_converter_state;
146
144
  // Used to identify the main thread, to give it a fallback name
@@ -204,18 +202,7 @@ static void thread_context_collector_typed_data_free(void *state_ptr);
204
202
  static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
205
203
  static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
206
204
  static VALUE _native_new(VALUE klass);
207
- static VALUE _native_initialize(
208
- VALUE self,
209
- VALUE collector_instance,
210
- VALUE recorder_instance,
211
- VALUE max_frames,
212
- VALUE tracer_context_key,
213
- VALUE endpoint_collection_enabled,
214
- VALUE timeline_enabled,
215
- VALUE waiting_for_gvl_threshold_ns,
216
- VALUE otel_context_enabled,
217
- VALUE allocation_type_enabled
218
- );
205
+ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
219
206
  static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread);
220
207
  static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
221
208
  static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
@@ -312,7 +299,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
312
299
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
313
300
  rb_define_alloc_func(collectors_thread_context_class, _native_new);
314
301
 
315
- rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, 9);
302
+ rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, -1);
316
303
  rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
317
304
  rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
318
305
  rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
@@ -435,8 +422,7 @@ static VALUE _native_new(VALUE klass) {
435
422
  state->thread_list_buffer = thread_list_buffer;
436
423
  state->endpoint_collection_enabled = true;
437
424
  state->timeline_enabled = true;
438
- state->otel_context_enabled = otel_context_enabled_false;
439
- state->allocation_type_enabled = true;
425
+ state->otel_context_enabled = OTEL_CONTEXT_ENABLED_FALSE;
440
426
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
441
427
  VALUE main_thread = rb_thread_main();
442
428
  state->main_thread = main_thread;
@@ -456,26 +442,27 @@ static VALUE _native_new(VALUE klass) {
456
442
  return instance;
457
443
  }
458
444
 
459
- // TODO: Convert this to use options like CpuAndWallTimeWorker
460
- static VALUE _native_initialize(
461
- DDTRACE_UNUSED VALUE _self,
462
- VALUE collector_instance,
463
- VALUE recorder_instance,
464
- VALUE max_frames,
465
- VALUE tracer_context_key,
466
- VALUE endpoint_collection_enabled,
467
- VALUE timeline_enabled,
468
- VALUE waiting_for_gvl_threshold_ns,
469
- VALUE otel_context_enabled,
470
- VALUE allocation_type_enabled
471
- ) {
445
+ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
446
+ VALUE options;
447
+ rb_scan_args(argc, argv, "0:", &options);
448
+ if (options == Qnil) options = rb_hash_new();
449
+
450
+ VALUE self_instance = rb_hash_fetch(options, ID2SYM(rb_intern("self_instance")));
451
+ VALUE recorder_instance = rb_hash_fetch(options, ID2SYM(rb_intern("recorder")));
452
+ VALUE max_frames = rb_hash_fetch(options, ID2SYM(rb_intern("max_frames")));
453
+ VALUE tracer_context_key = rb_hash_fetch(options, ID2SYM(rb_intern("tracer_context_key")));
454
+ VALUE endpoint_collection_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("endpoint_collection_enabled")));
455
+ VALUE timeline_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("timeline_enabled")));
456
+ VALUE waiting_for_gvl_threshold_ns = rb_hash_fetch(options, ID2SYM(rb_intern("waiting_for_gvl_threshold_ns")));
457
+ VALUE otel_context_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("otel_context_enabled")));
458
+
459
+ ENFORCE_TYPE(max_frames, T_FIXNUM);
472
460
  ENFORCE_BOOLEAN(endpoint_collection_enabled);
473
461
  ENFORCE_BOOLEAN(timeline_enabled);
474
462
  ENFORCE_TYPE(waiting_for_gvl_threshold_ns, T_FIXNUM);
475
- ENFORCE_BOOLEAN(allocation_type_enabled);
476
463
 
477
464
  struct thread_context_collector_state *state;
478
- TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
465
+ TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
479
466
 
480
467
  // Update this when modifying state struct
481
468
  state->max_frames = sampling_buffer_check_max_frames(NUM2INT(max_frames));
@@ -485,15 +472,14 @@ static VALUE _native_initialize(
485
472
  state->endpoint_collection_enabled = (endpoint_collection_enabled == Qtrue);
486
473
  state->timeline_enabled = (timeline_enabled == Qtrue);
487
474
  if (otel_context_enabled == Qfalse || otel_context_enabled == Qnil) {
488
- state->otel_context_enabled = otel_context_enabled_false;
475
+ state->otel_context_enabled = OTEL_CONTEXT_ENABLED_FALSE;
489
476
  } else if (otel_context_enabled == ID2SYM(rb_intern("only"))) {
490
- state->otel_context_enabled = otel_context_enabled_only;
477
+ state->otel_context_enabled = OTEL_CONTEXT_ENABLED_ONLY;
491
478
  } else if (otel_context_enabled == ID2SYM(rb_intern("both"))) {
492
- state->otel_context_enabled = otel_context_enabled_both;
479
+ state->otel_context_enabled = OTEL_CONTEXT_ENABLED_BOTH;
493
480
  } else {
494
481
  rb_raise(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
495
482
  }
496
- state->allocation_type_enabled = (allocation_type_enabled == Qtrue);
497
483
 
498
484
  global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
499
485
 
@@ -829,6 +815,9 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
829
815
 
830
816
  state->stats.gc_samples++;
831
817
 
818
+ // Let recorder do any cleanup/updates it requires after a GC step.
819
+ recorder_after_gc_step(state->recorder_instance);
820
+
832
821
  // Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
833
822
  return Qnil;
834
823
  }
@@ -885,7 +874,7 @@ static void trigger_sample_for_thread(
885
874
  struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
886
875
  trace_identifiers_for(state, thread, &trace_identifiers_result);
887
876
 
888
- if (!trace_identifiers_result.valid && state->otel_context_enabled != otel_context_enabled_false) {
877
+ if (!trace_identifiers_result.valid && state->otel_context_enabled != OTEL_CONTEXT_ENABLED_FALSE) {
889
878
  // If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
890
879
  otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result);
891
880
  }
@@ -1111,7 +1100,6 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
1111
1100
  rb_str_concat(result, rb_sprintf(" endpoint_collection_enabled=%"PRIsVALUE, state->endpoint_collection_enabled ? Qtrue : Qfalse));
1112
1101
  rb_str_concat(result, rb_sprintf(" timeline_enabled=%"PRIsVALUE, state->timeline_enabled ? Qtrue : Qfalse));
1113
1102
  rb_str_concat(result, rb_sprintf(" otel_context_enabled=%d", state->otel_context_enabled));
1114
- rb_str_concat(result, rb_sprintf(" allocation_type_enabled=%"PRIsVALUE, state->allocation_type_enabled ? Qtrue : Qfalse));
1115
1103
  rb_str_concat(result, rb_sprintf(
1116
1104
  " time_converter_state={.system_epoch_ns_reference=%ld, .delta_to_epoch_ns=%ld}",
1117
1105
  state->time_converter_state.system_epoch_ns_reference,
@@ -1302,7 +1290,7 @@ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_ins
1302
1290
 
1303
1291
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
1304
1292
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
1305
- if (state->otel_context_enabled == otel_context_enabled_only) return;
1293
+ if (state->otel_context_enabled == OTEL_CONTEXT_ENABLED_ONLY) return;
1306
1294
  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
1307
1295
 
1308
1296
  VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
@@ -1415,62 +1403,61 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1415
1403
  ddog_CharSlice *optional_class_name = NULL;
1416
1404
  char imemo_type[100];
1417
1405
 
1418
- if (state->allocation_type_enabled) {
1419
- optional_class_name = &class_name;
1420
-
1421
- if (
1422
- type == RUBY_T_OBJECT ||
1423
- type == RUBY_T_CLASS ||
1424
- type == RUBY_T_MODULE ||
1425
- type == RUBY_T_FLOAT ||
1426
- type == RUBY_T_STRING ||
1427
- type == RUBY_T_REGEXP ||
1428
- type == RUBY_T_ARRAY ||
1429
- type == RUBY_T_HASH ||
1430
- type == RUBY_T_STRUCT ||
1431
- type == RUBY_T_BIGNUM ||
1432
- type == RUBY_T_FILE ||
1433
- type == RUBY_T_DATA ||
1434
- type == RUBY_T_MATCH ||
1435
- type == RUBY_T_COMPLEX ||
1436
- type == RUBY_T_RATIONAL ||
1437
- type == RUBY_T_NIL ||
1438
- type == RUBY_T_TRUE ||
1439
- type == RUBY_T_FALSE ||
1440
- type == RUBY_T_SYMBOL ||
1441
- type == RUBY_T_FIXNUM
1442
- ) {
1443
- VALUE klass = rb_class_of(new_object);
1444
-
1445
- // Ruby sometimes plays a bit fast and loose with some of its internal objects, e.g.
1446
- // `rb_str_tmp_frozen_acquire` allocates a string with no class (klass=0).
1447
- // Thus, we need to make sure there's actually a class before getting its name.
1448
-
1449
- if (klass != 0) {
1450
- const char *name = rb_class2name(klass);
1451
- size_t name_length = name != NULL ? strlen(name) : 0;
1452
-
1453
- if (name_length > 0) {
1454
- class_name = (ddog_CharSlice) {.ptr = name, .len = name_length};
1455
- } else {
1456
- // @ivoanjo: I'm not sure this can ever happen, but just-in-case
1457
- class_name = ruby_value_type_to_class_name(type);
1458
- }
1406
+ optional_class_name = &class_name;
1407
+
1408
+ if (
1409
+ type == RUBY_T_OBJECT ||
1410
+ type == RUBY_T_CLASS ||
1411
+ type == RUBY_T_MODULE ||
1412
+ type == RUBY_T_FLOAT ||
1413
+ type == RUBY_T_STRING ||
1414
+ type == RUBY_T_REGEXP ||
1415
+ type == RUBY_T_ARRAY ||
1416
+ type == RUBY_T_HASH ||
1417
+ type == RUBY_T_STRUCT ||
1418
+ type == RUBY_T_BIGNUM ||
1419
+ type == RUBY_T_FILE ||
1420
+ type == RUBY_T_DATA ||
1421
+ type == RUBY_T_MATCH ||
1422
+ type == RUBY_T_COMPLEX ||
1423
+ type == RUBY_T_RATIONAL ||
1424
+ type == RUBY_T_NIL ||
1425
+ type == RUBY_T_TRUE ||
1426
+ type == RUBY_T_FALSE ||
1427
+ type == RUBY_T_SYMBOL ||
1428
+ type == RUBY_T_FIXNUM
1429
+ ) {
1430
+ VALUE klass = rb_class_of(new_object);
1431
+
1432
+ // Ruby sometimes plays a bit fast and loose with some of its internal objects, e.g.
1433
+ // `rb_str_tmp_frozen_acquire` allocates a string with no class (klass=0).
1434
+ // Thus, we need to make sure there's actually a class before getting its name.
1435
+
1436
+ if (klass != 0) {
1437
+ const char *name = rb_class2name(klass);
1438
+ size_t name_length = name != NULL ? strlen(name) : 0;
1439
+
1440
+ if (name_length > 0) {
1441
+ class_name = (ddog_CharSlice) {.ptr = name, .len = name_length};
1459
1442
  } else {
1460
- // Fallback for objects with no class
1443
+ // @ivoanjo: I'm not sure this can ever happen, but just-in-case
1461
1444
  class_name = ruby_value_type_to_class_name(type);
1462
1445
  }
1463
- } else if (type == RUBY_T_IMEMO) {
1464
- const char *imemo_string = imemo_kind(new_object);
1465
- if (imemo_string != NULL) {
1466
- snprintf(imemo_type, 100, "(VM Internal, T_IMEMO, %s)", imemo_string);
1467
- class_name = (ddog_CharSlice) {.ptr = imemo_type, .len = strlen(imemo_type)};
1468
- } else { // Ruby < 3
1469
- class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
1470
- }
1471
1446
  } else {
1472
- class_name = ruby_vm_type; // For other weird internal things we just use the VM type
1447
+ // Fallback for objects with no class. Objects with no class are a way for the Ruby VM to mark them
1448
+ // as internal objects; see rb_objspace_internal_object_p for details.
1449
+ class_name = ruby_value_type_to_class_name(type);
1473
1450
  }
1451
+ } else if (type == RUBY_T_IMEMO) {
1452
+ const char *imemo_string = imemo_kind(new_object);
1453
+ if (imemo_string != NULL) {
1454
+ snprintf(imemo_type, 100, "(VM Internal, T_IMEMO, %s)", imemo_string);
1455
+ class_name = (ddog_CharSlice) {.ptr = imemo_type, .len = strlen(imemo_type)};
1456
+ } else { // Ruby < 3
1457
+ class_name = DDOG_CHARSLICE_C("(VM Internal, T_IMEMO)");
1458
+ }
1459
+ } else {
1460
+ class_name = ruby_vm_type; // For other weird internal things we just use the VM type
1474
1461
  }
1475
1462
 
1476
1463
  track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
@@ -1763,14 +1750,14 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1763
1750
 
1764
1751
  // This function can get called from outside the GVL and even on non-main Ractors
1765
1752
  __attribute__((warn_unused_result))
1766
- bool thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
1753
+ on_gvl_running_result thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
1767
1754
  intptr_t gvl_waiting_at = gvl_profiling_state_get(thread);
1768
1755
 
1769
1756
  // Thread was not being profiled / not waiting on gvl
1770
- if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) return false;
1757
+ if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) return ON_GVL_RUNNING_UNKNOWN;
1771
1758
 
1772
1759
  // @ivoanjo: I'm not sure if this can happen -- It means we should've sampled already but haven't gotten the chance yet?
1773
- if (gvl_waiting_at < 0) return true;
1760
+ if (gvl_waiting_at < 0) return ON_GVL_RUNNING_SAMPLE;
1774
1761
 
1775
1762
  long waiting_for_gvl_duration_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - gvl_waiting_at;
1776
1763
 
@@ -1786,11 +1773,11 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1786
1773
  gvl_profiling_state_set(thread, GVL_WAITING_ENABLED_EMPTY);
1787
1774
  }
1788
1775
 
1789
- return should_sample;
1776
+ return should_sample ? ON_GVL_RUNNING_SAMPLE : ON_GVL_RUNNING_DONT_SAMPLE;
1790
1777
  }
1791
1778
 
1792
1779
  __attribute__((warn_unused_result))
1793
- bool thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
1780
+ on_gvl_running_result thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
1794
1781
  return thread_context_collector_on_gvl_running_with_threshold(thread, global_waiting_for_gvl_threshold_ns);
1795
1782
  }
1796
1783
 
@@ -1822,7 +1809,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1822
1809
  //
1823
1810
  // NOTE: In normal use, current_thread is expected to be == rb_thread_current(); the `current_thread` parameter only
1824
1811
  // exists to enable testing.
1825
- VALUE thread_context_collector_sample_after_gvl_running_with_thread(VALUE self_instance, VALUE current_thread) {
1812
+ VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance, VALUE current_thread, long current_monotonic_wall_time_ns) {
1826
1813
  struct thread_context_collector_state *state;
1827
1814
  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1828
1815
 
@@ -1854,14 +1841,10 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1854
1841
  thread_context,
1855
1842
  thread_context->sampling_buffer,
1856
1843
  cpu_time_for_thread,
1857
- monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
1844
+ current_monotonic_wall_time_ns
1858
1845
  );
1859
1846
 
1860
- return Qtrue; // To allow this to be called from rb_rescue2
1861
- }
1862
-
1863
- VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance) {
1864
- return thread_context_collector_sample_after_gvl_running_with_thread(self_instance, rb_thread_current());
1847
+ return Qtrue;
1865
1848
  }
1866
1849
 
1867
1850
  // This method is intended to be called from update_metrics_and_sample. It exists to handle extra sampling steps we
@@ -1980,13 +1963,17 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1980
1963
  static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) {
1981
1964
  ENFORCE_THREAD(thread);
1982
1965
 
1983
- return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) ? Qtrue : Qfalse;
1966
+ return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) == ON_GVL_RUNNING_SAMPLE ? Qtrue : Qfalse;
1984
1967
  }
1985
1968
 
1986
1969
  static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) {
1987
1970
  ENFORCE_THREAD(thread);
1988
1971
 
1989
- return thread_context_collector_sample_after_gvl_running_with_thread(collector_instance, thread);
1972
+ return thread_context_collector_sample_after_gvl_running(
1973
+ collector_instance,
1974
+ thread,
1975
+ monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
1976
+ );
1990
1977
  }
1991
1978
 
1992
1979
  static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns) {
@@ -18,7 +18,13 @@ __attribute__((warn_unused_result)) bool thread_context_collector_on_gc_finish(V
18
18
  VALUE enforce_thread_context_collector_instance(VALUE object);
19
19
 
20
20
  #ifndef NO_GVL_INSTRUMENTATION
21
+ typedef enum {
22
+ ON_GVL_RUNNING_UNKNOWN, // Thread is not known, it may not even be from the current Ractor
23
+ ON_GVL_RUNNING_DONT_SAMPLE, // Thread is known, but "Waiting for GVL" period was too small to be sampled
24
+ ON_GVL_RUNNING_SAMPLE, // Thread is known, and "Waiting for GVL" period should be sampled
25
+ } on_gvl_running_result;
26
+
21
27
  void thread_context_collector_on_gvl_waiting(gvl_profiling_thread thread);
22
- __attribute__((warn_unused_result)) bool thread_context_collector_on_gvl_running(gvl_profiling_thread thread);
23
- VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance);
28
+ __attribute__((warn_unused_result)) on_gvl_running_result thread_context_collector_on_gvl_running(gvl_profiling_thread thread);
29
+ VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance, VALUE current_thread, long current_monotonic_wall_time_ns);
24
30
  #endif