RubyGems - ddtrace - Versions diffs - 1.18.0 → 1.19.0 - Mend

ddtrace 1.18.0 → 1.19.0

Files changed (38) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 69e775ab06a83ce14114a7287056e3d3fb575191b7ff6ccdc5c7b33f7fd58172
-  data.tar.gz: 13b607a4e29e516be4988dca7827eca09b79e968cf98e0641a155039d2ec3273
+  metadata.gz: 37ea5c2fe193569e17d13e026b4477dd8806c00df50250fb5f69854c23e6e6a5
+  data.tar.gz: 858b756d1ef6baddb66f85fb44f3301b317e151d58e4e299390f819621d4ecb8
 SHA512:
-  metadata.gz: d345e07c8b0a654974c51a7457b3fc6d3d7eb99226cfc5555d6bc8ee3e65b17b3782b1e582591be925297c09dd104108007b2081e28ee43c103f8f2fec3ffe5b
-  data.tar.gz: '085ea801f5fae16ed58cd79bab86839cd1aa23fa09261b39219264f603455633e19dbb8f60d647e407c7218d7d00052ef7b593405ec5af828af56ffecd58227d'
+  metadata.gz: 3d86acc37f0bcb7d3b680c4ee8698b45eeb10c026d7dbcdf65ca5c7991eeb561ab35b40b02e98aa24dd984fd7871da8c4906ccc1aa64f9b8c2d8ad86aded199b
+  data.tar.gz: 4463963285c39c09e2d1d090fe0f80518107b7b60fdb58a0bd993d11dfbaf64e0131cd774b16483d6bd84b5a7faf4256eb8304c3bf4b08b4b90dadfaa513adb9

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,33 @@
 ## [Unreleased]
+## [1.19.0] - 2024-01-10
+### Highlights
+Alpha support for memory profiling has been added. For more details, check the [release notes](https://github.com/DataDog/dd-trace-rb/releases/tag/v1.19.0)
+### Added
+* Tracing: Add `on_error` settings for `mysql2` ([#3316][])
+* Core: Add install_signature to app-started telemetry event ([#3349][])
+* Profiling: Heap Profiling ([#3281][]) ([#3287][]) ([#3328][]) ([#3329][]) ([#3333][]) ([#3360][])
+* Profiling: Redesign GC profiling to add timeline support and reduce overhead ([#3313][])
+* Core: Use Ruby 3.3 stable for CI testing ([#3354][])
+### Changed
+* Core: Bump `datadog-ci` dependency to 0.6.0 ([#3361][])
+* Core: Bump debase-ruby_core_source dependency to 3.3.1 ([#3373][])
+* Docs: Backport "List Ruby 3.3 as supported in the docs" to master branch ([#3374][])
+* Profiling: Import upstream `rb_profile_frames` fix ([#3352][])
+* Profiling: Allow the dynamic sampling rate overhead target to be set ([#3310][])
+* Profiling: Split profiling tests into ractor and non-ractor suites. ([#3320][])
+### Fixed
+* Docs: Fix `pg` doc markdown format ([#3317][])
+* Tracing: Fix recursive `require` in Railtie ([#3365][])
+* Profiling: Fix issues stemming from rb_gc_force_recycle ([#3366][])
+* Profiling: Fix Ruby 3.3 CI being broken in master due to profiler ([#3356][])
+* Profiling: Fix "no signals" workaround detection when mariadb is in use ([#3362][])
 ## [1.18.0] - 2023-12-07
 ### Added
@@ -2680,7 +2707,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
 Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
-[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.18.0...master
+[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.19.0...master
+[1.19.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.18.0...v1.19.0
 [1.18.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.17.0...v1.18.0
 [1.17.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.16.2...v1.17.0
 [1.16.2]: https://github.com/DataDog/dd-trace-rb/compare/v1.16.1...v1.16.2
@@ -3910,12 +3938,33 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
 [#3273]: https://github.com/DataDog/dd-trace-rb/issues/3273
 [#3279]: https://github.com/DataDog/dd-trace-rb/issues/3279
 [#3280]: https://github.com/DataDog/dd-trace-rb/issues/3280
+[#3281]: https://github.com/DataDog/dd-trace-rb/issues/3281
 [#3284]: https://github.com/DataDog/dd-trace-rb/issues/3284
 [#3286]: https://github.com/DataDog/dd-trace-rb/issues/3286
+[#3287]: https://github.com/DataDog/dd-trace-rb/issues/3287
 [#3289]: https://github.com/DataDog/dd-trace-rb/issues/3289
 [#3303]: https://github.com/DataDog/dd-trace-rb/issues/3303
 [#3307]: https://github.com/DataDog/dd-trace-rb/issues/3307
 [#3308]: https://github.com/DataDog/dd-trace-rb/issues/3308
+[#3310]: https://github.com/DataDog/dd-trace-rb/issues/3310
+[#3313]: https://github.com/DataDog/dd-trace-rb/issues/3313
+[#3316]: https://github.com/DataDog/dd-trace-rb/issues/3316
+[#3317]: https://github.com/DataDog/dd-trace-rb/issues/3317
+[#3320]: https://github.com/DataDog/dd-trace-rb/issues/3320
+[#3328]: https://github.com/DataDog/dd-trace-rb/issues/3328
+[#3329]: https://github.com/DataDog/dd-trace-rb/issues/3329
+[#3333]: https://github.com/DataDog/dd-trace-rb/issues/3333
+[#3349]: https://github.com/DataDog/dd-trace-rb/issues/3349
+[#3352]: https://github.com/DataDog/dd-trace-rb/issues/3352
+[#3354]: https://github.com/DataDog/dd-trace-rb/issues/3354
+[#3356]: https://github.com/DataDog/dd-trace-rb/issues/3356
+[#3360]: https://github.com/DataDog/dd-trace-rb/issues/3360
+[#3361]: https://github.com/DataDog/dd-trace-rb/issues/3361
+[#3362]: https://github.com/DataDog/dd-trace-rb/issues/3362
+[#3365]: https://github.com/DataDog/dd-trace-rb/issues/3365
+[#3366]: https://github.com/DataDog/dd-trace-rb/issues/3366
+[#3373]: https://github.com/DataDog/dd-trace-rb/issues/3373
+[#3374]: https://github.com/DataDog/dd-trace-rb/issues/3374
 [@AdrianLC]: https://github.com/AdrianLC
 [@Azure7111]: https://github.com/Azure7111
 [@BabyGroot]: https://github.com/BabyGroot

data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c CHANGED Viewed

@@ -75,15 +75,22 @@
 //
 // ---
+#ifndef NO_POSTPONED_TRIGGER
+  // Used to call the rb_postponed_job_trigger from Ruby 3.3+. These get initialized in
+  // `collectors_cpu_and_wall_time_worker_init` below and always get reused after that.
+  static rb_postponed_job_handle_t sample_from_postponed_job_handle;
+  static rb_postponed_job_handle_t after_gc_from_postponed_job_handle;
+#endif
 // Contains state for a single CpuAndWallTimeWorker instance
 struct cpu_and_wall_time_worker_state {
   // These are immutable after initialization
   bool gc_profiling_enabled;
-  bool allocation_counting_enabled;
   bool no_signals_workaround_enabled;
   bool dynamic_sampling_rate_enabled;
-  int allocation_sample_every; // Temporarily used for development/testing of allocation profiling
+  int allocation_sample_every;
+  bool allocation_profiling_enabled;
   VALUE self_instance;
   VALUE thread_context_collector_instance;
   VALUE idle_sampling_helper_instance;
@@ -149,10 +156,11 @@ static VALUE _native_initialize(
   VALUE thread_context_collector_instance,
   VALUE gc_profiling_enabled,
   VALUE idle_sampling_helper_instance,
-  VALUE allocation_counting_enabled,
   VALUE no_signals_workaround_enabled,
   VALUE dynamic_sampling_rate_enabled,
-  VALUE allocation_sample_every
+  VALUE dynamic_sampling_rate_overhead_target_percentage,
+  VALUE allocation_sample_every,
+  VALUE allocation_profiling_enabled
 );
 static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
 static VALUE _native_sampling_loop(VALUE self, VALUE instance);
@@ -211,6 +219,16 @@ __thread uint64_t allocation_count = 0;
 void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
   rb_global_variable(&active_sampler_instance);
+  #ifndef NO_POSTPONED_TRIGGER
+    int unused_flags = 0;
+    sample_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, sample_from_postponed_job, NULL);
+    after_gc_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, after_gc_from_postponed_job, NULL);
+    if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
+      rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
+    }
+  #endif
   VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
   VALUE collectors_cpu_and_wall_time_worker_class = rb_define_class_under(collectors_module, "CpuAndWallTimeWorker", rb_cObject);
   // Hosts methods used for testing the native code using RSpec
@@ -226,7 +244,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
   // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
   rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
-  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8);
+  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
@@ -264,10 +282,10 @@ static VALUE _native_new(VALUE klass) {
   // being leaked.
   state->gc_profiling_enabled = false;
-  state->allocation_counting_enabled = false;
   state->no_signals_workaround_enabled = false;
   state->dynamic_sampling_rate_enabled = true;
   state->allocation_sample_every = 0;
+  state->allocation_profiling_enabled = false;
   state->thread_context_collector_instance = Qnil;
   state->idle_sampling_helper_instance = Qnil;
   state->owner_thread = Qnil;
@@ -292,28 +310,31 @@ static VALUE _native_initialize(
   VALUE thread_context_collector_instance,
   VALUE gc_profiling_enabled,
   VALUE idle_sampling_helper_instance,
-  VALUE allocation_counting_enabled,
   VALUE no_signals_workaround_enabled,
   VALUE dynamic_sampling_rate_enabled,
-  VALUE allocation_sample_every
+  VALUE dynamic_sampling_rate_overhead_target_percentage,
+  VALUE allocation_sample_every,
+  VALUE allocation_profiling_enabled
 ) {
   ENFORCE_BOOLEAN(gc_profiling_enabled);
-  ENFORCE_BOOLEAN(allocation_counting_enabled);
   ENFORCE_BOOLEAN(no_signals_workaround_enabled);
   ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
   ENFORCE_TYPE(allocation_sample_every, T_FIXNUM);
+  ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
+  ENFORCE_BOOLEAN(allocation_profiling_enabled);
   struct cpu_and_wall_time_worker_state *state;
   TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
   state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
-  state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
   state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
   state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
+  dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage));
   state->allocation_sample_every = NUM2INT(allocation_sample_every);
+  state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
-  if (state->allocation_sample_every < 0) {
-    rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every);
+  if (state->allocation_sample_every <= 0) {
+    rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every);
   }
   state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
@@ -472,20 +493,25 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
   // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
   // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
-  int result = rb_postponed_job_register_one(0, sample_from_postponed_job, NULL);
-  // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
-  // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
-  switch (result) {
-    case 0:
-      state->stats.postponed_job_full++; break;
-    case 1:
-      state->stats.postponed_job_success++; break;
-    case 2:
-      state->stats.postponed_job_skipped_already_existed++; break;
-    default:
-      state->stats.postponed_job_unknown_result++;
-  }
+  #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
+    rb_postponed_job_trigger(sample_from_postponed_job_handle);
+    state->stats.postponed_job_success++; // Always succeeds
+  #else
+    int result = rb_postponed_job_register_one(0, sample_from_postponed_job, NULL);
+    // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
+    // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
+    switch (result) {
+      case 0:
+        state->stats.postponed_job_full++; break;
+      case 1:
+        state->stats.postponed_job_success++; break;
+      case 2:
+        state->stats.postponed_job_skipped_already_existed++; break;
+      default:
+        state->stats.postponed_job_unknown_result++;
+    }
+  #endif
 }
 // The actual sampling trigger loop always runs **without** the global vm lock.
@@ -632,7 +658,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
   // because they may raise exceptions.
   install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
   if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
-  if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
+  if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
   rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
@@ -714,28 +740,17 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
   if (event == RUBY_INTERNAL_EVENT_GC_ENTER) {
     thread_context_collector_on_gc_start(state->thread_context_collector_instance);
   } else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) {
-    // Design: In an earlier iteration of this feature (see https://github.com/DataDog/dd-trace-rb/pull/2308) we
-    // actually had a single method to implement the behavior of both thread_context_collector_on_gc_finish
-    // and thread_context_collector_sample_after_gc (the latter is called via after_gc_from_postponed_job).
-    //
-    // Unfortunately, then we discovered the safety issue around no allocations, and thus decided to separate them -- so that
-    // the sampling could run outside the tight safety constraints of the garbage collection process.
-    //
-    // There is a downside: The sample is now taken very very shortly afterwards the GC finishes, and not immediately
-    // as the GC finishes, which means the stack captured may by affected by "skid", e.g. point slightly after where
-    // it should be pointing at.
-    // Alternatives to solve this would be to capture no stack for garbage collection (as we do for Java and .net);
-    // making the sampling process allocation-safe (very hard); or separate stack sampling from sample recording,
-    // e.g. enabling us to capture the stack in thread_context_collector_on_gc_finish and do the rest later
-    // (medium hard).
-    thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
-    // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after if
-    // fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual.
-    //
-    // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
-    // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
-    rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
+    bool should_flush = thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
+    // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc when the
+    // thread collector flags it's time to flush.
+    if (should_flush) {
+      #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
+        rb_postponed_job_trigger(after_gc_from_postponed_job_handle);
+      #else
+        rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
+      #endif
+    }
   }
 }
@@ -888,9 +903,9 @@ static void sleep_for(uint64_t time_ns) {
 }
 static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
-  bool is_profiler_running = active_sampler_instance_state != NULL;
+  bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
-  return is_profiler_running ? ULL2NUM(allocation_count) : Qnil;
+  return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
 }
 // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
@@ -924,7 +939,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
   // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
   // beta), and having something here allows us to test the rest of feature, sampling decision aside.
-  if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) {
+  if (allocation_count % state->allocation_sample_every == 0) {
     // Rescue against any exceptions that happen during sampling
     safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
   }

data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c CHANGED Viewed

@@ -19,7 +19,7 @@
 //
 // Instead of sampling at a fixed sample rate, the actual sampling rate should be decided by also observing the impact
 // that running the profiler is having. This protects against issues such as the profiler being deployed in very busy
-//machines or containers with unrealistic CPU restrictions.
+// machines or containers with unrealistic CPU restrictions.
 //
 // ### Implementation
 //
@@ -35,13 +35,13 @@
 // sample. If it's not, it will skip sampling.
 //
 // Finally, as an additional optimization, there's a `dynamic_sampling_rate_get_sleep()` which, given the current
-// wall-time, will return the time remaining (*there's an exception, check below) until the next sample.
+// wall-time, will return the time remaining (*there's an exception, check function) until the next sample.
 //
 // ---
 // This is the wall-time overhead we're targeting. E.g. we target to spend no more than 2%, or 1.2 seconds per minute,
-// taking profiling samples.
-#define WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
+// taking profiling samples by default.
+#define DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
 // See `dynamic_sampling_rate_get_sleep()` for details
 #define MAX_SLEEP_TIME_NS MILLIS_AS_NS(100)
 // See `dynamic_sampling_rate_after_sample()` for details
@@ -49,6 +49,11 @@
 void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state) {
   atomic_init(&state->next_sample_after_monotonic_wall_time_ns, 0);
+  dynamic_sampling_rate_set_overhead_target_percentage(state, DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE);
+}
+void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage) {
+  state->overhead_target_percentage = overhead_target_percentage;
 }
 void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state) {
@@ -76,7 +81,7 @@ bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, lon
 }
 void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns) {
-  double overhead_target = (double) WALL_TIME_OVERHEAD_TARGET_PERCENTAGE;
+  double overhead_target = state->overhead_target_percentage;
   // The idea here is that we're targeting a maximum % of wall-time spent sampling.
   // So for instance, if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend
@@ -93,48 +98,51 @@ void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long
 // ---
 // Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
-VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
-VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
-VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
+VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
+VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
+VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
 void collectors_dynamic_sampling_rate_init(VALUE profiling_module) {
   VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
   VALUE dynamic_sampling_rate_module = rb_define_module_under(collectors_module, "DynamicSamplingRate");
   VALUE testing_module = rb_define_module_under(dynamic_sampling_rate_module, "Testing");
-  rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 2);
-  rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 2);
-  rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 2);
+  rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 3);
+  rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 3);
+  rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 3);
 }
-VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
+VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
   ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
   ENFORCE_TYPE(current_monotonic_wall_time_ns, T_FIXNUM);
   dynamic_sampling_rate_state state;
   dynamic_sampling_rate_init(&state);
+  dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
   atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
   return ULL2NUM(dynamic_sampling_rate_get_sleep(&state, NUM2LONG(current_monotonic_wall_time_ns)));
 }
-VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
+VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
   ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
   ENFORCE_TYPE(wall_time_ns_before_sample, T_FIXNUM);
   dynamic_sampling_rate_state state;
   dynamic_sampling_rate_init(&state);
+  dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
   atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
   return dynamic_sampling_rate_should_sample(&state, NUM2LONG(wall_time_ns_before_sample)) ? Qtrue : Qfalse;
 }
-VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
+VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
   ENFORCE_TYPE(wall_time_ns_after_sample, T_FIXNUM);
   ENFORCE_TYPE(sampling_time_ns, T_FIXNUM);
   dynamic_sampling_rate_state state;
   dynamic_sampling_rate_init(&state);
+  dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
   dynamic_sampling_rate_after_sample(&state, NUM2LONG(wall_time_ns_after_sample), NUM2ULL(sampling_time_ns));

data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h CHANGED Viewed

@@ -4,10 +4,14 @@
 #include <stdbool.h>
 typedef struct {
+  // This is the wall-time overhead we're targeting. E.g. by default, we target to spend no more than 2%, or 1.2 seconds
+  // per minute, taking profiling samples.
+  double overhead_target_percentage;
   atomic_long next_sample_after_monotonic_wall_time_ns;
 } dynamic_sampling_rate_state;
 void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state);
+void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage);
 void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state);
 uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns);
 bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample);

data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c ADDED Viewed

@@ -0,0 +1,156 @@
+#include <ruby.h>
+#include <datadog/profiling.h>
+#include "collectors_gc_profiling_helper.h"
+// This helper is used by the Datadog::Profiling::Collectors::ThreadContext to profile garbage collection.
+// It's tested through that class' interfaces.
+// ---
+// Used when retrieving GC information from the VM.
+//  All these are symbols, but we don't need to mark them since we ask for them to be interned (and thus live forever)
+static VALUE state_sym;
+static VALUE marking_sym;
+static VALUE sweeping_sym;
+static VALUE none_sym;
+static VALUE gc_by_sym;
+static VALUE newobj_sym;
+static VALUE malloc_sym;
+static VALUE method_sym;
+static VALUE capi_sym;
+static VALUE stress_sym;
+static VALUE major_by_sym;
+static VALUE nofree_sym;
+static VALUE oldgen_sym;
+static VALUE shady_sym;
+static VALUE force_sym;
+static VALUE oldmalloc_sym;
+static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason);
+static ddog_CharSlice gc_cause_pretty(VALUE gc_cause);
+static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state);
+void gc_profiling_init(void) {
+  // This function lazy-interns a few constants, which may trigger allocations. Since we want to call it during GC as
+  // well, when allocations are not allowed, we call it once here so that the constants get defined ahead of time.
+  rb_gc_latest_gc_info(rb_hash_new());
+  // Used to query and look up the results of GC information
+  state_sym     = ID2SYM(rb_intern_const("state"));
+  marking_sym   = ID2SYM(rb_intern_const("marking"));
+  sweeping_sym  = ID2SYM(rb_intern_const("sweeping"));
+  none_sym      = ID2SYM(rb_intern_const("none"));
+  gc_by_sym     = ID2SYM(rb_intern_const("gc_by"));
+  newobj_sym    = ID2SYM(rb_intern_const("newobj"));
+  malloc_sym    = ID2SYM(rb_intern_const("malloc"));
+  method_sym    = ID2SYM(rb_intern_const("method"));
+  capi_sym      = ID2SYM(rb_intern_const("capi"));
+  stress_sym    = ID2SYM(rb_intern_const("stress"));
+  major_by_sym  = ID2SYM(rb_intern_const("major_by"));
+  nofree_sym    = ID2SYM(rb_intern_const("nofree"));
+  oldgen_sym    = ID2SYM(rb_intern_const("oldgen"));
+  shady_sym     = ID2SYM(rb_intern_const("shady"));
+  force_sym     = ID2SYM(rb_intern_const("force"));
+  oldmalloc_sym = ID2SYM(rb_intern_const("oldmalloc"));
+  state_sym     = ID2SYM(rb_intern_const("state"));
+  none_sym      = ID2SYM(rb_intern_const("none"));
+}
+bool gc_profiling_has_major_gc_finished(void) {
+  return rb_gc_latest_gc_info(state_sym) == none_sym && rb_gc_latest_gc_info(major_by_sym) != Qnil;
+}
+uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) {
+  uint8_t max_label_count =
+    1 + // thread id
+    1 + // thread name
+    1 + // state
+    1 + // event
+    1 + // gc reason
+    1 + // gc cause
+    1;  // gc type
+  if (max_label_count > labels_length) {
+    rb_raise(rb_eArgError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count);
+  }
+  uint8_t label_pos = 0;
+  labels[label_pos++] = (ddog_prof_Label) {
+    .key = DDOG_CHARSLICE_C("thread id"),
+    .str = DDOG_CHARSLICE_C("GC"),
+    .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
+  };
+  labels[label_pos++] = (ddog_prof_Label) {
+    .key = DDOG_CHARSLICE_C("thread name"),
+    .str = DDOG_CHARSLICE_C("Garbage Collection"),
+    .num = 0, // Workaround, same as above
+  };
+  labels[label_pos++] = (ddog_prof_Label) {
+    .key = DDOG_CHARSLICE_C("state"),
+    .str = DDOG_CHARSLICE_C("had cpu"),
+    .num = 0, // Workaround, same as above
+  };
+  labels[label_pos++] = (ddog_prof_Label) {
+    .key = DDOG_CHARSLICE_C("event"),
+    .str = DDOG_CHARSLICE_C("gc"),
+    .num = 0, // Workaround, same as above
+  };
+  VALUE major_by = rb_gc_latest_gc_info(major_by_sym);
+  if (major_by != Qnil) {
+    labels[label_pos++] = (ddog_prof_Label) {
+      .key = DDOG_CHARSLICE_C("gc reason"),
+      .str = major_gc_reason_pretty(major_by),
+    };
+  }
+  labels[label_pos++] = (ddog_prof_Label) {
+    .key = DDOG_CHARSLICE_C("gc cause"),
+    .str = gc_cause_pretty(rb_gc_latest_gc_info(gc_by_sym)),
+  };
+  labels[label_pos++] = (ddog_prof_Label) {
+    .key = DDOG_CHARSLICE_C("gc type"),
+    .str = gc_type_pretty(major_by, rb_gc_latest_gc_info(state_sym)),
+  };
+  if (label_pos > max_label_count) {
+    rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
+  }
+  return label_pos;
+}
+static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason) {
+  if (major_gc_reason == nofree_sym   ) return DDOG_CHARSLICE_C("not enough free slots (NOFREE)");
+  if (major_gc_reason == oldgen_sym   ) return DDOG_CHARSLICE_C("old generation full (OLDGEN)");
+  if (major_gc_reason == shady_sym    ) return DDOG_CHARSLICE_C("too many objects without write barriers (SHADY)");
+  if (major_gc_reason == force_sym    ) return DDOG_CHARSLICE_C("requested (FORCE)");
+  if (major_gc_reason == oldmalloc_sym) return DDOG_CHARSLICE_C("heap bytes allocated threshold (OLDMALLOC)");
+  return DDOG_CHARSLICE_C("unknown");
+}
+static ddog_CharSlice gc_cause_pretty(VALUE gc_cause) {
+  if (gc_cause == newobj_sym) return DDOG_CHARSLICE_C("object allocation");
+  if (gc_cause == malloc_sym) return DDOG_CHARSLICE_C("malloc()");
+  if (gc_cause == method_sym) return DDOG_CHARSLICE_C("GC.start()");
+  if (gc_cause == capi_sym  ) return DDOG_CHARSLICE_C("rb_gc()");
+  if (gc_cause == stress_sym) return DDOG_CHARSLICE_C("stress");
+  return DDOG_CHARSLICE_C("unknown");
+}
+static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state) {
+  if (major_gc_reason != Qnil) {
+    if (gc_state == marking_sym ) return DDOG_CHARSLICE_C("major (ongoing, marking)");
+    if (gc_state == sweeping_sym) return DDOG_CHARSLICE_C("major (ongoing, sweeping)");
+    return DDOG_CHARSLICE_C("major");
+  } else {
+    // As we delay flushing events when a minor GC finishes, it's not relevant to include the observed state of the
+    // minor GC, as we often won't record a marking -> sweeping -> done cycle, as it happens too quickly.
+    return DDOG_CHARSLICE_C("minor");
+  }
+}

data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h ADDED Viewed

@@ -0,0 +1,5 @@
+#pragma once
+void gc_profiling_init(void);
+bool gc_profiling_has_major_gc_finished(void);
+uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length);