RubyGems - ddtrace - Versions diffs - 1.18.0 → 1.20.0 - Mend

ddtrace 1.18.0 → 1.20.0

Files changed (151) hide show

data/ext/ddtrace_profiling_native_extension/collectors_discrete_dynamic_sampler.c ADDED Viewed

@@ -0,0 +1,349 @@
+#include "collectors_discrete_dynamic_sampler.h"
+#include <ruby.h>
+#include "helpers.h"
+#include "time_helpers.h"
+#include "ruby_helpers.h"
+#define BASE_OVERHEAD_PCT 1.0
+#define BASE_SAMPLING_INTERVAL 50
+#define ADJUSTMENT_WINDOW_NS SECONDS_AS_NS(1)
+#define EMA_SMOOTHING_FACTOR 0.6
+#define EXP_MOVING_AVERAGE(last, avg, first) first ? last : (1-EMA_SMOOTHING_FACTOR) * avg + EMA_SMOOTHING_FACTOR * last
+void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name) {
+  sampler->debug_name = debug_name;
+  discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT);
+}
+static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
+  const char *debug_name = sampler->debug_name;
+  double target_overhead = sampler->target_overhead;
+  (*sampler) = (discrete_dynamic_sampler) {
+    .debug_name = debug_name,
+    .target_overhead = target_overhead,
+    // Act as if a reset is a readjustment (it kinda is!) and wait for a full adjustment window
+    // to compute stats. Otherwise, we'd readjust on the next event that comes and thus be operating
+    // with very incomplete information
+    .last_readjust_time_ns = now_ns,
+    // This fake readjustment will use a hardcoded sampling interval
+    .sampling_interval = BASE_SAMPLING_INTERVAL,
+    .sampling_probability = 1.0 / BASE_SAMPLING_INTERVAL,
+    // But we want to make sure we sample at least once in the next window so that our first
+    // real readjustment has some notion of how heavy sampling is. Therefore, we'll make it so that
+    // the next event is automatically sampled by artificially locating it in the interval threshold.
+    .events_since_last_sample = BASE_SAMPLING_INTERVAL - 1,
+  };
+}
+void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler) {
+  long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
+  _discrete_dynamic_sampler_reset(sampler, now);
+}
+static void _discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
+  if (target_overhead <= 0 || target_overhead > 100) {
+    rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
+  }
+  sampler->target_overhead = target_overhead;
+  _discrete_dynamic_sampler_reset(sampler, now_ns);
+}
+void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead) {
+  long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
+  _discrete_dynamic_sampler_set_overhead_target_percentage(sampler, target_overhead, now);
+}
+static void maybe_readjust(discrete_dynamic_sampler *sampler, long now);
+static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
+  // For efficiency reasons we don't do true random sampling but rather systematic
+  // sampling following a sample interval/skip. This can be biased and hide patterns
+  // but the dynamic interval and rather indeterministic pattern of allocations in
+  // most real applications should help reduce the bias impact.
+  sampler->events_since_last_sample++;
+  sampler->events_since_last_readjustment++;
+  bool should_sample = sampler->sampling_interval > 0 && sampler->events_since_last_sample >= sampler->sampling_interval;
+  if (should_sample) {
+    sampler->sample_start_time_ns = now_ns;
+  } else {
+    // check if we should readjust our sampler after this event, even if we didn't sample it
+    maybe_readjust(sampler, now_ns);
+  }
+  return should_sample;
+}
+bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler) {
+  long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
+  return _discrete_dynamic_sampler_should_sample(sampler, now);
+}
+static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
+  long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns);
+  sampler->samples_since_last_readjustment++;
+  sampler->sampling_time_since_last_readjustment_ns += last_sampling_time_ns;
+  sampler->events_since_last_sample = 0;
+  // check if we should readjust our sampler after this sample
+  maybe_readjust(sampler, now_ns);
+  return last_sampling_time_ns;
+}
+long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler) {
+  long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
+  return _discrete_dynamic_sampler_after_sample(sampler, now);
+}
+double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler) {
+  return sampler->sampling_probability * 100.;
+}
+size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler) {
+  return sampler->events_since_last_sample;
+}
+static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
+  long window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
+  if (window_time_ns < ADJUSTMENT_WINDOW_NS) {
+    // not enough time has passed to perform a readjustment
+    return;
+  }
+  // If we got this far, lets recalculate our sampling params based on new observations
+  bool first_readjustment = !sampler->has_completed_full_adjustment_window;
+  // Update our running average of events/sec with latest observation
+  sampler->events_per_ns = EXP_MOVING_AVERAGE(
+    (double) sampler->events_since_last_readjustment / window_time_ns,
+    sampler->events_per_ns,
+    first_readjustment
+  );
+  // Update our running average of sampling time for a specific event
+  long sampling_window_time_ns = sampler->sampling_time_since_last_readjustment_ns;
+  long sampling_overshoot_time_ns = -1;
+  if (sampler->samples_since_last_readjustment > 0) {
+    // We can only update sampling-related stats if we actually sampled on the last window...
+    // Lets update our average sampling time per event
+    long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampling_window_time_ns / sampler->samples_since_last_readjustment;
+    sampler->sampling_time_ns = EXP_MOVING_AVERAGE(
+      avg_sampling_time_in_window_ns,
+      sampler->sampling_time_ns,
+      first_readjustment
+    );
+  }
+  // Are we meeting our target in practice? If we're consistently overshooting our estimate due to non-uniform allocation patterns lets
+  // adjust our overhead target.
+  // NOTE: Updating this even when no samples occur is a conscious choice which enables us to cooldown extreme adjustments over time.
+  //       If we didn't do this, whenever a big spike caused target_overhead_adjustment to equal target_overhead, we'd get stuck
+  //       in a "probability = 0" state.
+  long reference_target_sampling_time_ns = window_time_ns * (sampler->target_overhead / 100.);
+  // Overshoot by definition is always >= 0. < 0 would be undershooting!
+  sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - reference_target_sampling_time_ns);
+  // Our overhead adjustment should always be between [-target_overhead, 0]. Higher adjustments would lead to negative overhead targets
+  // which don't make much sense.
+  double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, sampling_overshoot_time_ns * 100. / window_time_ns);
+  sampler->target_overhead_adjustment = EXP_MOVING_AVERAGE(
+    last_target_overhead_adjustment,
+    sampler->target_overhead_adjustment,
+    first_readjustment
+  );
+  // Apply our overhead adjustment to figure out our real targets for this readjustment.
+  double target_overhead = double_max_of(0, sampler->target_overhead + sampler->target_overhead_adjustment);
+  long target_sampling_time_ns = window_time_ns * (target_overhead / 100.);
+  // Recalculate target sampling probability so that the following 2 hold:
+  // * window_time_ns = working_window_time_ns + sampling_window_time_ns
+  //       │                     │                        │
+  //       │                     │                        └ how much time is spent sampling
+  //       │                     └── how much time is spent doing actual app stuff
+  //       └── total (wall) time in this adjustment window
+  // * sampling_window_time_ns <= window_time_ns * target_overhead / 100
+  //
+  // Note that
+  //
+  //   sampling_window_time_ns = samples_in_window * sampling_time_ns =
+  //                                                ┌─ assuming no events will be emitted during sampling
+  //                                                │
+  //                           = events_per_ns * working_window_time_ns * sampling_probability * sampling_time_ns
+  //
+  // Re-ordering for sampling_probability and solving for the upper-bound of sampling_window_time_ns:
+  //
+  //   sampling_window_time_ns = window_time_ns * target_overhead / 100
+  //   sampling_probability = window_time_ns * target_overhead / 100 / (events_per_ns * working_window_time_ns * sampling_time_ns) =
+  //
+  // Which you can intuitively understand as:
+  //
+  //   sampling_probability = max_allowed_time_for_sampling_ns / time_to_sample_all_events_ns
+  //
+  // As a quick sanity check:
+  // * If app is eventing very little or we're sampling very fast, so that time_to_sample_all_events_ns < max_allowed_time_for_sampling_ns
+  //   then probability will be > 1 (but we should clamp to 1 since probabilities higher than 1 don't make sense).
+  // * If app is eventing a lot or our sampling overhead is big, then as time_to_sample_all_events_ns grows, sampling_probability will
+  //   tend to 0.
+  long working_window_time_ns = long_max_of(0, window_time_ns - sampling_window_time_ns);
+  double max_allowed_time_for_sampling_ns = target_sampling_time_ns;
+  long time_to_sample_all_events_ns = sampler->events_per_ns * working_window_time_ns * sampler->sampling_time_ns;
+  if (max_allowed_time_for_sampling_ns == 0) {
+    // if we aren't allowed any sampling time at all, probability has to be 0
+    sampler->sampling_probability = 0;
+  } else {
+    // otherwise apply the formula described above (protecting against div by 0)
+    sampler->sampling_probability = time_to_sample_all_events_ns == 0 ? 1. :
+      double_min_of(1., max_allowed_time_for_sampling_ns / time_to_sample_all_events_ns);
+  }
+  // Doing true random selection would involve "tossing a coin" on every allocation. Lets do systematic sampling instead so that our
+  // sampling decision can rely solely on a sampling skip/interval (i.e. more efficient).
+  //
+  //   sampling_interval = events / samples =
+  //                     = event_rate * working_window_time_ns / (event_rate * working_window_time_ns * sampling_probability)
+  //                     = 1 / sampling_probability
+  //
+  // NOTE: The sampling interval has to be an integer since we're dealing with discrete events here. This means that there'll be
+  //       a loss of precision (and thus control) when adjusting between probabilities that lead to non-integer granularity
+  //       changes (e.g. probabilities in the range of ]50%, 100%[ which map to intervals in the range of ]1, 2[). Our approach
+  //       when the sampling interval is a non-integer is to ceil it (i.e. we'll always choose to sample less often).
+  // NOTE: Overhead target adjustments or very big sampling times can in theory bring probability so close to 0 as to effectively
+  //       round down to full 0. This means we have to be careful to handle div-by-0 as well as resulting double intervals that
+  //       are so big they don't fit into the sampling_interval. In both cases lets just disable sampling until next readjustment
+  //       by setting interval to 0.
+  double sampling_interval = sampler->sampling_probability == 0 ? 0 : ceil(1.0 / sampler->sampling_probability);
+  sampler->sampling_interval = sampling_interval > ULONG_MAX ? 0 : sampling_interval;
+  #ifdef DD_DEBUG
+    double allocs_in_60s = sampler->events_per_ns * 1e9 * 60;
+    double samples_in_60s = allocs_in_60s * sampler->sampling_probability;
+    double expected_total_sampling_time_in_60s =
+      samples_in_60s * sampler->sampling_time_ns / 1e9;
+    double real_total_sampling_time_in_60s = sampling_window_time_ns / 1e9 * 60 / (window_time_ns / 1e9);
+    fprintf(stderr, "[dds.%s] readjusting...\n", sampler->debug_name);
+    fprintf(stderr, "samples_since_last_readjustment=%ld\n", sampler->samples_since_last_readjustment);
+    fprintf(stderr, "window_time=%ld\n", window_time_ns);
+    fprintf(stderr, "events_per_sec=%f\n", sampler->events_per_ns * 1e9);
+    fprintf(stderr, "sampling_time=%ld\n", sampler->sampling_time_ns);
+    fprintf(stderr, "sampling_window_time=%ld\n", sampling_window_time_ns);
+    fprintf(stderr, "sampling_target_time=%ld\n", reference_target_sampling_time_ns);
+    fprintf(stderr, "sampling_overshoot_time=%ld\n", sampling_overshoot_time_ns);
+    fprintf(stderr, "working_window_time=%ld\n", working_window_time_ns);
+    fprintf(stderr, "sampling_interval=%zu\n", sampler->sampling_interval);
+    fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability);
+    fprintf(stderr, "expected allocs in 60s=%f\n", allocs_in_60s);
+    fprintf(stderr, "expected samples in 60s=%f\n", samples_in_60s);
+    fprintf(stderr, "expected sampling time in 60s=%f (previous real=%f)\n", expected_total_sampling_time_in_60s, real_total_sampling_time_in_60s);
+    fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
+    fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
+    fprintf(stderr, "target_sampling_time=%ld\n", target_sampling_time_ns);
+    fprintf(stderr, "expected max overhead in 60s=%f\n", target_overhead / 100.0 * 60);
+    fprintf(stderr, "-------\n");
+  #endif
+  sampler->events_since_last_readjustment = 0;
+  sampler->samples_since_last_readjustment = 0;
+  sampler->sampling_time_since_last_readjustment_ns = 0;
+  sampler->last_readjust_time_ns = now;
+  sampler->has_completed_full_adjustment_window = true;
+}
+// ---
+// Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
+static VALUE _native_new(VALUE klass);
+static VALUE _native_reset(VALUE self, VALUE now);
+static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now);
+static VALUE _native_should_sample(VALUE self, VALUE now);
+static VALUE _native_after_sample(VALUE self, VALUE now);
+static VALUE _native_probability(VALUE self);
+typedef struct sampler_state {
+  discrete_dynamic_sampler sampler;
+} sampler_state;
+void collectors_discrete_dynamic_sampler_init(VALUE profiling_module) {
+  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
+  VALUE discrete_sampler_module = rb_define_module_under(collectors_module, "DiscreteDynamicSampler");
+  VALUE testing_module = rb_define_module_under(discrete_sampler_module, "Testing");
+  VALUE sampler_class = rb_define_class_under(testing_module, "Sampler", rb_cObject);
+  rb_define_alloc_func(sampler_class, _native_new);
+  rb_define_method(sampler_class, "_native_reset", _native_reset, 1);
+  rb_define_method(sampler_class, "_native_set_overhead_target_percentage", _native_set_overhead_target_percentage, 2);
+  rb_define_method(sampler_class, "_native_should_sample", _native_should_sample, 1);
+  rb_define_method(sampler_class, "_native_after_sample", _native_after_sample, 1);
+  rb_define_method(sampler_class, "_native_probability", _native_probability, 0);
+}
+static const rb_data_type_t sampler_typed_data = {
+  .wrap_struct_name = "Datadog::Profiling::DiscreteDynamicSampler::Testing::Sampler",
+  .function = {
+    .dfree = RUBY_DEFAULT_FREE,
+    .dsize = NULL,
+  },
+  .flags = RUBY_TYPED_FREE_IMMEDIATELY
+};
+static VALUE _native_new(VALUE klass) {
+  sampler_state *state = ruby_xcalloc(sizeof(sampler_state), 1);
+  discrete_dynamic_sampler_init(&state->sampler, "test sampler");
+  return TypedData_Wrap_Struct(klass, &sampler_typed_data, state);
+}
+static VALUE _native_reset(VALUE self, VALUE now_ns) {
+  ENFORCE_TYPE(now_ns, T_FIXNUM);
+  sampler_state *state;
+  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
+  _discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
+  return Qtrue;
+}
+static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now_ns) {
+  ENFORCE_TYPE(target_overhead, T_FLOAT);
+  ENFORCE_TYPE(now_ns, T_FIXNUM);
+  sampler_state *state;
+  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
+  _discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
+  return Qnil;
+}
+VALUE _native_should_sample(VALUE self, VALUE now_ns) {
+  ENFORCE_TYPE(now_ns, T_FIXNUM);
+  sampler_state *state;
+  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
+  return _discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
+}
+VALUE _native_after_sample(VALUE self, VALUE now_ns) {
+  ENFORCE_TYPE(now_ns, T_FIXNUM);
+  sampler_state *state;
+  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
+  return LONG2NUM(_discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
+}
+VALUE _native_probability(VALUE self) {
+  sampler_state *state;
+  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
+  return DBL2NUM(discrete_dynamic_sampler_probability(&state->sampler));
+}

data/ext/ddtrace_profiling_native_extension/collectors_discrete_dynamic_sampler.h ADDED Viewed

@@ -0,0 +1,89 @@
+#pragma once
+#include <stdbool.h>
+#include <stddef.h>
+// A sampler that will sample discrete events based on the overhead of their
+// sampling.
+//
+// NOTE: For performance reasons, this sampler does systematic sampling via
+//       sampling intervals/skips that are dynamically adjusted over time.
+//       It will not perform truly random sampling by "throwing a coin" at
+//       every event and is thus, in theory, susceptible to some pattern
+//       biases. In practice, the dynamic readjustment of sampling interval
+//       and randomized starting point should help with avoiding heavy biases.
+typedef struct discrete_dynamic_sampler {
+  // --- Config ---
+  // Name of this sampler for debug logs.
+  const char *debug_name;
+  // Value in the range ]0, 100] representing the % of time we're willing to dedicate
+  // to sampling.
+  double target_overhead;
+  // -- Reference State ---
+  // Moving average of how many events per ns we saw over the recent past.
+  double events_per_ns;
+  // Moving average of the sampling time of each individual event.
+  long sampling_time_ns;
+  // Sampling probability being applied by this sampler.
+  double sampling_probability;
+  // Sampling interval/skip that drives the systematic sampling done by this sampler.
+  // NOTE: This is an inverted view of the probability.
+  // NOTE: A value of 0 works as +inf, effectively disabling sampling (to align with probability=0)
+  unsigned long sampling_interval;
+  // -- Sampling State --
+  // How many events have we seen since we last decided to sample.
+  unsigned long events_since_last_sample;
+  // Captures the time at which the last true-returning call to should_sample happened.
+  // This is used in after_sample to understand the total sample time.
+  long sample_start_time_ns;
+  // -- Adjustment State --
+  // Has this sampler already ran for at least one complete adjustment window?
+  bool has_completed_full_adjustment_window;
+  // Time at which we last readjust our sampling parameters.
+  long last_readjust_time_ns;
+  // How many events have we seen since the last readjustment.
+  unsigned long events_since_last_readjustment;
+  // How many samples have we seen since the last readjustment.
+  unsigned long samples_since_last_readjustment;
+  // How much time have we spent sampling since the last readjustment.
+  unsigned long sampling_time_since_last_readjustment_ns;
+  // A negative number that we add to target_overhead to serve as extra padding to
+  // try and mitigate observed overshooting of max sampling time.
+  double target_overhead_adjustment;
+} discrete_dynamic_sampler;
+// Init a new sampler with sane defaults.
+void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name);
+// Reset a sampler, clearing all stored state.
+void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler);
+// Sets a new target_overhead for the provided sampler, resetting it in the process.
+// @param target_overhead A double representing the percentage of total time we are
+//        willing to use as overhead for the resulting sampling. Values are expected
+//        to be in the range ]0.0, 100.0].
+void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead);
+// Make a sampling decision.
+//
+// @return True if the event associated with this decision should be sampled, false
+//         otherwise.
+//
+// NOTE: If true is returned we implicitly assume the start of a sampling operation
+//       and it is expected that a follow-up after_sample call is issued.
+bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler);
+// Signal the end of a sampling operation.
+//
+// @return Sampling time in nanoseconds for the sample operation we just finished.
+long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler);
+// Retrieve the current sampling probability ([0.0, 100.0]) being applied by this sampler.
+double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler);
+// Retrieve the current number of events seen since last sample.
+unsigned long discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler);

data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c CHANGED Viewed

@@ -19,7 +19,7 @@
 //
 // Instead of sampling at a fixed sample rate, the actual sampling rate should be decided by also observing the impact
 // that running the profiler is having. This protects against issues such as the profiler being deployed in very busy
-//machines or containers with unrealistic CPU restrictions.
+// machines or containers with unrealistic CPU restrictions.
 //
 // ### Implementation
 //
@@ -35,13 +35,13 @@
 // sample. If it's not, it will skip sampling.
 //
 // Finally, as an additional optimization, there's a `dynamic_sampling_rate_get_sleep()` which, given the current
-// wall-time, will return the time remaining (*there's an exception, check below) until the next sample.
+// wall-time, will return the time remaining (*there's an exception, check function) until the next sample.
 //
 // ---
 // This is the wall-time overhead we're targeting. E.g. we target to spend no more than 2%, or 1.2 seconds per minute,
-// taking profiling samples.
-#define WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
+// taking profiling samples by default.
+#define DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
 // See `dynamic_sampling_rate_get_sleep()` for details
 #define MAX_SLEEP_TIME_NS MILLIS_AS_NS(100)
 // See `dynamic_sampling_rate_after_sample()` for details
@@ -49,6 +49,11 @@
 void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state) {
   atomic_init(&state->next_sample_after_monotonic_wall_time_ns, 0);
+  dynamic_sampling_rate_set_overhead_target_percentage(state, DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE);
+}
+void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage) {
+  state->overhead_target_percentage = overhead_target_percentage;
 }
 void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state) {
@@ -76,7 +81,7 @@ bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, lon
 }
 void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns) {
-  double overhead_target = (double) WALL_TIME_OVERHEAD_TARGET_PERCENTAGE;
+  double overhead_target = state->overhead_target_percentage;
   // The idea here is that we're targeting a maximum % of wall-time spent sampling.
   // So for instance, if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend
@@ -93,48 +98,51 @@ void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long
 // ---
 // Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
-VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
-VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
-VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
+VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
+VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
+VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
 void collectors_dynamic_sampling_rate_init(VALUE profiling_module) {
   VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
   VALUE dynamic_sampling_rate_module = rb_define_module_under(collectors_module, "DynamicSamplingRate");
   VALUE testing_module = rb_define_module_under(dynamic_sampling_rate_module, "Testing");
-  rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 2);
-  rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 2);
-  rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 2);
+  rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 3);
+  rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 3);
+  rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 3);
 }
-VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
+VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
   ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
   ENFORCE_TYPE(current_monotonic_wall_time_ns, T_FIXNUM);
   dynamic_sampling_rate_state state;
   dynamic_sampling_rate_init(&state);
+  dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
   atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
   return ULL2NUM(dynamic_sampling_rate_get_sleep(&state, NUM2LONG(current_monotonic_wall_time_ns)));
 }
-VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
+VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
   ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
   ENFORCE_TYPE(wall_time_ns_before_sample, T_FIXNUM);
   dynamic_sampling_rate_state state;
   dynamic_sampling_rate_init(&state);
+  dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
   atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
   return dynamic_sampling_rate_should_sample(&state, NUM2LONG(wall_time_ns_before_sample)) ? Qtrue : Qfalse;
 }
-VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
+VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
   ENFORCE_TYPE(wall_time_ns_after_sample, T_FIXNUM);
   ENFORCE_TYPE(sampling_time_ns, T_FIXNUM);
   dynamic_sampling_rate_state state;
   dynamic_sampling_rate_init(&state);
+  dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
   dynamic_sampling_rate_after_sample(&state, NUM2LONG(wall_time_ns_after_sample), NUM2ULL(sampling_time_ns));

data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h CHANGED Viewed

@@ -4,10 +4,14 @@
 #include <stdbool.h>
 typedef struct {
+  // This is the wall-time overhead we're targeting. E.g. by default, we target to spend no more than 2%, or 1.2 seconds
+  // per minute, taking profiling samples.
+  double overhead_target_percentage;
   atomic_long next_sample_after_monotonic_wall_time_ns;
 } dynamic_sampling_rate_state;
 void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state);
+void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage);
 void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state);
 uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns);
 bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample);