RubyGems - ddtrace - Versions diffs - 1.7.0 → 1.9.0 - Mend

ddtrace 1.7.0 → 1.9.0

Files changed (182) hide show

data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c ADDED Viewed

@@ -0,0 +1,142 @@
+#include <ruby.h>
+#include "collectors_dynamic_sampling_rate.h"
+#include "helpers.h"
+#include "ruby_helpers.h"
+#include "time_helpers.h"
+// Used to pace the rate of profiling samples based on the last observed time for a sample.
+//
+// This file implements the native bits of the Datadog::Profiling::Collectors::DynamicSamplingRate module, and is
+// only exposed to Ruby for testing (it's always and only invoked by other C code in production).
+// ---
+// ## Dynamic Sampling Rate
+//
+// Our profilers get deployed in quite unpredictable situations in terms of system resources. While they can provide key
+// information to help customers solve their performance problems, the profilers must always be careful not to make
+// performance problems worse. This is where the idea of a dynamic sampling rate comes in.
+//
+// Instead of sampling at a fixed sample rate, the actual sampling rate should be decided by also observing the impact
+// that running the profiler is having. This protects against issues such as the profiler being deployed in very busy
+//machines or containers with unrealistic CPU restrictions.
+//
+// ### Implementation
+//
+// The APIs exposed by this file are used by the `CpuAndWallTimeWorker`.
+//
+// The main idea of the implementation below is the following: whenever the profiler takes a sample, the time we spent
+// sampling and the current wall-time are recorded by calling `dynamic_sampling_rate_after_sample()`.
+//
+// Inside `dynamic_sampling_rate_after_sample()`, both values are combined to decide a future wall-time before which
+// we should not sample. That is, we may decide that the next sample should happen no less than 200ms from now.
+//
+// Before taking a sample, the profiler checks using `dynamic_sampling_rate_should_sample()`, if it's time or not to
+// sample. If it's not, it will skip sampling.
+//
+// Finally, as an additional optimization, there's a `dynamic_sampling_rate_get_sleep()` which, given the current
+// wall-time, will return the time remaining (*there's an exception, check below) until the next sample.
+//
+// ---
+// This is the wall-time overhead we're targeting. E.g. we target to spend no more than 2%, or 1.2 seconds per minute,
+// taking profiling samples.
+#define WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
+// See `dynamic_sampling_rate_get_sleep()` for details
+#define MAX_SLEEP_TIME_NS MILLIS_AS_NS(100)
+// See `dynamic_sampling_rate_after_sample()` for details
+#define MAX_TIME_UNTIL_NEXT_SAMPLE_NS SECONDS_AS_NS(10)
+void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state) {
+  atomic_init(&state->next_sample_after_monotonic_wall_time_ns, 0);
+}
+void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state) {
+  atomic_store(&state->next_sample_after_monotonic_wall_time_ns, 0);
+}
+uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns) {
+  long next_sample_after_ns = atomic_load(&state->next_sample_after_monotonic_wall_time_ns);
+  long delta_ns = next_sample_after_ns - current_monotonic_wall_time_ns;
+  if (delta_ns > 0 && next_sample_after_ns > 0) {
+    // We don't want to sleep for too long as the profiler may be trying to stop.
+    //
+    // Instead, here we sleep for at most this time. Worst case, the profiler will still try to sample before
+    // `next_sample_after_monotonic_wall_time_ns`, BUT `dynamic_sampling_rate_should_sample()` will still be false
+    // so we still get the intended behavior.
+    return uint64_min_of(delta_ns, MAX_SLEEP_TIME_NS);
+  } else {
+    return 0;
+  }
+}
+bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample) {
+  return wall_time_ns_before_sample >= atomic_load(&state->next_sample_after_monotonic_wall_time_ns);
+}
+void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns) {
+  double overhead_target = (double) WALL_TIME_OVERHEAD_TARGET_PERCENTAGE;
+  // The idea here is that we're targeting a maximum % of wall-time spent sampling.
+  // So for instance, if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend
+  // sleeping? As an example, if the last sample took 1ms and the target overhead is 2%, we should sleep for 49ms.
+  uint64_t time_to_sleep_ns = sampling_time_ns * ((100.0 - overhead_target)/overhead_target);
+  // In case a sample took an unexpected long time (e.g. maybe a VM was paused, or a laptop was suspended), we clamp the
+  // value so it doesn't get too crazy
+  time_to_sleep_ns = uint64_min_of(time_to_sleep_ns, MAX_TIME_UNTIL_NEXT_SAMPLE_NS);
+  atomic_store(&state->next_sample_after_monotonic_wall_time_ns, wall_time_ns_after_sample + time_to_sleep_ns);
+}
+// ---
+// Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
+VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
+VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
+VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
+void collectors_dynamic_sampling_rate_init(VALUE profiling_module) {
+  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
+  VALUE dynamic_sampling_rate_module = rb_define_module_under(collectors_module, "DynamicSamplingRate");
+  VALUE testing_module = rb_define_module_under(dynamic_sampling_rate_module, "Testing");
+  rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 2);
+  rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 2);
+  rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 2);
+}
+VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
+  ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
+  ENFORCE_TYPE(current_monotonic_wall_time_ns, T_FIXNUM);
+  dynamic_sampling_rate_state state;
+  dynamic_sampling_rate_init(&state);
+  atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
+  return ULL2NUM(dynamic_sampling_rate_get_sleep(&state, NUM2LONG(current_monotonic_wall_time_ns)));
+}
+VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
+  ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
+  ENFORCE_TYPE(wall_time_ns_before_sample, T_FIXNUM);
+  dynamic_sampling_rate_state state;
+  dynamic_sampling_rate_init(&state);
+  atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
+  return dynamic_sampling_rate_should_sample(&state, NUM2LONG(wall_time_ns_before_sample)) ? Qtrue : Qfalse;
+}
+VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
+  ENFORCE_TYPE(wall_time_ns_after_sample, T_FIXNUM);
+  ENFORCE_TYPE(sampling_time_ns, T_FIXNUM);
+  dynamic_sampling_rate_state state;
+  dynamic_sampling_rate_init(&state);
+  dynamic_sampling_rate_after_sample(&state, NUM2LONG(wall_time_ns_after_sample), NUM2ULL(sampling_time_ns));
+  return ULL2NUM(atomic_load(&state.next_sample_after_monotonic_wall_time_ns));
+}

data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h ADDED Viewed

@@ -0,0 +1,14 @@
+#pragma once
+#include <stdatomic.h>
+#include <stdbool.h>
+typedef struct {
+  atomic_long next_sample_after_monotonic_wall_time_ns;
+} dynamic_sampling_rate_state;
+void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state);
+void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state);
+uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns);
+bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample);
+void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns);

data/ext/ddtrace_profiling_native_extension/collectors_idle_sampling_helper.c ADDED Viewed

@@ -0,0 +1,241 @@
+#include <ruby.h>
+#include <ruby/thread.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include "helpers.h"
+#include "ruby_helpers.h"
+#include "collectors_idle_sampling_helper.h"
+// Used by the Collectors::CpuAndWallTimeWorker to gather samples when the Ruby process is idle.
+//
+// Specifically, the IdleSamplingHelper is expected to be triggered by the CpuAndWallTimeWorker whenever it needs to
+// trigger a sample, but the VM is otherwise idle. See implementation of CpuAndWallTimeWorker for details.
+//
+// The IdleSamplingHelper keeps a background thread that waits for functions to run on a single-element "queue".
+// Other threads communicate with it by asking it to ACTION_RUN a `requested_action` or ACTION_STOP to terminate.
+//
+// The state is protected by the `wakeup_mutex`, and the background thread is woken up after changes using the
+// `wakeup` condition variable.
+typedef enum { ACTION_WAIT, ACTION_RUN, ACTION_STOP } action;
+// Contains state for a single CpuAndWallTimeWorker instance
+struct idle_sampling_loop_state {
+  pthread_mutex_t wakeup_mutex;
+  pthread_cond_t wakeup;
+  action requested_action;
+  void (*run_action_function)(void);
+};
+static VALUE _native_new(VALUE klass);
+static void reset_state(struct idle_sampling_loop_state *state);
+static VALUE _native_idle_sampling_loop(DDTRACE_UNUSED VALUE self, VALUE self_instance);
+static VALUE _native_stop(DDTRACE_UNUSED VALUE self, VALUE self_instance);
+static void *run_idle_sampling_loop(void *state_ptr);
+static void interrupt_idle_sampling_loop(void *state_ptr);
+static VALUE _native_reset(DDTRACE_UNUSED VALUE self, VALUE self_instance);
+static VALUE _native_idle_sampling_helper_request_action(DDTRACE_UNUSED VALUE self, VALUE self_instance);
+static void *request_testing_action(void *self_instance_ptr);
+static void grab_gvl_and_run_testing_action(void);
+static void *run_testing_action(DDTRACE_UNUSED void *unused);
+void collectors_idle_sampling_helper_init(VALUE profiling_module) {
+  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
+  VALUE collectors_idle_sampling_helper_class = rb_define_class_under(collectors_module, "IdleSamplingHelper", rb_cObject);
+  // Hosts methods used for testing the native code using RSpec
+  VALUE testing_module = rb_define_module_under(collectors_idle_sampling_helper_class, "Testing");
+  // Instances of the IdleSamplingHelper class are "TypedData" objects.
+  // "TypedData" objects are special objects in the Ruby VM that can wrap C structs.
+  // In this case, it wraps the idle_sampling_loop_state.
+  //
+  // Because Ruby doesn't know how to initialize native-level structs, we MUST override the allocation function for objects
+  // of this class so that we can manage this part. Not overriding or disabling the allocation function is a common
+  // gotcha for "TypedData" objects that can very easily lead to VM crashes, see for instance
+  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
+  rb_define_alloc_func(collectors_idle_sampling_helper_class, _native_new);
+  rb_define_singleton_method(collectors_idle_sampling_helper_class, "_native_idle_sampling_loop", _native_idle_sampling_loop, 1);
+  rb_define_singleton_method(collectors_idle_sampling_helper_class, "_native_stop", _native_stop, 1);
+  rb_define_singleton_method(collectors_idle_sampling_helper_class, "_native_reset", _native_reset, 1);
+  rb_define_singleton_method(testing_module, "_native_idle_sampling_helper_request_action", _native_idle_sampling_helper_request_action, 1);
+}
+// This structure is used to define a Ruby object that stores a pointer to a struct idle_sampling_loop_state
+// See also https://github.com/ruby/ruby/blob/master/doc/extension.rdoc for how this works
+static const rb_data_type_t idle_sampling_helper_typed_data = {
+  .wrap_struct_name = "Datadog::Profiling::Collectors::IdleSamplingHelper",
+  .function = {
+    .dmark = NULL, // We don't store references to Ruby objects so we don't need to mark any of them
+    .dfree = RUBY_DEFAULT_FREE,
+    .dsize = NULL, // We don't track memory usage (although it'd be cool if we did!)
+    //.dcompact = NULL, // Not needed -- we don't store references to Ruby objects
+  },
+  .flags = RUBY_TYPED_FREE_IMMEDIATELY
+};
+static VALUE _native_new(VALUE klass) {
+  struct idle_sampling_loop_state *state = ruby_xcalloc(1, sizeof(struct idle_sampling_loop_state));
+  reset_state(state);
+  return TypedData_Wrap_Struct(klass, &idle_sampling_helper_typed_data, state);
+}
+static void reset_state(struct idle_sampling_loop_state *state) {
+  state->wakeup_mutex = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
+  state->wakeup = (pthread_cond_t) PTHREAD_COND_INITIALIZER;
+  state->requested_action = ACTION_WAIT;
+  state->run_action_function = NULL;
+}
+// The same instance of the IdleSamplingHelper can be reused multiple times, and this resets it back to
+// a pristine state before recreating the worker thread (this includes resetting the mutex in case it was left
+// locked halfway through the VM forking)
+static VALUE _native_reset(DDTRACE_UNUSED VALUE self, VALUE self_instance) {
+  struct idle_sampling_loop_state *state;
+  TypedData_Get_Struct(self_instance, struct idle_sampling_loop_state, &idle_sampling_helper_typed_data, state);
+  reset_state(state);
+  return Qtrue;
+}
+static VALUE _native_idle_sampling_loop(DDTRACE_UNUSED VALUE self, VALUE self_instance) {
+  struct idle_sampling_loop_state *state;
+  TypedData_Get_Struct(self_instance, struct idle_sampling_loop_state, &idle_sampling_helper_typed_data, state);
+  // Release GVL and run the loop waiting for requests
+  rb_thread_call_without_gvl(run_idle_sampling_loop, state, interrupt_idle_sampling_loop, state);
+  return Qtrue;
+}
+static void *run_idle_sampling_loop(void *state_ptr) {
+  struct idle_sampling_loop_state *state = (struct idle_sampling_loop_state *) state_ptr;
+  int error = 0;
+  while (true) {
+    ENFORCE_SUCCESS_NO_GVL(pthread_mutex_lock(&state->wakeup_mutex));
+    action next_action;
+    void (*run_action_function)(void);
+    // Await for an action
+    while ((next_action = state->requested_action) == ACTION_WAIT) {
+      error = pthread_cond_wait(&state->wakeup, &state->wakeup_mutex);
+      if (error) {
+        // If something went wrong, try to leave the mutex unlocked at least
+        pthread_mutex_unlock(&state->wakeup_mutex);
+        ENFORCE_SUCCESS_NO_GVL(error);
+      }
+    }
+    // There's an action to be taken!
+    // Record function, if any
+    run_action_function = state->run_action_function;
+    // Reset buffer for next request
+    state->requested_action = ACTION_WAIT;
+    // Unlock the mutex immediately so other threads can continue to request actions without blocking
+    ENFORCE_SUCCESS_NO_GVL(pthread_mutex_unlock(&state->wakeup_mutex));
+    // Process pending action
+    if (next_action == ACTION_RUN) {
+      if (run_action_function == NULL) {
+        grab_gvl_and_raise(rb_eRuntimeError, "Unexpected NULL run_action_function in run_idle_sampling_loop");
+      }
+      run_action_function();
+    } else { // ACTION_STOP
+      return NULL;
+    }
+  }
+}
+static void interrupt_idle_sampling_loop(void *state_ptr) {
+  struct idle_sampling_loop_state *state = (struct idle_sampling_loop_state *) state_ptr;
+  int error = 0;
+  // Note about the error handling in this situation: Something bad happening at this stage is really really awkward to
+  // handle because we get called by the VM in a situation where we can't really raise exceptions, and the VM really really
+  // just wants us to stop what we're doing and return control of the thread to it.
+  //
+  // So if we return immediately on error, we may leave the VM hanging because we didn't actually interrupt the thread.
+  // We're also not at a great location to flag errors.
+  // That's why: a) I chose to log to stderr, as a last-ditch effort; b) even if something goes wrong we still try to
+  // ask the thread to stop, instead of exiting early.
+  error = pthread_mutex_lock(&state->wakeup_mutex);
+  if (error) { fprintf(stderr, "[DDTRACE] Error during pthread_mutex_lock in interrupt_idle_sampling_loop (%s)\n", strerror(error)); }
+  state->requested_action = ACTION_STOP;
+  error = pthread_mutex_unlock(&state->wakeup_mutex);
+  if (error) { fprintf(stderr, "[DDTRACE] Error during pthread_mutex_unlock in interrupt_idle_sampling_loop (%s)\n", strerror(error)); }
+  error = pthread_cond_broadcast(&state->wakeup);
+  if (error) { fprintf(stderr, "[DDTRACE] Error during pthread_cond_broadcast in interrupt_idle_sampling_loop (%s)\n", strerror(error)); }
+}
+static VALUE _native_stop(DDTRACE_UNUSED VALUE self, VALUE self_instance) {
+  struct idle_sampling_loop_state *state;
+  TypedData_Get_Struct(self_instance, struct idle_sampling_loop_state, &idle_sampling_helper_typed_data, state);
+  ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&state->wakeup_mutex));
+  state->requested_action = ACTION_STOP;
+  ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(&state->wakeup_mutex));
+  // Wake up worker thread, if needed; It's OK to call broadcast after releasing the mutex
+  ENFORCE_SUCCESS_GVL(pthread_cond_broadcast(&state->wakeup));
+  return Qtrue;
+}
+// Assumption: Function gets called without the global VM lock
+void idle_sampling_helper_request_action(VALUE self_instance, void (*run_action_function)(void)) {
+  struct idle_sampling_loop_state *state;
+  if (!rb_typeddata_is_kind_of(self_instance, &idle_sampling_helper_typed_data)) {
+    grab_gvl_and_raise(rb_eTypeError, "Wrong argument for idle_sampling_helper_request_action");
+  }
+  // This should never fail the the above check passes
+  TypedData_Get_Struct(self_instance, struct idle_sampling_loop_state, &idle_sampling_helper_typed_data, state);
+  ENFORCE_SUCCESS_NO_GVL(pthread_mutex_lock(&state->wakeup_mutex));
+  if (state->requested_action == ACTION_WAIT) {
+    state->requested_action = ACTION_RUN;
+    state->run_action_function = run_action_function;
+  }
+  ENFORCE_SUCCESS_NO_GVL(pthread_mutex_unlock(&state->wakeup_mutex));
+  // Wake up worker thread, if needed; It's OK to call broadcast after releasing the mutex
+  ENFORCE_SUCCESS_NO_GVL(pthread_cond_broadcast(&state->wakeup));
+}
+// Because the idle_sampling_helper_request_action is built to be called without the global VM lock, here we release it
+// to be able to call that API.
+static VALUE _native_idle_sampling_helper_request_action(DDTRACE_UNUSED VALUE self, VALUE self_instance) {
+  rb_thread_call_without_gvl(request_testing_action, (void *) self_instance, NULL, NULL);
+  return Qtrue;
+}
+static void *request_testing_action(void *self_instance_ptr) {
+  VALUE self_instance = (VALUE) self_instance_ptr;
+  idle_sampling_helper_request_action(self_instance, grab_gvl_and_run_testing_action);
+  return NULL;
+}
+// This gets called by the worker thread, which is not holding the global VM lock. To be able to actually run the action,
+// we need to acquire it
+static void grab_gvl_and_run_testing_action(void) {
+  rb_thread_call_with_gvl(run_testing_action, NULL);
+}
+static void *run_testing_action(DDTRACE_UNUSED void *unused) {
+  VALUE idle_sampling_helper_testing_action = rb_gv_get("$idle_sampling_helper_testing_action");
+  rb_funcall(idle_sampling_helper_testing_action, rb_intern("call"), 0);
+  return NULL;
+}

data/ext/ddtrace_profiling_native_extension/collectors_idle_sampling_helper.h ADDED Viewed

@@ -0,0 +1,3 @@
+#pragma once
+void idle_sampling_helper_request_action(VALUE self_instance, void (*run_action_function)(void));

data/ext/ddtrace_profiling_native_extension/collectors_stack.c CHANGED Viewed

@@ -22,8 +22,8 @@ struct sampling_buffer {
   VALUE *stack_buffer;
   int *lines_buffer;
   bool *is_ruby_frame;
-  ddog_Location *locations;
-  ddog_Line *lines;
+  ddog_prof_Location *locations;
+  ddog_prof_Line *lines;
 }; // Note: typedef'd in the header to sampling_buffer
 static VALUE _native_sample(
@@ -39,8 +39,8 @@ static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer*
 static void record_placeholder_stack_in_native_code(
   sampling_buffer* buffer,
   VALUE recorder_instance,
-  ddog_Slice_i64 metric_values,
-  ddog_Slice_label labels,
+  ddog_Slice_I64 metric_values,
+  ddog_prof_Slice_Label labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 );
@@ -48,8 +48,8 @@ static void sample_thread_internal(
   VALUE thread,
   sampling_buffer* buffer,
   VALUE recorder_instance,
-  ddog_Slice_i64 metric_values,
-  ddog_Slice_label labels,
+  ddog_Slice_I64 metric_values,
+  ddog_prof_Slice_Label labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 );
@@ -96,12 +96,12 @@ static VALUE _native_sample(
   }
   long labels_count = RARRAY_LEN(labels_array);
-  ddog_Label labels[labels_count];
+  ddog_prof_Label labels[labels_count];
   for (int i = 0; i < labels_count; i++) {
     VALUE key_str_pair = rb_ary_entry(labels_array, i);
-    labels[i] = (ddog_Label) {
+    labels[i] = (ddog_prof_Label) {
       .key = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 0)),
       .str = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 1))
     };
@@ -116,8 +116,8 @@ static VALUE _native_sample(
     thread,
     buffer,
     recorder_instance,
-    (ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
-    (ddog_Slice_label) {.ptr = labels, .len = labels_count},
+    (ddog_Slice_I64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
+    (ddog_prof_Slice_Label) {.ptr = labels, .len = labels_count},
     RTEST(in_gc) ? SAMPLE_IN_GC : SAMPLE_REGULAR
   );
@@ -130,8 +130,8 @@ void sample_thread(
   VALUE thread,
   sampling_buffer* buffer,
   VALUE recorder_instance,
-  ddog_Slice_i64 metric_values,
-  ddog_Slice_label labels,
+  ddog_Slice_I64 metric_values,
+  ddog_prof_Slice_Label labels,
   sample_type type
 ) {
   // Samples thread into recorder
@@ -146,8 +146,8 @@ void sample_thread(
   if (type == SAMPLE_IN_GC) {
     ddog_CharSlice function_name = DDOG_CHARSLICE_C("");
     ddog_CharSlice function_filename = DDOG_CHARSLICE_C("Garbage Collection");
-    buffer->lines[0] = (ddog_Line) {
-      .function = (ddog_Function) {.name = function_name, .filename = function_filename},
+    buffer->lines[0] = (ddog_prof_Line) {
+      .function = (ddog_prof_Function) {.name = function_name, .filename = function_filename},
       .line = 0
     };
     // To avoid changing sample_thread_internal, we just prepare a new buffer struct that uses the same underlying storage as the
@@ -192,8 +192,8 @@ static void sample_thread_internal(
   VALUE thread,
   sampling_buffer* buffer,
   VALUE recorder_instance,
-  ddog_Slice_i64 metric_values,
-  ddog_Slice_label labels,
+  ddog_Slice_I64 metric_values,
+  ddog_prof_Slice_Label labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 ) {
@@ -257,8 +257,8 @@ static void sample_thread_internal(
     name = NIL_P(name) ? missing_string : name;
     filename = NIL_P(filename) ? missing_string : filename;
-    buffer->lines[i] = (ddog_Line) {
-      .function = (ddog_Function) {
+    buffer->lines[i] = (ddog_prof_Line) {
+      .function = (ddog_prof_Function) {
         .name = char_slice_from_ruby_string(name),
         .filename = char_slice_from_ruby_string(filename)
       },
@@ -278,8 +278,8 @@ static void sample_thread_internal(
   record_sample(
     recorder_instance,
-    (ddog_Sample) {
-      .locations = (ddog_Slice_location) {.ptr = record_buffer->locations, .len = captured_frames + extra_frames_in_record_buffer},
+    (ddog_prof_Sample) {
+      .locations = (ddog_prof_Slice_Location) {.ptr = record_buffer->locations, .len = captured_frames + extra_frames_in_record_buffer},
       .values = metric_values,
       .labels = labels,
     }
@@ -301,8 +301,8 @@ static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer*
   // `record_sample`. So be careful where it gets allocated. (We do have tests for this, at least!)
   ddog_CharSlice function_name = DDOG_CHARSLICE_C("");
   ddog_CharSlice function_filename = {.ptr = frames_omitted_message, .len = strlen(frames_omitted_message)};
-  buffer->lines[buffer->max_frames - 1] = (ddog_Line) {
-    .function = (ddog_Function) {.name = function_name, .filename = function_filename},
+  buffer->lines[buffer->max_frames - 1] = (ddog_prof_Line) {
+    .function = (ddog_prof_Function) {.name = function_name, .filename = function_filename},
     .line = 0,
   };
 }
@@ -330,22 +330,22 @@ static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer*
 static void record_placeholder_stack_in_native_code(
   sampling_buffer* buffer,
   VALUE recorder_instance,
-  ddog_Slice_i64 metric_values,
-  ddog_Slice_label labels,
+  ddog_Slice_I64 metric_values,
+  ddog_prof_Slice_Label labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 ) {
   ddog_CharSlice function_name = DDOG_CHARSLICE_C("");
   ddog_CharSlice function_filename = DDOG_CHARSLICE_C("In native code");
-  buffer->lines[0] = (ddog_Line) {
-    .function = (ddog_Function) {.name = function_name, .filename = function_filename},
+  buffer->lines[0] = (ddog_prof_Line) {
+    .function = (ddog_prof_Function) {.name = function_name, .filename = function_filename},
     .line = 0
   };
   record_sample(
     recorder_instance,
-    (ddog_Sample) {
-      .locations = (ddog_Slice_location) {.ptr = record_buffer->locations, .len = 1 + extra_frames_in_record_buffer},
+    (ddog_prof_Sample) {
+      .locations = (ddog_prof_Slice_Location) {.ptr = record_buffer->locations, .len = 1 + extra_frames_in_record_buffer},
       .values = metric_values,
       .labels = labels,
     }
@@ -364,14 +364,14 @@ sampling_buffer *sampling_buffer_new(unsigned int max_frames) {
   buffer->stack_buffer  = ruby_xcalloc(max_frames, sizeof(VALUE));
   buffer->lines_buffer  = ruby_xcalloc(max_frames, sizeof(int));
   buffer->is_ruby_frame = ruby_xcalloc(max_frames, sizeof(bool));
-  buffer->locations     = ruby_xcalloc(max_frames, sizeof(ddog_Location));
-  buffer->lines         = ruby_xcalloc(max_frames, sizeof(ddog_Line));
+  buffer->locations     = ruby_xcalloc(max_frames, sizeof(ddog_prof_Location));
+  buffer->lines         = ruby_xcalloc(max_frames, sizeof(ddog_prof_Line));
   // Currently we have a 1-to-1 correspondence between lines and locations, so we just initialize the locations once
   // here and then only mutate the contents of the lines.
   for (unsigned int i = 0; i < max_frames; i++) {
-    ddog_Slice_line lines = (ddog_Slice_line) {.ptr = &buffer->lines[i], .len = 1};
-    buffer->locations[i] = (ddog_Location) {.lines = lines};
+    ddog_prof_Slice_Line lines = (ddog_prof_Slice_Line) {.ptr = &buffer->lines[i], .len = 1};
+    buffer->locations[i] = (ddog_prof_Location) {.lines = lines};
   }
   return buffer;

data/ext/ddtrace_profiling_native_extension/collectors_stack.h CHANGED Viewed

@@ -10,8 +10,8 @@ void sample_thread(
   VALUE thread,
   sampling_buffer* buffer,
   VALUE recorder_instance,
-  ddog_Slice_i64 metric_values,
-  ddog_Slice_label labels,
+  ddog_Slice_I64 metric_values,
+  ddog_prof_Slice_Label labels,
   sample_type type
 );
 sampling_buffer *sampling_buffer_new(unsigned int max_frames);

data/ext/ddtrace_profiling_native_extension/extconf.rb CHANGED Viewed

@@ -67,6 +67,10 @@ $stderr.puts(
 # that may fail on an environment not properly setup for building Ruby extensions.
 require 'mkmf'
+Logging.message(" [ddtrace] Using compiler:\n")
+xsystem("#{CONFIG['CC']} -v")
+Logging.message(" [ddtrace] End of compiler information\n")
 # mkmf on modern Rubies actually has an append_cflags that does something similar
 # (see https://github.com/ruby/ruby/pull/5760), but as usual we need a bit more boilerplate to deal with legacy Rubies
 def add_compiler_flag(flag)
@@ -87,7 +91,7 @@ add_compiler_flag '-Werror' if ENV['DDTRACE_CI'] == 'true'
 #   (https://github.com/msgpack/msgpack-ruby/blob/18ce08f6d612fe973843c366ac9a0b74c4e50599/ext/msgpack/extconf.rb#L8)
 add_compiler_flag '-std=gnu99'
-# Gets really noisy when we include the MJIT header, let's omit it
+# Gets really noisy when we include the MJIT header, let's omit it (TODO: Use #pragma GCC diagnostic instead?)
 add_compiler_flag '-Wno-unused-function'
 # Allow defining variables at any point in a function
@@ -108,6 +112,9 @@ add_compiler_flag '-Wunused-parameter'
 # For more details see https://gcc.gnu.org/wiki/Visibility
 add_compiler_flag '-fvisibility=hidden'
+# Avoid legacy C definitions
+add_compiler_flag '-Wold-style-definition'
 # Enable all other compiler warnings
 add_compiler_flag '-Wall'
 add_compiler_flag '-Wextra'
@@ -126,6 +133,9 @@ end
 # On older Rubies, there was no struct rb_native_thread. See private_vm_api_acccess.c for details.
 $defs << '-DNO_RB_NATIVE_THREAD' if RUBY_VERSION < '3.2'
+# On older Rubies, there was no struct rb_thread_sched (it was struct rb_global_vm_lock_struct)
+$defs << '-DNO_RB_THREAD_SCHED' if RUBY_VERSION < '3.2'
 # On older Rubies, there was no tid member in the internal thread structure
 $defs << '-DNO_THREAD_TID' if RUBY_VERSION < '3.1'
@@ -135,9 +145,15 @@ $defs << '-DUSE_BACKPORTED_RB_PROFILE_FRAME_METHOD_NAME' if RUBY_VERSION < '3'
 # On older Rubies, there are no Ractors
 $defs << '-DNO_RACTORS' if RUBY_VERSION < '3'
+# On older Rubies, rb_global_vm_lock_struct did not include the owner field
+$defs << '-DNO_GVL_OWNER' if RUBY_VERSION < '2.6'
 # On older Rubies, we need to use rb_thread_t instead of rb_execution_context_t
 $defs << '-DUSE_THREAD_INSTEAD_OF_EXECUTION_CONTEXT' if RUBY_VERSION < '2.5'
+# On older Rubies, extensions can't use GET_VM()
+$defs << '-DNO_GET_VM' if RUBY_VERSION < '2.5'
 # On older Rubies...
 if RUBY_VERSION < '2.4'
   # ...we need to use RUBY_VM_NORMAL_ISEQ_P instead of VM_FRAME_RUBYFRAME_P
@@ -154,8 +170,6 @@ if RUBY_VERSION < '2.3'
   $defs << '-DUSE_LEGACY_RB_PROFILE_FRAMES'
   # ... you couldn't name threads
   $defs << '-DNO_THREAD_NAMES'
-  # ...the ruby_thread_has_gvl_p function was not exposed to users outside of the VM
-  $defs << '-DNO_THREAD_HAS_GVL'
 end
 # If we got here, libdatadog is available and loaded
@@ -173,6 +187,10 @@ unless pkg_config('datadog_profiling_with_rpath')
   )
 end
+unless have_type('atomic_int', ['stdatomic.h'])
+  skip_building_extension!(Datadog::Profiling::NativeExtensionHelpers::Supported::COMPILER_ATOMIC_MISSING)
+end
 # See comments on the helper method being used for why we need to additionally set this.
 # The extremely excessive escaping around ORIGIN below seems to be correct and was determined after a lot of
 # experimentation. We need to get these special characters across a lot of tools untouched...
@@ -181,10 +199,6 @@ $LDFLAGS += \
   "#{Datadog::Profiling::NativeExtensionHelpers.libdatadog_folder_relative_to_native_lib_folder}"
 Logging.message(" [ddtrace] After pkg-config $LDFLAGS were set to: #{$LDFLAGS.inspect}\n")
-Logging.message(" [ddtrace] Using compiler:\n")
-xsystem("#{CONFIG['CC']} --version")
-Logging.message(" [ddtrace] End of compiler information\n")
 # Tag the native extension library with the Ruby version and Ruby platform.
 # This makes it easier for development (avoids "oops I forgot to rebuild when I switched my Ruby") and ensures that
 # the wrong library is never loaded.