RubyGems - ddtrace - Versions diffs - 1.21.1 → 1.23.0 - Mend

ddtrace 1.21.1 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +54 -1
data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +40 -32
data/ext/datadog_profiling_native_extension/collectors_thread_context.c +23 -12
data/ext/datadog_profiling_native_extension/heap_recorder.c +81 -4
data/ext/datadog_profiling_native_extension/heap_recorder.h +12 -1
data/ext/datadog_profiling_native_extension/http_transport.c +5 -5
data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +1 -1
data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -0
data/ext/datadog_profiling_native_extension/stack_recorder.c +161 -62
data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
data/lib/datadog/appsec/event.rb +1 -1
data/lib/datadog/core/configuration/components.rb +2 -1
data/lib/datadog/core/configuration/option.rb +7 -5
data/lib/datadog/core/configuration/settings.rb +38 -17
data/lib/datadog/core/configuration.rb +20 -4
data/lib/datadog/core/environment/platform.rb +7 -1
data/lib/datadog/core/remote/client/capabilities.rb +1 -1
data/lib/datadog/core/remote/transport/http/config.rb +1 -1
data/lib/datadog/core/telemetry/client.rb +18 -10
data/lib/datadog/core/telemetry/emitter.rb +9 -13
data/lib/datadog/core/telemetry/event.rb +247 -57
data/lib/datadog/core/telemetry/ext.rb +1 -0
data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
data/lib/datadog/core/telemetry/http/ext.rb +4 -1
data/lib/datadog/core/telemetry/http/response.rb +4 -0
data/lib/datadog/core/telemetry/http/transport.rb +9 -4
data/lib/datadog/core/telemetry/request.rb +59 -0
data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +25 -0
data/lib/datadog/profiling/component.rb +23 -15
data/lib/datadog/profiling/exporter.rb +6 -3
data/lib/datadog/profiling/load_native_extension.rb +14 -1
data/lib/datadog/profiling/stack_recorder.rb +6 -2
data/lib/datadog/profiling.rb +11 -0
data/lib/datadog/tracing/sampling/matcher.rb +23 -3
data/lib/datadog/tracing/sampling/rule.rb +7 -2
data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
data/lib/ddtrace/version.rb +2 -2
metadata +9 -20
data/lib/datadog/core/telemetry/collector.rb +0 -250
data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
data/lib/datadog/core/telemetry/v1/application.rb +0 -92
data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
data/lib/datadog/core/telemetry/v1/host.rb +0 -59
data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
data/lib/datadog/core/telemetry/v1/product.rb +0 -36
data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
data/lib/datadog/core/telemetry/v2/request.rb +0 -29

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 88e2b5d32d76e7c167d43a9868aed0903f8b82936bd0faf8eb10e02ae83d5928
-  data.tar.gz: 65927b5d7037a853debb4da6cd71dda3dec0dd6b765bd90230ac3fa2904655f4
+  metadata.gz: b5e8a3ccb932af75df7d30d6fe59edc8277e21ee34a9562f7c2c8bfba1ea7521
+  data.tar.gz: 1508bf7e56a24af598aad666665be6cd0a7b97c9cbf3bbcc95a3fda36b67c07a
 SHA512:
-  metadata.gz: c467f5c384aeae1fe813df7a2b081e36c665f7a5f550322b9fbfafc402ad70d1d81ba578094bf4064a9d1c8a97f94be8b998ae2d2218c18e0bfaba97b754d5ea
-  data.tar.gz: 8510f7f8afa6a89591bb128389ab210e2d6f3049c23a2d2ef3f58c4f0c042abe5cc865b6b5efe5198f116a6c98cea34cbf6e257078477744aa66db66aa3e6f1f
+  metadata.gz: afccca8d070d1dfd472af2390da1b343dff3199c42a71e742f10e29e91ddb27a6fa709490adcd639e6914f792b8e8565f667ff5d916f9f0f6690d69b8d8b0b54
+  data.tar.gz: 581022bd8d146fe01e9dac70a309e58d503d48ed8420b845a6b91200796ce9a0b1c2bc29fb58382ddc546abd00785d2f6ee42863e1bc6df517052a566352604e

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,44 @@
 ## [Unreleased]
+## [1.23.0] - 2024-05-09
+### Added
+* Profiling: Enable endpoint profiling for Sidekiq and similar background job processors ([#3619][])
+### Fixed
+* Fix no such file or directory issue when using single step instrumentation ([#3623][])
+* Fix error during telemetry debug logging attempt ([#3618][])
+## [1.22.0] - 2024-04-16
+### Added
+* Tracing: Add sampling rules by trace resouce and tags ([#3587][], [#3585][])
+* Appsec: Add WAF vendor header support ([#3528][])
+### Changed
+* Upgrade `Telemetry` to V2 ([#3551][])
+* Upgrade to libdatadog 7 ([#3536][])
+* Profiling: Enable Garbage Collection profiling by default ([#3558][])
+* Profiling: Skip heap samples with age 0 ([#3573][])
+* Profiling: Support falling back into extension directory when loading profiler ([#3582][])
+### Fixed
+* Appsec: Fix MIME-style newlines with strict base64 encoding ([#3565][])
+## [2.0.0.beta1] - 2024-03-22
+Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v2.0.0.beta1
+Git diff: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...v2.0.0.beta1
+See https://github.com/DataDog/dd-trace-rb/blob/v2.0.0.beta1/docs/UpgradeGuide2.md.
 ## [1.21.1] - 2024-03-20
 ### Added
@@ -2776,7 +2814,10 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
 Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
-[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...master
+[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.22.0...master
+[1.23.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.22.0...v1.23.0
+[1.22.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...v1.22.0
+[2.0.0.beta1]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...v2.0.0.beta1
 [1.21.1]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.0...v1.21.1
 [1.21.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.20.0...v1.21.0
 [1.20.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.19.0...v1.20.0
@@ -4070,9 +4111,21 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
 [#3519]: https://github.com/DataDog/dd-trace-rb/issues/3519
 [#3520]: https://github.com/DataDog/dd-trace-rb/issues/3520
 [#3523]: https://github.com/DataDog/dd-trace-rb/issues/3523
+[#3528]: https://github.com/DataDog/dd-trace-rb/issues/3528
 [#3531]: https://github.com/DataDog/dd-trace-rb/issues/3531
 [#3535]: https://github.com/DataDog/dd-trace-rb/issues/3535
+[#3536]: https://github.com/DataDog/dd-trace-rb/issues/3536
 [#3539]: https://github.com/DataDog/dd-trace-rb/issues/3539
+[#3551]: https://github.com/DataDog/dd-trace-rb/issues/3551
+[#3558]: https://github.com/DataDog/dd-trace-rb/issues/3558
+[#3565]: https://github.com/DataDog/dd-trace-rb/issues/3565
+[#3573]: https://github.com/DataDog/dd-trace-rb/issues/3573
+[#3582]: https://github.com/DataDog/dd-trace-rb/issues/3582
+[#3585]: https://github.com/DataDog/dd-trace-rb/issues/3585
+[#3587]: https://github.com/DataDog/dd-trace-rb/issues/3587
+[#3618]: https://github.com/DataDog/dd-trace-rb/issues/3618
+[#3619]: https://github.com/DataDog/dd-trace-rb/issues/3619
+[#3623]: https://github.com/DataDog/dd-trace-rb/issues/3623
 [@AdrianLC]: https://github.com/AdrianLC
 [@Azure7111]: https://github.com/Azure7111
 [@BabyGroot]: https://github.com/BabyGroot

data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c CHANGED Viewed

@@ -96,6 +96,7 @@ struct cpu_and_wall_time_worker_state {
   bool no_signals_workaround_enabled;
   bool dynamic_sampling_rate_enabled;
   bool allocation_profiling_enabled;
+  bool skip_idle_samples_for_testing;
   VALUE self_instance;
   VALUE thread_context_collector_instance;
   VALUE idle_sampling_helper_instance;
@@ -132,6 +133,8 @@ struct cpu_and_wall_time_worker_state {
     unsigned int signal_handler_enqueued_sample;
     // How many times the signal handler was called from the wrong thread
     unsigned int signal_handler_wrong_thread;
+    // How many times we actually tried to interrupt a thread for sampling
+    unsigned int interrupt_thread_attempts;
     // # Stats for the results of calling rb_postponed_job_register_one
     // The same function was already waiting to be executed
@@ -177,7 +180,8 @@ static VALUE _native_initialize(
   VALUE no_signals_workaround_enabled,
   VALUE dynamic_sampling_rate_enabled,
   VALUE dynamic_sampling_rate_overhead_target_percentage,
-  VALUE allocation_profiling_enabled
+  VALUE allocation_profiling_enabled,
+  VALUE skip_idle_samples_for_testing
 );
 static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
 static VALUE _native_sampling_loop(VALUE self, VALUE instance);
@@ -272,14 +276,16 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
   // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
   rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
-  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8);
+  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats", _native_stats, 1);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats_reset_not_thread_safe", _native_stats_reset_not_thread_safe, 1);
   rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
+  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1);
   rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
+  // TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it
   rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
   rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
   rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
@@ -315,6 +321,7 @@ static VALUE _native_new(VALUE klass) {
   state->no_signals_workaround_enabled = false;
   state->dynamic_sampling_rate_enabled = true;
   state->allocation_profiling_enabled = false;
+  state->skip_idle_samples_for_testing = false;
   state->thread_context_collector_instance = Qnil;
   state->idle_sampling_helper_instance = Qnil;
   state->owner_thread = Qnil;
@@ -350,13 +357,15 @@ static VALUE _native_initialize(
   VALUE no_signals_workaround_enabled,
   VALUE dynamic_sampling_rate_enabled,
   VALUE dynamic_sampling_rate_overhead_target_percentage,
-  VALUE allocation_profiling_enabled
+  VALUE allocation_profiling_enabled,
+  VALUE skip_idle_samples_for_testing
 ) {
   ENFORCE_BOOLEAN(gc_profiling_enabled);
   ENFORCE_BOOLEAN(no_signals_workaround_enabled);
   ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
   ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
   ENFORCE_BOOLEAN(allocation_profiling_enabled);
+  ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
   struct cpu_and_wall_time_worker_state *state;
   TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
@@ -365,6 +374,7 @@ static VALUE _native_initialize(
   state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
   state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
   state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
+  state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
   double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
   if (!state->allocation_profiling_enabled) {
@@ -616,17 +626,23 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
         // Note that reading the GVL owner and sending them a signal is a race -- the Ruby VM keeps on executing while
         // we're doing this, so we may still not signal the correct thread from time to time, but our signal handler
         // includes a check to see if it got called in the right thread
+        state->stats.interrupt_thread_attempts++;
         pthread_kill(owner.owner, SIGPROF);
       } else {
-        // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
-        // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
-        //
-        // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
-        // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
-        // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
-        // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
-        state->stats.trigger_simulated_signal_delivery_attempts++;
-        idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
+        if (state->skip_idle_samples_for_testing) {
+          // This was added to make sure our tests don't accidentally pass due to idle samples. Specifically, if we
+          // comment out the thread interruption code inside `if (owner.valid)` above, our tests should not pass!
+        } else {
+          // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
+          // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
+          //
+          // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
+          // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
+          // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
+          // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
+          state->stats.trigger_simulated_signal_delivery_attempts++;
+          idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
+        }
       }
     }
@@ -737,6 +753,9 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
   if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
   if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
+  // Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
+  rb_funcall(instance, rb_intern("signal_running"), 0);
   rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
   // If we stopped sampling due to an exception, re-raise it (now in the worker thread)
@@ -910,18 +929,6 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
   struct cpu_and_wall_time_worker_state *state;
   TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
-  VALUE pretty_cpu_sampling_time_ns_min = state->stats.cpu_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_min);
-  VALUE pretty_cpu_sampling_time_ns_max = state->stats.cpu_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_max);
-  VALUE pretty_cpu_sampling_time_ns_total = state->stats.cpu_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_total);
-  VALUE pretty_cpu_sampling_time_ns_avg =
-    state->stats.cpu_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampling_time_ns_total) / state->stats.cpu_sampled);
-  VALUE pretty_allocation_sampling_time_ns_min = state->stats.allocation_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_min);
-  VALUE pretty_allocation_sampling_time_ns_max = state->stats.allocation_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_max);
-  VALUE pretty_allocation_sampling_time_ns_total = state->stats.allocation_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_total);
-  VALUE pretty_allocation_sampling_time_ns_avg =
-    state->stats.allocation_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampling_time_ns_total) / state->stats.allocation_sampled);
   unsigned long total_cpu_samples_attempted = state->stats.cpu_sampled + state->stats.cpu_skipped;
   VALUE effective_cpu_sample_rate =
     total_cpu_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampled) / total_cpu_samples_attempted);
@@ -943,24 +950,25 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
     ID2SYM(rb_intern("postponed_job_success")),                      /* => */ UINT2NUM(state->stats.postponed_job_success),
     ID2SYM(rb_intern("postponed_job_full")),                         /* => */ UINT2NUM(state->stats.postponed_job_full),
     ID2SYM(rb_intern("postponed_job_unknown_result")),               /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
+    ID2SYM(rb_intern("interrupt_thread_attempts")),                  /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
     // CPU Stats
     ID2SYM(rb_intern("cpu_sampled")),                /* => */ UINT2NUM(state->stats.cpu_sampled),
     ID2SYM(rb_intern("cpu_skipped")),                /* => */ UINT2NUM(state->stats.cpu_skipped),
     ID2SYM(rb_intern("cpu_effective_sample_rate")),  /* => */ effective_cpu_sample_rate,
-    ID2SYM(rb_intern("cpu_sampling_time_ns_min")),   /* => */ pretty_cpu_sampling_time_ns_min,
-    ID2SYM(rb_intern("cpu_sampling_time_ns_max")),   /* => */ pretty_cpu_sampling_time_ns_max,
-    ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ pretty_cpu_sampling_time_ns_total,
-    ID2SYM(rb_intern("cpu_sampling_time_ns_avg")),   /* => */ pretty_cpu_sampling_time_ns_avg,
+    ID2SYM(rb_intern("cpu_sampling_time_ns_min")),   /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
+    ID2SYM(rb_intern("cpu_sampling_time_ns_max")),   /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_max, > 0, ULL2NUM),
+    ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_total, > 0, ULL2NUM),
+    ID2SYM(rb_intern("cpu_sampling_time_ns_avg")),   /* => */ RUBY_AVG_OR_NIL(state->stats.cpu_sampling_time_ns_total, state->stats.cpu_sampled),
     // Allocation stats
     ID2SYM(rb_intern("allocation_sampled")),                /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_sampled) : Qnil,
     ID2SYM(rb_intern("allocation_skipped")),                /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_skipped) : Qnil,
     ID2SYM(rb_intern("allocation_effective_sample_rate")),  /* => */ effective_allocation_sample_rate,
-    ID2SYM(rb_intern("allocation_sampling_time_ns_min")),   /* => */ pretty_allocation_sampling_time_ns_min,
-    ID2SYM(rb_intern("allocation_sampling_time_ns_max")),   /* => */ pretty_allocation_sampling_time_ns_max,
-    ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ pretty_allocation_sampling_time_ns_total,
-    ID2SYM(rb_intern("allocation_sampling_time_ns_avg")),   /* => */ pretty_allocation_sampling_time_ns_avg,
+    ID2SYM(rb_intern("allocation_sampling_time_ns_min")),   /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
+    ID2SYM(rb_intern("allocation_sampling_time_ns_max")),   /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_max, > 0, ULL2NUM),
+    ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_total, > 0, ULL2NUM),
+    ID2SYM(rb_intern("allocation_sampling_time_ns_avg")),   /* => */ RUBY_AVG_OR_NIL(state->stats.allocation_sampling_time_ns_total, state->stats.allocation_sampled),
     ID2SYM(rb_intern("allocation_sampler_snapshot")),       /* => */ allocation_sampler_snapshot,
     ID2SYM(rb_intern("allocations_during_sample")),         /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
   };

data/ext/datadog_profiling_native_extension/collectors_thread_context.c CHANGED Viewed

@@ -217,7 +217,7 @@ static long thread_id_for(VALUE thread);
 static VALUE _native_stats(VALUE self, VALUE collector_instance);
 static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
 static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
-static bool should_collect_resource(VALUE root_span_type);
+static bool should_collect_resource(VALUE root_span);
 static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
 static VALUE thread_list(struct thread_context_collector_state *state);
 static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
@@ -621,11 +621,14 @@ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
   // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
   // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
   // samples first.
-  bool finished_major_gc = gc_profiling_has_major_gc_finished();
   bool over_flush_time_treshold =
     (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
-  return finished_major_gc || over_flush_time_treshold;
+  if (over_flush_time_treshold) {
+    return true;
+  } else {
+    return gc_profiling_has_major_gc_finished();
+  }
 }
 // This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
@@ -1143,10 +1146,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
   trace_identifiers_result->valid = true;
-  if (!state->endpoint_collection_enabled) return;
-  VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
-  if (root_span_type == Qnil || !should_collect_resource(root_span_type)) return;
+  if (!state->endpoint_collection_enabled || !should_collect_resource(root_span)) return;
   VALUE trace_resource = rb_ivar_get(active_trace, at_resource_id /* @resource */);
   if (RB_TYPE_P(trace_resource, T_STRING)) {
@@ -1157,21 +1157,32 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
   }
 }
-// We only collect the resource for spans of types:
+// We opt-in to collecting the resource for spans of types:
 // * 'web', for web requests
-// * proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
+// * 'proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
+// * 'worker', used for sidekiq and similar background job processors
 //
-// NOTE: Currently we're only interested in HTTP service endpoints. Over time, this list may be expanded.
+// Over time, this list may be expanded.
 // Resources MUST NOT include personal identifiable information (PII); this should not be the case with
 // ddtrace integrations, but worth mentioning just in case :)
-static bool should_collect_resource(VALUE root_span_type) {
+static bool should_collect_resource(VALUE root_span) {
+  VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
+  if (root_span_type == Qnil) return false;
   ENFORCE_TYPE(root_span_type, T_STRING);
   int root_span_type_length = RSTRING_LEN(root_span_type);
   const char *root_span_type_value = StringValuePtr(root_span_type);
-  return (root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
+  bool is_web_request =
+    (root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
     (root_span_type_length == strlen("proxy") && (memcmp("proxy", root_span_type_value, strlen("proxy")) == 0));
+  if (is_web_request) return true;
+  bool is_worker_request =
+    (root_span_type_length == strlen("worker") && (memcmp("worker", root_span_type_value, strlen("worker")) == 0));
+  return is_worker_request;
 }
 // After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent.

data/ext/datadog_profiling_native_extension/heap_recorder.c CHANGED Viewed

@@ -10,6 +10,13 @@
   #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
 #endif
+// Minimum age (in GC generations) of heap objects we want to include in heap
+// recorder iterations. Object with age 0 represent objects that have yet to undergo
+// a GC and, thus, may just be noise/trash at instant of iteration and are usually not
+// relevant for heap profiles as the great majority should be trivially reclaimed
+// during the next GC.
+#define ITERATION_MIN_AGE 1
 // A compact representation of a stacktrace frame for a heap allocation.
 typedef struct {
   char *name;
@@ -137,6 +144,11 @@ struct heap_recorder {
   // mutation of the data so iteration can occur without acquiring a lock.
   // NOTE: Contrary to object_records, this table has no ownership of its data.
   st_table *object_records_snapshot;
+  // The GC gen/epoch/count in which we prepared the current iteration.
+  //
+  // This enables us to calculate the age of iterated objects in the above snapshot by
+  // comparing it against an object's alloc_gen.
+  size_t iteration_gen;
   // Data for a heap recording that was started but not yet ended
   recording active_recording;
@@ -146,6 +158,13 @@ struct heap_recorder {
   // Sampling state
   uint num_recordings_skipped;
+  struct stats_last_update {
+    size_t objects_alive;
+    size_t objects_dead;
+    size_t objects_skipped;
+    size_t objects_frozen;
+  } stats_last_update;
 };
 static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Location);
 static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
@@ -353,11 +372,16 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
     return;
   }
+  heap_recorder->iteration_gen = rb_gc_count();
   if (heap_recorder->object_records_snapshot != NULL) {
     // we could trivially handle this but we raise to highlight and catch unexpected usages.
     rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
   }
+  // Reset last update stats, we'll be building them from scratch during the st_foreach call below
+  heap_recorder->stats_last_update = (struct stats_last_update) {};
   st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);
   heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
@@ -413,6 +437,22 @@ bool heap_recorder_for_each_live_object(
   return true;
 }
+VALUE heap_recorder_state_snapshot(heap_recorder *heap_recorder) {
+  VALUE arguments[] = {
+    ID2SYM(rb_intern("num_object_records")), /* => */ LONG2NUM(heap_recorder->object_records->num_entries),
+    ID2SYM(rb_intern("num_heap_records")),   /* => */ LONG2NUM(heap_recorder->heap_records->num_entries),
+    // Stats as of last update
+    ID2SYM(rb_intern("last_update_objects_alive")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_alive),
+    ID2SYM(rb_intern("last_update_objects_dead")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_dead),
+    ID2SYM(rb_intern("last_update_objects_skipped")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_skipped),
+    ID2SYM(rb_intern("last_update_objects_frozen")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_frozen),
+  };
+  VALUE hash = rb_hash_new();
+  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
+  return hash;
+}
 void heap_recorder_testonly_assert_hash_matches(ddog_prof_Slice_Location locations) {
   heap_stack *stack = heap_stack_new(locations);
   heap_record_key stack_based_key = (heap_record_key) {
@@ -459,6 +499,13 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v
   return ST_DELETE;
 }
+// Check to see if an object should not be included in a heap recorder iteration.
+// This centralizes the checking logic to ensure it's equally applied between
+// preparation and iteration codepaths.
+static inline bool should_exclude_from_iteration(object_record *obj_record) {
+  return obj_record->object_data.gen_age < ITERATION_MIN_AGE;
+}
 static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
   long obj_id = (long) key;
   object_record *record = (object_record*) value;
@@ -466,9 +513,24 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
   VALUE ref;
+  size_t iteration_gen = recorder->iteration_gen;
+  size_t alloc_gen = record->object_data.alloc_gen;
+  // Guard against potential overflows given unsigned types here.
+  record->object_data.gen_age = alloc_gen < iteration_gen ? iteration_gen - alloc_gen : 0;
+  if (should_exclude_from_iteration(record)) {
+    // If an object won't be included in the current iteration, there's
+    // no point checking for liveness or updating its size, so exit early.
+    // NOTE: This means that there should be an equivalent check during actual
+    //       iteration otherwise we'd iterate/expose stale object data.
+    recorder->stats_last_update.objects_skipped++;
+    return ST_CONTINUE;
+  }
   if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
     // Id no longer associated with a valid ref. Need to delete this object record!
     on_committed_object_record_cleanup(recorder, record);
+    recorder->stats_last_update.objects_dead++;
     return ST_DELETE;
   }
@@ -503,6 +565,7 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
       RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID);
       on_committed_object_record_cleanup(recorder, record);
+      recorder->stats_last_update.objects_dead++;
       return ST_DELETE;
     }
@@ -516,6 +579,11 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
     record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
   }
+  recorder->stats_last_update.objects_alive++;
+  if (record->object_data.is_frozen) {
+    recorder->stats_last_update.objects_frozen++;
+  }
   return ST_CONTINUE;
 }
@@ -525,8 +593,16 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
   const heap_stack *stack = record->heap_record->stack;
   iteration_context *context = (iteration_context*) extra;
-  ddog_prof_Location *locations = context->heap_recorder->reusable_locations;
+  const heap_recorder *recorder = context->heap_recorder;
+  if (should_exclude_from_iteration(record)) {
+    // Skip objects that should not be included in iteration
+    // NOTE: This matches the short-circuiting condition in st_object_record_update
+    //       and prevents iteration over stale objects.
+    return ST_CONTINUE;
+  }
+  ddog_prof_Location *locations = recorder->reusable_locations;
   for (uint16_t i = 0; i < stack->frames_len; i++) {
     const heap_frame *frame = &stack->frames[i];
     ddog_prof_Location *location = &locations[i];
@@ -725,9 +801,10 @@ void object_record_free(object_record *record) {
 VALUE object_record_inspect(object_record *record) {
   heap_frame top_frame = record->heap_record->stack->frames[0];
-  VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu ",
-      record->obj_id, record->object_data.weight, record->object_data.size, top_frame.filename,
-      (int) top_frame.line, record->object_data.alloc_gen);
+  live_object_data object_data = record->object_data;
+  VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu gen_age=%zu frozen=%d ",
+      record->obj_id, object_data.weight, object_data.size, top_frame.filename,
+      (int) top_frame.line, object_data.alloc_gen, object_data.gen_age, object_data.is_frozen);
   const char *class = record->object_data.class;
   if (class != NULL) {

data/ext/datadog_profiling_native_extension/heap_recorder.h CHANGED Viewed

@@ -27,7 +27,9 @@ typedef struct live_object_data {
   //          could be seen as being representative of 50 objects.
   unsigned int weight;
-  // Size of this object on last flush/update.
+  // Size of this object in memory.
+  // NOTE: This only gets updated during heap_recorder_prepare_iteration and only
+  //       for those objects that meet the minimum iteration age requirements.
   size_t size;
   // The class of the object that we're tracking.
@@ -39,6 +41,10 @@ typedef struct live_object_data {
   // This enables us to calculate the age of this object in terms of GC executions.
   size_t alloc_gen;
+  // The age of this object in terms of GC generations.
+  // NOTE: This only gets updated during heap_recorder_prepare_iteration
+  size_t gen_age;
   // Whether this object was previously seen as being frozen. If this is the case,
   // we'll skip any further size updates since frozen objects are supposed to be
   // immutable.
@@ -144,6 +150,11 @@ bool heap_recorder_for_each_live_object(
     bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg),
     void *for_each_callback_extra_arg);
+// Return a Ruby hash containing a snapshot of this recorder's interesting state at calling time.
+// WARN: This allocates in the Ruby VM and therefore should not be called without the
+//       VM lock or during GC.
+VALUE heap_recorder_state_snapshot(heap_recorder *heap_recorder);
 // v--- TEST-ONLY APIs ---v
 // Assert internal hashing logic is valid for the provided locations and its

data/ext/datadog_profiling_native_extension/http_transport.c CHANGED Viewed

@@ -30,7 +30,7 @@ inline static ddog_ByteSlice byte_slice_from_ruby_string(VALUE string);
 static VALUE _native_validate_exporter(VALUE self, VALUE exporter_configuration);
 static ddog_prof_Exporter_NewResult create_exporter(VALUE exporter_configuration, VALUE tags_as_array);
 static VALUE handle_exporter_failure(ddog_prof_Exporter_NewResult exporter_result);
-static ddog_Endpoint endpoint_from(VALUE exporter_configuration);
+static ddog_prof_Endpoint endpoint_from(VALUE exporter_configuration);
 static ddog_Vec_Tag convert_tags(VALUE tags_as_array);
 static void safely_log_failure_to_process_tag(ddog_Vec_Tag tags, VALUE err_details);
 static VALUE _native_do_export(
@@ -94,7 +94,7 @@ static ddog_prof_Exporter_NewResult create_exporter(VALUE exporter_configuration
   // This needs to be called BEFORE convert_tags since it can raise an exception and thus cause the ddog_Vec_Tag
   // to be leaked.
-  ddog_Endpoint endpoint = endpoint_from(exporter_configuration);
+  ddog_prof_Endpoint endpoint = endpoint_from(exporter_configuration);
   ddog_Vec_Tag tags = convert_tags(tags_as_array);
@@ -116,7 +116,7 @@ static VALUE handle_exporter_failure(ddog_prof_Exporter_NewResult exporter_resul
     rb_ary_new_from_args(2, error_symbol, get_error_details_and_drop(&exporter_result.err));
 }
-static ddog_Endpoint endpoint_from(VALUE exporter_configuration) {
+static ddog_prof_Endpoint endpoint_from(VALUE exporter_configuration) {
   ENFORCE_TYPE(exporter_configuration, T_ARRAY);
   ID working_mode = SYM2ID(rb_ary_entry(exporter_configuration, 0)); // SYM2ID verifies its input so we can do this safely
@@ -131,12 +131,12 @@ static ddog_Endpoint endpoint_from(VALUE exporter_configuration) {
     ENFORCE_TYPE(site, T_STRING);
     ENFORCE_TYPE(api_key, T_STRING);
-    return ddog_Endpoint_agentless(char_slice_from_ruby_string(site), char_slice_from_ruby_string(api_key));
+    return ddog_prof_Endpoint_agentless(char_slice_from_ruby_string(site), char_slice_from_ruby_string(api_key));
   } else { // agent_id
     VALUE base_url = rb_ary_entry(exporter_configuration, 1);
     ENFORCE_TYPE(base_url, T_STRING);
-    return ddog_Endpoint_agent(char_slice_from_ruby_string(base_url));
+    return ddog_prof_Endpoint_agent(char_slice_from_ruby_string(base_url));
   }
 }

data/ext/datadog_profiling_native_extension/native_extension_helpers.rb CHANGED Viewed

@@ -15,7 +15,7 @@ module Datadog
       # The MJIT header was introduced on 2.6 and removed on 3.3; for other Rubies we rely on debase-ruby_core_source
       CAN_USE_MJIT_HEADER = RUBY_VERSION.start_with?('2.6', '2.7', '3.0.', '3.1.', '3.2.')
-      LIBDATADOG_VERSION = '~> 6.0.0.2.0'
+      LIBDATADOG_VERSION = '~> 7.0.0.1.0'
       def self.fail_install_if_missing_extension?
         ENV[ENV_FAIL_INSTALL_IF_MISSING_EXTENSION].to_s.strip.downcase == 'true'

data/ext/datadog_profiling_native_extension/ruby_helpers.h CHANGED Viewed

@@ -82,6 +82,9 @@ NORETURN(
 #define ENFORCE_SUCCESS_HELPER(expression, have_gvl) \
   { int result_syserr_errno = expression; if (RB_UNLIKELY(result_syserr_errno)) raise_syserr(result_syserr_errno, have_gvl, ADD_QUOTES(expression), __FILE__, __LINE__, __func__); }
+#define RUBY_NUM_OR_NIL(val, condition, conv) ((val condition) ? conv(val) : Qnil)
+#define RUBY_AVG_OR_NIL(total, count) ((count == 0) ? Qnil : DBL2NUM(((double) total) / count))
 // Called by ENFORCE_SUCCESS_HELPER; should not be used directly
 NORETURN(void raise_syserr(
   int syserr_errno,