ddtrace 1.21.1 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +40 -32
  4. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +23 -12
  5. data/ext/datadog_profiling_native_extension/heap_recorder.c +81 -4
  6. data/ext/datadog_profiling_native_extension/heap_recorder.h +12 -1
  7. data/ext/datadog_profiling_native_extension/http_transport.c +5 -5
  8. data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +1 -1
  9. data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -0
  10. data/ext/datadog_profiling_native_extension/stack_recorder.c +161 -62
  11. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
  12. data/lib/datadog/appsec/event.rb +1 -1
  13. data/lib/datadog/core/configuration/components.rb +2 -1
  14. data/lib/datadog/core/configuration/option.rb +7 -5
  15. data/lib/datadog/core/configuration/settings.rb +38 -17
  16. data/lib/datadog/core/configuration.rb +20 -4
  17. data/lib/datadog/core/environment/platform.rb +7 -1
  18. data/lib/datadog/core/remote/client/capabilities.rb +1 -1
  19. data/lib/datadog/core/remote/transport/http/config.rb +1 -1
  20. data/lib/datadog/core/telemetry/client.rb +18 -10
  21. data/lib/datadog/core/telemetry/emitter.rb +9 -13
  22. data/lib/datadog/core/telemetry/event.rb +247 -57
  23. data/lib/datadog/core/telemetry/ext.rb +1 -0
  24. data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
  25. data/lib/datadog/core/telemetry/http/ext.rb +4 -1
  26. data/lib/datadog/core/telemetry/http/response.rb +4 -0
  27. data/lib/datadog/core/telemetry/http/transport.rb +9 -4
  28. data/lib/datadog/core/telemetry/request.rb +59 -0
  29. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  30. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +25 -0
  31. data/lib/datadog/profiling/component.rb +23 -15
  32. data/lib/datadog/profiling/exporter.rb +6 -3
  33. data/lib/datadog/profiling/load_native_extension.rb +14 -1
  34. data/lib/datadog/profiling/stack_recorder.rb +6 -2
  35. data/lib/datadog/profiling.rb +11 -0
  36. data/lib/datadog/tracing/sampling/matcher.rb +23 -3
  37. data/lib/datadog/tracing/sampling/rule.rb +7 -2
  38. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
  39. data/lib/ddtrace/version.rb +2 -2
  40. metadata +9 -20
  41. data/lib/datadog/core/telemetry/collector.rb +0 -250
  42. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
  43. data/lib/datadog/core/telemetry/v1/application.rb +0 -92
  44. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
  45. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
  46. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  47. data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
  48. data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
  49. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  50. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
  51. data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
  52. data/lib/datadog/core/telemetry/v2/request.rb +0 -29
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 88e2b5d32d76e7c167d43a9868aed0903f8b82936bd0faf8eb10e02ae83d5928
4
- data.tar.gz: 65927b5d7037a853debb4da6cd71dda3dec0dd6b765bd90230ac3fa2904655f4
3
+ metadata.gz: b5e8a3ccb932af75df7d30d6fe59edc8277e21ee34a9562f7c2c8bfba1ea7521
4
+ data.tar.gz: 1508bf7e56a24af598aad666665be6cd0a7b97c9cbf3bbcc95a3fda36b67c07a
5
5
  SHA512:
6
- metadata.gz: c467f5c384aeae1fe813df7a2b081e36c665f7a5f550322b9fbfafc402ad70d1d81ba578094bf4064a9d1c8a97f94be8b998ae2d2218c18e0bfaba97b754d5ea
7
- data.tar.gz: 8510f7f8afa6a89591bb128389ab210e2d6f3049c23a2d2ef3f58c4f0c042abe5cc865b6b5efe5198f116a6c98cea34cbf6e257078477744aa66db66aa3e6f1f
6
+ metadata.gz: afccca8d070d1dfd472af2390da1b343dff3199c42a71e742f10e29e91ddb27a6fa709490adcd639e6914f792b8e8565f667ff5d916f9f0f6690d69b8d8b0b54
7
+ data.tar.gz: 581022bd8d146fe01e9dac70a309e58d503d48ed8420b845a6b91200796ce9a0b1c2bc29fb58382ddc546abd00785d2f6ee42863e1bc6df517052a566352604e
data/CHANGELOG.md CHANGED
@@ -2,6 +2,44 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [1.23.0] - 2024-05-09
6
+
7
+ ### Added
8
+
9
+ * Profiling: Enable endpoint profiling for Sidekiq and similar background job processors ([#3619][])
10
+
11
+ ### Fixed
12
+
13
+ * Fix no such file or directory issue when using single step instrumentation ([#3623][])
14
+ * Fix error during telemetry debug logging attempt ([#3618][])
15
+
16
+ ## [1.22.0] - 2024-04-16
17
+
18
+ ### Added
19
+
20
+ * Tracing: Add sampling rules by trace resouce and tags ([#3587][], [#3585][])
21
+ * Appsec: Add WAF vendor header support ([#3528][])
22
+
23
+ ### Changed
24
+
25
+ * Upgrade `Telemetry` to V2 ([#3551][])
26
+ * Upgrade to libdatadog 7 ([#3536][])
27
+ * Profiling: Enable Garbage Collection profiling by default ([#3558][])
28
+ * Profiling: Skip heap samples with age 0 ([#3573][])
29
+ * Profiling: Support falling back into extension directory when loading profiler ([#3582][])
30
+
31
+ ### Fixed
32
+
33
+ * Appsec: Fix MIME-style newlines with strict base64 encoding ([#3565][])
34
+
35
+ ## [2.0.0.beta1] - 2024-03-22
36
+
37
+ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v2.0.0.beta1
38
+
39
+ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...v2.0.0.beta1
40
+
41
+ See https://github.com/DataDog/dd-trace-rb/blob/v2.0.0.beta1/docs/UpgradeGuide2.md.
42
+
5
43
  ## [1.21.1] - 2024-03-20
6
44
 
7
45
  ### Added
@@ -2776,7 +2814,10 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
2776
2814
  Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
2777
2815
 
2778
2816
 
2779
- [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...master
2817
+ [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.22.0...master
2818
+ [1.23.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.22.0...v1.23.0
2819
+ [1.22.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...v1.22.0
2820
+ [2.0.0.beta1]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.1...v2.0.0.beta1
2780
2821
  [1.21.1]: https://github.com/DataDog/dd-trace-rb/compare/v1.21.0...v1.21.1
2781
2822
  [1.21.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.20.0...v1.21.0
2782
2823
  [1.20.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.19.0...v1.20.0
@@ -4070,9 +4111,21 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4070
4111
  [#3519]: https://github.com/DataDog/dd-trace-rb/issues/3519
4071
4112
  [#3520]: https://github.com/DataDog/dd-trace-rb/issues/3520
4072
4113
  [#3523]: https://github.com/DataDog/dd-trace-rb/issues/3523
4114
+ [#3528]: https://github.com/DataDog/dd-trace-rb/issues/3528
4073
4115
  [#3531]: https://github.com/DataDog/dd-trace-rb/issues/3531
4074
4116
  [#3535]: https://github.com/DataDog/dd-trace-rb/issues/3535
4117
+ [#3536]: https://github.com/DataDog/dd-trace-rb/issues/3536
4075
4118
  [#3539]: https://github.com/DataDog/dd-trace-rb/issues/3539
4119
+ [#3551]: https://github.com/DataDog/dd-trace-rb/issues/3551
4120
+ [#3558]: https://github.com/DataDog/dd-trace-rb/issues/3558
4121
+ [#3565]: https://github.com/DataDog/dd-trace-rb/issues/3565
4122
+ [#3573]: https://github.com/DataDog/dd-trace-rb/issues/3573
4123
+ [#3582]: https://github.com/DataDog/dd-trace-rb/issues/3582
4124
+ [#3585]: https://github.com/DataDog/dd-trace-rb/issues/3585
4125
+ [#3587]: https://github.com/DataDog/dd-trace-rb/issues/3587
4126
+ [#3618]: https://github.com/DataDog/dd-trace-rb/issues/3618
4127
+ [#3619]: https://github.com/DataDog/dd-trace-rb/issues/3619
4128
+ [#3623]: https://github.com/DataDog/dd-trace-rb/issues/3623
4076
4129
  [@AdrianLC]: https://github.com/AdrianLC
4077
4130
  [@Azure7111]: https://github.com/Azure7111
4078
4131
  [@BabyGroot]: https://github.com/BabyGroot
@@ -96,6 +96,7 @@ struct cpu_and_wall_time_worker_state {
96
96
  bool no_signals_workaround_enabled;
97
97
  bool dynamic_sampling_rate_enabled;
98
98
  bool allocation_profiling_enabled;
99
+ bool skip_idle_samples_for_testing;
99
100
  VALUE self_instance;
100
101
  VALUE thread_context_collector_instance;
101
102
  VALUE idle_sampling_helper_instance;
@@ -132,6 +133,8 @@ struct cpu_and_wall_time_worker_state {
132
133
  unsigned int signal_handler_enqueued_sample;
133
134
  // How many times the signal handler was called from the wrong thread
134
135
  unsigned int signal_handler_wrong_thread;
136
+ // How many times we actually tried to interrupt a thread for sampling
137
+ unsigned int interrupt_thread_attempts;
135
138
 
136
139
  // # Stats for the results of calling rb_postponed_job_register_one
137
140
  // The same function was already waiting to be executed
@@ -177,7 +180,8 @@ static VALUE _native_initialize(
177
180
  VALUE no_signals_workaround_enabled,
178
181
  VALUE dynamic_sampling_rate_enabled,
179
182
  VALUE dynamic_sampling_rate_overhead_target_percentage,
180
- VALUE allocation_profiling_enabled
183
+ VALUE allocation_profiling_enabled,
184
+ VALUE skip_idle_samples_for_testing
181
185
  );
182
186
  static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
183
187
  static VALUE _native_sampling_loop(VALUE self, VALUE instance);
@@ -272,14 +276,16 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
272
276
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
273
277
  rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
274
278
 
275
- rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8);
279
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
276
280
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
277
281
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
278
282
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
279
283
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats", _native_stats, 1);
280
284
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats_reset_not_thread_safe", _native_stats_reset_not_thread_safe, 1);
281
285
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
286
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1);
282
287
  rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
288
+ // TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it
283
289
  rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
284
290
  rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
285
291
  rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
@@ -315,6 +321,7 @@ static VALUE _native_new(VALUE klass) {
315
321
  state->no_signals_workaround_enabled = false;
316
322
  state->dynamic_sampling_rate_enabled = true;
317
323
  state->allocation_profiling_enabled = false;
324
+ state->skip_idle_samples_for_testing = false;
318
325
  state->thread_context_collector_instance = Qnil;
319
326
  state->idle_sampling_helper_instance = Qnil;
320
327
  state->owner_thread = Qnil;
@@ -350,13 +357,15 @@ static VALUE _native_initialize(
350
357
  VALUE no_signals_workaround_enabled,
351
358
  VALUE dynamic_sampling_rate_enabled,
352
359
  VALUE dynamic_sampling_rate_overhead_target_percentage,
353
- VALUE allocation_profiling_enabled
360
+ VALUE allocation_profiling_enabled,
361
+ VALUE skip_idle_samples_for_testing
354
362
  ) {
355
363
  ENFORCE_BOOLEAN(gc_profiling_enabled);
356
364
  ENFORCE_BOOLEAN(no_signals_workaround_enabled);
357
365
  ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
358
366
  ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
359
367
  ENFORCE_BOOLEAN(allocation_profiling_enabled);
368
+ ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
360
369
 
361
370
  struct cpu_and_wall_time_worker_state *state;
362
371
  TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
@@ -365,6 +374,7 @@ static VALUE _native_initialize(
365
374
  state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
366
375
  state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
367
376
  state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
377
+ state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
368
378
 
369
379
  double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
370
380
  if (!state->allocation_profiling_enabled) {
@@ -616,17 +626,23 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
616
626
  // Note that reading the GVL owner and sending them a signal is a race -- the Ruby VM keeps on executing while
617
627
  // we're doing this, so we may still not signal the correct thread from time to time, but our signal handler
618
628
  // includes a check to see if it got called in the right thread
629
+ state->stats.interrupt_thread_attempts++;
619
630
  pthread_kill(owner.owner, SIGPROF);
620
631
  } else {
621
- // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
622
- // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
623
- //
624
- // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
625
- // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
626
- // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
627
- // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
628
- state->stats.trigger_simulated_signal_delivery_attempts++;
629
- idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
632
+ if (state->skip_idle_samples_for_testing) {
633
+ // This was added to make sure our tests don't accidentally pass due to idle samples. Specifically, if we
634
+ // comment out the thread interruption code inside `if (owner.valid)` above, our tests should not pass!
635
+ } else {
636
+ // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
637
+ // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
638
+ //
639
+ // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
640
+ // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
641
+ // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
642
+ // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
643
+ state->stats.trigger_simulated_signal_delivery_attempts++;
644
+ idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
645
+ }
630
646
  }
631
647
  }
632
648
 
@@ -737,6 +753,9 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
737
753
  if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
738
754
  if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
739
755
 
756
+ // Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
757
+ rb_funcall(instance, rb_intern("signal_running"), 0);
758
+
740
759
  rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
741
760
 
742
761
  // If we stopped sampling due to an exception, re-raise it (now in the worker thread)
@@ -910,18 +929,6 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
910
929
  struct cpu_and_wall_time_worker_state *state;
911
930
  TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
912
931
 
913
- VALUE pretty_cpu_sampling_time_ns_min = state->stats.cpu_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_min);
914
- VALUE pretty_cpu_sampling_time_ns_max = state->stats.cpu_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_max);
915
- VALUE pretty_cpu_sampling_time_ns_total = state->stats.cpu_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_total);
916
- VALUE pretty_cpu_sampling_time_ns_avg =
917
- state->stats.cpu_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampling_time_ns_total) / state->stats.cpu_sampled);
918
-
919
- VALUE pretty_allocation_sampling_time_ns_min = state->stats.allocation_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_min);
920
- VALUE pretty_allocation_sampling_time_ns_max = state->stats.allocation_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_max);
921
- VALUE pretty_allocation_sampling_time_ns_total = state->stats.allocation_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_total);
922
- VALUE pretty_allocation_sampling_time_ns_avg =
923
- state->stats.allocation_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampling_time_ns_total) / state->stats.allocation_sampled);
924
-
925
932
  unsigned long total_cpu_samples_attempted = state->stats.cpu_sampled + state->stats.cpu_skipped;
926
933
  VALUE effective_cpu_sample_rate =
927
934
  total_cpu_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampled) / total_cpu_samples_attempted);
@@ -943,24 +950,25 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
943
950
  ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
944
951
  ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
945
952
  ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
953
+ ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
946
954
 
947
955
  // CPU Stats
948
956
  ID2SYM(rb_intern("cpu_sampled")), /* => */ UINT2NUM(state->stats.cpu_sampled),
949
957
  ID2SYM(rb_intern("cpu_skipped")), /* => */ UINT2NUM(state->stats.cpu_skipped),
950
958
  ID2SYM(rb_intern("cpu_effective_sample_rate")), /* => */ effective_cpu_sample_rate,
951
- ID2SYM(rb_intern("cpu_sampling_time_ns_min")), /* => */ pretty_cpu_sampling_time_ns_min,
952
- ID2SYM(rb_intern("cpu_sampling_time_ns_max")), /* => */ pretty_cpu_sampling_time_ns_max,
953
- ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ pretty_cpu_sampling_time_ns_total,
954
- ID2SYM(rb_intern("cpu_sampling_time_ns_avg")), /* => */ pretty_cpu_sampling_time_ns_avg,
959
+ ID2SYM(rb_intern("cpu_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
960
+ ID2SYM(rb_intern("cpu_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_max, > 0, ULL2NUM),
961
+ ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_total, > 0, ULL2NUM),
962
+ ID2SYM(rb_intern("cpu_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.cpu_sampling_time_ns_total, state->stats.cpu_sampled),
955
963
 
956
964
  // Allocation stats
957
965
  ID2SYM(rb_intern("allocation_sampled")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_sampled) : Qnil,
958
966
  ID2SYM(rb_intern("allocation_skipped")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_skipped) : Qnil,
959
967
  ID2SYM(rb_intern("allocation_effective_sample_rate")), /* => */ effective_allocation_sample_rate,
960
- ID2SYM(rb_intern("allocation_sampling_time_ns_min")), /* => */ pretty_allocation_sampling_time_ns_min,
961
- ID2SYM(rb_intern("allocation_sampling_time_ns_max")), /* => */ pretty_allocation_sampling_time_ns_max,
962
- ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ pretty_allocation_sampling_time_ns_total,
963
- ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ pretty_allocation_sampling_time_ns_avg,
968
+ ID2SYM(rb_intern("allocation_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
969
+ ID2SYM(rb_intern("allocation_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_max, > 0, ULL2NUM),
970
+ ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_total, > 0, ULL2NUM),
971
+ ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.allocation_sampling_time_ns_total, state->stats.allocation_sampled),
964
972
  ID2SYM(rb_intern("allocation_sampler_snapshot")), /* => */ allocation_sampler_snapshot,
965
973
  ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
966
974
  };
@@ -217,7 +217,7 @@ static long thread_id_for(VALUE thread);
217
217
  static VALUE _native_stats(VALUE self, VALUE collector_instance);
218
218
  static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
219
219
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
220
- static bool should_collect_resource(VALUE root_span_type);
220
+ static bool should_collect_resource(VALUE root_span);
221
221
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
222
222
  static VALUE thread_list(struct thread_context_collector_state *state);
223
223
  static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
@@ -621,11 +621,14 @@ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
621
621
  // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
622
622
  // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
623
623
  // samples first.
624
- bool finished_major_gc = gc_profiling_has_major_gc_finished();
625
624
  bool over_flush_time_treshold =
626
625
  (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
627
626
 
628
- return finished_major_gc || over_flush_time_treshold;
627
+ if (over_flush_time_treshold) {
628
+ return true;
629
+ } else {
630
+ return gc_profiling_has_major_gc_finished();
631
+ }
629
632
  }
630
633
 
631
634
  // This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
@@ -1143,10 +1146,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1143
1146
 
1144
1147
  trace_identifiers_result->valid = true;
1145
1148
 
1146
- if (!state->endpoint_collection_enabled) return;
1147
-
1148
- VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
1149
- if (root_span_type == Qnil || !should_collect_resource(root_span_type)) return;
1149
+ if (!state->endpoint_collection_enabled || !should_collect_resource(root_span)) return;
1150
1150
 
1151
1151
  VALUE trace_resource = rb_ivar_get(active_trace, at_resource_id /* @resource */);
1152
1152
  if (RB_TYPE_P(trace_resource, T_STRING)) {
@@ -1157,21 +1157,32 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1157
1157
  }
1158
1158
  }
1159
1159
 
1160
- // We only collect the resource for spans of types:
1160
+ // We opt-in to collecting the resource for spans of types:
1161
1161
  // * 'web', for web requests
1162
- // * proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
1162
+ // * 'proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
1163
+ // * 'worker', used for sidekiq and similar background job processors
1163
1164
  //
1164
- // NOTE: Currently we're only interested in HTTP service endpoints. Over time, this list may be expanded.
1165
+ // Over time, this list may be expanded.
1165
1166
  // Resources MUST NOT include personal identifiable information (PII); this should not be the case with
1166
1167
  // ddtrace integrations, but worth mentioning just in case :)
1167
- static bool should_collect_resource(VALUE root_span_type) {
1168
+ static bool should_collect_resource(VALUE root_span) {
1169
+ VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
1170
+ if (root_span_type == Qnil) return false;
1168
1171
  ENFORCE_TYPE(root_span_type, T_STRING);
1169
1172
 
1170
1173
  int root_span_type_length = RSTRING_LEN(root_span_type);
1171
1174
  const char *root_span_type_value = StringValuePtr(root_span_type);
1172
1175
 
1173
- return (root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
1176
+ bool is_web_request =
1177
+ (root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
1174
1178
  (root_span_type_length == strlen("proxy") && (memcmp("proxy", root_span_type_value, strlen("proxy")) == 0));
1179
+
1180
+ if (is_web_request) return true;
1181
+
1182
+ bool is_worker_request =
1183
+ (root_span_type_length == strlen("worker") && (memcmp("worker", root_span_type_value, strlen("worker")) == 0));
1184
+
1185
+ return is_worker_request;
1175
1186
  }
1176
1187
 
1177
1188
  // After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent.
@@ -10,6 +10,13 @@
10
10
  #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
11
11
  #endif
12
12
 
13
+ // Minimum age (in GC generations) of heap objects we want to include in heap
14
+ // recorder iterations. Object with age 0 represent objects that have yet to undergo
15
+ // a GC and, thus, may just be noise/trash at instant of iteration and are usually not
16
+ // relevant for heap profiles as the great majority should be trivially reclaimed
17
+ // during the next GC.
18
+ #define ITERATION_MIN_AGE 1
19
+
13
20
  // A compact representation of a stacktrace frame for a heap allocation.
14
21
  typedef struct {
15
22
  char *name;
@@ -137,6 +144,11 @@ struct heap_recorder {
137
144
  // mutation of the data so iteration can occur without acquiring a lock.
138
145
  // NOTE: Contrary to object_records, this table has no ownership of its data.
139
146
  st_table *object_records_snapshot;
147
+ // The GC gen/epoch/count in which we prepared the current iteration.
148
+ //
149
+ // This enables us to calculate the age of iterated objects in the above snapshot by
150
+ // comparing it against an object's alloc_gen.
151
+ size_t iteration_gen;
140
152
 
141
153
  // Data for a heap recording that was started but not yet ended
142
154
  recording active_recording;
@@ -146,6 +158,13 @@ struct heap_recorder {
146
158
 
147
159
  // Sampling state
148
160
  uint num_recordings_skipped;
161
+
162
+ struct stats_last_update {
163
+ size_t objects_alive;
164
+ size_t objects_dead;
165
+ size_t objects_skipped;
166
+ size_t objects_frozen;
167
+ } stats_last_update;
149
168
  };
150
169
  static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Location);
151
170
  static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
@@ -353,11 +372,16 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
353
372
  return;
354
373
  }
355
374
 
375
+ heap_recorder->iteration_gen = rb_gc_count();
376
+
356
377
  if (heap_recorder->object_records_snapshot != NULL) {
357
378
  // we could trivially handle this but we raise to highlight and catch unexpected usages.
358
379
  rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
359
380
  }
360
381
 
382
+ // Reset last update stats, we'll be building them from scratch during the st_foreach call below
383
+ heap_recorder->stats_last_update = (struct stats_last_update) {};
384
+
361
385
  st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);
362
386
 
363
387
  heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
@@ -413,6 +437,22 @@ bool heap_recorder_for_each_live_object(
413
437
  return true;
414
438
  }
415
439
 
440
+ VALUE heap_recorder_state_snapshot(heap_recorder *heap_recorder) {
441
+ VALUE arguments[] = {
442
+ ID2SYM(rb_intern("num_object_records")), /* => */ LONG2NUM(heap_recorder->object_records->num_entries),
443
+ ID2SYM(rb_intern("num_heap_records")), /* => */ LONG2NUM(heap_recorder->heap_records->num_entries),
444
+
445
+ // Stats as of last update
446
+ ID2SYM(rb_intern("last_update_objects_alive")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_alive),
447
+ ID2SYM(rb_intern("last_update_objects_dead")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_dead),
448
+ ID2SYM(rb_intern("last_update_objects_skipped")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_skipped),
449
+ ID2SYM(rb_intern("last_update_objects_frozen")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_frozen),
450
+ };
451
+ VALUE hash = rb_hash_new();
452
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
453
+ return hash;
454
+ }
455
+
416
456
  void heap_recorder_testonly_assert_hash_matches(ddog_prof_Slice_Location locations) {
417
457
  heap_stack *stack = heap_stack_new(locations);
418
458
  heap_record_key stack_based_key = (heap_record_key) {
@@ -459,6 +499,13 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v
459
499
  return ST_DELETE;
460
500
  }
461
501
 
502
+ // Check to see if an object should not be included in a heap recorder iteration.
503
+ // This centralizes the checking logic to ensure it's equally applied between
504
+ // preparation and iteration codepaths.
505
+ static inline bool should_exclude_from_iteration(object_record *obj_record) {
506
+ return obj_record->object_data.gen_age < ITERATION_MIN_AGE;
507
+ }
508
+
462
509
  static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
463
510
  long obj_id = (long) key;
464
511
  object_record *record = (object_record*) value;
@@ -466,9 +513,24 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
466
513
 
467
514
  VALUE ref;
468
515
 
516
+ size_t iteration_gen = recorder->iteration_gen;
517
+ size_t alloc_gen = record->object_data.alloc_gen;
518
+ // Guard against potential overflows given unsigned types here.
519
+ record->object_data.gen_age = alloc_gen < iteration_gen ? iteration_gen - alloc_gen : 0;
520
+
521
+ if (should_exclude_from_iteration(record)) {
522
+ // If an object won't be included in the current iteration, there's
523
+ // no point checking for liveness or updating its size, so exit early.
524
+ // NOTE: This means that there should be an equivalent check during actual
525
+ // iteration otherwise we'd iterate/expose stale object data.
526
+ recorder->stats_last_update.objects_skipped++;
527
+ return ST_CONTINUE;
528
+ }
529
+
469
530
  if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
470
531
  // Id no longer associated with a valid ref. Need to delete this object record!
471
532
  on_committed_object_record_cleanup(recorder, record);
533
+ recorder->stats_last_update.objects_dead++;
472
534
  return ST_DELETE;
473
535
  }
474
536
 
@@ -503,6 +565,7 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
503
565
  RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID);
504
566
 
505
567
  on_committed_object_record_cleanup(recorder, record);
568
+ recorder->stats_last_update.objects_dead++;
506
569
  return ST_DELETE;
507
570
  }
508
571
 
@@ -516,6 +579,11 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
516
579
  record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
517
580
  }
518
581
 
582
+ recorder->stats_last_update.objects_alive++;
583
+ if (record->object_data.is_frozen) {
584
+ recorder->stats_last_update.objects_frozen++;
585
+ }
586
+
519
587
  return ST_CONTINUE;
520
588
  }
521
589
 
@@ -525,8 +593,16 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
525
593
  const heap_stack *stack = record->heap_record->stack;
526
594
  iteration_context *context = (iteration_context*) extra;
527
595
 
528
- ddog_prof_Location *locations = context->heap_recorder->reusable_locations;
596
+ const heap_recorder *recorder = context->heap_recorder;
597
+
598
+ if (should_exclude_from_iteration(record)) {
599
+ // Skip objects that should not be included in iteration
600
+ // NOTE: This matches the short-circuiting condition in st_object_record_update
601
+ // and prevents iteration over stale objects.
602
+ return ST_CONTINUE;
603
+ }
529
604
 
605
+ ddog_prof_Location *locations = recorder->reusable_locations;
530
606
  for (uint16_t i = 0; i < stack->frames_len; i++) {
531
607
  const heap_frame *frame = &stack->frames[i];
532
608
  ddog_prof_Location *location = &locations[i];
@@ -725,9 +801,10 @@ void object_record_free(object_record *record) {
725
801
 
726
802
  VALUE object_record_inspect(object_record *record) {
727
803
  heap_frame top_frame = record->heap_record->stack->frames[0];
728
- VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu ",
729
- record->obj_id, record->object_data.weight, record->object_data.size, top_frame.filename,
730
- (int) top_frame.line, record->object_data.alloc_gen);
804
+ live_object_data object_data = record->object_data;
805
+ VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu gen_age=%zu frozen=%d ",
806
+ record->obj_id, object_data.weight, object_data.size, top_frame.filename,
807
+ (int) top_frame.line, object_data.alloc_gen, object_data.gen_age, object_data.is_frozen);
731
808
 
732
809
  const char *class = record->object_data.class;
733
810
  if (class != NULL) {
@@ -27,7 +27,9 @@ typedef struct live_object_data {
27
27
  // could be seen as being representative of 50 objects.
28
28
  unsigned int weight;
29
29
 
30
- // Size of this object on last flush/update.
30
+ // Size of this object in memory.
31
+ // NOTE: This only gets updated during heap_recorder_prepare_iteration and only
32
+ // for those objects that meet the minimum iteration age requirements.
31
33
  size_t size;
32
34
 
33
35
  // The class of the object that we're tracking.
@@ -39,6 +41,10 @@ typedef struct live_object_data {
39
41
  // This enables us to calculate the age of this object in terms of GC executions.
40
42
  size_t alloc_gen;
41
43
 
44
+ // The age of this object in terms of GC generations.
45
+ // NOTE: This only gets updated during heap_recorder_prepare_iteration
46
+ size_t gen_age;
47
+
42
48
  // Whether this object was previously seen as being frozen. If this is the case,
43
49
  // we'll skip any further size updates since frozen objects are supposed to be
44
50
  // immutable.
@@ -144,6 +150,11 @@ bool heap_recorder_for_each_live_object(
144
150
  bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg),
145
151
  void *for_each_callback_extra_arg);
146
152
 
153
+ // Return a Ruby hash containing a snapshot of this recorder's interesting state at calling time.
154
+ // WARN: This allocates in the Ruby VM and therefore should not be called without the
155
+ // VM lock or during GC.
156
+ VALUE heap_recorder_state_snapshot(heap_recorder *heap_recorder);
157
+
147
158
  // v--- TEST-ONLY APIs ---v
148
159
 
149
160
  // Assert internal hashing logic is valid for the provided locations and its
@@ -30,7 +30,7 @@ inline static ddog_ByteSlice byte_slice_from_ruby_string(VALUE string);
30
30
  static VALUE _native_validate_exporter(VALUE self, VALUE exporter_configuration);
31
31
  static ddog_prof_Exporter_NewResult create_exporter(VALUE exporter_configuration, VALUE tags_as_array);
32
32
  static VALUE handle_exporter_failure(ddog_prof_Exporter_NewResult exporter_result);
33
- static ddog_Endpoint endpoint_from(VALUE exporter_configuration);
33
+ static ddog_prof_Endpoint endpoint_from(VALUE exporter_configuration);
34
34
  static ddog_Vec_Tag convert_tags(VALUE tags_as_array);
35
35
  static void safely_log_failure_to_process_tag(ddog_Vec_Tag tags, VALUE err_details);
36
36
  static VALUE _native_do_export(
@@ -94,7 +94,7 @@ static ddog_prof_Exporter_NewResult create_exporter(VALUE exporter_configuration
94
94
 
95
95
  // This needs to be called BEFORE convert_tags since it can raise an exception and thus cause the ddog_Vec_Tag
96
96
  // to be leaked.
97
- ddog_Endpoint endpoint = endpoint_from(exporter_configuration);
97
+ ddog_prof_Endpoint endpoint = endpoint_from(exporter_configuration);
98
98
 
99
99
  ddog_Vec_Tag tags = convert_tags(tags_as_array);
100
100
 
@@ -116,7 +116,7 @@ static VALUE handle_exporter_failure(ddog_prof_Exporter_NewResult exporter_resul
116
116
  rb_ary_new_from_args(2, error_symbol, get_error_details_and_drop(&exporter_result.err));
117
117
  }
118
118
 
119
- static ddog_Endpoint endpoint_from(VALUE exporter_configuration) {
119
+ static ddog_prof_Endpoint endpoint_from(VALUE exporter_configuration) {
120
120
  ENFORCE_TYPE(exporter_configuration, T_ARRAY);
121
121
 
122
122
  ID working_mode = SYM2ID(rb_ary_entry(exporter_configuration, 0)); // SYM2ID verifies its input so we can do this safely
@@ -131,12 +131,12 @@ static ddog_Endpoint endpoint_from(VALUE exporter_configuration) {
131
131
  ENFORCE_TYPE(site, T_STRING);
132
132
  ENFORCE_TYPE(api_key, T_STRING);
133
133
 
134
- return ddog_Endpoint_agentless(char_slice_from_ruby_string(site), char_slice_from_ruby_string(api_key));
134
+ return ddog_prof_Endpoint_agentless(char_slice_from_ruby_string(site), char_slice_from_ruby_string(api_key));
135
135
  } else { // agent_id
136
136
  VALUE base_url = rb_ary_entry(exporter_configuration, 1);
137
137
  ENFORCE_TYPE(base_url, T_STRING);
138
138
 
139
- return ddog_Endpoint_agent(char_slice_from_ruby_string(base_url));
139
+ return ddog_prof_Endpoint_agent(char_slice_from_ruby_string(base_url));
140
140
  }
141
141
  }
142
142
 
@@ -15,7 +15,7 @@ module Datadog
15
15
  # The MJIT header was introduced on 2.6 and removed on 3.3; for other Rubies we rely on debase-ruby_core_source
16
16
  CAN_USE_MJIT_HEADER = RUBY_VERSION.start_with?('2.6', '2.7', '3.0.', '3.1.', '3.2.')
17
17
 
18
- LIBDATADOG_VERSION = '~> 6.0.0.2.0'
18
+ LIBDATADOG_VERSION = '~> 7.0.0.1.0'
19
19
 
20
20
  def self.fail_install_if_missing_extension?
21
21
  ENV[ENV_FAIL_INSTALL_IF_MISSING_EXTENSION].to_s.strip.downcase == 'true'
@@ -82,6 +82,9 @@ NORETURN(
82
82
  #define ENFORCE_SUCCESS_HELPER(expression, have_gvl) \
83
83
  { int result_syserr_errno = expression; if (RB_UNLIKELY(result_syserr_errno)) raise_syserr(result_syserr_errno, have_gvl, ADD_QUOTES(expression), __FILE__, __LINE__, __func__); }
84
84
 
85
+ #define RUBY_NUM_OR_NIL(val, condition, conv) ((val condition) ? conv(val) : Qnil)
86
+ #define RUBY_AVG_OR_NIL(total, count) ((count == 0) ? Qnil : DBL2NUM(((double) total) / count))
87
+
85
88
  // Called by ENFORCE_SUCCESS_HELPER; should not be used directly
86
89
  NORETURN(void raise_syserr(
87
90
  int syserr_errno,