datadog 2.17.0 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +44 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +12 -46
  4. data/ext/datadog_profiling_native_extension/collectors_stack.c +227 -49
  5. data/ext/datadog_profiling_native_extension/collectors_stack.h +19 -3
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +63 -12
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
  8. data/ext/datadog_profiling_native_extension/extconf.rb +7 -0
  9. data/ext/datadog_profiling_native_extension/heap_recorder.c +239 -363
  10. data/ext/datadog_profiling_native_extension/heap_recorder.h +4 -6
  11. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +22 -0
  12. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +8 -5
  13. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +1 -0
  14. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +6 -3
  15. data/ext/datadog_profiling_native_extension/ruby_helpers.c +1 -13
  16. data/ext/datadog_profiling_native_extension/ruby_helpers.h +2 -10
  17. data/ext/datadog_profiling_native_extension/stack_recorder.c +154 -57
  18. data/ext/libdatadog_api/extconf.rb +2 -2
  19. data/ext/libdatadog_api/library_config.c +54 -12
  20. data/ext/libdatadog_api/library_config.h +6 -0
  21. data/ext/libdatadog_api/process_discovery.c +2 -7
  22. data/ext/libdatadog_extconf_helpers.rb +1 -1
  23. data/lib/datadog/appsec/api_security/lru_cache.rb +9 -2
  24. data/lib/datadog/appsec/api_security/route_extractor.rb +65 -0
  25. data/lib/datadog/appsec/api_security/sampler.rb +59 -0
  26. data/lib/datadog/appsec/api_security.rb +14 -0
  27. data/lib/datadog/appsec/assets/waf_rules/recommended.json +257 -85
  28. data/lib/datadog/appsec/assets/waf_rules/strict.json +10 -78
  29. data/lib/datadog/appsec/component.rb +30 -54
  30. data/lib/datadog/appsec/configuration/settings.rb +60 -2
  31. data/lib/datadog/appsec/context.rb +6 -6
  32. data/lib/datadog/appsec/contrib/devise/tracking_middleware.rb +1 -1
  33. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +27 -16
  34. data/lib/datadog/appsec/processor/rule_loader.rb +5 -6
  35. data/lib/datadog/appsec/remote.rb +15 -55
  36. data/lib/datadog/appsec/security_engine/engine.rb +194 -0
  37. data/lib/datadog/appsec/security_engine/runner.rb +10 -11
  38. data/lib/datadog/appsec.rb +4 -7
  39. data/lib/datadog/core/configuration/agent_settings.rb +52 -0
  40. data/lib/datadog/core/configuration/agent_settings_resolver.rb +1 -43
  41. data/lib/datadog/core/configuration/components.rb +2 -4
  42. data/lib/datadog/core/configuration/option.rb +9 -9
  43. data/lib/datadog/core/configuration/settings.rb +22 -10
  44. data/lib/datadog/core/configuration/stable_config.rb +1 -2
  45. data/lib/datadog/core/crashtracking/tag_builder.rb +4 -22
  46. data/lib/datadog/core/process_discovery/tracer_memfd.rb +15 -0
  47. data/lib/datadog/core/process_discovery.rb +5 -1
  48. data/lib/datadog/core/remote/configuration/repository.rb +12 -0
  49. data/lib/datadog/core/tag_builder.rb +56 -0
  50. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +1 -0
  51. data/lib/datadog/core/telemetry/event/app_started.rb +129 -39
  52. data/lib/datadog/core/telemetry/logger.rb +5 -4
  53. data/lib/datadog/core/telemetry/logging.rb +11 -5
  54. data/lib/datadog/core/transport/http/adapters/net.rb +17 -2
  55. data/lib/datadog/core/transport/http/builder.rb +2 -2
  56. data/lib/datadog/core/transport/http/env.rb +8 -0
  57. data/lib/datadog/core/utils.rb +7 -0
  58. data/lib/datadog/di/instrumenter.rb +52 -2
  59. data/lib/datadog/di/probe_notification_builder.rb +31 -41
  60. data/lib/datadog/di/probe_notifier_worker.rb +9 -1
  61. data/lib/datadog/di/serializer.rb +6 -2
  62. data/lib/datadog/di/transport/http/input.rb +10 -0
  63. data/lib/datadog/di/transport/input.rb +10 -2
  64. data/lib/datadog/profiling/collectors/code_provenance.rb +17 -8
  65. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +4 -0
  66. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  67. data/lib/datadog/profiling/collectors/thread_context.rb +16 -1
  68. data/lib/datadog/profiling/component.rb +7 -9
  69. data/lib/datadog/profiling/ext.rb +0 -12
  70. data/lib/datadog/profiling/http_transport.rb +2 -2
  71. data/lib/datadog/profiling/profiler.rb +2 -0
  72. data/lib/datadog/profiling/scheduler.rb +2 -1
  73. data/lib/datadog/profiling/stack_recorder.rb +5 -5
  74. data/lib/datadog/profiling/tag_builder.rb +5 -37
  75. data/lib/datadog/profiling/tasks/setup.rb +2 -0
  76. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +15 -0
  77. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +19 -12
  78. data/lib/datadog/tracing/contrib/action_pack/ext.rb +2 -0
  79. data/lib/datadog/tracing/contrib/lograge/patcher.rb +4 -2
  80. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  81. data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +5 -2
  82. data/lib/datadog/tracing/sync_writer.rb +1 -1
  83. data/lib/datadog/tracing/trace_operation.rb +12 -4
  84. data/lib/datadog/tracing/tracer.rb +6 -2
  85. data/lib/datadog/version.rb +1 -1
  86. metadata +12 -10
  87. data/lib/datadog/appsec/assets/waf_rules/processors.json +0 -321
  88. data/lib/datadog/appsec/assets/waf_rules/scanners.json +0 -1023
  89. data/lib/datadog/appsec/processor/rule_merger.rb +0 -171
  90. data/lib/datadog/appsec/processor.rb +0 -107
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28053ce1bca4cce2032cb068c38e99a56524e6ccecec3068c4d26d1e750f9790
4
- data.tar.gz: 29b6b57640638184370c5176e4da94e74c9770bae8afd7884d1c2baea782350d
3
+ metadata.gz: a05fd61c1cac65f510b1e6d180ce2bc0552d63218f4fb837284ab9e509b071cb
4
+ data.tar.gz: 29b4e928bb96d57d3fa4820e91ec030ea21973868cfdb906ebce5304c0955f22
5
5
  SHA512:
6
- metadata.gz: caf719b7c9a2e5ee3f47ad0c3e02670b90b1f76bfa5de56c10f69169c97b827f4b9e81df822e548d1390f152f15e756c54c200e9f35904138e7c672af99ef30b
7
- data.tar.gz: eda75252942b8c7d278bab14da78c7752b17b136a3f0bd9ed145512a64b3fb8acb110a8cbbfb280b2bcf7c558409ce83b5d311fce5f019ff34516ceb3883255c
6
+ metadata.gz: 9590ee74f91ecf5cf304627790069b6347f63ae050acfc6d79ad29d9155b5e8dde3243a47049eed5c023ad2b1dd6dc78ff4a0813d16d942d83df2db981b5511c
7
+ data.tar.gz: 22892adf3a7520629a4907738aa31125f00a97cdf263fb2edc85568f86096031de8038e78b6933e3a859d0f52aff3b67955e00109bbdc18a2e7e2d3f66602497
data/CHANGELOG.md CHANGED
@@ -2,6 +2,33 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.18.0] - 2025-07-03
6
+
7
+ ### Added
8
+
9
+ * AppSec: Add API Security with new sampling algorithm to the Rack, Rails, Sinatra and Grape frameworks ([#4699][])
10
+ * Core: Add support for tracer configuration through Fleet Automation ([#4651][])
11
+ * Tracing: Add Rails view render times and database query times to the spans, when available ([#4697][])
12
+ * Core: Add Service Discovery capability, enabling the tracer to reliably determine which process are instrumented
13
+
14
+ ### Changed
15
+
16
+ * AppSec: Enable API Security feature by default ([#4756][])
17
+ * AppSec: Improve overall threat detection ([#4757][])
18
+ * AppSec: Deprecate `config.appsec.ip_passlist`, `config.appsec.ip_denylist`, and `config.appsec.user_id_denylist` ([#4757][])
19
+ * Profiling: Graduate Heap Profiling from alpha to preview ([#4401][], [#4460][])
20
+ * Dynamic Instrumentation: Add instance variable capture to method and line probes ([#4668][])
21
+ * Profiling: Add capturing of native filenames in backtraces ([#4745][])
22
+ * Profiling: Clean up profiler logging ([#4728][])
23
+ Tracing: Add support for `DD_RUNTIME_METRICS_RUNTIME_ID_ENABLED` to tag runtime metrics with the current runtime ID, allowing correlation across processes ([#4718][])
24
+ * Tracing: Improve performance of `http.route` tag computation for Rails ([#4688][])
25
+
26
+ ### Fixed
27
+
28
+ * AppSec: Fix authenticated users tracking in anonymization mode for Devise ([#4731][])
29
+ * Dynamic Instrumentation: Fix source code display in dynamic instrumentation/live debugger ([#4743][])
30
+ * Profiling: Fix profiler stopping due to bug in heap profiling serialization ([#4721][])
31
+
5
32
  ## [2.17.0] - 2025-06-02
6
33
 
7
34
  ### Added
@@ -3241,7 +3268,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
3241
3268
  Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
3242
3269
 
3243
3270
 
3244
- [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.17.0...master
3271
+ [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.18.0...master
3272
+ [2.18.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.17.0...v2.18.0
3245
3273
  [2.17.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.16.0...v2.17.0
3246
3274
  [2.16.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.15.0...v2.16.0
3247
3275
  [2.15.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.14.0...v2.15.0
@@ -4745,6 +4773,7 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4745
4773
  [#4391]: https://github.com/DataDog/dd-trace-rb/issues/4391
4746
4774
  [#4398]: https://github.com/DataDog/dd-trace-rb/issues/4398
4747
4775
  [#4399]: https://github.com/DataDog/dd-trace-rb/issues/4399
4776
+ [#4401]: https://github.com/DataDog/dd-trace-rb/issues/4401
4748
4777
  [#4403]: https://github.com/DataDog/dd-trace-rb/issues/4403
4749
4778
  [#4406]: https://github.com/DataDog/dd-trace-rb/issues/4406
4750
4779
  [#4411]: https://github.com/DataDog/dd-trace-rb/issues/4411
@@ -4755,6 +4784,7 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4755
4784
  [#4433]: https://github.com/DataDog/dd-trace-rb/issues/4433
4756
4785
  [#4437]: https://github.com/DataDog/dd-trace-rb/issues/4437
4757
4786
  [#4455]: https://github.com/DataDog/dd-trace-rb/issues/4455
4787
+ [#4460]: https://github.com/DataDog/dd-trace-rb/issues/4460
4758
4788
  [#4473]: https://github.com/DataDog/dd-trace-rb/issues/4473
4759
4789
  [#4493]: https://github.com/DataDog/dd-trace-rb/issues/4493
4760
4790
  [#4497]: https://github.com/DataDog/dd-trace-rb/issues/4497
@@ -4787,14 +4817,27 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4787
4817
  [#4616]: https://github.com/DataDog/dd-trace-rb/issues/4616
4788
4818
  [#4625]: https://github.com/DataDog/dd-trace-rb/issues/4625
4789
4819
  [#4644]: https://github.com/DataDog/dd-trace-rb/issues/4644
4820
+ [#4651]: https://github.com/DataDog/dd-trace-rb/issues/4651
4790
4821
  [#4653]: https://github.com/DataDog/dd-trace-rb/issues/4653
4791
4822
  [#4656]: https://github.com/DataDog/dd-trace-rb/issues/4656
4792
4823
  [#4665]: https://github.com/DataDog/dd-trace-rb/issues/4665
4824
+ [#4668]: https://github.com/DataDog/dd-trace-rb/issues/4668
4793
4825
  [#4669]: https://github.com/DataDog/dd-trace-rb/issues/4669
4794
4826
  [#4672]: https://github.com/DataDog/dd-trace-rb/issues/4672
4795
4827
  [#4673]: https://github.com/DataDog/dd-trace-rb/issues/4673
4796
4828
  [#4678]: https://github.com/DataDog/dd-trace-rb/issues/4678
4797
4829
  [#4679]: https://github.com/DataDog/dd-trace-rb/issues/4679
4830
+ [#4688]: https://github.com/DataDog/dd-trace-rb/issues/4688
4831
+ [#4697]: https://github.com/DataDog/dd-trace-rb/issues/4697
4832
+ [#4699]: https://github.com/DataDog/dd-trace-rb/issues/4699
4833
+ [#4718]: https://github.com/DataDog/dd-trace-rb/issues/4718
4834
+ [#4721]: https://github.com/DataDog/dd-trace-rb/issues/4721
4835
+ [#4728]: https://github.com/DataDog/dd-trace-rb/issues/4728
4836
+ [#4731]: https://github.com/DataDog/dd-trace-rb/issues/4731
4837
+ [#4743]: https://github.com/DataDog/dd-trace-rb/issues/4743
4838
+ [#4745]: https://github.com/DataDog/dd-trace-rb/issues/4745
4839
+ [#4756]: https://github.com/DataDog/dd-trace-rb/issues/4756
4840
+ [#4757]: https://github.com/DataDog/dd-trace-rb/issues/4757
4798
4841
  [@AdrianLC]: https://github.com/AdrianLC
4799
4842
  [@Azure7111]: https://github.com/Azure7111
4800
4843
  [@BabyGroot]: https://github.com/BabyGroot
@@ -122,7 +122,11 @@ typedef struct {
122
122
  // Others
123
123
 
124
124
  // Used to detect/avoid nested sampling, e.g. when on_newobj_event gets triggered by a memory allocation
125
- // that happens during another sample.
125
+ // that happens during another sample, or when the signal handler gets triggered while we're already in the middle of
126
+ // sampling.
127
+ //
128
+ // @ivoanjo: Right now we always sample inside `safely_call`; if that ever changes, this flag may need to become
129
+ // volatile/atomic/have some barriers to ensure it's visible during e.g. signal handlers.
126
130
  bool during_sample;
127
131
 
128
132
  #ifndef NO_GVL_INSTRUMENTATION
@@ -145,16 +149,6 @@ typedef struct {
145
149
  // How many times we actually tried to interrupt a thread for sampling
146
150
  unsigned int interrupt_thread_attempts;
147
151
 
148
- // # Stats for the results of calling rb_postponed_job_register_one
149
- // The same function was already waiting to be executed
150
- unsigned int postponed_job_skipped_already_existed;
151
- // The function was added to the queue successfully
152
- unsigned int postponed_job_success;
153
- // The queue was full
154
- unsigned int postponed_job_full;
155
- // The function returned an unknown result code
156
- unsigned int postponed_job_unknown_result;
157
-
158
152
  // # CPU/Walltime sampling stats
159
153
  // How many times we actually CPU/wall sampled
160
154
  unsigned int cpu_sampled;
@@ -589,25 +583,17 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
589
583
  return;
590
584
  }
591
585
 
592
- // We implicitly assume there can be no concurrent nor nested calls to handle_sampling_signal because
593
- // a) we get triggered using SIGPROF, and the docs state a second SIGPROF will not interrupt an existing one
586
+ // We assume there can be no concurrent nor nested calls to handle_sampling_signal because
587
+ // a) we get triggered using SIGPROF, and the docs state a second SIGPROF will not interrupt an existing one (see sigaction docs on sa_mask)
594
588
  // b) we validate we are in the thread that has the global VM lock; if a different thread gets a signal, it will return early
595
589
  // because it will not have the global VM lock
596
590
 
597
- // Note: rb_postponed_job_register_one ensures that if there's a previous sample_from_postponed_job queued for execution
598
- // then we will not queue a second one. It does this by doing a linear scan on the existing jobs; in the future we
599
- // may want to implement that check ourselves.
600
-
601
591
  state->stats.signal_handler_enqueued_sample++;
602
592
 
603
- // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
604
- // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
605
593
  #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
606
594
  rb_postponed_job_trigger(sample_from_postponed_job_handle);
607
- state->stats.postponed_job_success++; // Always succeeds
608
595
  #else
609
-
610
- // This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
596
+ // Passing in `gc_finalize_deferred_workaround` is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
611
597
  //
612
598
  // TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
613
599
  // and in rare cases can cause VM crashes.
@@ -631,20 +617,7 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
631
617
  //
632
618
  // Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
633
619
  // In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
634
- int result = rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
635
-
636
- // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
637
- // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
638
- switch (result) {
639
- case 0:
640
- state->stats.postponed_job_full++; break;
641
- case 1:
642
- state->stats.postponed_job_success++; break;
643
- case 2:
644
- state->stats.postponed_job_skipped_already_existed++; break;
645
- default:
646
- state->stats.postponed_job_unknown_result++;
647
- }
620
+ rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
648
621
  #endif
649
622
  }
650
623
 
@@ -714,6 +687,8 @@ static void interrupt_sampling_trigger_loop(void *state_ptr) {
714
687
  atomic_store(&state->should_run, false);
715
688
  }
716
689
 
690
+ // Note: If we ever want to get rid of the postponed job execution, remember not to clobber Ruby exceptions, as
691
+ // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
717
692
  static void sample_from_postponed_job(DDTRACE_UNUSED void *_unused) {
718
693
  cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
719
694
 
@@ -1020,10 +995,6 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
1020
995
  ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
1021
996
  ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
1022
997
  ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
1023
- ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
1024
- ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
1025
- ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
1026
- ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
1027
998
  ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
1028
999
 
1029
1000
  // CPU Stats
@@ -1073,8 +1044,7 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
1073
1044
 
1074
1045
  state->stats.simulated_signal_delivery++;
1075
1046
 
1076
- // @ivoanjo: We could instead directly call sample_from_postponed_job, but I chose to go through the signal handler
1077
- // so that the simulated case is as close to the original one as well (including any metrics increases, etc).
1047
+ // `handle_sampling_signal` does a few things extra on top of `sample_from_postponed_job` so that's why we don't shortcut here
1078
1048
  handle_sampling_signal(0, NULL, NULL);
1079
1049
 
1080
1050
  return NULL; // Unused
@@ -1207,10 +1177,6 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *u
1207
1177
  &state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
1208
1178
  );
1209
1179
 
1210
- // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
1211
- // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
1212
- // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
1213
- // defined as not being able to allocate) sets this.
1214
1180
  state->during_sample = true;
1215
1181
 
1216
1182
  // Rescue against any exceptions that happen during sampling
@@ -1,9 +1,21 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/debug.h>
3
- #include "extconf.h"
4
- #include "helpers.h"
5
- #include "libdatadog_helpers.h"
6
- #include "ruby_helpers.h"
3
+ #include <ruby/st.h>
4
+
5
+ #include "extconf.h" // This is needed for the HAVE_DLADDR and friends below
6
+
7
+ // For dladdr/dladdr1
8
+ #if defined(HAVE_DLADDR1) || defined(HAVE_DLADDR)
9
+ #ifndef _GNU_SOURCE
10
+ #define _GNU_SOURCE
11
+ #endif
12
+ #include <dlfcn.h>
13
+ #ifdef HAVE_DLADDR1
14
+ #include <link.h>
15
+ #endif
16
+ #endif
17
+
18
+ #include "datadog_ruby_common.h"
7
19
  #include "private_vm_api_access.h"
8
20
  #include "stack_recorder.h"
9
21
  #include "collectors_stack.h"
@@ -11,18 +23,22 @@
11
23
  // Gathers stack traces from running threads, storing them in a StackRecorder instance
12
24
  // This file implements the native bits of the Datadog::Profiling::Collectors::Stack class
13
25
 
14
- static VALUE missing_string = Qnil;
15
-
16
- // Used as scratch space during sampling
17
- struct sampling_buffer { // Note: typedef'd in the header to sampling_buffer
18
- uint16_t max_frames;
19
- ddog_prof_Location *locations;
20
- frame_info *stack_buffer;
21
- };
22
-
26
+ static VALUE _native_filenames_available(DDTRACE_UNUSED VALUE self);
27
+ static VALUE _native_ruby_native_filename(DDTRACE_UNUSED VALUE self);
23
28
  static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
24
29
  static VALUE native_sample_do(VALUE args);
25
30
  static VALUE native_sample_ensure(VALUE args);
31
+ static void set_file_info_for_cfunc(
32
+ ddog_CharSlice *filename_slice,
33
+ int *line,
34
+ ddog_CharSlice last_ruby_frame_filename,
35
+ int last_ruby_line,
36
+ void *function,
37
+ bool top_of_the_stack,
38
+ bool native_filenames_enabled,
39
+ st_table *native_filenames_cache
40
+ );
41
+ static const char *get_or_compute_native_filename(void *function, st_table *native_filenames_cache);
26
42
  static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* buffer, char *frames_omitted_message, int frames_omitted_message_size);
27
43
  static void record_placeholder_stack_in_native_code(VALUE recorder_instance, sample_values values, sample_labels labels);
28
44
  static void maybe_trim_template_random_ids(ddog_CharSlice *name_slice, ddog_CharSlice *filename_slice);
@@ -32,16 +48,47 @@ static void maybe_trim_template_random_ids(ddog_CharSlice *name_slice, ddog_Char
32
48
  extern VALUE rb_iseq_path(const VALUE);
33
49
  extern VALUE rb_iseq_base_label(const VALUE);
34
50
 
51
+ // NULL if dladdr is not available or we weren't able to get the native filename for the Ruby VM
52
+ static const char *ruby_native_filename = NULL;
53
+
35
54
  void collectors_stack_init(VALUE profiling_module) {
36
55
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
37
56
  VALUE collectors_stack_class = rb_define_class_under(collectors_module, "Stack", rb_cObject);
57
+
58
+ rb_define_singleton_method(collectors_stack_class, "_native_filenames_available?", _native_filenames_available, 0);
59
+ rb_define_singleton_method(collectors_stack_class, "_native_ruby_native_filename", _native_ruby_native_filename, 0);
60
+
38
61
  // Hosts methods used for testing the native code using RSpec
39
62
  VALUE testing_module = rb_define_module_under(collectors_stack_class, "Testing");
40
63
 
41
64
  rb_define_singleton_method(testing_module, "_native_sample", _native_sample, -1);
42
65
 
43
- missing_string = rb_str_new2("");
44
- rb_global_variable(&missing_string);
66
+ #if defined(HAVE_DLADDR1) || defined(HAVE_DLADDR)
67
+ // To be able to detect when a frame is coming from Ruby, we record here its filename as returned by dladdr.
68
+ // We expect this same pointer to be returned by dladdr for all frames coming from Ruby.
69
+ //
70
+ // Small note: Creating/deleting the cache is a bit awkward here, but it seems like a bigger footgun to allow
71
+ // `get_or_compute_native_filename` to run without a cache, since we never expect that to happen during sampling. So it seems
72
+ // like a reasonable trade-off to force callers to always figure that out.
73
+ st_table *temporary_cache = st_init_numtable();
74
+ const char *native_filename = get_or_compute_native_filename(rb_ary_new, temporary_cache);
75
+ if (native_filename != NULL && native_filename[0] != '\0') {
76
+ ruby_native_filename = native_filename;
77
+ }
78
+ st_free_table(temporary_cache);
79
+ #endif
80
+ }
81
+
82
+ static VALUE _native_filenames_available(DDTRACE_UNUSED VALUE self) {
83
+ #if defined(HAVE_DLADDR1) || defined(HAVE_DLADDR)
84
+ return ruby_native_filename != NULL ? Qtrue : Qfalse;
85
+ #else
86
+ return Qfalse;
87
+ #endif
88
+ }
89
+
90
+ static VALUE _native_ruby_native_filename(DDTRACE_UNUSED VALUE self) {
91
+ return ruby_native_filename != NULL ? rb_utf8_str_new_cstr(ruby_native_filename) : Qnil;
45
92
  }
46
93
 
47
94
  typedef struct {
@@ -52,6 +99,8 @@ typedef struct {
52
99
  VALUE thread;
53
100
  ddog_prof_Location *locations;
54
101
  sampling_buffer *buffer;
102
+ bool native_filenames_enabled;
103
+ st_table *native_filenames_cache;
55
104
  } native_sample_args;
56
105
 
57
106
  // This method exists only to enable testing Datadog::Profiling::Collectors::Stack behavior using RSpec.
@@ -73,10 +122,15 @@ static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
73
122
  VALUE max_frames = rb_hash_lookup2(options, ID2SYM(rb_intern("max_frames")), INT2NUM(400));
74
123
  VALUE in_gc = rb_hash_lookup2(options, ID2SYM(rb_intern("in_gc")), Qfalse);
75
124
  VALUE is_gvl_waiting_state = rb_hash_lookup2(options, ID2SYM(rb_intern("is_gvl_waiting_state")), Qfalse);
125
+ VALUE native_filenames_enabled = rb_hash_lookup2(options, ID2SYM(rb_intern("native_filenames_enabled")), Qfalse);
76
126
 
77
127
  ENFORCE_TYPE(metric_values_hash, T_HASH);
78
128
  ENFORCE_TYPE(labels_array, T_ARRAY);
79
129
  ENFORCE_TYPE(numeric_labels_array, T_ARRAY);
130
+ ENFORCE_TYPE(max_frames, T_FIXNUM);
131
+ ENFORCE_BOOLEAN(in_gc);
132
+ ENFORCE_BOOLEAN(is_gvl_waiting_state);
133
+ ENFORCE_BOOLEAN(native_filenames_enabled);
80
134
 
81
135
  VALUE zero = INT2NUM(0);
82
136
  VALUE heap_sample = rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("heap_sample"), Qfalse);
@@ -119,7 +173,8 @@ static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
119
173
  int max_frames_requested = sampling_buffer_check_max_frames(NUM2INT(max_frames));
120
174
 
121
175
  ddog_prof_Location *locations = ruby_xcalloc(max_frames_requested, sizeof(ddog_prof_Location));
122
- sampling_buffer *buffer = sampling_buffer_new(max_frames_requested, locations);
176
+ sampling_buffer buffer;
177
+ sampling_buffer_initialize(&buffer, max_frames_requested, locations);
123
178
 
124
179
  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count};
125
180
 
@@ -130,7 +185,9 @@ static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
130
185
  .labels = (sample_labels) {.labels = slice_labels, .state_label = state_label, .is_gvl_waiting_state = is_gvl_waiting_state == Qtrue},
131
186
  .thread = thread,
132
187
  .locations = locations,
133
- .buffer = buffer,
188
+ .buffer = &buffer,
189
+ .native_filenames_enabled = native_filenames_enabled == Qtrue,
190
+ .native_filenames_cache = st_init_numtable(),
134
191
  };
135
192
 
136
193
  return rb_ensure(native_sample_do, (VALUE) &args_struct, native_sample_ensure, (VALUE) &args_struct);
@@ -152,7 +209,9 @@ static VALUE native_sample_do(VALUE args) {
152
209
  args_struct->buffer,
153
210
  args_struct->recorder_instance,
154
211
  args_struct->values,
155
- args_struct->labels
212
+ args_struct->labels,
213
+ args_struct->native_filenames_enabled,
214
+ args_struct->native_filenames_cache
156
215
  );
157
216
  }
158
217
 
@@ -164,6 +223,7 @@ static VALUE native_sample_ensure(VALUE args) {
164
223
 
165
224
  ruby_xfree(args_struct->locations);
166
225
  sampling_buffer_free(args_struct->buffer);
226
+ st_free_table(args_struct->native_filenames_cache);
167
227
 
168
228
  return Qtrue;
169
229
  }
@@ -184,14 +244,15 @@ void sample_thread(
184
244
  sampling_buffer* buffer,
185
245
  VALUE recorder_instance,
186
246
  sample_values values,
187
- sample_labels labels
247
+ sample_labels labels,
248
+ bool native_filenames_enabled,
249
+ st_table *native_filenames_cache
188
250
  ) {
189
- int captured_frames = ddtrace_rb_profile_frames(
190
- thread,
191
- 0 /* stack starting depth */,
192
- buffer->max_frames,
193
- buffer->stack_buffer
194
- );
251
+ // If we already prepared a sample, we use it below; if not, we prepare it now.
252
+ if (!buffer->pending_sample) prepare_sample_thread(thread, buffer);
253
+
254
+ buffer->pending_sample = false;
255
+ int captured_frames = buffer->pending_sample_result;
195
256
 
196
257
  if (captured_frames == PLACEHOLDER_STACK_IN_NATIVE_CODE) {
197
258
  record_placeholder_stack_in_native_code(recorder_instance, values, labels);
@@ -211,7 +272,7 @@ void sample_thread(
211
272
  // on the stack that is below (e.g. directly or indirectly has called) the native method.
212
273
  // Thus, we keep that frame here to able to replicate that behavior.
213
274
  // (This is why we also iterate the sampling buffers backwards below -- so that it's easier to keep the last_ruby_frame_filename)
214
- VALUE last_ruby_frame_filename = Qnil;
275
+ ddog_CharSlice last_ruby_frame_filename = DDOG_CHARSLICE_C("");
215
276
  int last_ruby_line = 0;
216
277
 
217
278
  ddog_prof_Label *state_label = labels.state_label;
@@ -230,31 +291,39 @@ void sample_thread(
230
291
  }
231
292
 
232
293
  for (int i = captured_frames - 1; i >= 0; i--) {
233
- VALUE name, filename;
294
+ ddog_CharSlice name_slice, filename_slice;
234
295
  int line;
296
+ bool top_of_the_stack = i == 0;
235
297
 
236
298
  if (buffer->stack_buffer[i].is_ruby_frame) {
237
- name = rb_iseq_base_label(buffer->stack_buffer[i].as.ruby_frame.iseq);
238
- filename = rb_iseq_path(buffer->stack_buffer[i].as.ruby_frame.iseq);
299
+ VALUE name = rb_iseq_base_label(buffer->stack_buffer[i].as.ruby_frame.iseq);
300
+ VALUE filename = rb_iseq_path(buffer->stack_buffer[i].as.ruby_frame.iseq);
301
+
302
+ name_slice = NIL_P(name) ? DDOG_CHARSLICE_C("") : char_slice_from_ruby_string(name);
303
+ filename_slice = NIL_P(filename) ? DDOG_CHARSLICE_C("") : char_slice_from_ruby_string(filename);
239
304
  line = buffer->stack_buffer[i].as.ruby_frame.line;
240
305
 
241
- last_ruby_frame_filename = filename;
306
+ last_ruby_frame_filename = filename_slice;
242
307
  last_ruby_line = line;
243
308
  } else {
244
- name = rb_id2str(buffer->stack_buffer[i].as.native_frame.method_id);
245
- filename = last_ruby_frame_filename;
246
- line = last_ruby_line;
309
+ VALUE name = rb_id2str(buffer->stack_buffer[i].as.native_frame.method_id);
310
+
311
+ name_slice = NIL_P(name) ? DDOG_CHARSLICE_C("") : char_slice_from_ruby_string(name);
312
+
313
+ set_file_info_for_cfunc(
314
+ &filename_slice,
315
+ &line,
316
+ last_ruby_frame_filename,
317
+ last_ruby_line,
318
+ buffer->stack_buffer[i].as.native_frame.function,
319
+ top_of_the_stack,
320
+ native_filenames_enabled,
321
+ native_filenames_cache
322
+ );
247
323
  }
248
324
 
249
- name = NIL_P(name) ? missing_string : name;
250
- filename = NIL_P(filename) ? missing_string : filename;
251
-
252
- ddog_CharSlice name_slice = char_slice_from_ruby_string(name);
253
- ddog_CharSlice filename_slice = char_slice_from_ruby_string(filename);
254
-
255
325
  maybe_trim_template_random_ids(&name_slice, &filename_slice);
256
326
 
257
- bool top_of_the_stack = i == 0;
258
327
 
259
328
  // When there's only wall-time in a sample, this means that the thread was not active in the sampled period.
260
329
  if (top_of_the_stack && only_wall_time) {
@@ -324,6 +393,94 @@ void sample_thread(
324
393
  );
325
394
  }
326
395
 
396
+ #if defined(HAVE_DLADDR1) || defined(HAVE_DLADDR)
397
+ static void set_file_info_for_cfunc(
398
+ ddog_CharSlice *filename_slice,
399
+ int *line,
400
+ ddog_CharSlice last_ruby_frame_filename,
401
+ int last_ruby_line,
402
+ void *function,
403
+ bool top_of_the_stack,
404
+ bool native_filenames_enabled,
405
+ st_table *native_filenames_cache
406
+ ) {
407
+ if (native_filenames_enabled) {
408
+ const char *native_filename = get_or_compute_native_filename(function, native_filenames_cache);
409
+ if (native_filename && native_filename[0] != '\0' &&
410
+ // Using the ruby_native_filename at the top of the stack has a weird effect on the "top methods" table because
411
+ // e.g. we don't have classnames for methods. This is especially visible in the allocations profile, e.g.
412
+ // what a surprise, you're telling me "libruby.so:new" is the top method always?
413
+ //
414
+ // Until we have a better way of dealing with that, we don't do this replacement for the top frame.
415
+ //
416
+ // Also, dladdr is expected to always return the same pointer to the ruby_native_filename, so that's why we're
417
+ // comparing only pointer values and not the string contents.
418
+ (native_filename != ruby_native_filename || !top_of_the_stack)
419
+ ) {
420
+ *filename_slice = (ddog_CharSlice) {.ptr = native_filename, .len = strlen(native_filename)};
421
+ // Explicitly set the line to 0 as it has no meaning on a native library (e.g. an .so is built of many source files)
422
+ // and anyway often that debug info is not available.
423
+ *line = 0;
424
+ return;
425
+ }
426
+ }
427
+
428
+ *filename_slice = last_ruby_frame_filename;
429
+ *line = last_ruby_line;
430
+ }
431
+
432
+ // `native_filenames_cache` is used to cache native filename lookup results (Map[void *function_pointer, char *filename])
433
+ //
434
+ // Caching this information is safe because there's no API in Ruby to "unrequire" a native extension. Thus, if we see a
435
+ // frame on the **Ruby** stack with a given `function`, then that `function` was registered with the Ruby VM and
436
+ // belongs to a Ruby extension, so a lot of other bad things would happen if it was dlclosed.
437
+ static const char *get_or_compute_native_filename(void *function, st_table *native_filenames_cache) {
438
+ const char *cached_filename = NULL;
439
+ st_lookup(native_filenames_cache, (st_data_t) function, (st_data_t *) &cached_filename);
440
+ if (cached_filename != NULL) return cached_filename;
441
+
442
+ Dl_info info;
443
+ const char *native_filename = NULL;
444
+ #ifdef HAVE_DLADDR1
445
+ struct link_map *extra_info = NULL;
446
+ if (dladdr1(function, &info, (void **) &extra_info, RTLD_DL_LINKMAP) != 0 && extra_info != NULL) {
447
+ native_filename = extra_info->l_name != NULL ? extra_info->l_name : info.dli_fname;
448
+ }
449
+ #elif defined(HAVE_DLADDR)
450
+ if (dladdr(function, &info) != 0) {
451
+ native_filename = info.dli_fname;
452
+ }
453
+ #endif
454
+
455
+ // We explicitly use an empty string here so as to cache lookups that somehow "failed". Otherwise we would keep trying them every time.
456
+ if (native_filename == NULL) native_filename = "";
457
+
458
+ // An st_table is what Ruby uses for its own hashtables. This allows us to get an easy estimate of the size of the cache:
459
+ // `ObjectSpace.memsize_of((0..100000).map { |it| [it, nil] }.to_h)` => 4194400 bytes as of Ruby 3.2 so that seems reasonable?
460
+ // Note: `st_table_size()` is available from Ruby 3.2+ but not before
461
+ if (native_filenames_cache->num_entries >= 100000) {
462
+ st_clear(native_filenames_cache);
463
+ }
464
+
465
+ st_insert(native_filenames_cache, (st_data_t) function, (st_data_t) native_filename);
466
+ return native_filename;
467
+ }
468
+ #else
469
+ static void set_file_info_for_cfunc(
470
+ ddog_CharSlice *filename_slice,
471
+ int *line,
472
+ ddog_CharSlice last_ruby_frame_filename,
473
+ int last_ruby_line,
474
+ DDTRACE_UNUSED void *function,
475
+ DDTRACE_UNUSED bool top_of_the_stack,
476
+ DDTRACE_UNUSED bool native_filenames_enabled,
477
+ DDTRACE_UNUSED st_table *native_filenames_cache
478
+ ) {
479
+ *filename_slice = last_ruby_frame_filename;
480
+ *line = last_ruby_line;
481
+ }
482
+ #endif
483
+
327
484
  // Rails's ActionView likes to dynamically generate method names with suffixed hashes/ids, resulting in methods with
328
485
  // names such as:
329
486
  // * "_app_views_layouts_explore_html_haml__2304485752546535910_211320" (__number_number suffix -- two underscores)
@@ -340,6 +497,7 @@ static void maybe_trim_template_random_ids(ddog_CharSlice *name_slice, ddog_Char
340
497
  if (filename_slice->len < 3 || memcmp(filename_slice->ptr + filename_slice->len - 3, ".rb", 3) == 0) return;
341
498
 
342
499
  if (name_slice->len > 1024) return;
500
+ if (name_slice->len == 0) return;
343
501
 
344
502
  int pos = ((int) name_slice->len) - 1;
345
503
 
@@ -439,30 +597,50 @@ void record_placeholder_stack(
439
597
  );
440
598
  }
441
599
 
600
+ void prepare_sample_thread(VALUE thread, sampling_buffer *buffer) {
601
+ buffer->pending_sample = true;
602
+ buffer->pending_sample_result = ddtrace_rb_profile_frames(thread, 0, buffer->max_frames, buffer->stack_buffer);
603
+ }
604
+
442
605
  uint16_t sampling_buffer_check_max_frames(int max_frames) {
443
606
  if (max_frames < 5) rb_raise(rb_eArgError, "Invalid max_frames: value must be >= 5");
444
607
  if (max_frames > MAX_FRAMES_LIMIT) rb_raise(rb_eArgError, "Invalid max_frames: value must be <= " MAX_FRAMES_LIMIT_AS_STRING);
445
608
  return max_frames;
446
609
  }
447
610
 
448
- sampling_buffer *sampling_buffer_new(uint16_t max_frames, ddog_prof_Location *locations) {
611
+ void sampling_buffer_initialize(sampling_buffer *buffer, uint16_t max_frames, ddog_prof_Location *locations) {
449
612
  sampling_buffer_check_max_frames(max_frames);
450
613
 
451
- // Note: never returns NULL; if out of memory, it calls the Ruby out-of-memory handlers
452
- sampling_buffer* buffer = ruby_xcalloc(1, sizeof(sampling_buffer));
453
-
454
614
  buffer->max_frames = max_frames;
455
615
  buffer->locations = locations;
456
616
  buffer->stack_buffer = ruby_xcalloc(max_frames, sizeof(frame_info));
457
-
458
- return buffer;
617
+ buffer->pending_sample = false;
618
+ buffer->pending_sample_result = 0;
459
619
  }
460
620
 
461
621
  void sampling_buffer_free(sampling_buffer *buffer) {
462
- if (buffer == NULL) rb_raise(rb_eArgError, "sampling_buffer_free called with NULL buffer");
622
+ if (buffer->max_frames == 0 || buffer->locations == NULL || buffer->stack_buffer == NULL) {
623
+ rb_raise(rb_eArgError, "sampling_buffer_free called with invalid buffer");
624
+ }
463
625
 
464
- // buffer->locations are owned by whoever called sampling_buffer_new, not us
465
626
  ruby_xfree(buffer->stack_buffer);
627
+ // Note: buffer->locations are owned by whoever called sampling_buffer_initialize, not by the buffer itself
628
+
629
+ buffer->max_frames = 0;
630
+ buffer->locations = NULL;
631
+ buffer->stack_buffer = NULL;
632
+ buffer->pending_sample = false;
633
+ buffer->pending_sample_result = 0;
634
+ }
466
635
 
467
- ruby_xfree(buffer);
636
+ void sampling_buffer_mark(sampling_buffer *buffer) {
637
+ if (!sampling_buffer_needs_marking(buffer)) {
638
+ rb_bug("sampling_buffer_mark called with no pending sample. `sampling_buffer_needs_marking` should be used before calling mark.");
639
+ }
640
+
641
+ for (int i = 0; i < buffer->pending_sample_result; i++) {
642
+ if (buffer->stack_buffer[i].is_ruby_frame) {
643
+ rb_gc_mark(buffer->stack_buffer[i].as.ruby_frame.iseq);
644
+ }
645
+ }
468
646
  }