datadog 2.17.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +90 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +63 -56
  4. data/ext/datadog_profiling_native_extension/collectors_stack.c +263 -76
  5. data/ext/datadog_profiling_native_extension/collectors_stack.h +20 -3
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +62 -12
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
  8. data/ext/datadog_profiling_native_extension/extconf.rb +7 -0
  9. data/ext/datadog_profiling_native_extension/heap_recorder.c +239 -363
  10. data/ext/datadog_profiling_native_extension/heap_recorder.h +4 -6
  11. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +22 -0
  12. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +8 -5
  13. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +38 -26
  14. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +6 -4
  15. data/ext/datadog_profiling_native_extension/ruby_helpers.c +1 -13
  16. data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -11
  17. data/ext/datadog_profiling_native_extension/stack_recorder.c +154 -57
  18. data/ext/libdatadog_api/extconf.rb +2 -2
  19. data/ext/libdatadog_api/library_config.c +54 -12
  20. data/ext/libdatadog_api/library_config.h +6 -0
  21. data/ext/libdatadog_api/process_discovery.c +2 -7
  22. data/ext/libdatadog_extconf_helpers.rb +1 -1
  23. data/lib/datadog/appsec/api_security/lru_cache.rb +9 -2
  24. data/lib/datadog/appsec/api_security/route_extractor.rb +71 -0
  25. data/lib/datadog/appsec/api_security/sampler.rb +59 -0
  26. data/lib/datadog/appsec/api_security.rb +14 -0
  27. data/lib/datadog/appsec/assets/waf_rules/recommended.json +257 -85
  28. data/lib/datadog/appsec/assets/waf_rules/strict.json +10 -78
  29. data/lib/datadog/appsec/component.rb +30 -54
  30. data/lib/datadog/appsec/configuration/settings.rb +60 -2
  31. data/lib/datadog/appsec/context.rb +6 -6
  32. data/lib/datadog/appsec/contrib/devise/tracking_middleware.rb +1 -1
  33. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +27 -16
  34. data/lib/datadog/appsec/instrumentation/gateway/argument.rb +1 -1
  35. data/lib/datadog/appsec/processor/rule_loader.rb +5 -6
  36. data/lib/datadog/appsec/remote.rb +15 -55
  37. data/lib/datadog/appsec/security_engine/engine.rb +194 -0
  38. data/lib/datadog/appsec/security_engine/runner.rb +10 -11
  39. data/lib/datadog/appsec.rb +4 -7
  40. data/lib/datadog/core/configuration/agent_settings.rb +52 -0
  41. data/lib/datadog/core/configuration/agent_settings_resolver.rb +1 -43
  42. data/lib/datadog/core/configuration/components.rb +2 -4
  43. data/lib/datadog/core/configuration/option.rb +9 -9
  44. data/lib/datadog/core/configuration/settings.rb +42 -10
  45. data/lib/datadog/core/configuration/stable_config.rb +1 -2
  46. data/lib/datadog/core/crashtracking/tag_builder.rb +4 -22
  47. data/lib/datadog/core/process_discovery/tracer_memfd.rb +15 -0
  48. data/lib/datadog/core/process_discovery.rb +5 -1
  49. data/lib/datadog/core/remote/configuration/repository.rb +12 -0
  50. data/lib/datadog/core/tag_builder.rb +56 -0
  51. data/lib/datadog/core/telemetry/component.rb +8 -4
  52. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +1 -0
  53. data/lib/datadog/core/telemetry/event/app_started.rb +148 -40
  54. data/lib/datadog/core/telemetry/logger.rb +5 -4
  55. data/lib/datadog/core/telemetry/logging.rb +11 -5
  56. data/lib/datadog/core/transport/http/adapters/net.rb +17 -2
  57. data/lib/datadog/core/transport/http/builder.rb +2 -2
  58. data/lib/datadog/core/transport/http/env.rb +8 -0
  59. data/lib/datadog/core/utils.rb +7 -0
  60. data/lib/datadog/di/instrumenter.rb +48 -5
  61. data/lib/datadog/di/probe_notification_builder.rb +37 -42
  62. data/lib/datadog/di/probe_notifier_worker.rb +9 -1
  63. data/lib/datadog/di/serializer.rb +10 -2
  64. data/lib/datadog/di/transport/http/input.rb +10 -0
  65. data/lib/datadog/di/transport/input.rb +10 -2
  66. data/lib/datadog/di.rb +0 -6
  67. data/lib/datadog/kit/appsec/events/v2.rb +195 -0
  68. data/lib/datadog/profiling/collectors/code_provenance.rb +17 -8
  69. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -0
  70. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  71. data/lib/datadog/profiling/collectors/info.rb +41 -0
  72. data/lib/datadog/profiling/collectors/thread_context.rb +16 -1
  73. data/lib/datadog/profiling/component.rb +8 -9
  74. data/lib/datadog/profiling/exporter.rb +9 -3
  75. data/lib/datadog/profiling/ext.rb +0 -12
  76. data/lib/datadog/profiling/http_transport.rb +2 -2
  77. data/lib/datadog/profiling/profiler.rb +2 -0
  78. data/lib/datadog/profiling/scheduler.rb +2 -1
  79. data/lib/datadog/profiling/sequence_tracker.rb +44 -0
  80. data/lib/datadog/profiling/stack_recorder.rb +5 -5
  81. data/lib/datadog/profiling/tag_builder.rb +7 -37
  82. data/lib/datadog/profiling/tasks/setup.rb +2 -0
  83. data/lib/datadog/profiling.rb +1 -0
  84. data/lib/datadog/single_step_instrument.rb +9 -0
  85. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +15 -0
  86. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +19 -12
  87. data/lib/datadog/tracing/contrib/action_pack/ext.rb +2 -0
  88. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +7 -1
  89. data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +13 -0
  90. data/lib/datadog/tracing/contrib/lograge/patcher.rb +4 -2
  91. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -6
  92. data/lib/datadog/tracing/contrib/rails/patcher.rb +4 -1
  93. data/lib/datadog/tracing/contrib/rails/runner.rb +61 -40
  94. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  95. data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +5 -2
  96. data/lib/datadog/tracing/diagnostics/environment_logger.rb +3 -1
  97. data/lib/datadog/tracing/span_event.rb +1 -1
  98. data/lib/datadog/tracing/span_operation.rb +22 -0
  99. data/lib/datadog/tracing/sync_writer.rb +1 -1
  100. data/lib/datadog/tracing/trace_operation.rb +12 -4
  101. data/lib/datadog/tracing/tracer.rb +6 -2
  102. data/lib/datadog/version.rb +1 -1
  103. data/lib/datadog.rb +7 -0
  104. metadata +14 -10
  105. data/lib/datadog/appsec/assets/waf_rules/processors.json +0 -321
  106. data/lib/datadog/appsec/assets/waf_rules/scanners.json +0 -1023
  107. data/lib/datadog/appsec/processor/rule_merger.rb +0 -171
  108. data/lib/datadog/appsec/processor.rb +0 -107
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28053ce1bca4cce2032cb068c38e99a56524e6ccecec3068c4d26d1e750f9790
4
- data.tar.gz: 29b6b57640638184370c5176e4da94e74c9770bae8afd7884d1c2baea782350d
3
+ metadata.gz: 9c533682b9a96989e1ad8d4eb96339af4bffdb5fd9cbfe447bd0a034bc387c03
4
+ data.tar.gz: 5d7808aa6b7fd5f9c68e453fef4fff8c5345c62fb26a7f7839d630ec0da2fe9f
5
5
  SHA512:
6
- metadata.gz: caf719b7c9a2e5ee3f47ad0c3e02670b90b1f76bfa5de56c10f69169c97b827f4b9e81df822e548d1390f152f15e756c54c200e9f35904138e7c672af99ef30b
7
- data.tar.gz: eda75252942b8c7d278bab14da78c7752b17b136a3f0bd9ed145512a64b3fb8acb110a8cbbfb280b2bcf7c558409ce83b5d311fce5f019ff34516ceb3883255c
6
+ metadata.gz: 6b35d7ef1ce2f9e565727037f9e5b329d019202ff20c01a15bb5096cf14253b3ab9812d4fc84fc23e265ec2e7e0096e34acc6a845ad510b215ef7df4093107e7
7
+ data.tar.gz: 72fff247b51de201e4373638627fa419a3a679979f880dfa44c73d1c9a7c062991746045d41cbe5fa47d824b025fb9cad2f34c0139e034df1b85ddb4a07bccd9
data/CHANGELOG.md CHANGED
@@ -2,6 +2,62 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.19.0] - 2025-07-24
6
+
7
+ ### Added
8
+
9
+ * AppSec: Added Business Logic Events SDK v2. ([#4802][])
10
+ * Tracing: Add `record_exception` API to capture and attach error information to spans via span events. ([#4771][])
11
+ * Tracing: Add `:cache_store` option to ActiveSupport integration to allow tracing only specified cache backends. ([#4693][])
12
+ * SSI: Rework SSI from the ground up. ([#4366][])
13
+
14
+ ### Changed
15
+
16
+ * Profiling: Switch profiler stack truncation strategy and improve sampling performance ([#4819][])
17
+ * Profiling: Report GC tuning environment variables with profiles ([#4813][])
18
+ * Profiling: Tag profiles with sequence number ([#4794][])
19
+ * Profiling: Enable sample from inside signal handler by default on modern Rubies ([#4786][], [#4785][])
20
+
21
+ ### Fixed
22
+
23
+ * Core: Fix emitting duplicate warnings on agent configuration mismatch ([#4814][])
24
+ * Appsec: Fix an error in AppSec route extractor for not-found routes in Rails 8 ([#4793][])
25
+ * Profiling: Add workaround for Ruby VM bug ([#4787][])
26
+ * Profiling: Fix checking for dladdr in profiling ([#4783][])
27
+ * Profiling: Fix potential profiler compilation issue. ([#4783][])
28
+ * Tracing: The mysql integration now only sets the `db.name` tag if there is a valid value ([#4776][])
29
+ * Tracing: The Rails Runner instrumentation should now create Rails Runner spans. ([#4681][])
30
+ * Tracing: Fix sampling rules and sample rate reporting in environment logger. ([#4772][])
31
+
32
+ ### Removed
33
+
34
+ ## [2.18.0] - 2025-07-03
35
+
36
+ ### Added
37
+
38
+ * AppSec: Add API Security with new sampling algorithm to the Rack, Rails, Sinatra and Grape frameworks ([#4699][])
39
+ * Core: Add support for tracer configuration through Fleet Automation ([#4651][])
40
+ * Tracing: Add Rails view render times and database query times to the spans, when available ([#4697][])
41
+ * Core: Add Service Discovery capability, enabling the tracer to reliably determine which process are instrumented
42
+
43
+ ### Changed
44
+
45
+ * AppSec: Enable API Security feature by default ([#4756][])
46
+ * AppSec: Improve overall threat detection ([#4757][])
47
+ * AppSec: Deprecate `config.appsec.ip_passlist`, `config.appsec.ip_denylist`, and `config.appsec.user_id_denylist` ([#4757][])
48
+ * Profiling: Graduate Heap Profiling from alpha to preview ([#4401][], [#4460][])
49
+ * Dynamic Instrumentation: Add instance variable capture to method and line probes ([#4668][])
50
+ * Profiling: Add capturing of native filenames in backtraces ([#4745][])
51
+ * Profiling: Clean up profiler logging ([#4728][])
52
+ Tracing: Add support for `DD_RUNTIME_METRICS_RUNTIME_ID_ENABLED` to tag runtime metrics with the current runtime ID, allowing correlation across processes ([#4718][])
53
+ * Tracing: Improve performance of `http.route` tag computation for Rails ([#4688][])
54
+
55
+ ### Fixed
56
+
57
+ * AppSec: Fix authenticated users tracking in anonymization mode for Devise ([#4731][])
58
+ * Dynamic Instrumentation: Fix source code display in dynamic instrumentation/live debugger ([#4743][])
59
+ * Profiling: Fix profiler stopping due to bug in heap profiling serialization ([#4721][])
60
+
5
61
  ## [2.17.0] - 2025-06-02
6
62
 
7
63
  ### Added
@@ -3241,7 +3297,9 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
3241
3297
  Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
3242
3298
 
3243
3299
 
3244
- [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.17.0...master
3300
+ [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.19.0...master
3301
+ [2.19.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.18.0...v2.19.0
3302
+ [2.18.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.17.0...v2.18.0
3245
3303
  [2.17.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.16.0...v2.17.0
3246
3304
  [2.16.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.15.0...v2.16.0
3247
3305
  [2.15.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.14.0...v2.15.0
@@ -4742,9 +4800,11 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4742
4800
  [#4353]: https://github.com/DataDog/dd-trace-rb/issues/4353
4743
4801
  [#4360]: https://github.com/DataDog/dd-trace-rb/issues/4360
4744
4802
  [#4363]: https://github.com/DataDog/dd-trace-rb/issues/4363
4803
+ [#4366]: https://github.com/DataDog/dd-trace-rb/issues/4366
4745
4804
  [#4391]: https://github.com/DataDog/dd-trace-rb/issues/4391
4746
4805
  [#4398]: https://github.com/DataDog/dd-trace-rb/issues/4398
4747
4806
  [#4399]: https://github.com/DataDog/dd-trace-rb/issues/4399
4807
+ [#4401]: https://github.com/DataDog/dd-trace-rb/issues/4401
4748
4808
  [#4403]: https://github.com/DataDog/dd-trace-rb/issues/4403
4749
4809
  [#4406]: https://github.com/DataDog/dd-trace-rb/issues/4406
4750
4810
  [#4411]: https://github.com/DataDog/dd-trace-rb/issues/4411
@@ -4755,6 +4815,7 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4755
4815
  [#4433]: https://github.com/DataDog/dd-trace-rb/issues/4433
4756
4816
  [#4437]: https://github.com/DataDog/dd-trace-rb/issues/4437
4757
4817
  [#4455]: https://github.com/DataDog/dd-trace-rb/issues/4455
4818
+ [#4460]: https://github.com/DataDog/dd-trace-rb/issues/4460
4758
4819
  [#4473]: https://github.com/DataDog/dd-trace-rb/issues/4473
4759
4820
  [#4493]: https://github.com/DataDog/dd-trace-rb/issues/4493
4760
4821
  [#4497]: https://github.com/DataDog/dd-trace-rb/issues/4497
@@ -4787,14 +4848,42 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4787
4848
  [#4616]: https://github.com/DataDog/dd-trace-rb/issues/4616
4788
4849
  [#4625]: https://github.com/DataDog/dd-trace-rb/issues/4625
4789
4850
  [#4644]: https://github.com/DataDog/dd-trace-rb/issues/4644
4851
+ [#4651]: https://github.com/DataDog/dd-trace-rb/issues/4651
4790
4852
  [#4653]: https://github.com/DataDog/dd-trace-rb/issues/4653
4791
4853
  [#4656]: https://github.com/DataDog/dd-trace-rb/issues/4656
4792
4854
  [#4665]: https://github.com/DataDog/dd-trace-rb/issues/4665
4855
+ [#4668]: https://github.com/DataDog/dd-trace-rb/issues/4668
4793
4856
  [#4669]: https://github.com/DataDog/dd-trace-rb/issues/4669
4794
4857
  [#4672]: https://github.com/DataDog/dd-trace-rb/issues/4672
4795
4858
  [#4673]: https://github.com/DataDog/dd-trace-rb/issues/4673
4796
4859
  [#4678]: https://github.com/DataDog/dd-trace-rb/issues/4678
4797
4860
  [#4679]: https://github.com/DataDog/dd-trace-rb/issues/4679
4861
+ [#4681]: https://github.com/DataDog/dd-trace-rb/issues/4681
4862
+ [#4688]: https://github.com/DataDog/dd-trace-rb/issues/4688
4863
+ [#4693]: https://github.com/DataDog/dd-trace-rb/issues/4693
4864
+ [#4697]: https://github.com/DataDog/dd-trace-rb/issues/4697
4865
+ [#4699]: https://github.com/DataDog/dd-trace-rb/issues/4699
4866
+ [#4718]: https://github.com/DataDog/dd-trace-rb/issues/4718
4867
+ [#4721]: https://github.com/DataDog/dd-trace-rb/issues/4721
4868
+ [#4728]: https://github.com/DataDog/dd-trace-rb/issues/4728
4869
+ [#4731]: https://github.com/DataDog/dd-trace-rb/issues/4731
4870
+ [#4743]: https://github.com/DataDog/dd-trace-rb/issues/4743
4871
+ [#4745]: https://github.com/DataDog/dd-trace-rb/issues/4745
4872
+ [#4756]: https://github.com/DataDog/dd-trace-rb/issues/4756
4873
+ [#4757]: https://github.com/DataDog/dd-trace-rb/issues/4757
4874
+ [#4771]: https://github.com/DataDog/dd-trace-rb/issues/4771
4875
+ [#4772]: https://github.com/DataDog/dd-trace-rb/issues/4772
4876
+ [#4776]: https://github.com/DataDog/dd-trace-rb/issues/4776
4877
+ [#4783]: https://github.com/DataDog/dd-trace-rb/issues/4783
4878
+ [#4785]: https://github.com/DataDog/dd-trace-rb/issues/4785
4879
+ [#4786]: https://github.com/DataDog/dd-trace-rb/issues/4786
4880
+ [#4787]: https://github.com/DataDog/dd-trace-rb/issues/4787
4881
+ [#4793]: https://github.com/DataDog/dd-trace-rb/issues/4793
4882
+ [#4794]: https://github.com/DataDog/dd-trace-rb/issues/4794
4883
+ [#4802]: https://github.com/DataDog/dd-trace-rb/issues/4802
4884
+ [#4813]: https://github.com/DataDog/dd-trace-rb/issues/4813
4885
+ [#4814]: https://github.com/DataDog/dd-trace-rb/issues/4814
4886
+ [#4819]: https://github.com/DataDog/dd-trace-rb/issues/4819
4798
4887
  [@AdrianLC]: https://github.com/AdrianLC
4799
4888
  [@Azure7111]: https://github.com/Azure7111
4800
4889
  [@BabyGroot]: https://github.com/BabyGroot
@@ -102,6 +102,7 @@ typedef struct {
102
102
  bool allocation_counting_enabled;
103
103
  bool gvl_profiling_enabled;
104
104
  bool skip_idle_samples_for_testing;
105
+ bool sighandler_sampling_enabled;
105
106
  VALUE self_instance;
106
107
  VALUE thread_context_collector_instance;
107
108
  VALUE idle_sampling_helper_instance;
@@ -122,7 +123,11 @@ typedef struct {
122
123
  // Others
123
124
 
124
125
  // Used to detect/avoid nested sampling, e.g. when on_newobj_event gets triggered by a memory allocation
125
- // that happens during another sample.
126
+ // that happens during another sample, or when the signal handler gets triggered while we're already in the middle of
127
+ // sampling.
128
+ //
129
+ // @ivoanjo: Right now we always sample inside `safely_call`; if that ever changes, this flag may need to become
130
+ // volatile/atomic/have some barriers to ensure it's visible during e.g. signal handlers.
126
131
  bool during_sample;
127
132
 
128
133
  #ifndef NO_GVL_INSTRUMENTATION
@@ -138,23 +143,15 @@ typedef struct {
138
143
  unsigned int trigger_simulated_signal_delivery_attempts;
139
144
  // How many times we actually simulated signal delivery
140
145
  unsigned int simulated_signal_delivery;
141
- // How many times we actually called rb_postponed_job_register_one from a signal handler
146
+ // How many times we actually called rb_postponed_job_register_one from the signal handler
142
147
  unsigned int signal_handler_enqueued_sample;
148
+ // How many times we prepared a sample (sampled directly) from the signal handler
149
+ unsigned int signal_handler_prepared_sample;
143
150
  // How many times the signal handler was called from the wrong thread
144
151
  unsigned int signal_handler_wrong_thread;
145
152
  // How many times we actually tried to interrupt a thread for sampling
146
153
  unsigned int interrupt_thread_attempts;
147
154
 
148
- // # Stats for the results of calling rb_postponed_job_register_one
149
- // The same function was already waiting to be executed
150
- unsigned int postponed_job_skipped_already_existed;
151
- // The function was added to the queue successfully
152
- unsigned int postponed_job_success;
153
- // The queue was full
154
- unsigned int postponed_job_full;
155
- // The function returned an unknown result code
156
- unsigned int postponed_job_unknown_result;
157
-
158
155
  // # CPU/Walltime sampling stats
159
156
  // How many times we actually CPU/wall sampled
160
157
  unsigned int cpu_sampled;
@@ -238,6 +235,8 @@ static void after_gvl_running_from_postponed_job(DDTRACE_UNUSED void *_unused);
238
235
  #endif
239
236
  static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance);
240
237
  static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance);
238
+ static inline void during_sample_enter(cpu_and_wall_time_worker_state* state);
239
+ static inline void during_sample_exit(cpu_and_wall_time_worker_state* state);
241
240
 
242
241
  // We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
243
242
  // with signature `rb_event_hook_func_t` which doesn't match `on_newobj_event`.
@@ -362,6 +361,7 @@ static VALUE _native_new(VALUE klass) {
362
361
  state->allocation_counting_enabled = false;
363
362
  state->gvl_profiling_enabled = false;
364
363
  state->skip_idle_samples_for_testing = false;
364
+ state->sighandler_sampling_enabled = false;
365
365
  state->thread_context_collector_instance = Qnil;
366
366
  state->idle_sampling_helper_instance = Qnil;
367
367
  state->owner_thread = Qnil;
@@ -372,7 +372,7 @@ static VALUE _native_new(VALUE klass) {
372
372
  state->failure_exception = Qnil;
373
373
  state->stop_thread = Qnil;
374
374
 
375
- state->during_sample = false;
375
+ during_sample_exit(state);
376
376
 
377
377
  #ifndef NO_GVL_INSTRUMENTATION
378
378
  state->gvl_profiling_hook = NULL;
@@ -404,6 +404,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
404
404
  VALUE allocation_counting_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("allocation_counting_enabled")));
405
405
  VALUE gvl_profiling_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("gvl_profiling_enabled")));
406
406
  VALUE skip_idle_samples_for_testing = rb_hash_fetch(options, ID2SYM(rb_intern("skip_idle_samples_for_testing")));
407
+ VALUE sighandler_sampling_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("sighandler_sampling_enabled")));
407
408
 
408
409
  ENFORCE_BOOLEAN(gc_profiling_enabled);
409
410
  ENFORCE_BOOLEAN(no_signals_workaround_enabled);
@@ -413,6 +414,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
413
414
  ENFORCE_BOOLEAN(allocation_counting_enabled);
414
415
  ENFORCE_BOOLEAN(gvl_profiling_enabled);
415
416
  ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
417
+ ENFORCE_BOOLEAN(sighandler_sampling_enabled)
416
418
 
417
419
  cpu_and_wall_time_worker_state *state;
418
420
  TypedData_Get_Struct(self_instance, cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
@@ -424,6 +426,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
424
426
  state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
425
427
  state->gvl_profiling_enabled = (gvl_profiling_enabled == Qtrue);
426
428
  state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
429
+ state->sighandler_sampling_enabled = (sighandler_sampling_enabled == Qtrue);
427
430
 
428
431
  double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
429
432
  if (!state->allocation_profiling_enabled) {
@@ -589,25 +592,30 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
589
592
  return;
590
593
  }
591
594
 
592
- // We implicitly assume there can be no concurrent nor nested calls to handle_sampling_signal because
593
- // a) we get triggered using SIGPROF, and the docs state a second SIGPROF will not interrupt an existing one
595
+ // We assume there can be no concurrent nor nested calls to handle_sampling_signal because
596
+ // a) we get triggered using SIGPROF, and the docs state a second SIGPROF will not interrupt an existing one (see sigaction docs on sa_mask)
594
597
  // b) we validate we are in the thread that has the global VM lock; if a different thread gets a signal, it will return early
595
598
  // because it will not have the global VM lock
596
599
 
597
- // Note: rb_postponed_job_register_one ensures that if there's a previous sample_from_postponed_job queued for execution
598
- // then we will not queue a second one. It does this by doing a linear scan on the existing jobs; in the future we
599
- // may want to implement that check ourselves.
600
-
601
600
  state->stats.signal_handler_enqueued_sample++;
602
601
 
603
- // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
604
- // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
602
+ bool sample_from_signal_handler =
603
+ state->sighandler_sampling_enabled &&
604
+ // Don't sample if we're already in the middle of processing a sample
605
+ !state->during_sample;
606
+
607
+ if (sample_from_signal_handler) {
608
+ // Buffer current stack trace. Note that this will not actually record the sample, for that we still need to wait
609
+ // until the postponed job below gets run.
610
+ bool prepared = thread_context_collector_prepare_sample_inside_signal_handler(state->thread_context_collector_instance);
611
+
612
+ if (prepared) state->stats.signal_handler_prepared_sample++;
613
+ }
614
+
605
615
  #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
606
616
  rb_postponed_job_trigger(sample_from_postponed_job_handle);
607
- state->stats.postponed_job_success++; // Always succeeds
608
617
  #else
609
-
610
- // This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
618
+ // Passing in `gc_finalize_deferred_workaround` is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
611
619
  //
612
620
  // TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
613
621
  // and in rare cases can cause VM crashes.
@@ -631,20 +639,7 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
631
639
  //
632
640
  // Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
633
641
  // In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
634
- int result = rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
635
-
636
- // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
637
- // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
638
- switch (result) {
639
- case 0:
640
- state->stats.postponed_job_full++; break;
641
- case 1:
642
- state->stats.postponed_job_success++; break;
643
- case 2:
644
- state->stats.postponed_job_skipped_already_existed++; break;
645
- default:
646
- state->stats.postponed_job_unknown_result++;
647
- }
642
+ rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
648
643
  #endif
649
644
  }
650
645
 
@@ -714,6 +709,8 @@ static void interrupt_sampling_trigger_loop(void *state_ptr) {
714
709
  atomic_store(&state->should_run, false);
715
710
  }
716
711
 
712
+ // Note: If we ever want to get rid of the postponed job execution, remember not to clobber Ruby exceptions, as
713
+ // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
717
714
  static void sample_from_postponed_job(DDTRACE_UNUSED void *_unused) {
718
715
  cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
719
716
 
@@ -726,12 +723,12 @@ static void sample_from_postponed_job(DDTRACE_UNUSED void *_unused) {
726
723
  return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
727
724
  }
728
725
 
729
- state->during_sample = true;
726
+ during_sample_enter(state);
730
727
 
731
728
  // Rescue against any exceptions that happen during sampling
732
729
  safely_call(rescued_sample_from_postponed_job, state->self_instance, state->self_instance);
733
730
 
734
- state->during_sample = false;
731
+ during_sample_exit(state);
735
732
  }
736
733
 
737
734
  static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
@@ -937,11 +934,11 @@ static void after_gc_from_postponed_job(DDTRACE_UNUSED void *_unused) {
937
934
  return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
938
935
  }
939
936
 
940
- state->during_sample = true;
937
+ during_sample_enter(state);
941
938
 
942
939
  safely_call(thread_context_collector_sample_after_gc, state->thread_context_collector_instance, state->self_instance);
943
940
 
944
- state->during_sample = false;
941
+ during_sample_exit(state);
945
942
  }
946
943
 
947
944
  // Equivalent to Ruby begin/rescue call, where we call a C function and jump to the exception handler if an
@@ -1019,11 +1016,8 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
1019
1016
  ID2SYM(rb_intern("trigger_simulated_signal_delivery_attempts")), /* => */ UINT2NUM(state->stats.trigger_simulated_signal_delivery_attempts),
1020
1017
  ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
1021
1018
  ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
1019
+ ID2SYM(rb_intern("signal_handler_prepared_sample")), /* => */ UINT2NUM(state->stats.signal_handler_prepared_sample),
1022
1020
  ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
1023
- ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
1024
- ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
1025
- ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
1026
- ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
1027
1021
  ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
1028
1022
 
1029
1023
  // CPU Stats
@@ -1073,8 +1067,7 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
1073
1067
 
1074
1068
  state->stats.simulated_signal_delivery++;
1075
1069
 
1076
- // @ivoanjo: We could instead directly call sample_from_postponed_job, but I chose to go through the signal handler
1077
- // so that the simulated case is as close to the original one as well (including any metrics increases, etc).
1070
+ // `handle_sampling_signal` does a few things extra on top of `sample_from_postponed_job` so that's why we don't shortcut here
1078
1071
  handle_sampling_signal(0, NULL, NULL);
1079
1072
 
1080
1073
  return NULL; // Unused
@@ -1207,11 +1200,7 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *u
1207
1200
  &state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
1208
1201
  );
1209
1202
 
1210
- // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
1211
- // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
1212
- // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
1213
- // defined as not being able to allocate) sets this.
1214
- state->during_sample = true;
1203
+ during_sample_enter(state);
1215
1204
 
1216
1205
  // Rescue against any exceptions that happen during sampling
1217
1206
  safely_call(rescued_sample_allocation, Qnil, state->self_instance);
@@ -1232,7 +1221,7 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *u
1232
1221
 
1233
1222
  state->stats.allocation_sampled++;
1234
1223
 
1235
- state->during_sample = false;
1224
+ during_sample_exit(state);
1236
1225
  }
1237
1226
 
1238
1227
  static void disable_tracepoints(cpu_and_wall_time_worker_state *state) {
@@ -1373,12 +1362,12 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
1373
1362
  // This can potentially happen if the CpuAndWallTimeWorker was stopped while the postponed job was waiting to be executed; nothing to do
1374
1363
  if (state == NULL) return;
1375
1364
 
1376
- state->during_sample = true;
1365
+ during_sample_enter(state);
1377
1366
 
1378
1367
  // Rescue against any exceptions that happen during sampling
1379
1368
  safely_call(rescued_after_gvl_running_from_postponed_job, state->self_instance, state->self_instance);
1380
1369
 
1381
- state->during_sample = false;
1370
+ during_sample_exit(state);
1382
1371
  }
1383
1372
 
1384
1373
  static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance) {
@@ -1414,3 +1403,21 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
1414
1403
  return Qfalse;
1415
1404
  }
1416
1405
  #endif
1406
+
1407
+ static inline void during_sample_enter(cpu_and_wall_time_worker_state* state) {
1408
+ // Tell the compiler it's not allowed to reorder the `during_sample` flag with anything that happens after.
1409
+ //
1410
+ // In a few cases, we may be checking this flag from a signal handler, so we need to make sure the compiler didn't
1411
+ // get clever and reordered things in such a way that makes us miss the flag update.
1412
+ //
1413
+ // See https://github.com/ruby/ruby/pull/11036 for a similar change made to the Ruby VM with more context.
1414
+ state->during_sample = true;
1415
+ atomic_signal_fence(memory_order_seq_cst);
1416
+ }
1417
+
1418
+ static inline void during_sample_exit(cpu_and_wall_time_worker_state* state) {
1419
+ // See `during_sample_enter` for more context; in this case we set the fence before to make sure anything that
1420
+ // happens before the fence is not reordered with the flag update.
1421
+ atomic_signal_fence(memory_order_seq_cst);
1422
+ state->during_sample = false;
1423
+ }