datadog 2.17.0 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +90 -1
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +63 -56
- data/ext/datadog_profiling_native_extension/collectors_stack.c +263 -76
- data/ext/datadog_profiling_native_extension/collectors_stack.h +20 -3
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +62 -12
- data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
- data/ext/datadog_profiling_native_extension/extconf.rb +7 -0
- data/ext/datadog_profiling_native_extension/heap_recorder.c +239 -363
- data/ext/datadog_profiling_native_extension/heap_recorder.h +4 -6
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +22 -0
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +8 -5
- data/ext/datadog_profiling_native_extension/private_vm_api_access.c +38 -26
- data/ext/datadog_profiling_native_extension/private_vm_api_access.h +6 -4
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +1 -13
- data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -11
- data/ext/datadog_profiling_native_extension/stack_recorder.c +154 -57
- data/ext/libdatadog_api/extconf.rb +2 -2
- data/ext/libdatadog_api/library_config.c +54 -12
- data/ext/libdatadog_api/library_config.h +6 -0
- data/ext/libdatadog_api/process_discovery.c +2 -7
- data/ext/libdatadog_extconf_helpers.rb +1 -1
- data/lib/datadog/appsec/api_security/lru_cache.rb +9 -2
- data/lib/datadog/appsec/api_security/route_extractor.rb +71 -0
- data/lib/datadog/appsec/api_security/sampler.rb +59 -0
- data/lib/datadog/appsec/api_security.rb +14 -0
- data/lib/datadog/appsec/assets/waf_rules/recommended.json +257 -85
- data/lib/datadog/appsec/assets/waf_rules/strict.json +10 -78
- data/lib/datadog/appsec/component.rb +30 -54
- data/lib/datadog/appsec/configuration/settings.rb +60 -2
- data/lib/datadog/appsec/context.rb +6 -6
- data/lib/datadog/appsec/contrib/devise/tracking_middleware.rb +1 -1
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +27 -16
- data/lib/datadog/appsec/instrumentation/gateway/argument.rb +1 -1
- data/lib/datadog/appsec/processor/rule_loader.rb +5 -6
- data/lib/datadog/appsec/remote.rb +15 -55
- data/lib/datadog/appsec/security_engine/engine.rb +194 -0
- data/lib/datadog/appsec/security_engine/runner.rb +10 -11
- data/lib/datadog/appsec.rb +4 -7
- data/lib/datadog/core/configuration/agent_settings.rb +52 -0
- data/lib/datadog/core/configuration/agent_settings_resolver.rb +1 -43
- data/lib/datadog/core/configuration/components.rb +2 -4
- data/lib/datadog/core/configuration/option.rb +9 -9
- data/lib/datadog/core/configuration/settings.rb +42 -10
- data/lib/datadog/core/configuration/stable_config.rb +1 -2
- data/lib/datadog/core/crashtracking/tag_builder.rb +4 -22
- data/lib/datadog/core/process_discovery/tracer_memfd.rb +15 -0
- data/lib/datadog/core/process_discovery.rb +5 -1
- data/lib/datadog/core/remote/configuration/repository.rb +12 -0
- data/lib/datadog/core/tag_builder.rb +56 -0
- data/lib/datadog/core/telemetry/component.rb +8 -4
- data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +1 -0
- data/lib/datadog/core/telemetry/event/app_started.rb +148 -40
- data/lib/datadog/core/telemetry/logger.rb +5 -4
- data/lib/datadog/core/telemetry/logging.rb +11 -5
- data/lib/datadog/core/transport/http/adapters/net.rb +17 -2
- data/lib/datadog/core/transport/http/builder.rb +2 -2
- data/lib/datadog/core/transport/http/env.rb +8 -0
- data/lib/datadog/core/utils.rb +7 -0
- data/lib/datadog/di/instrumenter.rb +48 -5
- data/lib/datadog/di/probe_notification_builder.rb +37 -42
- data/lib/datadog/di/probe_notifier_worker.rb +9 -1
- data/lib/datadog/di/serializer.rb +10 -2
- data/lib/datadog/di/transport/http/input.rb +10 -0
- data/lib/datadog/di/transport/input.rb +10 -2
- data/lib/datadog/di.rb +0 -6
- data/lib/datadog/kit/appsec/events/v2.rb +195 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +17 -8
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -0
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
- data/lib/datadog/profiling/collectors/info.rb +41 -0
- data/lib/datadog/profiling/collectors/thread_context.rb +16 -1
- data/lib/datadog/profiling/component.rb +8 -9
- data/lib/datadog/profiling/exporter.rb +9 -3
- data/lib/datadog/profiling/ext.rb +0 -12
- data/lib/datadog/profiling/http_transport.rb +2 -2
- data/lib/datadog/profiling/profiler.rb +2 -0
- data/lib/datadog/profiling/scheduler.rb +2 -1
- data/lib/datadog/profiling/sequence_tracker.rb +44 -0
- data/lib/datadog/profiling/stack_recorder.rb +5 -5
- data/lib/datadog/profiling/tag_builder.rb +7 -37
- data/lib/datadog/profiling/tasks/setup.rb +2 -0
- data/lib/datadog/profiling.rb +1 -0
- data/lib/datadog/single_step_instrument.rb +9 -0
- data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +15 -0
- data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +19 -12
- data/lib/datadog/tracing/contrib/action_pack/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +7 -1
- data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +13 -0
- data/lib/datadog/tracing/contrib/lograge/patcher.rb +4 -2
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -6
- data/lib/datadog/tracing/contrib/rails/patcher.rb +4 -1
- data/lib/datadog/tracing/contrib/rails/runner.rb +61 -40
- data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +5 -2
- data/lib/datadog/tracing/diagnostics/environment_logger.rb +3 -1
- data/lib/datadog/tracing/span_event.rb +1 -1
- data/lib/datadog/tracing/span_operation.rb +22 -0
- data/lib/datadog/tracing/sync_writer.rb +1 -1
- data/lib/datadog/tracing/trace_operation.rb +12 -4
- data/lib/datadog/tracing/tracer.rb +6 -2
- data/lib/datadog/version.rb +1 -1
- data/lib/datadog.rb +7 -0
- metadata +14 -10
- data/lib/datadog/appsec/assets/waf_rules/processors.json +0 -321
- data/lib/datadog/appsec/assets/waf_rules/scanners.json +0 -1023
- data/lib/datadog/appsec/processor/rule_merger.rb +0 -171
- data/lib/datadog/appsec/processor.rb +0 -107
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c533682b9a96989e1ad8d4eb96339af4bffdb5fd9cbfe447bd0a034bc387c03
|
4
|
+
data.tar.gz: 5d7808aa6b7fd5f9c68e453fef4fff8c5345c62fb26a7f7839d630ec0da2fe9f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b35d7ef1ce2f9e565727037f9e5b329d019202ff20c01a15bb5096cf14253b3ab9812d4fc84fc23e265ec2e7e0096e34acc6a845ad510b215ef7df4093107e7
|
7
|
+
data.tar.gz: 72fff247b51de201e4373638627fa419a3a679979f880dfa44c73d1c9a7c062991746045d41cbe5fa47d824b025fb9cad2f34c0139e034df1b85ddb4a07bccd9
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,62 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [2.19.0] - 2025-07-24
|
6
|
+
|
7
|
+
### Added
|
8
|
+
|
9
|
+
* AppSec: Added Business Logic Events SDK v2. ([#4802][])
|
10
|
+
* Tracing: Add `record_exception` API to capture and attach error information to spans via span events. ([#4771][])
|
11
|
+
* Tracing: Add `:cache_store` option to ActiveSupport integration to allow tracing only specified cache backends. ([#4693][])
|
12
|
+
* SSI: Rework SSI from the ground up. ([#4366][])
|
13
|
+
|
14
|
+
### Changed
|
15
|
+
|
16
|
+
* Profiling: Switch profiler stack truncation strategy and improve sampling performance ([#4819][])
|
17
|
+
* Profiling: Report GC tuning environment variables with profiles ([#4813][])
|
18
|
+
* Profiling: Tag profiles with sequence number ([#4794][])
|
19
|
+
* Profiling: Enable sample from inside signal handler by default on modern Rubies ([#4786][], [#4785][])
|
20
|
+
|
21
|
+
### Fixed
|
22
|
+
|
23
|
+
* Core: Fix emitting duplicate warnings on agent configuration mismatch ([#4814][])
|
24
|
+
* Appsec: Fix an error in AppSec route extractor for not-found routes in Rails 8 ([#4793][])
|
25
|
+
* Profiling: Add workaround for Ruby VM bug ([#4787][])
|
26
|
+
* Profiling: Fix checking for dladdr in profiling ([#4783][])
|
27
|
+
* Profiling: Fix potential profiler compilation issue. ([#4783][])
|
28
|
+
* Tracing: The mysql integration now only sets the `db.name` tag if there is a valid value ([#4776][])
|
29
|
+
* Tracing: The Rails Runner instrumentation should now create Rails Runner spans. ([#4681][])
|
30
|
+
* Tracing: Fix sampling rules and sample rate reporting in environment logger. ([#4772][])
|
31
|
+
|
32
|
+
### Removed
|
33
|
+
|
34
|
+
## [2.18.0] - 2025-07-03
|
35
|
+
|
36
|
+
### Added
|
37
|
+
|
38
|
+
* AppSec: Add API Security with new sampling algorithm to the Rack, Rails, Sinatra and Grape frameworks ([#4699][])
|
39
|
+
* Core: Add support for tracer configuration through Fleet Automation ([#4651][])
|
40
|
+
* Tracing: Add Rails view render times and database query times to the spans, when available ([#4697][])
|
41
|
+
* Core: Add Service Discovery capability, enabling the tracer to reliably determine which process are instrumented
|
42
|
+
|
43
|
+
### Changed
|
44
|
+
|
45
|
+
* AppSec: Enable API Security feature by default ([#4756][])
|
46
|
+
* AppSec: Improve overall threat detection ([#4757][])
|
47
|
+
* AppSec: Deprecate `config.appsec.ip_passlist`, `config.appsec.ip_denylist`, and `config.appsec.user_id_denylist` ([#4757][])
|
48
|
+
* Profiling: Graduate Heap Profiling from alpha to preview ([#4401][], [#4460][])
|
49
|
+
* Dynamic Instrumentation: Add instance variable capture to method and line probes ([#4668][])
|
50
|
+
* Profiling: Add capturing of native filenames in backtraces ([#4745][])
|
51
|
+
* Profiling: Clean up profiler logging ([#4728][])
|
52
|
+
Tracing: Add support for `DD_RUNTIME_METRICS_RUNTIME_ID_ENABLED` to tag runtime metrics with the current runtime ID, allowing correlation across processes ([#4718][])
|
53
|
+
* Tracing: Improve performance of `http.route` tag computation for Rails ([#4688][])
|
54
|
+
|
55
|
+
### Fixed
|
56
|
+
|
57
|
+
* AppSec: Fix authenticated users tracking in anonymization mode for Devise ([#4731][])
|
58
|
+
* Dynamic Instrumentation: Fix source code display in dynamic instrumentation/live debugger ([#4743][])
|
59
|
+
* Profiling: Fix profiler stopping due to bug in heap profiling serialization ([#4721][])
|
60
|
+
|
5
61
|
## [2.17.0] - 2025-06-02
|
6
62
|
|
7
63
|
### Added
|
@@ -3241,7 +3297,9 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
|
|
3241
3297
|
Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
3242
3298
|
|
3243
3299
|
|
3244
|
-
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.
|
3300
|
+
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.19.0...master
|
3301
|
+
[2.19.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.18.0...v2.19.0
|
3302
|
+
[2.18.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.17.0...v2.18.0
|
3245
3303
|
[2.17.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.16.0...v2.17.0
|
3246
3304
|
[2.16.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.15.0...v2.16.0
|
3247
3305
|
[2.15.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.14.0...v2.15.0
|
@@ -4742,9 +4800,11 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4742
4800
|
[#4353]: https://github.com/DataDog/dd-trace-rb/issues/4353
|
4743
4801
|
[#4360]: https://github.com/DataDog/dd-trace-rb/issues/4360
|
4744
4802
|
[#4363]: https://github.com/DataDog/dd-trace-rb/issues/4363
|
4803
|
+
[#4366]: https://github.com/DataDog/dd-trace-rb/issues/4366
|
4745
4804
|
[#4391]: https://github.com/DataDog/dd-trace-rb/issues/4391
|
4746
4805
|
[#4398]: https://github.com/DataDog/dd-trace-rb/issues/4398
|
4747
4806
|
[#4399]: https://github.com/DataDog/dd-trace-rb/issues/4399
|
4807
|
+
[#4401]: https://github.com/DataDog/dd-trace-rb/issues/4401
|
4748
4808
|
[#4403]: https://github.com/DataDog/dd-trace-rb/issues/4403
|
4749
4809
|
[#4406]: https://github.com/DataDog/dd-trace-rb/issues/4406
|
4750
4810
|
[#4411]: https://github.com/DataDog/dd-trace-rb/issues/4411
|
@@ -4755,6 +4815,7 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4755
4815
|
[#4433]: https://github.com/DataDog/dd-trace-rb/issues/4433
|
4756
4816
|
[#4437]: https://github.com/DataDog/dd-trace-rb/issues/4437
|
4757
4817
|
[#4455]: https://github.com/DataDog/dd-trace-rb/issues/4455
|
4818
|
+
[#4460]: https://github.com/DataDog/dd-trace-rb/issues/4460
|
4758
4819
|
[#4473]: https://github.com/DataDog/dd-trace-rb/issues/4473
|
4759
4820
|
[#4493]: https://github.com/DataDog/dd-trace-rb/issues/4493
|
4760
4821
|
[#4497]: https://github.com/DataDog/dd-trace-rb/issues/4497
|
@@ -4787,14 +4848,42 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4787
4848
|
[#4616]: https://github.com/DataDog/dd-trace-rb/issues/4616
|
4788
4849
|
[#4625]: https://github.com/DataDog/dd-trace-rb/issues/4625
|
4789
4850
|
[#4644]: https://github.com/DataDog/dd-trace-rb/issues/4644
|
4851
|
+
[#4651]: https://github.com/DataDog/dd-trace-rb/issues/4651
|
4790
4852
|
[#4653]: https://github.com/DataDog/dd-trace-rb/issues/4653
|
4791
4853
|
[#4656]: https://github.com/DataDog/dd-trace-rb/issues/4656
|
4792
4854
|
[#4665]: https://github.com/DataDog/dd-trace-rb/issues/4665
|
4855
|
+
[#4668]: https://github.com/DataDog/dd-trace-rb/issues/4668
|
4793
4856
|
[#4669]: https://github.com/DataDog/dd-trace-rb/issues/4669
|
4794
4857
|
[#4672]: https://github.com/DataDog/dd-trace-rb/issues/4672
|
4795
4858
|
[#4673]: https://github.com/DataDog/dd-trace-rb/issues/4673
|
4796
4859
|
[#4678]: https://github.com/DataDog/dd-trace-rb/issues/4678
|
4797
4860
|
[#4679]: https://github.com/DataDog/dd-trace-rb/issues/4679
|
4861
|
+
[#4681]: https://github.com/DataDog/dd-trace-rb/issues/4681
|
4862
|
+
[#4688]: https://github.com/DataDog/dd-trace-rb/issues/4688
|
4863
|
+
[#4693]: https://github.com/DataDog/dd-trace-rb/issues/4693
|
4864
|
+
[#4697]: https://github.com/DataDog/dd-trace-rb/issues/4697
|
4865
|
+
[#4699]: https://github.com/DataDog/dd-trace-rb/issues/4699
|
4866
|
+
[#4718]: https://github.com/DataDog/dd-trace-rb/issues/4718
|
4867
|
+
[#4721]: https://github.com/DataDog/dd-trace-rb/issues/4721
|
4868
|
+
[#4728]: https://github.com/DataDog/dd-trace-rb/issues/4728
|
4869
|
+
[#4731]: https://github.com/DataDog/dd-trace-rb/issues/4731
|
4870
|
+
[#4743]: https://github.com/DataDog/dd-trace-rb/issues/4743
|
4871
|
+
[#4745]: https://github.com/DataDog/dd-trace-rb/issues/4745
|
4872
|
+
[#4756]: https://github.com/DataDog/dd-trace-rb/issues/4756
|
4873
|
+
[#4757]: https://github.com/DataDog/dd-trace-rb/issues/4757
|
4874
|
+
[#4771]: https://github.com/DataDog/dd-trace-rb/issues/4771
|
4875
|
+
[#4772]: https://github.com/DataDog/dd-trace-rb/issues/4772
|
4876
|
+
[#4776]: https://github.com/DataDog/dd-trace-rb/issues/4776
|
4877
|
+
[#4783]: https://github.com/DataDog/dd-trace-rb/issues/4783
|
4878
|
+
[#4785]: https://github.com/DataDog/dd-trace-rb/issues/4785
|
4879
|
+
[#4786]: https://github.com/DataDog/dd-trace-rb/issues/4786
|
4880
|
+
[#4787]: https://github.com/DataDog/dd-trace-rb/issues/4787
|
4881
|
+
[#4793]: https://github.com/DataDog/dd-trace-rb/issues/4793
|
4882
|
+
[#4794]: https://github.com/DataDog/dd-trace-rb/issues/4794
|
4883
|
+
[#4802]: https://github.com/DataDog/dd-trace-rb/issues/4802
|
4884
|
+
[#4813]: https://github.com/DataDog/dd-trace-rb/issues/4813
|
4885
|
+
[#4814]: https://github.com/DataDog/dd-trace-rb/issues/4814
|
4886
|
+
[#4819]: https://github.com/DataDog/dd-trace-rb/issues/4819
|
4798
4887
|
[@AdrianLC]: https://github.com/AdrianLC
|
4799
4888
|
[@Azure7111]: https://github.com/Azure7111
|
4800
4889
|
[@BabyGroot]: https://github.com/BabyGroot
|
@@ -102,6 +102,7 @@ typedef struct {
|
|
102
102
|
bool allocation_counting_enabled;
|
103
103
|
bool gvl_profiling_enabled;
|
104
104
|
bool skip_idle_samples_for_testing;
|
105
|
+
bool sighandler_sampling_enabled;
|
105
106
|
VALUE self_instance;
|
106
107
|
VALUE thread_context_collector_instance;
|
107
108
|
VALUE idle_sampling_helper_instance;
|
@@ -122,7 +123,11 @@ typedef struct {
|
|
122
123
|
// Others
|
123
124
|
|
124
125
|
// Used to detect/avoid nested sampling, e.g. when on_newobj_event gets triggered by a memory allocation
|
125
|
-
// that happens during another sample
|
126
|
+
// that happens during another sample, or when the signal handler gets triggered while we're already in the middle of
|
127
|
+
// sampling.
|
128
|
+
//
|
129
|
+
// @ivoanjo: Right now we always sample inside `safely_call`; if that ever changes, this flag may need to become
|
130
|
+
// volatile/atomic/have some barriers to ensure it's visible during e.g. signal handlers.
|
126
131
|
bool during_sample;
|
127
132
|
|
128
133
|
#ifndef NO_GVL_INSTRUMENTATION
|
@@ -138,23 +143,15 @@ typedef struct {
|
|
138
143
|
unsigned int trigger_simulated_signal_delivery_attempts;
|
139
144
|
// How many times we actually simulated signal delivery
|
140
145
|
unsigned int simulated_signal_delivery;
|
141
|
-
// How many times we actually called rb_postponed_job_register_one from
|
146
|
+
// How many times we actually called rb_postponed_job_register_one from the signal handler
|
142
147
|
unsigned int signal_handler_enqueued_sample;
|
148
|
+
// How many times we prepared a sample (sampled directly) from the signal handler
|
149
|
+
unsigned int signal_handler_prepared_sample;
|
143
150
|
// How many times the signal handler was called from the wrong thread
|
144
151
|
unsigned int signal_handler_wrong_thread;
|
145
152
|
// How many times we actually tried to interrupt a thread for sampling
|
146
153
|
unsigned int interrupt_thread_attempts;
|
147
154
|
|
148
|
-
// # Stats for the results of calling rb_postponed_job_register_one
|
149
|
-
// The same function was already waiting to be executed
|
150
|
-
unsigned int postponed_job_skipped_already_existed;
|
151
|
-
// The function was added to the queue successfully
|
152
|
-
unsigned int postponed_job_success;
|
153
|
-
// The queue was full
|
154
|
-
unsigned int postponed_job_full;
|
155
|
-
// The function returned an unknown result code
|
156
|
-
unsigned int postponed_job_unknown_result;
|
157
|
-
|
158
155
|
// # CPU/Walltime sampling stats
|
159
156
|
// How many times we actually CPU/wall sampled
|
160
157
|
unsigned int cpu_sampled;
|
@@ -238,6 +235,8 @@ static void after_gvl_running_from_postponed_job(DDTRACE_UNUSED void *_unused);
|
|
238
235
|
#endif
|
239
236
|
static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance);
|
240
237
|
static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance);
|
238
|
+
static inline void during_sample_enter(cpu_and_wall_time_worker_state* state);
|
239
|
+
static inline void during_sample_exit(cpu_and_wall_time_worker_state* state);
|
241
240
|
|
242
241
|
// We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
|
243
242
|
// with signature `rb_event_hook_func_t` which doesn't match `on_newobj_event`.
|
@@ -362,6 +361,7 @@ static VALUE _native_new(VALUE klass) {
|
|
362
361
|
state->allocation_counting_enabled = false;
|
363
362
|
state->gvl_profiling_enabled = false;
|
364
363
|
state->skip_idle_samples_for_testing = false;
|
364
|
+
state->sighandler_sampling_enabled = false;
|
365
365
|
state->thread_context_collector_instance = Qnil;
|
366
366
|
state->idle_sampling_helper_instance = Qnil;
|
367
367
|
state->owner_thread = Qnil;
|
@@ -372,7 +372,7 @@ static VALUE _native_new(VALUE klass) {
|
|
372
372
|
state->failure_exception = Qnil;
|
373
373
|
state->stop_thread = Qnil;
|
374
374
|
|
375
|
-
state
|
375
|
+
during_sample_exit(state);
|
376
376
|
|
377
377
|
#ifndef NO_GVL_INSTRUMENTATION
|
378
378
|
state->gvl_profiling_hook = NULL;
|
@@ -404,6 +404,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
|
|
404
404
|
VALUE allocation_counting_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("allocation_counting_enabled")));
|
405
405
|
VALUE gvl_profiling_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("gvl_profiling_enabled")));
|
406
406
|
VALUE skip_idle_samples_for_testing = rb_hash_fetch(options, ID2SYM(rb_intern("skip_idle_samples_for_testing")));
|
407
|
+
VALUE sighandler_sampling_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("sighandler_sampling_enabled")));
|
407
408
|
|
408
409
|
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
409
410
|
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
|
@@ -413,6 +414,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
|
|
413
414
|
ENFORCE_BOOLEAN(allocation_counting_enabled);
|
414
415
|
ENFORCE_BOOLEAN(gvl_profiling_enabled);
|
415
416
|
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
|
417
|
+
ENFORCE_BOOLEAN(sighandler_sampling_enabled)
|
416
418
|
|
417
419
|
cpu_and_wall_time_worker_state *state;
|
418
420
|
TypedData_Get_Struct(self_instance, cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
@@ -424,6 +426,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
|
|
424
426
|
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
|
425
427
|
state->gvl_profiling_enabled = (gvl_profiling_enabled == Qtrue);
|
426
428
|
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
|
429
|
+
state->sighandler_sampling_enabled = (sighandler_sampling_enabled == Qtrue);
|
427
430
|
|
428
431
|
double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
|
429
432
|
if (!state->allocation_profiling_enabled) {
|
@@ -589,25 +592,30 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
|
|
589
592
|
return;
|
590
593
|
}
|
591
594
|
|
592
|
-
// We
|
593
|
-
// a) we get triggered using SIGPROF, and the docs state a second SIGPROF will not interrupt an existing one
|
595
|
+
// We assume there can be no concurrent nor nested calls to handle_sampling_signal because
|
596
|
+
// a) we get triggered using SIGPROF, and the docs state a second SIGPROF will not interrupt an existing one (see sigaction docs on sa_mask)
|
594
597
|
// b) we validate we are in the thread that has the global VM lock; if a different thread gets a signal, it will return early
|
595
598
|
// because it will not have the global VM lock
|
596
599
|
|
597
|
-
// Note: rb_postponed_job_register_one ensures that if there's a previous sample_from_postponed_job queued for execution
|
598
|
-
// then we will not queue a second one. It does this by doing a linear scan on the existing jobs; in the future we
|
599
|
-
// may want to implement that check ourselves.
|
600
|
-
|
601
600
|
state->stats.signal_handler_enqueued_sample++;
|
602
601
|
|
603
|
-
|
604
|
-
|
602
|
+
bool sample_from_signal_handler =
|
603
|
+
state->sighandler_sampling_enabled &&
|
604
|
+
// Don't sample if we're already in the middle of processing a sample
|
605
|
+
!state->during_sample;
|
606
|
+
|
607
|
+
if (sample_from_signal_handler) {
|
608
|
+
// Buffer current stack trace. Note that this will not actually record the sample, for that we still need to wait
|
609
|
+
// until the postponed job below gets run.
|
610
|
+
bool prepared = thread_context_collector_prepare_sample_inside_signal_handler(state->thread_context_collector_instance);
|
611
|
+
|
612
|
+
if (prepared) state->stats.signal_handler_prepared_sample++;
|
613
|
+
}
|
614
|
+
|
605
615
|
#ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
|
606
616
|
rb_postponed_job_trigger(sample_from_postponed_job_handle);
|
607
|
-
state->stats.postponed_job_success++; // Always succeeds
|
608
617
|
#else
|
609
|
-
|
610
|
-
// This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
|
618
|
+
// Passing in `gc_finalize_deferred_workaround` is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
|
611
619
|
//
|
612
620
|
// TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
|
613
621
|
// and in rare cases can cause VM crashes.
|
@@ -631,20 +639,7 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
|
|
631
639
|
//
|
632
640
|
// Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
|
633
641
|
// In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
|
634
|
-
|
635
|
-
|
636
|
-
// Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
|
637
|
-
// seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
|
638
|
-
switch (result) {
|
639
|
-
case 0:
|
640
|
-
state->stats.postponed_job_full++; break;
|
641
|
-
case 1:
|
642
|
-
state->stats.postponed_job_success++; break;
|
643
|
-
case 2:
|
644
|
-
state->stats.postponed_job_skipped_already_existed++; break;
|
645
|
-
default:
|
646
|
-
state->stats.postponed_job_unknown_result++;
|
647
|
-
}
|
642
|
+
rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
|
648
643
|
#endif
|
649
644
|
}
|
650
645
|
|
@@ -714,6 +709,8 @@ static void interrupt_sampling_trigger_loop(void *state_ptr) {
|
|
714
709
|
atomic_store(&state->should_run, false);
|
715
710
|
}
|
716
711
|
|
712
|
+
// Note: If we ever want to get rid of the postponed job execution, remember not to clobber Ruby exceptions, as
|
713
|
+
// this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
|
717
714
|
static void sample_from_postponed_job(DDTRACE_UNUSED void *_unused) {
|
718
715
|
cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
719
716
|
|
@@ -726,12 +723,12 @@ static void sample_from_postponed_job(DDTRACE_UNUSED void *_unused) {
|
|
726
723
|
return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
|
727
724
|
}
|
728
725
|
|
729
|
-
state
|
726
|
+
during_sample_enter(state);
|
730
727
|
|
731
728
|
// Rescue against any exceptions that happen during sampling
|
732
729
|
safely_call(rescued_sample_from_postponed_job, state->self_instance, state->self_instance);
|
733
730
|
|
734
|
-
state
|
731
|
+
during_sample_exit(state);
|
735
732
|
}
|
736
733
|
|
737
734
|
static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
|
@@ -937,11 +934,11 @@ static void after_gc_from_postponed_job(DDTRACE_UNUSED void *_unused) {
|
|
937
934
|
return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
|
938
935
|
}
|
939
936
|
|
940
|
-
state
|
937
|
+
during_sample_enter(state);
|
941
938
|
|
942
939
|
safely_call(thread_context_collector_sample_after_gc, state->thread_context_collector_instance, state->self_instance);
|
943
940
|
|
944
|
-
state
|
941
|
+
during_sample_exit(state);
|
945
942
|
}
|
946
943
|
|
947
944
|
// Equivalent to Ruby begin/rescue call, where we call a C function and jump to the exception handler if an
|
@@ -1019,11 +1016,8 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
1019
1016
|
ID2SYM(rb_intern("trigger_simulated_signal_delivery_attempts")), /* => */ UINT2NUM(state->stats.trigger_simulated_signal_delivery_attempts),
|
1020
1017
|
ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
|
1021
1018
|
ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
|
1019
|
+
ID2SYM(rb_intern("signal_handler_prepared_sample")), /* => */ UINT2NUM(state->stats.signal_handler_prepared_sample),
|
1022
1020
|
ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
|
1023
|
-
ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
|
1024
|
-
ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
|
1025
|
-
ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
|
1026
|
-
ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
|
1027
1021
|
ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
|
1028
1022
|
|
1029
1023
|
// CPU Stats
|
@@ -1073,8 +1067,7 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
|
|
1073
1067
|
|
1074
1068
|
state->stats.simulated_signal_delivery++;
|
1075
1069
|
|
1076
|
-
//
|
1077
|
-
// so that the simulated case is as close to the original one as well (including any metrics increases, etc).
|
1070
|
+
// `handle_sampling_signal` does a few things extra on top of `sample_from_postponed_job` so that's why we don't shortcut here
|
1078
1071
|
handle_sampling_signal(0, NULL, NULL);
|
1079
1072
|
|
1080
1073
|
return NULL; // Unused
|
@@ -1207,11 +1200,7 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *u
|
|
1207
1200
|
&state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
|
1208
1201
|
);
|
1209
1202
|
|
1210
|
-
|
1211
|
-
// invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
|
1212
|
-
// to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
|
1213
|
-
// defined as not being able to allocate) sets this.
|
1214
|
-
state->during_sample = true;
|
1203
|
+
during_sample_enter(state);
|
1215
1204
|
|
1216
1205
|
// Rescue against any exceptions that happen during sampling
|
1217
1206
|
safely_call(rescued_sample_allocation, Qnil, state->self_instance);
|
@@ -1232,7 +1221,7 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *u
|
|
1232
1221
|
|
1233
1222
|
state->stats.allocation_sampled++;
|
1234
1223
|
|
1235
|
-
state
|
1224
|
+
during_sample_exit(state);
|
1236
1225
|
}
|
1237
1226
|
|
1238
1227
|
static void disable_tracepoints(cpu_and_wall_time_worker_state *state) {
|
@@ -1373,12 +1362,12 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
|
|
1373
1362
|
// This can potentially happen if the CpuAndWallTimeWorker was stopped while the postponed job was waiting to be executed; nothing to do
|
1374
1363
|
if (state == NULL) return;
|
1375
1364
|
|
1376
|
-
state
|
1365
|
+
during_sample_enter(state);
|
1377
1366
|
|
1378
1367
|
// Rescue against any exceptions that happen during sampling
|
1379
1368
|
safely_call(rescued_after_gvl_running_from_postponed_job, state->self_instance, state->self_instance);
|
1380
1369
|
|
1381
|
-
state
|
1370
|
+
during_sample_exit(state);
|
1382
1371
|
}
|
1383
1372
|
|
1384
1373
|
static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance) {
|
@@ -1414,3 +1403,21 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
|
|
1414
1403
|
return Qfalse;
|
1415
1404
|
}
|
1416
1405
|
#endif
|
1406
|
+
|
1407
|
+
static inline void during_sample_enter(cpu_and_wall_time_worker_state* state) {
|
1408
|
+
// Tell the compiler it's not allowed to reorder the `during_sample` flag with anything that happens after.
|
1409
|
+
//
|
1410
|
+
// In a few cases, we may be checking this flag from a signal handler, so we need to make sure the compiler didn't
|
1411
|
+
// get clever and reordered things in such a way that makes us miss the flag update.
|
1412
|
+
//
|
1413
|
+
// See https://github.com/ruby/ruby/pull/11036 for a similar change made to the Ruby VM with more context.
|
1414
|
+
state->during_sample = true;
|
1415
|
+
atomic_signal_fence(memory_order_seq_cst);
|
1416
|
+
}
|
1417
|
+
|
1418
|
+
static inline void during_sample_exit(cpu_and_wall_time_worker_state* state) {
|
1419
|
+
// See `during_sample_enter` for more context; in this case we set the fence before to make sure anything that
|
1420
|
+
// happens before the fence is not reordered with the flag update.
|
1421
|
+
atomic_signal_fence(memory_order_seq_cst);
|
1422
|
+
state->during_sample = false;
|
1423
|
+
}
|