datadog 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +64 -2
- data/ext/datadog_profiling_loader/datadog_profiling_loader.c +9 -1
- data/ext/datadog_profiling_loader/extconf.rb +10 -22
- data/ext/datadog_profiling_native_extension/NativeExtensionDesign.md +3 -3
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +198 -41
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +4 -2
- data/ext/datadog_profiling_native_extension/collectors_stack.c +89 -46
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +645 -107
- data/ext/datadog_profiling_native_extension/collectors_thread_context.h +15 -1
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +0 -27
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -4
- data/ext/datadog_profiling_native_extension/extconf.rb +42 -25
- data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +50 -0
- data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +75 -0
- data/ext/datadog_profiling_native_extension/heap_recorder.c +194 -34
- data/ext/datadog_profiling_native_extension/heap_recorder.h +11 -0
- data/ext/datadog_profiling_native_extension/http_transport.c +38 -6
- data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +1 -1
- data/ext/datadog_profiling_native_extension/private_vm_api_access.c +53 -2
- data/ext/datadog_profiling_native_extension/private_vm_api_access.h +3 -0
- data/ext/datadog_profiling_native_extension/profiling.c +1 -1
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +14 -11
- data/ext/datadog_profiling_native_extension/stack_recorder.c +58 -22
- data/ext/datadog_profiling_native_extension/stack_recorder.h +2 -0
- data/ext/libdatadog_api/crashtracker.c +20 -18
- data/ext/libdatadog_api/datadog_ruby_common.c +0 -27
- data/ext/libdatadog_api/datadog_ruby_common.h +0 -4
- data/ext/libdatadog_extconf_helpers.rb +1 -1
- data/lib/datadog/appsec/assets/waf_rules/recommended.json +2184 -108
- data/lib/datadog/appsec/assets/waf_rules/strict.json +1430 -2
- data/lib/datadog/appsec/component.rb +29 -8
- data/lib/datadog/appsec/configuration/settings.rb +10 -2
- data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +1 -0
- data/lib/datadog/appsec/contrib/devise/patcher/rememberable_patch.rb +21 -0
- data/lib/datadog/appsec/contrib/devise/patcher.rb +12 -2
- data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +0 -14
- data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +67 -31
- data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +14 -15
- data/lib/datadog/appsec/contrib/graphql/integration.rb +14 -1
- data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +7 -20
- data/lib/datadog/appsec/contrib/rack/gateway/request.rb +2 -5
- data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +9 -15
- data/lib/datadog/appsec/contrib/rack/reactive/request.rb +6 -18
- data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +7 -20
- data/lib/datadog/appsec/contrib/rack/reactive/response.rb +5 -18
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +3 -1
- data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +3 -5
- data/lib/datadog/appsec/contrib/rails/reactive/action.rb +5 -18
- data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +6 -10
- data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +7 -20
- data/lib/datadog/appsec/event.rb +25 -1
- data/lib/datadog/appsec/ext.rb +4 -0
- data/lib/datadog/appsec/monitor/gateway/watcher.rb +3 -5
- data/lib/datadog/appsec/monitor/reactive/set_user.rb +7 -20
- data/lib/datadog/appsec/processor/context.rb +109 -0
- data/lib/datadog/appsec/processor/rule_loader.rb +3 -1
- data/lib/datadog/appsec/processor/rule_merger.rb +33 -15
- data/lib/datadog/appsec/processor.rb +42 -107
- data/lib/datadog/appsec/rate_limiter.rb +25 -40
- data/lib/datadog/appsec/remote.rb +7 -3
- data/lib/datadog/appsec/scope.rb +1 -4
- data/lib/datadog/appsec/utils/trace_operation.rb +15 -0
- data/lib/datadog/appsec/utils.rb +2 -0
- data/lib/datadog/appsec.rb +3 -2
- data/lib/datadog/core/configuration/agent_settings_resolver.rb +26 -25
- data/lib/datadog/core/configuration/components.rb +4 -3
- data/lib/datadog/core/configuration/settings.rb +96 -5
- data/lib/datadog/core/configuration.rb +1 -3
- data/lib/datadog/core/crashtracking/component.rb +9 -6
- data/lib/datadog/core/environment/execution.rb +5 -5
- data/lib/datadog/core/environment/yjit.rb +5 -0
- data/lib/datadog/core/metrics/client.rb +7 -0
- data/lib/datadog/core/rate_limiter.rb +183 -0
- data/lib/datadog/core/remote/client/capabilities.rb +4 -3
- data/lib/datadog/core/remote/component.rb +4 -2
- data/lib/datadog/core/remote/negotiation.rb +4 -4
- data/lib/datadog/core/remote/tie.rb +2 -0
- data/lib/datadog/core/remote/transport/http.rb +5 -0
- data/lib/datadog/core/remote/worker.rb +1 -1
- data/lib/datadog/core/runtime/ext.rb +1 -0
- data/lib/datadog/core/runtime/metrics.rb +5 -1
- data/lib/datadog/core/semaphore.rb +35 -0
- data/lib/datadog/core/telemetry/component.rb +2 -0
- data/lib/datadog/core/telemetry/event.rb +12 -7
- data/lib/datadog/core/telemetry/logger.rb +51 -0
- data/lib/datadog/core/telemetry/logging.rb +50 -14
- data/lib/datadog/core/telemetry/request.rb +13 -1
- data/lib/datadog/core/transport/ext.rb +1 -0
- data/lib/datadog/core/utils/time.rb +12 -0
- data/lib/datadog/core/workers/async.rb +1 -1
- data/lib/datadog/di/code_tracker.rb +166 -0
- data/lib/datadog/di/configuration/settings.rb +163 -0
- data/lib/datadog/di/configuration.rb +11 -0
- data/lib/datadog/di/error.rb +31 -0
- data/lib/datadog/di/extensions.rb +16 -0
- data/lib/datadog/di/instrumenter.rb +301 -0
- data/lib/datadog/di/probe.rb +162 -0
- data/lib/datadog/di/probe_builder.rb +47 -0
- data/lib/datadog/di/probe_notification_builder.rb +207 -0
- data/lib/datadog/di/probe_notifier_worker.rb +244 -0
- data/lib/datadog/di/redactor.rb +188 -0
- data/lib/datadog/di/serializer.rb +215 -0
- data/lib/datadog/di/transport.rb +67 -0
- data/lib/datadog/di/utils.rb +39 -0
- data/lib/datadog/di.rb +57 -0
- data/lib/datadog/opentelemetry/sdk/propagator.rb +2 -0
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +12 -10
- data/lib/datadog/profiling/collectors/info.rb +12 -3
- data/lib/datadog/profiling/collectors/thread_context.rb +32 -8
- data/lib/datadog/profiling/component.rb +21 -4
- data/lib/datadog/profiling/http_transport.rb +6 -1
- data/lib/datadog/profiling/scheduler.rb +2 -0
- data/lib/datadog/profiling/stack_recorder.rb +40 -9
- data/lib/datadog/single_step_instrument.rb +12 -0
- data/lib/datadog/tracing/component.rb +13 -0
- data/lib/datadog/tracing/contrib/action_cable/instrumentation.rb +8 -12
- data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -0
- data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +78 -0
- data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +33 -0
- data/lib/datadog/tracing/contrib/action_pack/patcher.rb +2 -0
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +4 -0
- data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +3 -1
- data/lib/datadog/tracing/contrib/active_record/events/sql.rb +3 -1
- data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +5 -1
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +5 -0
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
- data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +4 -0
- data/lib/datadog/tracing/contrib/excon/middleware.rb +3 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +12 -0
- data/lib/datadog/tracing/contrib/grape/endpoint.rb +24 -2
- data/lib/datadog/tracing/contrib/graphql/patcher.rb +9 -12
- data/lib/datadog/tracing/contrib/graphql/trace_patcher.rb +3 -3
- data/lib/datadog/tracing/contrib/graphql/tracing_patcher.rb +3 -3
- data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +13 -9
- data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +6 -3
- data/lib/datadog/tracing/contrib/http/circuit_breaker.rb +9 -0
- data/lib/datadog/tracing/contrib/http/instrumentation.rb +22 -15
- data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +10 -5
- data/lib/datadog/tracing/contrib/httpclient/patcher.rb +1 -14
- data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +9 -0
- data/lib/datadog/tracing/contrib/httprb/patcher.rb +1 -14
- data/lib/datadog/tracing/contrib/lograge/patcher.rb +1 -2
- data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +2 -0
- data/lib/datadog/tracing/contrib/opensearch/patcher.rb +13 -6
- data/lib/datadog/tracing/contrib/patcher.rb +2 -1
- data/lib/datadog/tracing/contrib/presto/patcher.rb +1 -13
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +27 -0
- data/lib/datadog/tracing/contrib/rails/runner.rb +1 -1
- data/lib/datadog/tracing/contrib/redis/tags.rb +4 -0
- data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +3 -0
- data/lib/datadog/tracing/contrib/sinatra/tracer.rb +4 -0
- data/lib/datadog/tracing/contrib/stripe/request.rb +3 -2
- data/lib/datadog/tracing/distributed/propagation.rb +7 -0
- data/lib/datadog/tracing/metadata/ext.rb +2 -0
- data/lib/datadog/tracing/remote.rb +5 -2
- data/lib/datadog/tracing/sampling/matcher.rb +6 -1
- data/lib/datadog/tracing/sampling/rate_sampler.rb +1 -1
- data/lib/datadog/tracing/sampling/rule.rb +2 -0
- data/lib/datadog/tracing/sampling/rule_sampler.rb +15 -9
- data/lib/datadog/tracing/sampling/span/ext.rb +1 -1
- data/lib/datadog/tracing/sampling/span/rule.rb +2 -2
- data/lib/datadog/tracing/trace_operation.rb +26 -2
- data/lib/datadog/tracing/tracer.rb +29 -22
- data/lib/datadog/tracing/transport/http/client.rb +1 -0
- data/lib/datadog/tracing/transport/http.rb +4 -0
- data/lib/datadog/tracing/transport/io/client.rb +1 -0
- data/lib/datadog/tracing/workers/trace_writer.rb +1 -1
- data/lib/datadog/tracing/workers.rb +2 -2
- data/lib/datadog/tracing/writer.rb +26 -28
- data/lib/datadog/version.rb +1 -1
- metadata +40 -15
- data/lib/datadog/tracing/sampling/rate_limiter.rb +0 -185
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1aa5227f68791f80fe6deb7f5a8b53e5adff15a7e443f78788f827888945c83
|
4
|
+
data.tar.gz: 7931209b3685744b1aa17f709766d9a5da91b5b34987a8ac975032ebee0a7caa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '081740cc882d48f62d18f8a823696339346ad574f4ac5c67cd64430ee37f347ec33d6c536e3790f3697ea3a6cb72f2275a6b5b14808e48a4246cc20786fcc20b'
|
7
|
+
data.tar.gz: c51018d1655343c234e45d72ab5dc9a27ebc465e253462eec483faf862e3c6fde0bbba6d4f3c1ca6656a6d7c33525a5942481a69beee7a7d889cbbe75c900ff7
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,47 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [2.5.0] - 2024-11-05
|
6
|
+
|
7
|
+
### Added
|
8
|
+
|
9
|
+
* Performance: Profiling: Add setting to lower heap profiling memory use/latency by cleaning up young objects after Ruby GC ([#4020][])
|
10
|
+
|
11
|
+
### Changed
|
12
|
+
|
13
|
+
* Core: Replace the `debase-ruby_core_source` gem with the `datadog-ruby_core_source` ([#4014][])
|
14
|
+
* Core: Upgrade to `libdatadog` 13.1 ([#3997][])
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
|
18
|
+
* Fix `undefined method` error for Rails runner ([#3996][])
|
19
|
+
* Apply version tag only to spans that use the global/default service name ([#4027][])
|
20
|
+
* Ensure UDS takes precedence over HTTP when both Agent configurations defined ([#4024][])
|
21
|
+
* Remove duplicate leading slash in resource name for Grape routes ([#4033][])
|
22
|
+
|
23
|
+
## [2.4.0] - 2024-10-11
|
24
|
+
|
25
|
+
### Added
|
26
|
+
|
27
|
+
* Core: Allow changing sampling rate for customer defined tags and resources ([#3956][])
|
28
|
+
* Profiling: Add GVL profiling for Ruby 3.2+ as a preview feature ([#3929][])
|
29
|
+
* Profiling: Otel: Add preview support for correlating profiling with otel ruby gem ([#3984][])
|
30
|
+
* Tracing: AppSec: Add http.route tag to Rails, Grape, and Sinatra integrations ([#3849][])
|
31
|
+
* Tracing: Add capabilities to remote config: tracing sample rate, tracing logs injection, tracing http header tags ([#3888][])
|
32
|
+
* AppSec: Add a force disable of AppSec feature when using Ruby >= 3.3 with old FFI gem version ([#3969][])
|
33
|
+
|
34
|
+
### Changed
|
35
|
+
|
36
|
+
* AppSec: Improve PII compliance ([#3857][])
|
37
|
+
* AppSec: Integrations: Improve accuracy of login tracking for Devise ([#3867][])
|
38
|
+
* Crashtracking feature is now disabled by default ([#3970][])
|
39
|
+
|
40
|
+
### Fixed
|
41
|
+
|
42
|
+
* AppSec: Integrations: Fix GraphQL instrumentation for query fragments ([#3887][])
|
43
|
+
* Bug: Profiling: Fix (small) memory leak in profiler when forking ([#3852][])
|
44
|
+
* Tracing: Integrations: Fix GraphQL integration reconfiguration ([#3859][])
|
45
|
+
|
5
46
|
## [2.3.0] - 2024-08-22
|
6
47
|
|
7
48
|
### Added
|
@@ -2962,7 +3003,9 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
|
|
2962
3003
|
Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
2963
3004
|
|
2964
3005
|
|
2965
|
-
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.
|
3006
|
+
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.5.0...master
|
3007
|
+
[2.5.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.4.0...v2.5.0
|
3008
|
+
[2.4.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.3.0...v2.4.0
|
2966
3009
|
[2.3.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.2.0...v2.3.0
|
2967
3010
|
[2.2.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.1.0...v2.2.0
|
2968
3011
|
[2.1.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.0.0...v2.1.0
|
@@ -4383,6 +4426,25 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4383
4426
|
[#3837]: https://github.com/DataDog/dd-trace-rb/issues/3837
|
4384
4427
|
[#3839]: https://github.com/DataDog/dd-trace-rb/issues/3839
|
4385
4428
|
[#3841]: https://github.com/DataDog/dd-trace-rb/issues/3841
|
4429
|
+
[#3849]: https://github.com/DataDog/dd-trace-rb/issues/3849
|
4430
|
+
[#3852]: https://github.com/DataDog/dd-trace-rb/issues/3852
|
4431
|
+
[#3857]: https://github.com/DataDog/dd-trace-rb/issues/3857
|
4432
|
+
[#3859]: https://github.com/DataDog/dd-trace-rb/issues/3859
|
4433
|
+
[#3867]: https://github.com/DataDog/dd-trace-rb/issues/3867
|
4434
|
+
[#3887]: https://github.com/DataDog/dd-trace-rb/issues/3887
|
4435
|
+
[#3888]: https://github.com/DataDog/dd-trace-rb/issues/3888
|
4436
|
+
[#3929]: https://github.com/DataDog/dd-trace-rb/issues/3929
|
4437
|
+
[#3956]: https://github.com/DataDog/dd-trace-rb/issues/3956
|
4438
|
+
[#3969]: https://github.com/DataDog/dd-trace-rb/issues/3969
|
4439
|
+
[#3970]: https://github.com/DataDog/dd-trace-rb/issues/3970
|
4440
|
+
[#3984]: https://github.com/DataDog/dd-trace-rb/issues/3984
|
4441
|
+
[#3996]: https://github.com/DataDog/dd-trace-rb/issues/3996
|
4442
|
+
[#3997]: https://github.com/DataDog/dd-trace-rb/issues/3997
|
4443
|
+
[#4014]: https://github.com/DataDog/dd-trace-rb/issues/4014
|
4444
|
+
[#4020]: https://github.com/DataDog/dd-trace-rb/issues/4020
|
4445
|
+
[#4024]: https://github.com/DataDog/dd-trace-rb/issues/4024
|
4446
|
+
[#4027]: https://github.com/DataDog/dd-trace-rb/issues/4027
|
4447
|
+
[#4033]: https://github.com/DataDog/dd-trace-rb/issues/4033
|
4386
4448
|
[@AdrianLC]: https://github.com/AdrianLC
|
4387
4449
|
[@Azure7111]: https://github.com/Azure7111
|
4388
4450
|
[@BabyGroot]: https://github.com/BabyGroot
|
@@ -4534,4 +4596,4 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4534
4596
|
[@y-yagi]: https://github.com/y-yagi
|
4535
4597
|
[@yujideveloper]: https://github.com/yujideveloper
|
4536
4598
|
[@yukimurasawa]: https://github.com/yukimurasawa
|
4537
|
-
[@zachmccormick]: https://github.com/zachmccormick
|
4599
|
+
[@zachmccormick]: https://github.com/zachmccormick
|
@@ -65,7 +65,15 @@ static VALUE _native_load(DDTRACE_UNUSED VALUE self, VALUE ruby_path, VALUE ruby
|
|
65
65
|
char *path = StringValueCStr(ruby_path);
|
66
66
|
char *init_name = StringValueCStr(ruby_init_name);
|
67
67
|
|
68
|
-
|
68
|
+
int dlopen_flags = RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND;
|
69
|
+
|
70
|
+
#if defined(__has_feature)
|
71
|
+
#if __has_feature(address_sanitizer)
|
72
|
+
dlopen_flags &= ~RTLD_DEEPBIND; // Not supported by ASAN
|
73
|
+
#endif
|
74
|
+
#endif
|
75
|
+
|
76
|
+
void *handle = dlopen(path, dlopen_flags);
|
69
77
|
|
70
78
|
VALUE failure_details = Qnil;
|
71
79
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
# rubocop:disable Style/StderrPuts
|
2
|
-
# rubocop:disable Style/GlobalVars
|
3
2
|
|
4
3
|
if RUBY_ENGINE != "ruby" || Gem.win_platform?
|
5
4
|
$stderr.puts(
|
@@ -12,38 +11,28 @@ end
|
|
12
11
|
|
13
12
|
require "mkmf"
|
14
13
|
|
15
|
-
# mkmf on modern Rubies actually has an append_cflags that does something similar
|
16
|
-
# (see https://github.com/ruby/ruby/pull/5760), but as usual we need a bit more boilerplate to deal with legacy Rubies
|
17
|
-
def add_compiler_flag(flag)
|
18
|
-
if try_cflags(flag)
|
19
|
-
$CFLAGS << " " << flag
|
20
|
-
else
|
21
|
-
$stderr.puts("WARNING: '#{flag}' not accepted by compiler, skipping it")
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
14
|
# Because we can't control what compiler versions our customers use, shipping with -Werror by default is a no-go.
|
26
15
|
# But we can enable it in CI, so that we quickly spot any new warnings that just got introduced.
|
27
|
-
|
16
|
+
append_cflags "-Werror" if ENV["DATADOG_GEM_CI"] == "true"
|
28
17
|
|
29
18
|
# Older gcc releases may not default to C99 and we need to ask for this. This is also used:
|
30
19
|
# * by upstream Ruby -- search for gnu99 in the codebase
|
31
20
|
# * by msgpack, another datadog gem dependency
|
32
21
|
# (https://github.com/msgpack/msgpack-ruby/blob/18ce08f6d612fe973843c366ac9a0b74c4e50599/ext/msgpack/extconf.rb#L8)
|
33
|
-
|
22
|
+
append_cflags "-std=gnu99"
|
34
23
|
|
35
24
|
# Gets really noisy when we include the MJIT header, let's omit it (TODO: Use #pragma GCC diagnostic instead?)
|
36
|
-
|
25
|
+
append_cflags "-Wno-unused-function"
|
37
26
|
|
38
27
|
# Allow defining variables at any point in a function
|
39
|
-
|
28
|
+
append_cflags "-Wno-declaration-after-statement"
|
40
29
|
|
41
30
|
# If we forget to include a Ruby header, the function call may still appear to work, but then
|
42
31
|
# cause a segfault later. Let's ensure that never happens.
|
43
|
-
|
32
|
+
append_cflags "-Werror-implicit-function-declaration"
|
44
33
|
|
45
34
|
# Warn on unused parameters to functions. Use `DDTRACE_UNUSED` to mark things as known-to-not-be-used.
|
46
|
-
|
35
|
+
append_cflags "-Wunused-parameter"
|
47
36
|
|
48
37
|
# The native extension is not intended to expose any symbols/functions for other native libraries to use;
|
49
38
|
# the sole exception being `Init_datadog_profiling_loader` which needs to be visible for Ruby to call it when
|
@@ -51,14 +40,14 @@ add_compiler_flag "-Wunused-parameter"
|
|
51
40
|
#
|
52
41
|
# By setting this compiler flag, we tell it to assume that everything is private unless explicitly stated.
|
53
42
|
# For more details see https://gcc.gnu.org/wiki/Visibility
|
54
|
-
|
43
|
+
append_cflags "-fvisibility=hidden"
|
55
44
|
|
56
45
|
# Avoid legacy C definitions
|
57
|
-
|
46
|
+
append_cflags "-Wold-style-definition"
|
58
47
|
|
59
48
|
# Enable all other compiler warnings
|
60
|
-
|
61
|
-
|
49
|
+
append_cflags "-Wall"
|
50
|
+
append_cflags "-Wextra"
|
62
51
|
|
63
52
|
# Tag the native extension library with the Ruby version and Ruby platform.
|
64
53
|
# This makes it easier for development (avoids "oops I forgot to rebuild when I switched my Ruby") and ensures that
|
@@ -68,5 +57,4 @@ EXTENSION_NAME = "datadog_profiling_loader.#{RUBY_VERSION}_#{RUBY_PLATFORM}".fre
|
|
68
57
|
|
69
58
|
create_makefile(EXTENSION_NAME)
|
70
59
|
|
71
|
-
# rubocop:enable Style/GlobalVars
|
72
60
|
# rubocop:enable Style/StderrPuts
|
@@ -68,7 +68,7 @@ internal types, structures and functions).
|
|
68
68
|
Because these private header files are not included in regular Ruby installations, we have two different workarounds:
|
69
69
|
|
70
70
|
1. for Ruby versions 2.6 to 3.2 we make use use the Ruby private MJIT header
|
71
|
-
2. for Ruby versions < 2.6 and > 3.2 we make use of the `
|
71
|
+
2. for Ruby versions < 2.6 and > 3.2 we make use of the `datadog-ruby_core_source` gem
|
72
72
|
|
73
73
|
Functions which make use of these headers are defined in the <private_vm_api_acccess.c> file.
|
74
74
|
|
@@ -91,9 +91,9 @@ version. e.g. `rb_mjit_min_header-2.7.4.h`.
|
|
91
91
|
|
92
92
|
This header was removed in Ruby 3.3.
|
93
93
|
|
94
|
-
### Approach 2: Using the `
|
94
|
+
### Approach 2: Using the `datadog-ruby_core_source` gem
|
95
95
|
|
96
|
-
The [`
|
96
|
+
The [`datadog-ruby_core_source`](https://github.com/DataDog/datadog-ruby_core_source) contains almost no code;
|
97
97
|
instead, it just contains per-Ruby-version folders with the private VM headers (`.h`) files for that version.
|
98
98
|
|
99
99
|
Thus, even though a regular Ruby installation does not include these files, we can access the copy inside this gem.
|
@@ -17,13 +17,6 @@
|
|
17
17
|
#include "setup_signal_handler.h"
|
18
18
|
#include "time_helpers.h"
|
19
19
|
|
20
|
-
#define ERR_CLOCK_FAIL "failed to get clock time"
|
21
|
-
|
22
|
-
// Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
|
23
|
-
// See https://docs.google.com/document/d/1lWLB714wlLBBq6T4xZyAc4a5wtWhSmr4-hgiPKeErlA/edit#heading=h.ugp0zxcj5iqh
|
24
|
-
// (Datadog-only link) for research backing the choice of this value.
|
25
|
-
unsigned int MAX_ALLOC_WEIGHT = 10000;
|
26
|
-
|
27
20
|
// Used to trigger the execution of Collectors::ThreadState, which implements all of the sampling logic
|
28
21
|
// itself; this class only implements the "when to do it" part.
|
29
22
|
//
|
@@ -83,11 +76,19 @@ unsigned int MAX_ALLOC_WEIGHT = 10000;
|
|
83
76
|
//
|
84
77
|
// ---
|
85
78
|
|
79
|
+
#define ERR_CLOCK_FAIL "failed to get clock time"
|
80
|
+
|
81
|
+
// Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
|
82
|
+
// See https://docs.google.com/document/d/1lWLB714wlLBBq6T4xZyAc4a5wtWhSmr4-hgiPKeErlA/edit#heading=h.ugp0zxcj5iqh
|
83
|
+
// (Datadog-only link) for research backing the choice of this value.
|
84
|
+
unsigned int MAX_ALLOC_WEIGHT = 10000;
|
85
|
+
|
86
86
|
#ifndef NO_POSTPONED_TRIGGER
|
87
87
|
// Used to call the rb_postponed_job_trigger from Ruby 3.3+. These get initialized in
|
88
88
|
// `collectors_cpu_and_wall_time_worker_init` below and always get reused after that.
|
89
89
|
static rb_postponed_job_handle_t sample_from_postponed_job_handle;
|
90
90
|
static rb_postponed_job_handle_t after_gc_from_postponed_job_handle;
|
91
|
+
static rb_postponed_job_handle_t after_gvl_running_from_postponed_job_handle;
|
91
92
|
#endif
|
92
93
|
|
93
94
|
// Contains state for a single CpuAndWallTimeWorker instance
|
@@ -99,6 +100,7 @@ struct cpu_and_wall_time_worker_state {
|
|
99
100
|
bool dynamic_sampling_rate_enabled;
|
100
101
|
bool allocation_profiling_enabled;
|
101
102
|
bool allocation_counting_enabled;
|
103
|
+
bool gvl_profiling_enabled;
|
102
104
|
bool skip_idle_samples_for_testing;
|
103
105
|
VALUE self_instance;
|
104
106
|
VALUE thread_context_collector_instance;
|
@@ -123,6 +125,11 @@ struct cpu_and_wall_time_worker_state {
|
|
123
125
|
// that happens during another sample.
|
124
126
|
bool during_sample;
|
125
127
|
|
128
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
129
|
+
// Only set when sampling is active (gets created at start and cleaned on stop)
|
130
|
+
rb_internal_thread_event_hook_t *gvl_profiling_hook;
|
131
|
+
#endif
|
132
|
+
|
126
133
|
struct stats {
|
127
134
|
// # Generic stats
|
128
135
|
// How many times we tried to trigger a sample
|
@@ -169,23 +176,21 @@ struct cpu_and_wall_time_worker_state {
|
|
169
176
|
uint64_t allocation_sampling_time_ns_total;
|
170
177
|
// How many times we saw allocations being done inside a sample
|
171
178
|
unsigned int allocations_during_sample;
|
179
|
+
|
180
|
+
// # GVL profiling stats
|
181
|
+
// How many times we triggered the after_gvl_running sampling
|
182
|
+
unsigned int after_gvl_running;
|
183
|
+
// How many times we skipped the after_gvl_running sampling
|
184
|
+
unsigned int gvl_dont_sample;
|
185
|
+
// Min/max/total wall-time spent on gvl sampling
|
186
|
+
uint64_t gvl_sampling_time_ns_min;
|
187
|
+
uint64_t gvl_sampling_time_ns_max;
|
188
|
+
uint64_t gvl_sampling_time_ns_total;
|
172
189
|
} stats;
|
173
190
|
};
|
174
191
|
|
175
192
|
static VALUE _native_new(VALUE klass);
|
176
|
-
static VALUE _native_initialize(
|
177
|
-
DDTRACE_UNUSED VALUE _self,
|
178
|
-
VALUE self_instance,
|
179
|
-
VALUE thread_context_collector_instance,
|
180
|
-
VALUE gc_profiling_enabled,
|
181
|
-
VALUE idle_sampling_helper_instance,
|
182
|
-
VALUE no_signals_workaround_enabled,
|
183
|
-
VALUE dynamic_sampling_rate_enabled,
|
184
|
-
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
185
|
-
VALUE allocation_profiling_enabled,
|
186
|
-
VALUE allocation_counting_enabled,
|
187
|
-
VALUE skip_idle_samples_for_testing
|
188
|
-
);
|
193
|
+
static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
|
189
194
|
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
190
195
|
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
|
191
196
|
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE worker_thread);
|
@@ -227,6 +232,12 @@ static void delayed_error(struct cpu_and_wall_time_worker_state *state, const ch
|
|
227
232
|
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg);
|
228
233
|
static VALUE _native_hold_signals(DDTRACE_UNUSED VALUE self);
|
229
234
|
static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self);
|
235
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
236
|
+
static void on_gvl_event(rb_event_flag_t event_id, const rb_internal_thread_event_data_t *event_data, DDTRACE_UNUSED void *_unused);
|
237
|
+
static void after_gvl_running_from_postponed_job(DDTRACE_UNUSED void *_unused);
|
238
|
+
#endif
|
239
|
+
static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance);
|
240
|
+
static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance);
|
230
241
|
|
231
242
|
// We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
|
232
243
|
// with signature `rb_event_hook_func_t` which doesn't match `on_newobj_event`.
|
@@ -272,8 +283,13 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
272
283
|
int unused_flags = 0;
|
273
284
|
sample_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, sample_from_postponed_job, NULL);
|
274
285
|
after_gc_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, after_gc_from_postponed_job, NULL);
|
286
|
+
after_gvl_running_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, after_gvl_running_from_postponed_job, NULL);
|
275
287
|
|
276
|
-
if (
|
288
|
+
if (
|
289
|
+
sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ||
|
290
|
+
after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ||
|
291
|
+
after_gvl_running_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID
|
292
|
+
) {
|
277
293
|
rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
|
278
294
|
}
|
279
295
|
#else
|
@@ -295,7 +311,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
295
311
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
296
312
|
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
|
297
313
|
|
298
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize,
|
314
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, -1);
|
299
315
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
|
300
316
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
|
301
317
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
@@ -306,8 +322,6 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
306
322
|
rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
|
307
323
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_hold_signals", _native_hold_signals, 0);
|
308
324
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_resume_signals", _native_resume_signals, 0);
|
309
|
-
// TODO: Remove `_native_is_running` from `testing_module` (should be in class) once `prof-correctness` has been updated to not need it
|
310
|
-
rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
|
311
325
|
rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
|
312
326
|
rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
|
313
327
|
rb_define_singleton_method(testing_module, "_native_trigger_sample", _native_trigger_sample, 0);
|
@@ -317,6 +331,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
317
331
|
rb_define_singleton_method(testing_module, "_native_is_sigprof_blocked_in_current_thread", _native_is_sigprof_blocked_in_current_thread, 0);
|
318
332
|
rb_define_singleton_method(testing_module, "_native_with_blocked_sigprof", _native_with_blocked_sigprof, 0);
|
319
333
|
rb_define_singleton_method(testing_module, "_native_delayed_error", _native_delayed_error, 2);
|
334
|
+
rb_define_singleton_method(testing_module, "_native_gvl_profiling_hook_active", _native_gvl_profiling_hook_active, 1);
|
320
335
|
}
|
321
336
|
|
322
337
|
// This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_worker_state
|
@@ -345,6 +360,7 @@ static VALUE _native_new(VALUE klass) {
|
|
345
360
|
state->dynamic_sampling_rate_enabled = true;
|
346
361
|
state->allocation_profiling_enabled = false;
|
347
362
|
state->allocation_counting_enabled = false;
|
363
|
+
state->gvl_profiling_enabled = false;
|
348
364
|
state->skip_idle_samples_for_testing = false;
|
349
365
|
state->thread_context_collector_instance = Qnil;
|
350
366
|
state->idle_sampling_helper_instance = Qnil;
|
@@ -358,6 +374,10 @@ static VALUE _native_new(VALUE klass) {
|
|
358
374
|
|
359
375
|
state->during_sample = false;
|
360
376
|
|
377
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
378
|
+
state->gvl_profiling_hook = NULL;
|
379
|
+
#endif
|
380
|
+
|
361
381
|
reset_stats_not_thread_safe(state);
|
362
382
|
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
|
363
383
|
|
@@ -368,25 +388,30 @@ static VALUE _native_new(VALUE klass) {
|
|
368
388
|
return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
|
369
389
|
}
|
370
390
|
|
371
|
-
static VALUE _native_initialize(
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
VALUE
|
377
|
-
VALUE
|
378
|
-
VALUE
|
379
|
-
VALUE
|
380
|
-
VALUE
|
381
|
-
VALUE
|
382
|
-
VALUE
|
383
|
-
)
|
391
|
+
static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
|
392
|
+
VALUE options;
|
393
|
+
rb_scan_args(argc, argv, "0:", &options);
|
394
|
+
if (options == Qnil) options = rb_hash_new();
|
395
|
+
|
396
|
+
VALUE self_instance = rb_hash_fetch(options, ID2SYM(rb_intern("self_instance")));
|
397
|
+
VALUE thread_context_collector_instance = rb_hash_fetch(options, ID2SYM(rb_intern("thread_context_collector")));
|
398
|
+
VALUE gc_profiling_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("gc_profiling_enabled")));
|
399
|
+
VALUE idle_sampling_helper_instance = rb_hash_fetch(options, ID2SYM(rb_intern("idle_sampling_helper")));
|
400
|
+
VALUE no_signals_workaround_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("no_signals_workaround_enabled")));
|
401
|
+
VALUE dynamic_sampling_rate_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("dynamic_sampling_rate_enabled")));
|
402
|
+
VALUE dynamic_sampling_rate_overhead_target_percentage = rb_hash_fetch(options, ID2SYM(rb_intern("dynamic_sampling_rate_overhead_target_percentage")));
|
403
|
+
VALUE allocation_profiling_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("allocation_profiling_enabled")));
|
404
|
+
VALUE allocation_counting_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("allocation_counting_enabled")));
|
405
|
+
VALUE gvl_profiling_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("gvl_profiling_enabled")));
|
406
|
+
VALUE skip_idle_samples_for_testing = rb_hash_fetch(options, ID2SYM(rb_intern("skip_idle_samples_for_testing")));
|
407
|
+
|
384
408
|
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
385
409
|
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
|
386
410
|
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
|
387
411
|
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
|
388
412
|
ENFORCE_BOOLEAN(allocation_profiling_enabled);
|
389
413
|
ENFORCE_BOOLEAN(allocation_counting_enabled);
|
414
|
+
ENFORCE_BOOLEAN(gvl_profiling_enabled);
|
390
415
|
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
|
391
416
|
|
392
417
|
struct cpu_and_wall_time_worker_state *state;
|
@@ -397,6 +422,7 @@ static VALUE _native_initialize(
|
|
397
422
|
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
|
398
423
|
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
|
399
424
|
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
|
425
|
+
state->gvl_profiling_enabled = (gvl_profiling_enabled == Qtrue);
|
400
426
|
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
|
401
427
|
|
402
428
|
double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
|
@@ -781,6 +807,27 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
|
|
781
807
|
;
|
782
808
|
}
|
783
809
|
|
810
|
+
if (state->gvl_profiling_enabled) {
|
811
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
812
|
+
#ifdef USE_GVL_PROFILING_3_2_WORKAROUNDS
|
813
|
+
gvl_profiling_state_thread_tracking_workaround();
|
814
|
+
#endif
|
815
|
+
|
816
|
+
state->gvl_profiling_hook = rb_internal_thread_add_event_hook(
|
817
|
+
on_gvl_event,
|
818
|
+
(
|
819
|
+
// For now we're only asking for these events, even though there's more
|
820
|
+
// (e.g. check docs or gvl-tracing gem)
|
821
|
+
RUBY_INTERNAL_THREAD_EVENT_READY /* waiting for gvl */ |
|
822
|
+
RUBY_INTERNAL_THREAD_EVENT_RESUMED /* running/runnable */
|
823
|
+
),
|
824
|
+
NULL
|
825
|
+
);
|
826
|
+
#else
|
827
|
+
rb_raise(rb_eArgError, "GVL profiling is not supported in this Ruby version");
|
828
|
+
#endif
|
829
|
+
}
|
830
|
+
|
784
831
|
// Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
|
785
832
|
rb_funcall(instance, rb_intern("signal_running"), 0);
|
786
833
|
|
@@ -892,7 +939,6 @@ static void after_gc_from_postponed_job(DDTRACE_UNUSED void *_unused) {
|
|
892
939
|
|
893
940
|
state->during_sample = true;
|
894
941
|
|
895
|
-
// Trigger sampling using the Collectors::ThreadState; rescue against any exceptions that happen during sampling
|
896
942
|
safely_call(thread_context_collector_sample_after_gc, state->thread_context_collector_instance, state->self_instance);
|
897
943
|
|
898
944
|
state->during_sample = false;
|
@@ -999,6 +1045,14 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
999
1045
|
ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.allocation_sampling_time_ns_total, state->stats.allocation_sampled),
|
1000
1046
|
ID2SYM(rb_intern("allocation_sampler_snapshot")), /* => */ allocation_sampler_snapshot,
|
1001
1047
|
ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
|
1048
|
+
|
1049
|
+
// GVL profiling stats
|
1050
|
+
ID2SYM(rb_intern("after_gvl_running")), /* => */ UINT2NUM(state->stats.after_gvl_running),
|
1051
|
+
ID2SYM(rb_intern("gvl_dont_sample")), /* => */ UINT2NUM(state->stats.gvl_dont_sample),
|
1052
|
+
ID2SYM(rb_intern("gvl_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
|
1053
|
+
ID2SYM(rb_intern("gvl_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_max, > 0, ULL2NUM),
|
1054
|
+
ID2SYM(rb_intern("gvl_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_total, > 0, ULL2NUM),
|
1055
|
+
ID2SYM(rb_intern("gvl_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.gvl_sampling_time_ns_total, state->stats.after_gvl_running),
|
1002
1056
|
};
|
1003
1057
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
1004
1058
|
return stats_as_hash;
|
@@ -1036,8 +1090,10 @@ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *s
|
|
1036
1090
|
// Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
|
1037
1091
|
// this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
|
1038
1092
|
state->stats = (struct stats) {
|
1039
|
-
|
1040
|
-
.
|
1093
|
+
// All these values are initialized to their highest value possible since we always take the min between existing and latest sample
|
1094
|
+
.cpu_sampling_time_ns_min = UINT64_MAX,
|
1095
|
+
.allocation_sampling_time_ns_min = UINT64_MAX,
|
1096
|
+
.gvl_sampling_time_ns_min = UINT64_MAX,
|
1041
1097
|
};
|
1042
1098
|
}
|
1043
1099
|
|
@@ -1173,7 +1229,15 @@ static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
|
|
1173
1229
|
if (state->gc_tracepoint != Qnil) {
|
1174
1230
|
rb_tracepoint_disable(state->gc_tracepoint);
|
1175
1231
|
}
|
1232
|
+
|
1176
1233
|
rb_remove_event_hook_with_data(on_newobj_event_as_hook, state->self_instance);
|
1234
|
+
|
1235
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
1236
|
+
if (state->gvl_profiling_hook) {
|
1237
|
+
rb_internal_thread_remove_event_hook(state->gvl_profiling_hook);
|
1238
|
+
state->gvl_profiling_hook = NULL;
|
1239
|
+
}
|
1240
|
+
#endif
|
1177
1241
|
}
|
1178
1242
|
|
1179
1243
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
@@ -1211,7 +1275,8 @@ static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) {
|
|
1211
1275
|
thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
|
1212
1276
|
// ...but we still represent the skipped samples in the profile, thus the data will account for all allocations.
|
1213
1277
|
if (weight < allocations_since_last_sample) {
|
1214
|
-
|
1278
|
+
uint32_t skipped_samples = (uint32_t) uint64_min_of(allocations_since_last_sample - weight, UINT32_MAX);
|
1279
|
+
thread_context_collector_sample_skipped_allocation_samples(state->thread_context_collector_instance, skipped_samples);
|
1215
1280
|
}
|
1216
1281
|
|
1217
1282
|
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
|
@@ -1247,3 +1312,95 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
|
|
1247
1312
|
unblock_sigprof_signal_handler_from_running_in_current_thread();
|
1248
1313
|
return Qtrue;
|
1249
1314
|
}
|
1315
|
+
|
1316
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
1317
|
+
static void on_gvl_event(rb_event_flag_t event_id, const rb_internal_thread_event_data_t *event_data, DDTRACE_UNUSED void *_unused) {
|
1318
|
+
// Be very careful about touching the `state` here or doing anything at all:
|
1319
|
+
// This function gets called without the GVL, and potentially from background Ractors!
|
1320
|
+
//
|
1321
|
+
// In fact, the `target_thread` that this event is about may not even be the current thread. (So be careful with thread locals that
|
1322
|
+
// are not directly tied to the `target_thread` object and the like)
|
1323
|
+
gvl_profiling_thread target_thread = thread_from_event(event_data);
|
1324
|
+
|
1325
|
+
if (event_id == RUBY_INTERNAL_THREAD_EVENT_READY) { /* waiting for gvl */
|
1326
|
+
thread_context_collector_on_gvl_waiting(target_thread);
|
1327
|
+
} else if (event_id == RUBY_INTERNAL_THREAD_EVENT_RESUMED) { /* running/runnable */
|
1328
|
+
// Interesting note: A RUBY_INTERNAL_THREAD_EVENT_RESUMED is guaranteed to be called with the GVL being acquired.
|
1329
|
+
// (And... I think target_thread will be == rb_thread_current()?)
|
1330
|
+
//
|
1331
|
+
// But we're not sure if we're on the main Ractor yet. The thread context collector actually can actually help here:
|
1332
|
+
// it tags threads it's tracking, so if a thread is tagged then by definition we know that thread belongs to the main
|
1333
|
+
// Ractor. Thus, if we get a ON_GVL_RUNNING_UNKNOWN result we shouldn't touch any state, but otherwise we're good to go.
|
1334
|
+
|
1335
|
+
#ifdef USE_GVL_PROFILING_3_2_WORKAROUNDS
|
1336
|
+
target_thread = gvl_profiling_state_maybe_initialize();
|
1337
|
+
#endif
|
1338
|
+
|
1339
|
+
on_gvl_running_result result = thread_context_collector_on_gvl_running(target_thread);
|
1340
|
+
|
1341
|
+
if (result == ON_GVL_RUNNING_SAMPLE) {
|
1342
|
+
#ifndef NO_POSTPONED_TRIGGER
|
1343
|
+
rb_postponed_job_trigger(after_gvl_running_from_postponed_job_handle);
|
1344
|
+
#else
|
1345
|
+
rb_postponed_job_register_one(0, after_gvl_running_from_postponed_job, NULL);
|
1346
|
+
#endif
|
1347
|
+
} else if (result == ON_GVL_RUNNING_DONT_SAMPLE) {
|
1348
|
+
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
1349
|
+
|
1350
|
+
if (state == NULL) return; // This should not happen, but just in case...
|
1351
|
+
|
1352
|
+
state->stats.gvl_dont_sample++;
|
1353
|
+
}
|
1354
|
+
} else {
|
1355
|
+
// This is a very delicate time and it's hard for us to raise an exception so let's at least complain to stderr
|
1356
|
+
fprintf(stderr, "[ddtrace] Unexpected value in on_gvl_event (%d)\n", event_id);
|
1357
|
+
}
|
1358
|
+
}
|
1359
|
+
|
1360
|
+
static void after_gvl_running_from_postponed_job(DDTRACE_UNUSED void *_unused) {
|
1361
|
+
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
1362
|
+
|
1363
|
+
// This can potentially happen if the CpuAndWallTimeWorker was stopped while the postponed job was waiting to be executed; nothing to do
|
1364
|
+
if (state == NULL) return;
|
1365
|
+
|
1366
|
+
state->during_sample = true;
|
1367
|
+
|
1368
|
+
// Rescue against any exceptions that happen during sampling
|
1369
|
+
safely_call(rescued_after_gvl_running_from_postponed_job, state->self_instance, state->self_instance);
|
1370
|
+
|
1371
|
+
state->during_sample = false;
|
1372
|
+
}
|
1373
|
+
|
1374
|
+
static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance) {
|
1375
|
+
struct cpu_and_wall_time_worker_state *state;
|
1376
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
1377
|
+
|
1378
|
+
long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
1379
|
+
thread_context_collector_sample_after_gvl_running(state->thread_context_collector_instance, rb_thread_current(), wall_time_ns_before_sample);
|
1380
|
+
long wall_time_ns_after_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
1381
|
+
|
1382
|
+
long delta_ns = wall_time_ns_after_sample - wall_time_ns_before_sample;
|
1383
|
+
|
1384
|
+
// Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
|
1385
|
+
uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
|
1386
|
+
|
1387
|
+
state->stats.gvl_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_min);
|
1388
|
+
state->stats.gvl_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_max);
|
1389
|
+
state->stats.gvl_sampling_time_ns_total += sampling_time_ns;
|
1390
|
+
|
1391
|
+
state->stats.after_gvl_running++;
|
1392
|
+
|
1393
|
+
return Qnil;
|
1394
|
+
}
|
1395
|
+
|
1396
|
+
static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
1397
|
+
struct cpu_and_wall_time_worker_state *state;
|
1398
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
1399
|
+
|
1400
|
+
return state->gvl_profiling_hook != NULL ? Qtrue : Qfalse;
|
1401
|
+
}
|
1402
|
+
#else
|
1403
|
+
static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, DDTRACE_UNUSED VALUE instance) {
|
1404
|
+
return Qfalse;
|
1405
|
+
}
|
1406
|
+
#endif
|
@@ -92,7 +92,7 @@ double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler) {
|
|
92
92
|
return sampler->sampling_probability * 100.;
|
93
93
|
}
|
94
94
|
|
95
|
-
|
95
|
+
unsigned long discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler) {
|
96
96
|
return sampler->events_since_last_sample;
|
97
97
|
}
|
98
98
|
|
@@ -259,7 +259,9 @@ void discrete_dynamic_sampler_readjust(discrete_dynamic_sampler *sampler, long n
|
|
259
259
|
// are so big they don't fit into the sampling_interval. In both cases lets just disable sampling until next readjustment
|
260
260
|
// by setting interval to 0.
|
261
261
|
double sampling_interval = sampler->sampling_probability == 0 ? 0 : ceil(1.0 / sampler->sampling_probability);
|
262
|
-
|
262
|
+
// NOTE: We use UINT32_MAX instead of ULONG_MAX here to avoid clang warnings; in practice, we shouldn't ever hit
|
263
|
+
// such high sampling intervals.
|
264
|
+
sampler->sampling_interval = sampling_interval > UINT32_MAX ? 0 : sampling_interval;
|
263
265
|
|
264
266
|
#ifdef DD_DEBUG
|
265
267
|
double allocs_in_60s = sampler->events_per_ns * 1e9 * 60;
|