datadog 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +51 -2
- data/ext/datadog_profiling_loader/extconf.rb +15 -15
- data/ext/datadog_profiling_native_extension/clock_id.h +1 -0
- data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -2
- data/ext/datadog_profiling_native_extension/clock_id_noop.c +1 -2
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +113 -43
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +49 -26
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +34 -4
- data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +4 -0
- data/ext/datadog_profiling_native_extension/collectors_stack.c +49 -37
- data/ext/datadog_profiling_native_extension/collectors_stack.h +2 -2
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +81 -19
- data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +110 -0
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +57 -0
- data/ext/datadog_profiling_native_extension/extconf.rb +65 -60
- data/ext/datadog_profiling_native_extension/heap_recorder.c +34 -6
- data/ext/datadog_profiling_native_extension/heap_recorder.h +3 -1
- data/ext/datadog_profiling_native_extension/helpers.h +6 -17
- data/ext/datadog_profiling_native_extension/http_transport.c +3 -3
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +0 -86
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +2 -23
- data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +61 -172
- data/ext/datadog_profiling_native_extension/private_vm_api_access.c +64 -138
- data/ext/datadog_profiling_native_extension/private_vm_api_access.h +17 -11
- data/ext/datadog_profiling_native_extension/profiling.c +0 -2
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +0 -33
- data/ext/datadog_profiling_native_extension/ruby_helpers.h +1 -26
- data/ext/datadog_profiling_native_extension/setup_signal_handler.h +1 -0
- data/ext/datadog_profiling_native_extension/stack_recorder.c +14 -2
- data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
- data/ext/datadog_profiling_native_extension/time_helpers.c +0 -15
- data/ext/datadog_profiling_native_extension/time_helpers.h +36 -6
- data/ext/{datadog_profiling_native_extension → libdatadog_api}/crashtracker.c +19 -6
- data/ext/libdatadog_api/datadog_ruby_common.c +110 -0
- data/ext/libdatadog_api/datadog_ruby_common.h +57 -0
- data/ext/libdatadog_api/extconf.rb +108 -0
- data/ext/libdatadog_api/macos_development.md +26 -0
- data/ext/libdatadog_extconf_helpers.rb +130 -0
- data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +49 -0
- data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +73 -0
- data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +68 -0
- data/lib/datadog/appsec/contrib/graphql/integration.rb +41 -0
- data/lib/datadog/appsec/contrib/graphql/patcher.rb +37 -0
- data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +59 -0
- data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
- data/lib/datadog/appsec/processor/actions.rb +1 -1
- data/lib/datadog/appsec/response.rb +15 -1
- data/lib/datadog/appsec.rb +1 -0
- data/lib/datadog/core/configuration/components.rb +14 -12
- data/lib/datadog/core/configuration/settings.rb +54 -7
- data/lib/datadog/core/crashtracking/agent_base_url.rb +21 -0
- data/lib/datadog/core/crashtracking/component.rb +111 -0
- data/lib/datadog/core/crashtracking/tag_builder.rb +39 -0
- data/lib/datadog/core/diagnostics/environment_logger.rb +8 -11
- data/lib/datadog/core/telemetry/component.rb +49 -2
- data/lib/datadog/core/telemetry/emitter.rb +9 -11
- data/lib/datadog/core/telemetry/event.rb +32 -1
- data/lib/datadog/core/telemetry/ext.rb +1 -0
- data/lib/datadog/core/telemetry/http/adapters/net.rb +10 -12
- data/lib/datadog/core/telemetry/http/ext.rb +3 -0
- data/lib/datadog/core/telemetry/http/transport.rb +38 -9
- data/lib/datadog/core/telemetry/logging.rb +35 -0
- data/lib/datadog/core/utils/at_fork_monkey_patch.rb +102 -0
- data/lib/datadog/kit/appsec/events.rb +2 -4
- data/lib/datadog/opentelemetry/sdk/span_processor.rb +10 -0
- data/lib/datadog/opentelemetry/sdk/trace/span.rb +23 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +7 -7
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +17 -17
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +11 -13
- data/lib/datadog/profiling/collectors/info.rb +3 -3
- data/lib/datadog/profiling/collectors/thread_context.rb +4 -2
- data/lib/datadog/profiling/component.rb +69 -91
- data/lib/datadog/profiling/exporter.rb +3 -3
- data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
- data/lib/datadog/profiling/ext.rb +21 -21
- data/lib/datadog/profiling/flush.rb +1 -1
- data/lib/datadog/profiling/http_transport.rb +8 -6
- data/lib/datadog/profiling/load_native_extension.rb +5 -5
- data/lib/datadog/profiling/preload.rb +1 -1
- data/lib/datadog/profiling/profiler.rb +5 -8
- data/lib/datadog/profiling/scheduler.rb +31 -25
- data/lib/datadog/profiling/tag_builder.rb +2 -2
- data/lib/datadog/profiling/tasks/exec.rb +5 -5
- data/lib/datadog/profiling/tasks/setup.rb +16 -35
- data/lib/datadog/profiling.rb +4 -5
- data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -0
- data/lib/datadog/tracing/contrib/ext.rb +14 -0
- data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +1 -1
- data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +4 -1
- data/lib/datadog/tracing/contrib/lograge/patcher.rb +16 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +17 -13
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +4 -1
- data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +28 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +5 -1
- data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +22 -10
- data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +4 -1
- data/lib/datadog/tracing/diagnostics/environment_logger.rb +14 -16
- data/lib/datadog/tracing/metadata/errors.rb +9 -1
- data/lib/datadog/tracing/metadata/ext.rb +4 -0
- data/lib/datadog/tracing/pipeline/span_filter.rb +2 -2
- data/lib/datadog/tracing/span.rb +9 -2
- data/lib/datadog/tracing/span_event.rb +41 -0
- data/lib/datadog/tracing/span_operation.rb +6 -2
- data/lib/datadog/tracing/transport/serializable_trace.rb +3 -0
- data/lib/datadog/version.rb +1 -1
- metadata +28 -10
- data/lib/datadog/profiling/crashtracker.rb +0 -91
- data/lib/datadog/profiling/ext/forking.rb +0 -98
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d6610c8ef7e86c023f8a3fca884807bd7e9cf6b84fc6cbdd79b98e8a8762c2e
|
4
|
+
data.tar.gz: 236fafc4b8e2c809552d97c8eb025654c0ef4279685c741ad9504b2af8265b94
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e633db76f69b5d151629cde5e1b7024a6bea43343aa348ea50857d5e74d049d2b6a833be252c6c1c7aee95976805a795e1607a81d42e8c3246126abd9811777d
|
7
|
+
data.tar.gz: fb2bcf3803689d8e499f2266e7d063f1c5b9b7d08bb28b5d27f81bd16e7e373c01c1d5224027d43229a5ca609f66c5179aa521ef272f347d069ca73e3613bdc3
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,34 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [2.3.0] - 2024-08-22
|
6
|
+
|
7
|
+
### Added
|
8
|
+
|
9
|
+
* Core: Support agentless telemetry ([#3779][])
|
10
|
+
* Tracing: Add support for span events ([#3776][])
|
11
|
+
* Tracing: Add tags to enable inferred service dependencies for databases ([#3789][])
|
12
|
+
* Tracing: Emit log message and instructions for incompatible Lograge setup ([#3812][], [#3839][])
|
13
|
+
* Tracing: Add `append_comment` option to append SQL comment propagation for `mysql`, `pg` and `trilogy` ([#3809][])
|
14
|
+
* AppSec: Add threat detection and protection for `graphql` ([#3769][], [#3814][])
|
15
|
+
|
16
|
+
### Changed
|
17
|
+
|
18
|
+
* Core: Enable crashtracking by default ([#3826][])
|
19
|
+
* Profiling: Reduce allocation overhead ([#3805][], [#3797][])
|
20
|
+
* Profiling: Speed up stack sampling ([#3837][])
|
21
|
+
* Profiling: Upgrade to libdatadog 11 ([#3799][])
|
22
|
+
* Profiling: Disable allocation counting feature by default ([#3798][])
|
23
|
+
* Profiling: Reduce the maximum biased result for allocation samples ([#3793][])
|
24
|
+
* Tracing: Reduce noisy integration logs ([#3785][])
|
25
|
+
|
26
|
+
### Fixed
|
27
|
+
|
28
|
+
* Tracing: Fix `require` issue for `graphql` ([#3813][])
|
29
|
+
* AppSec: Fix an error when parsing http headers with integer value ([#3790][])
|
30
|
+
* AppSec: Fix an error when tracking login failure without `user_id` ([#3841][])
|
31
|
+
* Fix a syntax error for Ruby < 2.4 during single step instrumentation ([#3795][])
|
32
|
+
|
5
33
|
## [2.2.0] - 2024-07-11
|
6
34
|
|
7
35
|
### Added
|
@@ -2934,7 +2962,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
|
|
2934
2962
|
Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
2935
2963
|
|
2936
2964
|
|
2937
|
-
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.
|
2965
|
+
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.3.0...master
|
2966
|
+
[2.3.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.2.0...v2.3.0
|
2938
2967
|
[2.2.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.1.0...v2.2.0
|
2939
2968
|
[2.1.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.0.0...v2.1.0
|
2940
2969
|
[2.0.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.0.0.rc1...v2.0.0
|
@@ -4331,9 +4360,29 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4331
4360
|
[#3753]: https://github.com/DataDog/dd-trace-rb/issues/3753
|
4332
4361
|
[#3757]: https://github.com/DataDog/dd-trace-rb/issues/3757
|
4333
4362
|
[#3759]: https://github.com/DataDog/dd-trace-rb/issues/3759
|
4363
|
+
[#3769]: https://github.com/DataDog/dd-trace-rb/issues/3769
|
4334
4364
|
[#3770]: https://github.com/DataDog/dd-trace-rb/issues/3770
|
4335
4365
|
[#3772]: https://github.com/DataDog/dd-trace-rb/issues/3772
|
4336
4366
|
[#3774]: https://github.com/DataDog/dd-trace-rb/issues/3774
|
4367
|
+
[#3776]: https://github.com/DataDog/dd-trace-rb/issues/3776
|
4368
|
+
[#3779]: https://github.com/DataDog/dd-trace-rb/issues/3779
|
4369
|
+
[#3785]: https://github.com/DataDog/dd-trace-rb/issues/3785
|
4370
|
+
[#3789]: https://github.com/DataDog/dd-trace-rb/issues/3789
|
4371
|
+
[#3790]: https://github.com/DataDog/dd-trace-rb/issues/3790
|
4372
|
+
[#3793]: https://github.com/DataDog/dd-trace-rb/issues/3793
|
4373
|
+
[#3795]: https://github.com/DataDog/dd-trace-rb/issues/3795
|
4374
|
+
[#3797]: https://github.com/DataDog/dd-trace-rb/issues/3797
|
4375
|
+
[#3798]: https://github.com/DataDog/dd-trace-rb/issues/3798
|
4376
|
+
[#3799]: https://github.com/DataDog/dd-trace-rb/issues/3799
|
4377
|
+
[#3805]: https://github.com/DataDog/dd-trace-rb/issues/3805
|
4378
|
+
[#3809]: https://github.com/DataDog/dd-trace-rb/issues/3809
|
4379
|
+
[#3812]: https://github.com/DataDog/dd-trace-rb/issues/3812
|
4380
|
+
[#3813]: https://github.com/DataDog/dd-trace-rb/issues/3813
|
4381
|
+
[#3814]: https://github.com/DataDog/dd-trace-rb/issues/3814
|
4382
|
+
[#3826]: https://github.com/DataDog/dd-trace-rb/issues/3826
|
4383
|
+
[#3837]: https://github.com/DataDog/dd-trace-rb/issues/3837
|
4384
|
+
[#3839]: https://github.com/DataDog/dd-trace-rb/issues/3839
|
4385
|
+
[#3841]: https://github.com/DataDog/dd-trace-rb/issues/3841
|
4337
4386
|
[@AdrianLC]: https://github.com/AdrianLC
|
4338
4387
|
[@Azure7111]: https://github.com/Azure7111
|
4339
4388
|
[@BabyGroot]: https://github.com/BabyGroot
|
@@ -4485,4 +4534,4 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4485
4534
|
[@y-yagi]: https://github.com/y-yagi
|
4486
4535
|
[@yujideveloper]: https://github.com/yujideveloper
|
4487
4536
|
[@yukimurasawa]: https://github.com/yukimurasawa
|
4488
|
-
[@zachmccormick]: https://github.com/zachmccormick
|
4537
|
+
[@zachmccormick]: https://github.com/zachmccormick
|
@@ -1,22 +1,22 @@
|
|
1
1
|
# rubocop:disable Style/StderrPuts
|
2
2
|
# rubocop:disable Style/GlobalVars
|
3
3
|
|
4
|
-
if RUBY_ENGINE !=
|
4
|
+
if RUBY_ENGINE != "ruby" || Gem.win_platform?
|
5
5
|
$stderr.puts(
|
6
|
-
|
6
|
+
"WARN: Skipping build of Datadog profiling loader. See Datadog profiling native extension note for details."
|
7
7
|
)
|
8
8
|
|
9
|
-
File.write(
|
9
|
+
File.write("Makefile", "all install clean: # dummy makefile that does nothing")
|
10
10
|
exit
|
11
11
|
end
|
12
12
|
|
13
|
-
require
|
13
|
+
require "mkmf"
|
14
14
|
|
15
15
|
# mkmf on modern Rubies actually has an append_cflags that does something similar
|
16
16
|
# (see https://github.com/ruby/ruby/pull/5760), but as usual we need a bit more boilerplate to deal with legacy Rubies
|
17
17
|
def add_compiler_flag(flag)
|
18
18
|
if try_cflags(flag)
|
19
|
-
$CFLAGS <<
|
19
|
+
$CFLAGS << " " << flag
|
20
20
|
else
|
21
21
|
$stderr.puts("WARNING: '#{flag}' not accepted by compiler, skipping it")
|
22
22
|
end
|
@@ -24,26 +24,26 @@ end
|
|
24
24
|
|
25
25
|
# Because we can't control what compiler versions our customers use, shipping with -Werror by default is a no-go.
|
26
26
|
# But we can enable it in CI, so that we quickly spot any new warnings that just got introduced.
|
27
|
-
add_compiler_flag
|
27
|
+
add_compiler_flag "-Werror" if ENV["DATADOG_GEM_CI"] == "true"
|
28
28
|
|
29
29
|
# Older gcc releases may not default to C99 and we need to ask for this. This is also used:
|
30
30
|
# * by upstream Ruby -- search for gnu99 in the codebase
|
31
31
|
# * by msgpack, another datadog gem dependency
|
32
32
|
# (https://github.com/msgpack/msgpack-ruby/blob/18ce08f6d612fe973843c366ac9a0b74c4e50599/ext/msgpack/extconf.rb#L8)
|
33
|
-
add_compiler_flag
|
33
|
+
add_compiler_flag "-std=gnu99"
|
34
34
|
|
35
35
|
# Gets really noisy when we include the MJIT header, let's omit it (TODO: Use #pragma GCC diagnostic instead?)
|
36
|
-
add_compiler_flag
|
36
|
+
add_compiler_flag "-Wno-unused-function"
|
37
37
|
|
38
38
|
# Allow defining variables at any point in a function
|
39
|
-
add_compiler_flag
|
39
|
+
add_compiler_flag "-Wno-declaration-after-statement"
|
40
40
|
|
41
41
|
# If we forget to include a Ruby header, the function call may still appear to work, but then
|
42
42
|
# cause a segfault later. Let's ensure that never happens.
|
43
|
-
add_compiler_flag
|
43
|
+
add_compiler_flag "-Werror-implicit-function-declaration"
|
44
44
|
|
45
45
|
# Warn on unused parameters to functions. Use `DDTRACE_UNUSED` to mark things as known-to-not-be-used.
|
46
|
-
add_compiler_flag
|
46
|
+
add_compiler_flag "-Wunused-parameter"
|
47
47
|
|
48
48
|
# The native extension is not intended to expose any symbols/functions for other native libraries to use;
|
49
49
|
# the sole exception being `Init_datadog_profiling_loader` which needs to be visible for Ruby to call it when
|
@@ -51,14 +51,14 @@ add_compiler_flag '-Wunused-parameter'
|
|
51
51
|
#
|
52
52
|
# By setting this compiler flag, we tell it to assume that everything is private unless explicitly stated.
|
53
53
|
# For more details see https://gcc.gnu.org/wiki/Visibility
|
54
|
-
add_compiler_flag
|
54
|
+
add_compiler_flag "-fvisibility=hidden"
|
55
55
|
|
56
56
|
# Avoid legacy C definitions
|
57
|
-
add_compiler_flag
|
57
|
+
add_compiler_flag "-Wold-style-definition"
|
58
58
|
|
59
59
|
# Enable all other compiler warnings
|
60
|
-
add_compiler_flag
|
61
|
-
add_compiler_flag
|
60
|
+
add_compiler_flag "-Wall"
|
61
|
+
add_compiler_flag "-Wextra"
|
62
62
|
|
63
63
|
# Tag the native extension library with the Ruby version and Ruby platform.
|
64
64
|
# This makes it easier for development (avoids "oops I forgot to rebuild when I switched my Ruby") and ensures that
|
@@ -7,11 +7,10 @@
|
|
7
7
|
#include <pthread.h>
|
8
8
|
#include <time.h>
|
9
9
|
#include <errno.h>
|
10
|
-
#include <ruby.h>
|
11
10
|
|
11
|
+
#include "clock_id.h"
|
12
12
|
#include "helpers.h"
|
13
13
|
#include "private_vm_api_access.h"
|
14
|
-
#include "clock_id.h"
|
15
14
|
#include "time_helpers.h"
|
16
15
|
|
17
16
|
// Validate that our home-cooked pthread_id_for() matches pthread_self() for the current thread
|
@@ -20,7 +20,9 @@
|
|
20
20
|
#define ERR_CLOCK_FAIL "failed to get clock time"
|
21
21
|
|
22
22
|
// Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
|
23
|
-
|
23
|
+
// See https://docs.google.com/document/d/1lWLB714wlLBBq6T4xZyAc4a5wtWhSmr4-hgiPKeErlA/edit#heading=h.ugp0zxcj5iqh
|
24
|
+
// (Datadog-only link) for research backing the choice of this value.
|
25
|
+
unsigned int MAX_ALLOC_WEIGHT = 10000;
|
24
26
|
|
25
27
|
// Used to trigger the execution of Collectors::ThreadState, which implements all of the sampling logic
|
26
28
|
// itself; this class only implements the "when to do it" part.
|
@@ -96,6 +98,7 @@ struct cpu_and_wall_time_worker_state {
|
|
96
98
|
bool no_signals_workaround_enabled;
|
97
99
|
bool dynamic_sampling_rate_enabled;
|
98
100
|
bool allocation_profiling_enabled;
|
101
|
+
bool allocation_counting_enabled;
|
99
102
|
bool skip_idle_samples_for_testing;
|
100
103
|
VALUE self_instance;
|
101
104
|
VALUE thread_context_collector_instance;
|
@@ -104,7 +107,6 @@ struct cpu_and_wall_time_worker_state {
|
|
104
107
|
dynamic_sampling_rate_state cpu_dynamic_sampling_rate;
|
105
108
|
discrete_dynamic_sampler allocation_sampler;
|
106
109
|
VALUE gc_tracepoint; // Used to get gc start/finish information
|
107
|
-
VALUE object_allocation_tracepoint; // Used to get allocation counts and allocation profiling
|
108
110
|
|
109
111
|
// These are mutable and used to signal things between the worker thread and other threads
|
110
112
|
|
@@ -117,7 +119,7 @@ struct cpu_and_wall_time_worker_state {
|
|
117
119
|
|
118
120
|
// Others
|
119
121
|
|
120
|
-
// Used to detect/avoid nested sampling, e.g. when
|
122
|
+
// Used to detect/avoid nested sampling, e.g. when on_newobj_event gets triggered by a memory allocation
|
121
123
|
// that happens during another sample.
|
122
124
|
bool during_sample;
|
123
125
|
|
@@ -181,6 +183,7 @@ static VALUE _native_initialize(
|
|
181
183
|
VALUE dynamic_sampling_rate_enabled,
|
182
184
|
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
183
185
|
VALUE allocation_profiling_enabled,
|
186
|
+
VALUE allocation_counting_enabled,
|
184
187
|
VALUE skip_idle_samples_for_testing
|
185
188
|
);
|
186
189
|
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
@@ -216,7 +219,7 @@ static void grab_gvl_and_sample(void);
|
|
216
219
|
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
|
217
220
|
static void sleep_for(uint64_t time_ns);
|
218
221
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
|
219
|
-
static void on_newobj_event(VALUE
|
222
|
+
static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *unused2);
|
220
223
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
|
221
224
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
|
222
225
|
static VALUE rescued_sample_allocation(VALUE tracepoint_data);
|
@@ -225,6 +228,20 @@ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VA
|
|
225
228
|
static VALUE _native_hold_signals(DDTRACE_UNUSED VALUE self);
|
226
229
|
static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self);
|
227
230
|
|
231
|
+
// We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
|
232
|
+
// with signature `rb_event_hook_func_t` which doesn't match `on_newobj_event`.
|
233
|
+
//
|
234
|
+
// But in practice, because we pass the `RUBY_EVENT_HOOK_FLAG_RAW_ARG` flag to `rb_add_event_hook2`, it casts the
|
235
|
+
// expected signature into a `rb_event_hook_raw_arg_func_t`:
|
236
|
+
// > typedef void (*rb_event_hook_raw_arg_func_t)(VALUE data, const rb_trace_arg_t *arg); (from vm_trace.c)
|
237
|
+
// which does match `on_newobj_event`.
|
238
|
+
//
|
239
|
+
// So TL;DR we're just doing this here to avoid the warning and explain why the apparent mismatch in function signatures.
|
240
|
+
#pragma GCC diagnostic push
|
241
|
+
#pragma GCC diagnostic ignored "-Wcast-function-type"
|
242
|
+
static const rb_event_hook_func_t on_newobj_event_as_hook = (rb_event_hook_func_t) on_newobj_event;
|
243
|
+
#pragma GCC diagnostic pop
|
244
|
+
|
228
245
|
// Note on sampler global state safety:
|
229
246
|
//
|
230
247
|
// Both `active_sampler_instance` and `active_sampler_instance_state` are **GLOBAL** state. Be careful when accessing
|
@@ -278,7 +295,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
278
295
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
279
296
|
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
|
280
297
|
|
281
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize,
|
298
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10);
|
282
299
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
|
283
300
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
|
284
301
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
@@ -316,6 +333,8 @@ static const rb_data_type_t cpu_and_wall_time_worker_typed_data = {
|
|
316
333
|
};
|
317
334
|
|
318
335
|
static VALUE _native_new(VALUE klass) {
|
336
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
337
|
+
|
319
338
|
struct cpu_and_wall_time_worker_state *state = ruby_xcalloc(1, sizeof(struct cpu_and_wall_time_worker_state));
|
320
339
|
|
321
340
|
// Note: Any exceptions raised from this note until the TypedData_Wrap_Struct call will lead to the state memory
|
@@ -325,13 +344,13 @@ static VALUE _native_new(VALUE klass) {
|
|
325
344
|
state->no_signals_workaround_enabled = false;
|
326
345
|
state->dynamic_sampling_rate_enabled = true;
|
327
346
|
state->allocation_profiling_enabled = false;
|
347
|
+
state->allocation_counting_enabled = false;
|
328
348
|
state->skip_idle_samples_for_testing = false;
|
329
349
|
state->thread_context_collector_instance = Qnil;
|
330
350
|
state->idle_sampling_helper_instance = Qnil;
|
331
351
|
state->owner_thread = Qnil;
|
332
352
|
dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
|
333
353
|
state->gc_tracepoint = Qnil;
|
334
|
-
state->object_allocation_tracepoint = Qnil;
|
335
354
|
|
336
355
|
atomic_init(&state->should_run, false);
|
337
356
|
state->failure_exception = Qnil;
|
@@ -340,15 +359,12 @@ static VALUE _native_new(VALUE klass) {
|
|
340
359
|
state->during_sample = false;
|
341
360
|
|
342
361
|
reset_stats_not_thread_safe(state);
|
343
|
-
|
344
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
345
|
-
if (now == 0) {
|
346
|
-
ruby_xfree(state);
|
347
|
-
rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
|
348
|
-
}
|
349
|
-
|
350
362
|
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
|
351
363
|
|
364
|
+
// Note: As of this writing, no new Ruby objects get created and stored in the state. If that ever changes, remember
|
365
|
+
// to keep them on the stack and mark them with RB_GC_GUARD -- otherwise it's possible for a GC to run and
|
366
|
+
// since the instance representing the state does not yet exist, such objects will not get marked.
|
367
|
+
|
352
368
|
return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
|
353
369
|
}
|
354
370
|
|
@@ -362,6 +378,7 @@ static VALUE _native_initialize(
|
|
362
378
|
VALUE dynamic_sampling_rate_enabled,
|
363
379
|
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
364
380
|
VALUE allocation_profiling_enabled,
|
381
|
+
VALUE allocation_counting_enabled,
|
365
382
|
VALUE skip_idle_samples_for_testing
|
366
383
|
) {
|
367
384
|
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
@@ -369,6 +386,7 @@ static VALUE _native_initialize(
|
|
369
386
|
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
|
370
387
|
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
|
371
388
|
ENFORCE_BOOLEAN(allocation_profiling_enabled);
|
389
|
+
ENFORCE_BOOLEAN(allocation_counting_enabled);
|
372
390
|
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
|
373
391
|
|
374
392
|
struct cpu_and_wall_time_worker_state *state;
|
@@ -378,6 +396,7 @@ static VALUE _native_initialize(
|
|
378
396
|
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
|
379
397
|
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
|
380
398
|
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
|
399
|
+
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
|
381
400
|
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
|
382
401
|
|
383
402
|
double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
|
@@ -394,7 +413,6 @@ static VALUE _native_initialize(
|
|
394
413
|
state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
|
395
414
|
state->idle_sampling_helper_instance = idle_sampling_helper_instance;
|
396
415
|
state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */);
|
397
|
-
state->object_allocation_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_NEWOBJ, on_newobj_event, NULL /* unused */);
|
398
416
|
|
399
417
|
return Qtrue;
|
400
418
|
}
|
@@ -409,7 +427,6 @@ static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr) {
|
|
409
427
|
rb_gc_mark(state->failure_exception);
|
410
428
|
rb_gc_mark(state->stop_thread);
|
411
429
|
rb_gc_mark(state->gc_tracepoint);
|
412
|
-
rb_gc_mark(state->object_allocation_tracepoint);
|
413
430
|
}
|
414
431
|
|
415
432
|
// Called in a background thread created in CpuAndWallTimeWorker#start
|
@@ -755,7 +772,14 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
|
|
755
772
|
// because they may raise exceptions.
|
756
773
|
install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
|
757
774
|
if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
|
758
|
-
if (state->allocation_profiling_enabled)
|
775
|
+
if (state->allocation_profiling_enabled) {
|
776
|
+
rb_add_event_hook2(
|
777
|
+
on_newobj_event_as_hook,
|
778
|
+
RUBY_INTERNAL_EVENT_NEWOBJ,
|
779
|
+
state->self_instance,
|
780
|
+
RUBY_EVENT_HOOK_FLAG_SAFE | RUBY_EVENT_HOOK_FLAG_RAW_ARG)
|
781
|
+
;
|
782
|
+
}
|
759
783
|
|
760
784
|
// Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
|
761
785
|
rb_funcall(instance, rb_intern("signal_running"), 0);
|
@@ -1036,46 +1060,87 @@ static void sleep_for(uint64_t time_ns) {
|
|
1036
1060
|
}
|
1037
1061
|
|
1038
1062
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
|
1039
|
-
|
1063
|
+
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state;
|
1064
|
+
|
1065
|
+
bool are_allocations_being_tracked = state != NULL && state->allocation_profiling_enabled && state->allocation_counting_enabled;
|
1040
1066
|
|
1041
1067
|
return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
|
1042
1068
|
}
|
1043
1069
|
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
}
|
1070
|
+
#define HANDLE_CLOCK_FAILURE(call) ({ \
|
1071
|
+
long _result = (call); \
|
1072
|
+
if (_result == 0) { \
|
1073
|
+
delayed_error(state, ERR_CLOCK_FAIL); \
|
1074
|
+
return; \
|
1075
|
+
} \
|
1076
|
+
_result; \
|
1077
|
+
})
|
1053
1078
|
|
1079
|
+
// Implements memory-related profiling events. This function is called by Ruby via the `rb_add_event_hook2`
|
1080
|
+
// when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered.
|
1081
|
+
//
|
1082
|
+
// When allocation sampling is enabled, this function gets called for almost all* objects allocated by the Ruby VM.
|
1083
|
+
// (*In some weird cases the VM may skip this tracepoint.)
|
1084
|
+
//
|
1085
|
+
// At a high level, there's two paths through this function:
|
1086
|
+
// 1. should_sample == false -> return
|
1087
|
+
// 2. should_sample == true -> sample
|
1088
|
+
//
|
1089
|
+
// On big applications, path 1. is the hottest, since we don't sample every object. So it's quite important for it to
|
1090
|
+
// be as fast as possible.
|
1091
|
+
//
|
1092
|
+
// NOTE: You may be wondering why we don't use any of the arguments to this function. It turns out it's possible to just
|
1093
|
+
// call `rb_tracearg_from_tracepoint(anything)` anywhere during this function or its callees to get the data, so that's
|
1094
|
+
// why it's not being passed as an argument.
|
1095
|
+
static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *unused2) {
|
1054
1096
|
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
1055
1097
|
|
1056
1098
|
// This should not happen in a normal situation because the tracepoint is always enabled after the instance is set
|
1057
1099
|
// and disabled before it is cleared, but just in case...
|
1058
1100
|
if (state == NULL) return;
|
1059
1101
|
|
1060
|
-
|
1102
|
+
if (RB_UNLIKELY(state->allocation_counting_enabled)) {
|
1103
|
+
// Update thread-local allocation count
|
1104
|
+
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
|
1105
|
+
allocation_count = 0;
|
1106
|
+
} else {
|
1107
|
+
allocation_count++;
|
1108
|
+
}
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
// In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
|
1061
1112
|
// sample, so we just return early
|
1062
1113
|
if (state->during_sample) {
|
1063
1114
|
state->stats.allocations_during_sample++;
|
1064
1115
|
return;
|
1065
1116
|
}
|
1066
1117
|
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1118
|
+
// Hot path: Dynamic sampling rate is usually enabled and the sampling decision is usually false
|
1119
|
+
if (RB_LIKELY(state->dynamic_sampling_rate_enabled && !discrete_dynamic_sampler_should_sample(&state->allocation_sampler))) {
|
1120
|
+
state->stats.allocation_skipped++;
|
1121
|
+
|
1122
|
+
coarse_instant now = monotonic_coarse_wall_time_now_ns();
|
1123
|
+
HANDLE_CLOCK_FAILURE(now.timestamp_ns);
|
1124
|
+
|
1125
|
+
bool needs_readjust = discrete_dynamic_sampler_skipped_sample(&state->allocation_sampler, now);
|
1126
|
+
if (RB_UNLIKELY(needs_readjust)) {
|
1127
|
+
// We rarely readjust, so this is a cold path
|
1128
|
+
// Also, while above we used the cheaper monotonic_coarse, for this call we want the regular monotonic call,
|
1129
|
+
// which is why we end up getting time "again".
|
1130
|
+
discrete_dynamic_sampler_readjust(
|
1131
|
+
&state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
|
1132
|
+
);
|
1076
1133
|
}
|
1134
|
+
|
1135
|
+
return;
|
1077
1136
|
}
|
1078
1137
|
|
1138
|
+
// From here on, we've decided to go ahead with the sample, which is way less common than skipping it
|
1139
|
+
|
1140
|
+
discrete_dynamic_sampler_before_sample(
|
1141
|
+
&state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
|
1142
|
+
);
|
1143
|
+
|
1079
1144
|
// @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
|
1080
1145
|
// invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
|
1081
1146
|
// to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
|
@@ -1083,7 +1148,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
|
|
1083
1148
|
state->during_sample = true;
|
1084
1149
|
|
1085
1150
|
// Rescue against any exceptions that happen during sampling
|
1086
|
-
safely_call(rescued_sample_allocation,
|
1151
|
+
safely_call(rescued_sample_allocation, Qnil, state->self_instance);
|
1087
1152
|
|
1088
1153
|
if (state->dynamic_sampling_rate_enabled) {
|
1089
1154
|
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
@@ -1108,9 +1173,7 @@ static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
|
|
1108
1173
|
if (state->gc_tracepoint != Qnil) {
|
1109
1174
|
rb_tracepoint_disable(state->gc_tracepoint);
|
1110
1175
|
}
|
1111
|
-
|
1112
|
-
rb_tracepoint_disable(state->object_allocation_tracepoint);
|
1113
|
-
}
|
1176
|
+
rb_remove_event_hook_with_data(on_newobj_event_as_hook, state->self_instance);
|
1114
1177
|
}
|
1115
1178
|
|
1116
1179
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
@@ -1126,13 +1189,14 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
|
1126
1189
|
}
|
1127
1190
|
}
|
1128
1191
|
|
1129
|
-
static VALUE rescued_sample_allocation(VALUE
|
1192
|
+
static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) {
|
1130
1193
|
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
1131
1194
|
|
1132
1195
|
// This should not happen in a normal situation because on_newobj_event already checked for this, but just in case...
|
1133
1196
|
if (state == NULL) return Qnil;
|
1134
1197
|
|
1135
|
-
|
1198
|
+
// If we're getting called from inside a tracepoint/event hook, Ruby exposes the data using this function.
|
1199
|
+
rb_trace_arg_t *data = rb_tracearg_from_tracepoint(Qnil);
|
1136
1200
|
VALUE new_object = rb_tracearg_object(data);
|
1137
1201
|
|
1138
1202
|
unsigned long allocations_since_last_sample = state->dynamic_sampling_rate_enabled ?
|
@@ -1140,9 +1204,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
|
|
1140
1204
|
discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
|
1141
1205
|
// if we aren't, then we're sampling every event
|
1142
1206
|
1;
|
1143
|
-
|
1207
|
+
|
1208
|
+
// To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids
|
1209
|
+
// assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval.
|
1144
1210
|
unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
|
1145
1211
|
thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
|
1212
|
+
// ...but we still represent the skipped samples in the profile, thus the data will account for all allocations.
|
1213
|
+
if (weight < allocations_since_last_sample) {
|
1214
|
+
thread_context_collector_sample_skipped_allocation_samples(state->thread_context_collector_instance, allocations_since_last_sample - weight);
|
1215
|
+
}
|
1146
1216
|
|
1147
1217
|
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
|
1148
1218
|
return Qnil;
|