datadog 2.2.0 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +51 -2
- data/ext/datadog_profiling_loader/extconf.rb +15 -15
- data/ext/datadog_profiling_native_extension/clock_id.h +1 -0
- data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -2
- data/ext/datadog_profiling_native_extension/clock_id_noop.c +1 -2
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +113 -43
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +49 -26
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +34 -4
- data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +4 -0
- data/ext/datadog_profiling_native_extension/collectors_stack.c +49 -37
- data/ext/datadog_profiling_native_extension/collectors_stack.h +2 -2
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +81 -19
- data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +110 -0
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +57 -0
- data/ext/datadog_profiling_native_extension/extconf.rb +65 -60
- data/ext/datadog_profiling_native_extension/heap_recorder.c +34 -6
- data/ext/datadog_profiling_native_extension/heap_recorder.h +3 -1
- data/ext/datadog_profiling_native_extension/helpers.h +6 -17
- data/ext/datadog_profiling_native_extension/http_transport.c +3 -3
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +0 -86
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +2 -23
- data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +61 -172
- data/ext/datadog_profiling_native_extension/private_vm_api_access.c +64 -138
- data/ext/datadog_profiling_native_extension/private_vm_api_access.h +17 -11
- data/ext/datadog_profiling_native_extension/profiling.c +0 -2
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +0 -33
- data/ext/datadog_profiling_native_extension/ruby_helpers.h +1 -26
- data/ext/datadog_profiling_native_extension/setup_signal_handler.h +1 -0
- data/ext/datadog_profiling_native_extension/stack_recorder.c +14 -2
- data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
- data/ext/datadog_profiling_native_extension/time_helpers.c +0 -15
- data/ext/datadog_profiling_native_extension/time_helpers.h +36 -6
- data/ext/{datadog_profiling_native_extension → libdatadog_api}/crashtracker.c +19 -6
- data/ext/libdatadog_api/datadog_ruby_common.c +110 -0
- data/ext/libdatadog_api/datadog_ruby_common.h +57 -0
- data/ext/libdatadog_api/extconf.rb +108 -0
- data/ext/libdatadog_api/macos_development.md +26 -0
- data/ext/libdatadog_extconf_helpers.rb +130 -0
- data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +49 -0
- data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +73 -0
- data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +68 -0
- data/lib/datadog/appsec/contrib/graphql/integration.rb +41 -0
- data/lib/datadog/appsec/contrib/graphql/patcher.rb +37 -0
- data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +59 -0
- data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
- data/lib/datadog/appsec/processor/actions.rb +1 -1
- data/lib/datadog/appsec/response.rb +15 -1
- data/lib/datadog/appsec.rb +1 -0
- data/lib/datadog/core/configuration/components.rb +14 -12
- data/lib/datadog/core/configuration/settings.rb +54 -7
- data/lib/datadog/core/crashtracking/agent_base_url.rb +21 -0
- data/lib/datadog/core/crashtracking/component.rb +111 -0
- data/lib/datadog/core/crashtracking/tag_builder.rb +39 -0
- data/lib/datadog/core/diagnostics/environment_logger.rb +8 -11
- data/lib/datadog/core/telemetry/component.rb +49 -2
- data/lib/datadog/core/telemetry/emitter.rb +9 -11
- data/lib/datadog/core/telemetry/event.rb +32 -1
- data/lib/datadog/core/telemetry/ext.rb +1 -0
- data/lib/datadog/core/telemetry/http/adapters/net.rb +10 -12
- data/lib/datadog/core/telemetry/http/ext.rb +3 -0
- data/lib/datadog/core/telemetry/http/transport.rb +38 -9
- data/lib/datadog/core/telemetry/logging.rb +35 -0
- data/lib/datadog/core/utils/at_fork_monkey_patch.rb +102 -0
- data/lib/datadog/kit/appsec/events.rb +2 -4
- data/lib/datadog/opentelemetry/sdk/span_processor.rb +10 -0
- data/lib/datadog/opentelemetry/sdk/trace/span.rb +23 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +7 -7
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +17 -17
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +11 -13
- data/lib/datadog/profiling/collectors/info.rb +3 -3
- data/lib/datadog/profiling/collectors/thread_context.rb +4 -2
- data/lib/datadog/profiling/component.rb +69 -91
- data/lib/datadog/profiling/exporter.rb +3 -3
- data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
- data/lib/datadog/profiling/ext.rb +21 -21
- data/lib/datadog/profiling/flush.rb +1 -1
- data/lib/datadog/profiling/http_transport.rb +8 -6
- data/lib/datadog/profiling/load_native_extension.rb +5 -5
- data/lib/datadog/profiling/preload.rb +1 -1
- data/lib/datadog/profiling/profiler.rb +5 -8
- data/lib/datadog/profiling/scheduler.rb +31 -25
- data/lib/datadog/profiling/tag_builder.rb +2 -2
- data/lib/datadog/profiling/tasks/exec.rb +5 -5
- data/lib/datadog/profiling/tasks/setup.rb +16 -35
- data/lib/datadog/profiling.rb +4 -5
- data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -0
- data/lib/datadog/tracing/contrib/ext.rb +14 -0
- data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +1 -1
- data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +4 -1
- data/lib/datadog/tracing/contrib/lograge/patcher.rb +16 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +17 -13
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +4 -1
- data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +28 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +5 -1
- data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +22 -10
- data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +4 -1
- data/lib/datadog/tracing/diagnostics/environment_logger.rb +14 -16
- data/lib/datadog/tracing/metadata/errors.rb +9 -1
- data/lib/datadog/tracing/metadata/ext.rb +4 -0
- data/lib/datadog/tracing/pipeline/span_filter.rb +2 -2
- data/lib/datadog/tracing/span.rb +9 -2
- data/lib/datadog/tracing/span_event.rb +41 -0
- data/lib/datadog/tracing/span_operation.rb +6 -2
- data/lib/datadog/tracing/transport/serializable_trace.rb +3 -0
- data/lib/datadog/version.rb +1 -1
- metadata +28 -10
- data/lib/datadog/profiling/crashtracker.rb +0 -91
- data/lib/datadog/profiling/ext/forking.rb +0 -98
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d6610c8ef7e86c023f8a3fca884807bd7e9cf6b84fc6cbdd79b98e8a8762c2e
|
4
|
+
data.tar.gz: 236fafc4b8e2c809552d97c8eb025654c0ef4279685c741ad9504b2af8265b94
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e633db76f69b5d151629cde5e1b7024a6bea43343aa348ea50857d5e74d049d2b6a833be252c6c1c7aee95976805a795e1607a81d42e8c3246126abd9811777d
|
7
|
+
data.tar.gz: fb2bcf3803689d8e499f2266e7d063f1c5b9b7d08bb28b5d27f81bd16e7e373c01c1d5224027d43229a5ca609f66c5179aa521ef272f347d069ca73e3613bdc3
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,34 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [2.3.0] - 2024-08-22
|
6
|
+
|
7
|
+
### Added
|
8
|
+
|
9
|
+
* Core: Support agentless telemetry ([#3779][])
|
10
|
+
* Tracing: Add support for span events ([#3776][])
|
11
|
+
* Tracing: Add tags to enable inferred service dependencies for databases ([#3789][])
|
12
|
+
* Tracing: Emit log message and instructions for incompatible Lograge setup ([#3812][], [#3839][])
|
13
|
+
* Tracing: Add `append_comment` option to append SQL comment propagation for `mysql`, `pg` and `trilogy` ([#3809][])
|
14
|
+
* AppSec: Add threat detection and protection for `graphql` ([#3769][], [#3814][])
|
15
|
+
|
16
|
+
### Changed
|
17
|
+
|
18
|
+
* Core: Enable crashtracking by default ([#3826][])
|
19
|
+
* Profiling: Reduce allocation overhead ([#3805][], [#3797][])
|
20
|
+
* Profiling: Speed up stack sampling ([#3837][])
|
21
|
+
* Profiling: Upgrade to libdatadog 11 ([#3799][])
|
22
|
+
* Profiling: Disable allocation counting feature by default ([#3798][])
|
23
|
+
* Profiling: Reduce the maximum biased result for allocation samples ([#3793][])
|
24
|
+
* Tracing: Reduce noisy integration logs ([#3785][])
|
25
|
+
|
26
|
+
### Fixed
|
27
|
+
|
28
|
+
* Tracing: Fix `require` issue for `graphql` ([#3813][])
|
29
|
+
* AppSec: Fix an error when parsing http headers with integer value ([#3790][])
|
30
|
+
* AppSec: Fix an error when tracking login failure without `user_id` ([#3841][])
|
31
|
+
* Fix a syntax error for Ruby < 2.4 during single step instrumentation ([#3795][])
|
32
|
+
|
5
33
|
## [2.2.0] - 2024-07-11
|
6
34
|
|
7
35
|
### Added
|
@@ -2934,7 +2962,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
|
|
2934
2962
|
Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
2935
2963
|
|
2936
2964
|
|
2937
|
-
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.
|
2965
|
+
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.3.0...master
|
2966
|
+
[2.3.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.2.0...v2.3.0
|
2938
2967
|
[2.2.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.1.0...v2.2.0
|
2939
2968
|
[2.1.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.0.0...v2.1.0
|
2940
2969
|
[2.0.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.0.0.rc1...v2.0.0
|
@@ -4331,9 +4360,29 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4331
4360
|
[#3753]: https://github.com/DataDog/dd-trace-rb/issues/3753
|
4332
4361
|
[#3757]: https://github.com/DataDog/dd-trace-rb/issues/3757
|
4333
4362
|
[#3759]: https://github.com/DataDog/dd-trace-rb/issues/3759
|
4363
|
+
[#3769]: https://github.com/DataDog/dd-trace-rb/issues/3769
|
4334
4364
|
[#3770]: https://github.com/DataDog/dd-trace-rb/issues/3770
|
4335
4365
|
[#3772]: https://github.com/DataDog/dd-trace-rb/issues/3772
|
4336
4366
|
[#3774]: https://github.com/DataDog/dd-trace-rb/issues/3774
|
4367
|
+
[#3776]: https://github.com/DataDog/dd-trace-rb/issues/3776
|
4368
|
+
[#3779]: https://github.com/DataDog/dd-trace-rb/issues/3779
|
4369
|
+
[#3785]: https://github.com/DataDog/dd-trace-rb/issues/3785
|
4370
|
+
[#3789]: https://github.com/DataDog/dd-trace-rb/issues/3789
|
4371
|
+
[#3790]: https://github.com/DataDog/dd-trace-rb/issues/3790
|
4372
|
+
[#3793]: https://github.com/DataDog/dd-trace-rb/issues/3793
|
4373
|
+
[#3795]: https://github.com/DataDog/dd-trace-rb/issues/3795
|
4374
|
+
[#3797]: https://github.com/DataDog/dd-trace-rb/issues/3797
|
4375
|
+
[#3798]: https://github.com/DataDog/dd-trace-rb/issues/3798
|
4376
|
+
[#3799]: https://github.com/DataDog/dd-trace-rb/issues/3799
|
4377
|
+
[#3805]: https://github.com/DataDog/dd-trace-rb/issues/3805
|
4378
|
+
[#3809]: https://github.com/DataDog/dd-trace-rb/issues/3809
|
4379
|
+
[#3812]: https://github.com/DataDog/dd-trace-rb/issues/3812
|
4380
|
+
[#3813]: https://github.com/DataDog/dd-trace-rb/issues/3813
|
4381
|
+
[#3814]: https://github.com/DataDog/dd-trace-rb/issues/3814
|
4382
|
+
[#3826]: https://github.com/DataDog/dd-trace-rb/issues/3826
|
4383
|
+
[#3837]: https://github.com/DataDog/dd-trace-rb/issues/3837
|
4384
|
+
[#3839]: https://github.com/DataDog/dd-trace-rb/issues/3839
|
4385
|
+
[#3841]: https://github.com/DataDog/dd-trace-rb/issues/3841
|
4337
4386
|
[@AdrianLC]: https://github.com/AdrianLC
|
4338
4387
|
[@Azure7111]: https://github.com/Azure7111
|
4339
4388
|
[@BabyGroot]: https://github.com/BabyGroot
|
@@ -4485,4 +4534,4 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
4485
4534
|
[@y-yagi]: https://github.com/y-yagi
|
4486
4535
|
[@yujideveloper]: https://github.com/yujideveloper
|
4487
4536
|
[@yukimurasawa]: https://github.com/yukimurasawa
|
4488
|
-
[@zachmccormick]: https://github.com/zachmccormick
|
4537
|
+
[@zachmccormick]: https://github.com/zachmccormick
|
@@ -1,22 +1,22 @@
|
|
1
1
|
# rubocop:disable Style/StderrPuts
|
2
2
|
# rubocop:disable Style/GlobalVars
|
3
3
|
|
4
|
-
if RUBY_ENGINE !=
|
4
|
+
if RUBY_ENGINE != "ruby" || Gem.win_platform?
|
5
5
|
$stderr.puts(
|
6
|
-
|
6
|
+
"WARN: Skipping build of Datadog profiling loader. See Datadog profiling native extension note for details."
|
7
7
|
)
|
8
8
|
|
9
|
-
File.write(
|
9
|
+
File.write("Makefile", "all install clean: # dummy makefile that does nothing")
|
10
10
|
exit
|
11
11
|
end
|
12
12
|
|
13
|
-
require
|
13
|
+
require "mkmf"
|
14
14
|
|
15
15
|
# mkmf on modern Rubies actually has an append_cflags that does something similar
|
16
16
|
# (see https://github.com/ruby/ruby/pull/5760), but as usual we need a bit more boilerplate to deal with legacy Rubies
|
17
17
|
def add_compiler_flag(flag)
|
18
18
|
if try_cflags(flag)
|
19
|
-
$CFLAGS <<
|
19
|
+
$CFLAGS << " " << flag
|
20
20
|
else
|
21
21
|
$stderr.puts("WARNING: '#{flag}' not accepted by compiler, skipping it")
|
22
22
|
end
|
@@ -24,26 +24,26 @@ end
|
|
24
24
|
|
25
25
|
# Because we can't control what compiler versions our customers use, shipping with -Werror by default is a no-go.
|
26
26
|
# But we can enable it in CI, so that we quickly spot any new warnings that just got introduced.
|
27
|
-
add_compiler_flag
|
27
|
+
add_compiler_flag "-Werror" if ENV["DATADOG_GEM_CI"] == "true"
|
28
28
|
|
29
29
|
# Older gcc releases may not default to C99 and we need to ask for this. This is also used:
|
30
30
|
# * by upstream Ruby -- search for gnu99 in the codebase
|
31
31
|
# * by msgpack, another datadog gem dependency
|
32
32
|
# (https://github.com/msgpack/msgpack-ruby/blob/18ce08f6d612fe973843c366ac9a0b74c4e50599/ext/msgpack/extconf.rb#L8)
|
33
|
-
add_compiler_flag
|
33
|
+
add_compiler_flag "-std=gnu99"
|
34
34
|
|
35
35
|
# Gets really noisy when we include the MJIT header, let's omit it (TODO: Use #pragma GCC diagnostic instead?)
|
36
|
-
add_compiler_flag
|
36
|
+
add_compiler_flag "-Wno-unused-function"
|
37
37
|
|
38
38
|
# Allow defining variables at any point in a function
|
39
|
-
add_compiler_flag
|
39
|
+
add_compiler_flag "-Wno-declaration-after-statement"
|
40
40
|
|
41
41
|
# If we forget to include a Ruby header, the function call may still appear to work, but then
|
42
42
|
# cause a segfault later. Let's ensure that never happens.
|
43
|
-
add_compiler_flag
|
43
|
+
add_compiler_flag "-Werror-implicit-function-declaration"
|
44
44
|
|
45
45
|
# Warn on unused parameters to functions. Use `DDTRACE_UNUSED` to mark things as known-to-not-be-used.
|
46
|
-
add_compiler_flag
|
46
|
+
add_compiler_flag "-Wunused-parameter"
|
47
47
|
|
48
48
|
# The native extension is not intended to expose any symbols/functions for other native libraries to use;
|
49
49
|
# the sole exception being `Init_datadog_profiling_loader` which needs to be visible for Ruby to call it when
|
@@ -51,14 +51,14 @@ add_compiler_flag '-Wunused-parameter'
|
|
51
51
|
#
|
52
52
|
# By setting this compiler flag, we tell it to assume that everything is private unless explicitly stated.
|
53
53
|
# For more details see https://gcc.gnu.org/wiki/Visibility
|
54
|
-
add_compiler_flag
|
54
|
+
add_compiler_flag "-fvisibility=hidden"
|
55
55
|
|
56
56
|
# Avoid legacy C definitions
|
57
|
-
add_compiler_flag
|
57
|
+
add_compiler_flag "-Wold-style-definition"
|
58
58
|
|
59
59
|
# Enable all other compiler warnings
|
60
|
-
add_compiler_flag
|
61
|
-
add_compiler_flag
|
60
|
+
add_compiler_flag "-Wall"
|
61
|
+
add_compiler_flag "-Wextra"
|
62
62
|
|
63
63
|
# Tag the native extension library with the Ruby version and Ruby platform.
|
64
64
|
# This makes it easier for development (avoids "oops I forgot to rebuild when I switched my Ruby") and ensures that
|
@@ -7,11 +7,10 @@
|
|
7
7
|
#include <pthread.h>
|
8
8
|
#include <time.h>
|
9
9
|
#include <errno.h>
|
10
|
-
#include <ruby.h>
|
11
10
|
|
11
|
+
#include "clock_id.h"
|
12
12
|
#include "helpers.h"
|
13
13
|
#include "private_vm_api_access.h"
|
14
|
-
#include "clock_id.h"
|
15
14
|
#include "time_helpers.h"
|
16
15
|
|
17
16
|
// Validate that our home-cooked pthread_id_for() matches pthread_self() for the current thread
|
@@ -20,7 +20,9 @@
|
|
20
20
|
#define ERR_CLOCK_FAIL "failed to get clock time"
|
21
21
|
|
22
22
|
// Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
|
23
|
-
|
23
|
+
// See https://docs.google.com/document/d/1lWLB714wlLBBq6T4xZyAc4a5wtWhSmr4-hgiPKeErlA/edit#heading=h.ugp0zxcj5iqh
|
24
|
+
// (Datadog-only link) for research backing the choice of this value.
|
25
|
+
unsigned int MAX_ALLOC_WEIGHT = 10000;
|
24
26
|
|
25
27
|
// Used to trigger the execution of Collectors::ThreadState, which implements all of the sampling logic
|
26
28
|
// itself; this class only implements the "when to do it" part.
|
@@ -96,6 +98,7 @@ struct cpu_and_wall_time_worker_state {
|
|
96
98
|
bool no_signals_workaround_enabled;
|
97
99
|
bool dynamic_sampling_rate_enabled;
|
98
100
|
bool allocation_profiling_enabled;
|
101
|
+
bool allocation_counting_enabled;
|
99
102
|
bool skip_idle_samples_for_testing;
|
100
103
|
VALUE self_instance;
|
101
104
|
VALUE thread_context_collector_instance;
|
@@ -104,7 +107,6 @@ struct cpu_and_wall_time_worker_state {
|
|
104
107
|
dynamic_sampling_rate_state cpu_dynamic_sampling_rate;
|
105
108
|
discrete_dynamic_sampler allocation_sampler;
|
106
109
|
VALUE gc_tracepoint; // Used to get gc start/finish information
|
107
|
-
VALUE object_allocation_tracepoint; // Used to get allocation counts and allocation profiling
|
108
110
|
|
109
111
|
// These are mutable and used to signal things between the worker thread and other threads
|
110
112
|
|
@@ -117,7 +119,7 @@ struct cpu_and_wall_time_worker_state {
|
|
117
119
|
|
118
120
|
// Others
|
119
121
|
|
120
|
-
// Used to detect/avoid nested sampling, e.g. when
|
122
|
+
// Used to detect/avoid nested sampling, e.g. when on_newobj_event gets triggered by a memory allocation
|
121
123
|
// that happens during another sample.
|
122
124
|
bool during_sample;
|
123
125
|
|
@@ -181,6 +183,7 @@ static VALUE _native_initialize(
|
|
181
183
|
VALUE dynamic_sampling_rate_enabled,
|
182
184
|
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
183
185
|
VALUE allocation_profiling_enabled,
|
186
|
+
VALUE allocation_counting_enabled,
|
184
187
|
VALUE skip_idle_samples_for_testing
|
185
188
|
);
|
186
189
|
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
@@ -216,7 +219,7 @@ static void grab_gvl_and_sample(void);
|
|
216
219
|
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
|
217
220
|
static void sleep_for(uint64_t time_ns);
|
218
221
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
|
219
|
-
static void on_newobj_event(VALUE
|
222
|
+
static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *unused2);
|
220
223
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
|
221
224
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
|
222
225
|
static VALUE rescued_sample_allocation(VALUE tracepoint_data);
|
@@ -225,6 +228,20 @@ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VA
|
|
225
228
|
static VALUE _native_hold_signals(DDTRACE_UNUSED VALUE self);
|
226
229
|
static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self);
|
227
230
|
|
231
|
+
// We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
|
232
|
+
// with signature `rb_event_hook_func_t` which doesn't match `on_newobj_event`.
|
233
|
+
//
|
234
|
+
// But in practice, because we pass the `RUBY_EVENT_HOOK_FLAG_RAW_ARG` flag to `rb_add_event_hook2`, it casts the
|
235
|
+
// expected signature into a `rb_event_hook_raw_arg_func_t`:
|
236
|
+
// > typedef void (*rb_event_hook_raw_arg_func_t)(VALUE data, const rb_trace_arg_t *arg); (from vm_trace.c)
|
237
|
+
// which does match `on_newobj_event`.
|
238
|
+
//
|
239
|
+
// So TL;DR we're just doing this here to avoid the warning and explain why the apparent mismatch in function signatures.
|
240
|
+
#pragma GCC diagnostic push
|
241
|
+
#pragma GCC diagnostic ignored "-Wcast-function-type"
|
242
|
+
static const rb_event_hook_func_t on_newobj_event_as_hook = (rb_event_hook_func_t) on_newobj_event;
|
243
|
+
#pragma GCC diagnostic pop
|
244
|
+
|
228
245
|
// Note on sampler global state safety:
|
229
246
|
//
|
230
247
|
// Both `active_sampler_instance` and `active_sampler_instance_state` are **GLOBAL** state. Be careful when accessing
|
@@ -278,7 +295,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
278
295
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
279
296
|
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
|
280
297
|
|
281
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize,
|
298
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10);
|
282
299
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
|
283
300
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
|
284
301
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
@@ -316,6 +333,8 @@ static const rb_data_type_t cpu_and_wall_time_worker_typed_data = {
|
|
316
333
|
};
|
317
334
|
|
318
335
|
static VALUE _native_new(VALUE klass) {
|
336
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
337
|
+
|
319
338
|
struct cpu_and_wall_time_worker_state *state = ruby_xcalloc(1, sizeof(struct cpu_and_wall_time_worker_state));
|
320
339
|
|
321
340
|
// Note: Any exceptions raised from this note until the TypedData_Wrap_Struct call will lead to the state memory
|
@@ -325,13 +344,13 @@ static VALUE _native_new(VALUE klass) {
|
|
325
344
|
state->no_signals_workaround_enabled = false;
|
326
345
|
state->dynamic_sampling_rate_enabled = true;
|
327
346
|
state->allocation_profiling_enabled = false;
|
347
|
+
state->allocation_counting_enabled = false;
|
328
348
|
state->skip_idle_samples_for_testing = false;
|
329
349
|
state->thread_context_collector_instance = Qnil;
|
330
350
|
state->idle_sampling_helper_instance = Qnil;
|
331
351
|
state->owner_thread = Qnil;
|
332
352
|
dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
|
333
353
|
state->gc_tracepoint = Qnil;
|
334
|
-
state->object_allocation_tracepoint = Qnil;
|
335
354
|
|
336
355
|
atomic_init(&state->should_run, false);
|
337
356
|
state->failure_exception = Qnil;
|
@@ -340,15 +359,12 @@ static VALUE _native_new(VALUE klass) {
|
|
340
359
|
state->during_sample = false;
|
341
360
|
|
342
361
|
reset_stats_not_thread_safe(state);
|
343
|
-
|
344
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
345
|
-
if (now == 0) {
|
346
|
-
ruby_xfree(state);
|
347
|
-
rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
|
348
|
-
}
|
349
|
-
|
350
362
|
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
|
351
363
|
|
364
|
+
// Note: As of this writing, no new Ruby objects get created and stored in the state. If that ever changes, remember
|
365
|
+
// to keep them on the stack and mark them with RB_GC_GUARD -- otherwise it's possible for a GC to run and
|
366
|
+
// since the instance representing the state does not yet exist, such objects will not get marked.
|
367
|
+
|
352
368
|
return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
|
353
369
|
}
|
354
370
|
|
@@ -362,6 +378,7 @@ static VALUE _native_initialize(
|
|
362
378
|
VALUE dynamic_sampling_rate_enabled,
|
363
379
|
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
364
380
|
VALUE allocation_profiling_enabled,
|
381
|
+
VALUE allocation_counting_enabled,
|
365
382
|
VALUE skip_idle_samples_for_testing
|
366
383
|
) {
|
367
384
|
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
@@ -369,6 +386,7 @@ static VALUE _native_initialize(
|
|
369
386
|
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
|
370
387
|
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
|
371
388
|
ENFORCE_BOOLEAN(allocation_profiling_enabled);
|
389
|
+
ENFORCE_BOOLEAN(allocation_counting_enabled);
|
372
390
|
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
|
373
391
|
|
374
392
|
struct cpu_and_wall_time_worker_state *state;
|
@@ -378,6 +396,7 @@ static VALUE _native_initialize(
|
|
378
396
|
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
|
379
397
|
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
|
380
398
|
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
|
399
|
+
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
|
381
400
|
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
|
382
401
|
|
383
402
|
double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
|
@@ -394,7 +413,6 @@ static VALUE _native_initialize(
|
|
394
413
|
state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
|
395
414
|
state->idle_sampling_helper_instance = idle_sampling_helper_instance;
|
396
415
|
state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */);
|
397
|
-
state->object_allocation_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_NEWOBJ, on_newobj_event, NULL /* unused */);
|
398
416
|
|
399
417
|
return Qtrue;
|
400
418
|
}
|
@@ -409,7 +427,6 @@ static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr) {
|
|
409
427
|
rb_gc_mark(state->failure_exception);
|
410
428
|
rb_gc_mark(state->stop_thread);
|
411
429
|
rb_gc_mark(state->gc_tracepoint);
|
412
|
-
rb_gc_mark(state->object_allocation_tracepoint);
|
413
430
|
}
|
414
431
|
|
415
432
|
// Called in a background thread created in CpuAndWallTimeWorker#start
|
@@ -755,7 +772,14 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
|
|
755
772
|
// because they may raise exceptions.
|
756
773
|
install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
|
757
774
|
if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
|
758
|
-
if (state->allocation_profiling_enabled)
|
775
|
+
if (state->allocation_profiling_enabled) {
|
776
|
+
rb_add_event_hook2(
|
777
|
+
on_newobj_event_as_hook,
|
778
|
+
RUBY_INTERNAL_EVENT_NEWOBJ,
|
779
|
+
state->self_instance,
|
780
|
+
RUBY_EVENT_HOOK_FLAG_SAFE | RUBY_EVENT_HOOK_FLAG_RAW_ARG)
|
781
|
+
;
|
782
|
+
}
|
759
783
|
|
760
784
|
// Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
|
761
785
|
rb_funcall(instance, rb_intern("signal_running"), 0);
|
@@ -1036,46 +1060,87 @@ static void sleep_for(uint64_t time_ns) {
|
|
1036
1060
|
}
|
1037
1061
|
|
1038
1062
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
|
1039
|
-
|
1063
|
+
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state;
|
1064
|
+
|
1065
|
+
bool are_allocations_being_tracked = state != NULL && state->allocation_profiling_enabled && state->allocation_counting_enabled;
|
1040
1066
|
|
1041
1067
|
return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
|
1042
1068
|
}
|
1043
1069
|
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
}
|
1070
|
+
#define HANDLE_CLOCK_FAILURE(call) ({ \
|
1071
|
+
long _result = (call); \
|
1072
|
+
if (_result == 0) { \
|
1073
|
+
delayed_error(state, ERR_CLOCK_FAIL); \
|
1074
|
+
return; \
|
1075
|
+
} \
|
1076
|
+
_result; \
|
1077
|
+
})
|
1053
1078
|
|
1079
|
+
// Implements memory-related profiling events. This function is called by Ruby via the `rb_add_event_hook2`
|
1080
|
+
// when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered.
|
1081
|
+
//
|
1082
|
+
// When allocation sampling is enabled, this function gets called for almost all* objects allocated by the Ruby VM.
|
1083
|
+
// (*In some weird cases the VM may skip this tracepoint.)
|
1084
|
+
//
|
1085
|
+
// At a high level, there's two paths through this function:
|
1086
|
+
// 1. should_sample == false -> return
|
1087
|
+
// 2. should_sample == true -> sample
|
1088
|
+
//
|
1089
|
+
// On big applications, path 1. is the hottest, since we don't sample every object. So it's quite important for it to
|
1090
|
+
// be as fast as possible.
|
1091
|
+
//
|
1092
|
+
// NOTE: You may be wondering why we don't use any of the arguments to this function. It turns out it's possible to just
|
1093
|
+
// call `rb_tracearg_from_tracepoint(anything)` anywhere during this function or its callees to get the data, so that's
|
1094
|
+
// why it's not being passed as an argument.
|
1095
|
+
static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *unused2) {
|
1054
1096
|
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
1055
1097
|
|
1056
1098
|
// This should not happen in a normal situation because the tracepoint is always enabled after the instance is set
|
1057
1099
|
// and disabled before it is cleared, but just in case...
|
1058
1100
|
if (state == NULL) return;
|
1059
1101
|
|
1060
|
-
|
1102
|
+
if (RB_UNLIKELY(state->allocation_counting_enabled)) {
|
1103
|
+
// Update thread-local allocation count
|
1104
|
+
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
|
1105
|
+
allocation_count = 0;
|
1106
|
+
} else {
|
1107
|
+
allocation_count++;
|
1108
|
+
}
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
// In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
|
1061
1112
|
// sample, so we just return early
|
1062
1113
|
if (state->during_sample) {
|
1063
1114
|
state->stats.allocations_during_sample++;
|
1064
1115
|
return;
|
1065
1116
|
}
|
1066
1117
|
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1118
|
+
// Hot path: Dynamic sampling rate is usually enabled and the sampling decision is usually false
|
1119
|
+
if (RB_LIKELY(state->dynamic_sampling_rate_enabled && !discrete_dynamic_sampler_should_sample(&state->allocation_sampler))) {
|
1120
|
+
state->stats.allocation_skipped++;
|
1121
|
+
|
1122
|
+
coarse_instant now = monotonic_coarse_wall_time_now_ns();
|
1123
|
+
HANDLE_CLOCK_FAILURE(now.timestamp_ns);
|
1124
|
+
|
1125
|
+
bool needs_readjust = discrete_dynamic_sampler_skipped_sample(&state->allocation_sampler, now);
|
1126
|
+
if (RB_UNLIKELY(needs_readjust)) {
|
1127
|
+
// We rarely readjust, so this is a cold path
|
1128
|
+
// Also, while above we used the cheaper monotonic_coarse, for this call we want the regular monotonic call,
|
1129
|
+
// which is why we end up getting time "again".
|
1130
|
+
discrete_dynamic_sampler_readjust(
|
1131
|
+
&state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
|
1132
|
+
);
|
1076
1133
|
}
|
1134
|
+
|
1135
|
+
return;
|
1077
1136
|
}
|
1078
1137
|
|
1138
|
+
// From here on, we've decided to go ahead with the sample, which is way less common than skipping it
|
1139
|
+
|
1140
|
+
discrete_dynamic_sampler_before_sample(
|
1141
|
+
&state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
|
1142
|
+
);
|
1143
|
+
|
1079
1144
|
// @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
|
1080
1145
|
// invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
|
1081
1146
|
// to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
|
@@ -1083,7 +1148,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
|
|
1083
1148
|
state->during_sample = true;
|
1084
1149
|
|
1085
1150
|
// Rescue against any exceptions that happen during sampling
|
1086
|
-
safely_call(rescued_sample_allocation,
|
1151
|
+
safely_call(rescued_sample_allocation, Qnil, state->self_instance);
|
1087
1152
|
|
1088
1153
|
if (state->dynamic_sampling_rate_enabled) {
|
1089
1154
|
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
@@ -1108,9 +1173,7 @@ static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
|
|
1108
1173
|
if (state->gc_tracepoint != Qnil) {
|
1109
1174
|
rb_tracepoint_disable(state->gc_tracepoint);
|
1110
1175
|
}
|
1111
|
-
|
1112
|
-
rb_tracepoint_disable(state->object_allocation_tracepoint);
|
1113
|
-
}
|
1176
|
+
rb_remove_event_hook_with_data(on_newobj_event_as_hook, state->self_instance);
|
1114
1177
|
}
|
1115
1178
|
|
1116
1179
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
@@ -1126,13 +1189,14 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
|
1126
1189
|
}
|
1127
1190
|
}
|
1128
1191
|
|
1129
|
-
static VALUE rescued_sample_allocation(VALUE
|
1192
|
+
static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) {
|
1130
1193
|
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
1131
1194
|
|
1132
1195
|
// This should not happen in a normal situation because on_newobj_event already checked for this, but just in case...
|
1133
1196
|
if (state == NULL) return Qnil;
|
1134
1197
|
|
1135
|
-
|
1198
|
+
// If we're getting called from inside a tracepoint/event hook, Ruby exposes the data using this function.
|
1199
|
+
rb_trace_arg_t *data = rb_tracearg_from_tracepoint(Qnil);
|
1136
1200
|
VALUE new_object = rb_tracearg_object(data);
|
1137
1201
|
|
1138
1202
|
unsigned long allocations_since_last_sample = state->dynamic_sampling_rate_enabled ?
|
@@ -1140,9 +1204,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
|
|
1140
1204
|
discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
|
1141
1205
|
// if we aren't, then we're sampling every event
|
1142
1206
|
1;
|
1143
|
-
|
1207
|
+
|
1208
|
+
// To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids
|
1209
|
+
// assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval.
|
1144
1210
|
unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
|
1145
1211
|
thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
|
1212
|
+
// ...but we still represent the skipped samples in the profile, thus the data will account for all allocations.
|
1213
|
+
if (weight < allocations_since_last_sample) {
|
1214
|
+
thread_context_collector_sample_skipped_allocation_samples(state->thread_context_collector_instance, allocations_since_last_sample - weight);
|
1215
|
+
}
|
1146
1216
|
|
1147
1217
|
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
|
1148
1218
|
return Qnil;
|