ddtrace 1.18.0 → 1.23.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +228 -2
- data/LICENSE-3rdparty.csv +1 -1
- data/bin/ddprofrb +15 -0
- data/bin/ddtracerb +3 -1
- data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
- data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +312 -117
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +422 -0
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +101 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +22 -14
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +4 -0
- data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
- data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +43 -102
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +10 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +272 -136
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +2 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +28 -7
- data/ext/datadog_profiling_native_extension/heap_recorder.c +1047 -0
- data/ext/datadog_profiling_native_extension/heap_recorder.h +166 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +6 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +20 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +11 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +50 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +19 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +18 -1
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +267 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +33 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +476 -58
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +3 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +2 -0
- data/lib/datadog/appsec/contrib/devise/tracking.rb +8 -0
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +45 -14
- data/lib/datadog/appsec/event.rb +1 -1
- data/lib/datadog/auto_instrument.rb +3 -0
- data/lib/datadog/core/configuration/components.rb +7 -6
- data/lib/datadog/core/configuration/option.rb +8 -6
- data/lib/datadog/core/configuration/settings.rb +259 -60
- data/lib/datadog/core/configuration.rb +20 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
- data/lib/datadog/core/environment/class_count.rb +6 -6
- data/lib/datadog/core/environment/git.rb +25 -0
- data/lib/datadog/core/environment/identity.rb +18 -48
- data/lib/datadog/core/environment/platform.rb +7 -1
- data/lib/datadog/core/git/ext.rb +2 -23
- data/lib/datadog/core/remote/client/capabilities.rb +1 -1
- data/lib/datadog/core/remote/component.rb +25 -12
- data/lib/datadog/core/remote/ext.rb +1 -0
- data/lib/datadog/core/remote/negotiation.rb +2 -2
- data/lib/datadog/core/remote/tie/tracing.rb +39 -0
- data/lib/datadog/core/remote/tie.rb +27 -0
- data/lib/datadog/core/remote/transport/http/config.rb +1 -1
- data/lib/datadog/core/remote/worker.rb +7 -4
- data/lib/datadog/core/telemetry/client.rb +18 -10
- data/lib/datadog/core/telemetry/emitter.rb +9 -13
- data/lib/datadog/core/telemetry/event.rb +247 -56
- data/lib/datadog/core/telemetry/ext.rb +4 -0
- data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
- data/lib/datadog/core/telemetry/http/ext.rb +4 -1
- data/lib/datadog/core/telemetry/http/response.rb +4 -0
- data/lib/datadog/core/telemetry/http/transport.rb +9 -4
- data/lib/datadog/core/telemetry/request.rb +59 -0
- data/lib/datadog/core/transport/ext.rb +2 -0
- data/lib/datadog/core/utils/url.rb +25 -0
- data/lib/datadog/opentelemetry/sdk/propagator.rb +3 -2
- data/lib/datadog/opentelemetry.rb +3 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +36 -12
- data/lib/datadog/profiling/collectors/info.rb +101 -0
- data/lib/datadog/profiling/component.rb +210 -34
- data/lib/datadog/profiling/exporter.rb +23 -6
- data/lib/datadog/profiling/ext.rb +2 -0
- data/lib/datadog/profiling/flush.rb +6 -3
- data/lib/datadog/profiling/http_transport.rb +5 -1
- data/lib/datadog/profiling/load_native_extension.rb +19 -6
- data/lib/datadog/profiling/native_extension.rb +1 -1
- data/lib/datadog/profiling/scheduler.rb +4 -6
- data/lib/datadog/profiling/stack_recorder.rb +19 -4
- data/lib/datadog/profiling/tag_builder.rb +5 -0
- data/lib/datadog/profiling/tasks/exec.rb +3 -3
- data/lib/datadog/profiling/tasks/help.rb +3 -3
- data/lib/datadog/profiling.rb +13 -2
- data/lib/datadog/tracing/configuration/ext.rb +0 -1
- data/lib/datadog/tracing/configuration/settings.rb +2 -1
- data/lib/datadog/tracing/contrib/action_cable/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_cable/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_mailer/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
- data/lib/datadog/tracing/contrib/action_mailer/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_pack/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_pack/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_view/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_view/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_job/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_job/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_model_serializers/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_model_serializers/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
- data/lib/datadog/tracing/contrib/active_record/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_record/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_support/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/analytics.rb +0 -1
- data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/aws/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
- data/lib/datadog/tracing/contrib/configurable.rb +1 -1
- data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/delayed_job/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/delayed_job/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/extensions.rb +6 -2
- data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +7 -0
- data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +1 -1
- data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/grape/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/graphql/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/http/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/http/distributed/fetcher.rb +2 -2
- data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/kafka/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/kafka/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/opensearch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
- data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/presto/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/qless/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/qless/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/que/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/racecar/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/racecar/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +9 -2
- data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
- data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rails/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rake/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rake/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/instrumentation.rb +2 -2
- data/lib/datadog/tracing/contrib/redis/patcher.rb +34 -21
- data/lib/datadog/tracing/contrib/resque/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/resque/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/roda/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/roda/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sequel/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sequel/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/shoryuken/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/shoryuken/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sinatra/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sneakers/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sneakers/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/stripe/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/stripe/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sucker_punch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sucker_punch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +58 -0
- data/lib/datadog/tracing/contrib/trilogy/ext.rb +27 -0
- data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +94 -0
- data/lib/datadog/tracing/contrib/trilogy/integration.rb +43 -0
- data/lib/datadog/tracing/contrib/trilogy/patcher.rb +31 -0
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing/sampling/matcher.rb +23 -3
- data/lib/datadog/tracing/sampling/rule.rb +7 -2
- data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
- data/lib/datadog/tracing/trace_operation.rb +1 -2
- data/lib/datadog/tracing/transport/http.rb +1 -0
- data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
- data/lib/datadog/tracing.rb +8 -2
- data/lib/ddtrace/version.rb +2 -2
- metadata +71 -61
- data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
- data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -110
- data/lib/datadog/core/telemetry/collector.rb +0 -240
- data/lib/datadog/core/telemetry/v1/app_event.rb +0 -52
- data/lib/datadog/core/telemetry/v1/application.rb +0 -92
- data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
- data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
- data/lib/datadog/core/telemetry/v1/host.rb +0 -59
- data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
- data/lib/datadog/core/telemetry/v1/product.rb +0 -36
- data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
- data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
- data/lib/datadog/core/telemetry/v2/request.rb +0 -29
- data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
@@ -3,6 +3,7 @@
|
|
3
3
|
#include "collectors_thread_context.h"
|
4
4
|
#include "clock_id.h"
|
5
5
|
#include "collectors_stack.h"
|
6
|
+
#include "collectors_gc_profiling_helper.h"
|
6
7
|
#include "helpers.h"
|
7
8
|
#include "libdatadog_helpers.h"
|
8
9
|
#include "private_vm_api_access.h"
|
@@ -37,24 +38,29 @@
|
|
37
38
|
// When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
|
38
39
|
// context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
|
39
40
|
//
|
40
|
-
// While
|
41
|
-
//
|
41
|
+
// While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
|
42
|
+
// after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
|
43
|
+
// Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
|
42
44
|
//
|
43
|
-
// (Regular samples can still account for the time between the previous sample and the start of GC.)
|
45
|
+
// (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
|
44
46
|
//
|
45
|
-
// When `thread_context_collector_on_gc_finish` gets called, the
|
46
|
-
//
|
47
|
+
// When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
|
48
|
+
// into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
|
49
|
+
// of the thread that did GC also gets adjusted to avoid double-accounting.)
|
47
50
|
//
|
48
|
-
// Finally, when `thread_context_collector_sample_after_gc` gets called,
|
51
|
+
// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
|
52
|
+
// a single placeholder `Garbage Collection` frame. This sample gets
|
53
|
+
// assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
|
54
|
+
// as metadata for the last GC.
|
49
55
|
//
|
50
|
-
//
|
51
|
-
//
|
52
|
-
//
|
53
|
-
//
|
54
|
-
//
|
55
|
-
//
|
56
|
-
//
|
57
|
-
//
|
56
|
+
// Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
|
57
|
+
// so that the application can keep doing user work in between GC steps.
|
58
|
+
// The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
|
59
|
+
// that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
|
60
|
+
// Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
|
61
|
+
// spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
|
62
|
+
// individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
|
63
|
+
// happened in the coalesced period.
|
58
64
|
//
|
59
65
|
// In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
|
60
66
|
// discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
|
@@ -68,6 +74,7 @@
|
|
68
74
|
#define IS_WALL_TIME true
|
69
75
|
#define IS_NOT_WALL_TIME false
|
70
76
|
#define MISSING_TRACER_CONTEXT_KEY 0
|
77
|
+
#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
|
71
78
|
|
72
79
|
static ID at_active_span_id; // id of :@active_span in Ruby
|
73
80
|
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
@@ -75,6 +82,9 @@ static ID at_id_id; // id of :@id in Ruby
|
|
75
82
|
static ID at_resource_id; // id of :@resource in Ruby
|
76
83
|
static ID at_root_span_id; // id of :@root_span in Ruby
|
77
84
|
static ID at_type_id; // id of :@type in Ruby
|
85
|
+
static ID at_otel_values_id; // id of :@otel_values in Ruby
|
86
|
+
static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
|
87
|
+
static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
|
78
88
|
|
79
89
|
// Contains state for a single ThreadContext instance
|
80
90
|
struct thread_context_collector_state {
|
@@ -107,6 +117,8 @@ struct thread_context_collector_state {
|
|
107
117
|
monotonic_to_system_epoch_state time_converter_state;
|
108
118
|
// Used to identify the main thread, to give it a fallback name
|
109
119
|
VALUE main_thread;
|
120
|
+
// Used when extracting trace identifiers from otel spans. Lazily initialized.
|
121
|
+
VALUE otel_current_span_key;
|
110
122
|
|
111
123
|
struct stats {
|
112
124
|
// Track how many garbage collection samples we've taken.
|
@@ -114,6 +126,14 @@ struct thread_context_collector_state {
|
|
114
126
|
// See thread_context_collector_on_gc_start for details
|
115
127
|
unsigned int gc_samples_missed_due_to_missing_context;
|
116
128
|
} stats;
|
129
|
+
|
130
|
+
struct {
|
131
|
+
unsigned long accumulated_cpu_time_ns;
|
132
|
+
unsigned long accumulated_wall_time_ns;
|
133
|
+
|
134
|
+
long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
|
135
|
+
long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
|
136
|
+
} gc_tracking;
|
117
137
|
};
|
118
138
|
|
119
139
|
// Tracks per-thread state
|
@@ -127,15 +147,10 @@ struct per_thread_context {
|
|
127
147
|
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
|
128
148
|
|
129
149
|
struct {
|
130
|
-
// Both of these fields are set by on_gc_start and kept until
|
150
|
+
// Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
|
131
151
|
// Outside of this window, they will be INVALID_TIME.
|
132
152
|
long cpu_time_at_start_ns;
|
133
153
|
long wall_time_at_start_ns;
|
134
|
-
|
135
|
-
// Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
|
136
|
-
// Outside of this window, they will be INVALID_TIME.
|
137
|
-
long cpu_time_at_finish_ns;
|
138
|
-
long wall_time_at_finish_ns;
|
139
154
|
} gc_tracking;
|
140
155
|
};
|
141
156
|
|
@@ -180,7 +195,6 @@ static void trigger_sample_for_thread(
|
|
180
195
|
VALUE stack_from_thread,
|
181
196
|
struct per_thread_context *thread_context,
|
182
197
|
sample_values values,
|
183
|
-
sample_type type,
|
184
198
|
long current_monotonic_wall_time_ns,
|
185
199
|
ddog_CharSlice *ruby_vm_type,
|
186
200
|
ddog_CharSlice *class_name
|
@@ -193,6 +207,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
|
193
207
|
static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
|
194
208
|
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
|
195
209
|
static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
|
210
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
|
196
211
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
|
197
212
|
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
|
198
213
|
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
|
@@ -200,13 +215,22 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
|
|
200
215
|
static long cpu_time_now_ns(struct per_thread_context *thread_context);
|
201
216
|
static long thread_id_for(VALUE thread);
|
202
217
|
static VALUE _native_stats(VALUE self, VALUE collector_instance);
|
218
|
+
static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
|
203
219
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
|
204
|
-
static bool should_collect_resource(VALUE
|
220
|
+
static bool should_collect_resource(VALUE root_span);
|
205
221
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
206
222
|
static VALUE thread_list(struct thread_context_collector_state *state);
|
207
223
|
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
|
208
224
|
static VALUE _native_new_empty_thread(VALUE self);
|
209
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
225
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
226
|
+
static void ddtrace_otel_trace_identifiers_for(
|
227
|
+
struct thread_context_collector_state *state,
|
228
|
+
VALUE *active_trace,
|
229
|
+
VALUE *root_span,
|
230
|
+
VALUE *numeric_span_id,
|
231
|
+
VALUE active_span,
|
232
|
+
VALUE otel_values
|
233
|
+
);
|
210
234
|
|
211
235
|
void collectors_thread_context_init(VALUE profiling_module) {
|
212
236
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
@@ -235,6 +259,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
235
259
|
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
|
236
260
|
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
|
237
261
|
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
|
262
|
+
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
|
238
263
|
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
|
239
264
|
|
240
265
|
at_active_span_id = rb_intern_const("@active_span");
|
@@ -243,6 +268,11 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
243
268
|
at_resource_id = rb_intern_const("@resource");
|
244
269
|
at_root_span_id = rb_intern_const("@root_span");
|
245
270
|
at_type_id = rb_intern_const("@type");
|
271
|
+
at_otel_values_id = rb_intern_const("@otel_values");
|
272
|
+
at_parent_span_id_id = rb_intern_const("@parent_span_id");
|
273
|
+
at_datadog_trace_id = rb_intern_const("@datadog_trace");
|
274
|
+
|
275
|
+
gc_profiling_init();
|
246
276
|
}
|
247
277
|
|
248
278
|
// This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
|
@@ -268,6 +298,7 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
|
|
268
298
|
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
|
269
299
|
rb_gc_mark(state->thread_list_buffer);
|
270
300
|
rb_gc_mark(state->main_thread);
|
301
|
+
rb_gc_mark(state->otel_current_span_key);
|
271
302
|
}
|
272
303
|
|
273
304
|
static void thread_context_collector_typed_data_free(void *state_ptr) {
|
@@ -320,6 +351,9 @@ static VALUE _native_new(VALUE klass) {
|
|
320
351
|
state->allocation_type_enabled = true;
|
321
352
|
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
322
353
|
state->main_thread = rb_thread_main();
|
354
|
+
state->otel_current_span_key = Qnil;
|
355
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
356
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
|
323
357
|
|
324
358
|
return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
|
325
359
|
}
|
@@ -465,7 +499,11 @@ void update_metrics_and_sample(
|
|
465
499
|
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
466
500
|
&thread_context->wall_time_at_previous_sample_ns,
|
467
501
|
current_monotonic_wall_time_ns,
|
468
|
-
|
502
|
+
// We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
|
503
|
+
// accounting to change during GC.
|
504
|
+
// E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
|
505
|
+
// GC or not.
|
506
|
+
INVALID_TIME,
|
469
507
|
IS_WALL_TIME
|
470
508
|
);
|
471
509
|
|
@@ -475,7 +513,6 @@ void update_metrics_and_sample(
|
|
475
513
|
stack_from_thread,
|
476
514
|
thread_context,
|
477
515
|
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
|
478
|
-
SAMPLE_REGULAR,
|
479
516
|
current_monotonic_wall_time_ns,
|
480
517
|
NULL,
|
481
518
|
NULL
|
@@ -484,7 +521,7 @@ void update_metrics_and_sample(
|
|
484
521
|
|
485
522
|
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
|
486
523
|
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
487
|
-
// create
|
524
|
+
// create an event including the cpu/wall time spent in garbage collector work.
|
488
525
|
//
|
489
526
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
490
527
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
@@ -509,27 +546,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
509
546
|
return;
|
510
547
|
}
|
511
548
|
|
512
|
-
//
|
513
|
-
//
|
514
|
-
// When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
|
515
|
-
// called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
|
516
|
-
// before we can actually run that method.
|
517
|
-
//
|
518
|
-
// We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
|
519
|
-
// `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
|
520
|
-
// then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
|
521
|
-
// there was a single, longer GC period.
|
522
|
-
if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
|
523
|
-
thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
|
524
|
-
|
525
|
-
// Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
|
549
|
+
// Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
|
526
550
|
thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
527
551
|
thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
|
528
552
|
}
|
529
553
|
|
530
554
|
// This function gets called when Ruby has finished running the Garbage Collector on the current thread.
|
531
|
-
// It
|
532
|
-
// create
|
555
|
+
// It records the cpu/wall-time observed during GC, which will be used to later
|
556
|
+
// create an event including the cpu/wall time spent from the start of garbage collector work until now.
|
533
557
|
//
|
534
558
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
535
559
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
@@ -537,9 +561,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
537
561
|
//
|
538
562
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
539
563
|
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
540
|
-
|
564
|
+
bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
541
565
|
struct thread_context_collector_state *state;
|
542
|
-
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
|
566
|
+
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
|
543
567
|
// This should never fail the the above check passes
|
544
568
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
545
569
|
|
@@ -547,29 +571,70 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
547
571
|
|
548
572
|
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
|
549
573
|
// how often this happens -- see on_gc_start.
|
550
|
-
if (thread_context == NULL) return;
|
574
|
+
if (thread_context == NULL) return false;
|
551
575
|
|
552
|
-
|
553
|
-
|
576
|
+
long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
|
577
|
+
long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
|
578
|
+
|
579
|
+
if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
|
554
580
|
// If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
|
555
581
|
// context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
|
556
582
|
// do nothing.
|
557
|
-
return;
|
583
|
+
return false;
|
584
|
+
}
|
585
|
+
|
586
|
+
// Mark thread as no longer in GC
|
587
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
588
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
589
|
+
|
590
|
+
// Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
|
591
|
+
long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
|
592
|
+
long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
593
|
+
|
594
|
+
// If our end timestamp is not OK, we bail out
|
595
|
+
if (wall_time_at_finish_ns == 0) return false;
|
596
|
+
|
597
|
+
long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
|
598
|
+
long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
|
599
|
+
|
600
|
+
// Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
|
601
|
+
// so let's ensure we don't get negative values for time deltas.
|
602
|
+
gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
|
603
|
+
gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
|
604
|
+
|
605
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
606
|
+
state->gc_tracking.accumulated_cpu_time_ns = 0;
|
607
|
+
state->gc_tracking.accumulated_wall_time_ns = 0;
|
608
|
+
}
|
609
|
+
|
610
|
+
state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
|
611
|
+
state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
|
612
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
|
613
|
+
|
614
|
+
// Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
|
615
|
+
// We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
|
616
|
+
// `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
|
617
|
+
if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
|
618
|
+
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
558
619
|
}
|
559
620
|
|
560
|
-
//
|
561
|
-
|
562
|
-
|
621
|
+
// Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
|
622
|
+
// on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
|
623
|
+
// samples first.
|
624
|
+
bool over_flush_time_treshold =
|
625
|
+
(wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
|
626
|
+
|
627
|
+
if (over_flush_time_treshold) {
|
628
|
+
return true;
|
629
|
+
} else {
|
630
|
+
return gc_profiling_has_major_gc_finished();
|
631
|
+
}
|
563
632
|
}
|
564
633
|
|
565
|
-
// This function gets called
|
634
|
+
// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
|
566
635
|
// It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
|
567
636
|
// GC-related tracking.
|
568
637
|
//
|
569
|
-
// Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
|
570
|
-
// and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
|
571
|
-
// set on their context.
|
572
|
-
//
|
573
638
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
574
639
|
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
575
640
|
// Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
|
@@ -578,70 +643,45 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
|
|
578
643
|
struct thread_context_collector_state *state;
|
579
644
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
580
645
|
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
const long thread_count = RARRAY_LEN(threads);
|
585
|
-
for (long i = 0; i < thread_count; i++) {
|
586
|
-
VALUE thread = RARRAY_AREF(threads, i);
|
587
|
-
struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
|
646
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
647
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
|
648
|
+
}
|
588
649
|
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
|
593
|
-
thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
|
594
|
-
) continue; // Ignore threads with no/incomplete garbage collection data
|
595
|
-
|
596
|
-
sampled_any_thread = true;
|
597
|
-
|
598
|
-
long gc_cpu_time_elapsed_ns =
|
599
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
|
600
|
-
long gc_wall_time_elapsed_ns =
|
601
|
-
thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
|
602
|
-
|
603
|
-
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
604
|
-
if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
|
605
|
-
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
606
|
-
// was a bug.
|
607
|
-
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
608
|
-
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
609
|
-
if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
|
610
|
-
|
611
|
-
if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
|
612
|
-
// Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
|
613
|
-
// come up with a crazy value for the frame
|
614
|
-
rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
|
615
|
-
}
|
650
|
+
int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
|
651
|
+
ddog_prof_Label labels[max_labels_needed_for_gc];
|
652
|
+
uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
|
616
653
|
|
617
|
-
|
618
|
-
state,
|
619
|
-
/* thread: */ thread,
|
620
|
-
/* stack_from_thread: */ thread,
|
621
|
-
thread_context,
|
622
|
-
(sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
|
623
|
-
SAMPLE_IN_GC,
|
624
|
-
INVALID_TIME, // For now we're not collecting timestamps for these events
|
625
|
-
NULL,
|
626
|
-
NULL
|
627
|
-
);
|
654
|
+
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
|
628
655
|
|
629
|
-
|
630
|
-
|
631
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
632
|
-
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
633
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
656
|
+
// The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
|
657
|
+
int64_t end_timestamp_ns = 0;
|
634
658
|
|
635
|
-
|
636
|
-
|
637
|
-
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
638
|
-
}
|
639
|
-
if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
|
640
|
-
thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
|
641
|
-
}
|
659
|
+
if (state->timeline_enabled) {
|
660
|
+
end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
|
642
661
|
}
|
643
662
|
|
644
|
-
|
663
|
+
record_placeholder_stack(
|
664
|
+
state->sampling_buffer,
|
665
|
+
state->recorder_instance,
|
666
|
+
(sample_values) {
|
667
|
+
// This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
|
668
|
+
// timeline duration.
|
669
|
+
// This is done to enable two use-cases:
|
670
|
+
// * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
|
671
|
+
// * the timeline duration is used when the event shows up in the timeline
|
672
|
+
.cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
|
673
|
+
.cpu_or_wall_samples = 1,
|
674
|
+
.wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
675
|
+
.timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
676
|
+
},
|
677
|
+
(sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
|
678
|
+
DDOG_CHARSLICE_C("Garbage Collection")
|
679
|
+
);
|
680
|
+
|
681
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
|
682
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
683
|
+
|
684
|
+
state->stats.gc_samples++;
|
645
685
|
|
646
686
|
// Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
|
647
687
|
return Qnil;
|
@@ -653,7 +693,6 @@ static void trigger_sample_for_thread(
|
|
653
693
|
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
654
694
|
struct per_thread_context *thread_context,
|
655
695
|
sample_values values,
|
656
|
-
sample_type type,
|
657
696
|
long current_monotonic_wall_time_ns,
|
658
697
|
// These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
|
659
698
|
ddog_CharSlice *ruby_vm_type,
|
@@ -776,8 +815,7 @@ static void trigger_sample_for_thread(
|
|
776
815
|
state->sampling_buffer,
|
777
816
|
state->recorder_instance,
|
778
817
|
values,
|
779
|
-
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
780
|
-
type
|
818
|
+
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
781
819
|
);
|
782
820
|
}
|
783
821
|
|
@@ -874,9 +912,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
|
|
874
912
|
|
875
913
|
// These will only be used during a GC operation
|
876
914
|
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
877
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
878
915
|
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
879
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
880
916
|
}
|
881
917
|
|
882
918
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
@@ -901,6 +937,8 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
901
937
|
state->time_converter_state.delta_to_epoch_ns
|
902
938
|
));
|
903
939
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
940
|
+
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
941
|
+
rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
|
904
942
|
|
905
943
|
return result;
|
906
944
|
}
|
@@ -927,9 +965,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
927
965
|
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
|
928
966
|
|
929
967
|
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
930
|
-
ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
|
931
968
|
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
932
|
-
ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
|
933
969
|
};
|
934
970
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
935
971
|
|
@@ -947,6 +983,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) {
|
|
947
983
|
return stats_as_hash;
|
948
984
|
}
|
949
985
|
|
986
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) {
|
987
|
+
// Update this when modifying state struct (gc_tracking inner struct)
|
988
|
+
VALUE result = rb_hash_new();
|
989
|
+
VALUE arguments[] = {
|
990
|
+
ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns),
|
991
|
+
ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns),
|
992
|
+
ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns),
|
993
|
+
ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns),
|
994
|
+
};
|
995
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]);
|
996
|
+
return result;
|
997
|
+
}
|
998
|
+
|
950
999
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state) {
|
951
1000
|
st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
|
952
1001
|
}
|
@@ -1049,8 +1098,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) {
|
|
1049
1098
|
|
1050
1099
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
1051
1100
|
// It SHOULD NOT be used for other purposes.
|
1052
|
-
//
|
1053
|
-
// Returns the whole contents of the per_thread_context structs being tracked.
|
1054
1101
|
static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
1055
1102
|
struct thread_context_collector_state *state;
|
1056
1103
|
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
@@ -1058,6 +1105,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
|
|
1058
1105
|
return stats_as_ruby_hash(state);
|
1059
1106
|
}
|
1060
1107
|
|
1108
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
1109
|
+
// It SHOULD NOT be used for other purposes.
|
1110
|
+
static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
1111
|
+
struct thread_context_collector_state *state;
|
1112
|
+
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
1113
|
+
|
1114
|
+
return gc_tracking_as_ruby_hash(state);
|
1115
|
+
}
|
1116
|
+
|
1061
1117
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
1062
1118
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
|
1063
1119
|
if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
|
@@ -1070,10 +1126,19 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
1070
1126
|
|
1071
1127
|
VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
|
1072
1128
|
VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
|
1073
|
-
|
1129
|
+
// Note: On Ruby 3.x `rb_attr_get` is exactly the same as `rb_ivar_get`. For Ruby 2.x, the difference is that
|
1130
|
+
// `rb_ivar_get` can trigger "warning: instance variable @otel_values not initialized" if warnings are enabled and
|
1131
|
+
// opentelemetry is not in use, whereas `rb_attr_get` does the lookup without generating the warning.
|
1132
|
+
VALUE otel_values = rb_attr_get(active_trace, at_otel_values_id /* @otel_values */);
|
1133
|
+
|
1134
|
+
VALUE numeric_span_id = Qnil;
|
1135
|
+
|
1136
|
+
if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
|
1137
|
+
|
1138
|
+
if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
|
1074
1139
|
|
1075
1140
|
VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
|
1076
|
-
|
1141
|
+
if (active_span != Qnil && numeric_span_id == Qnil) numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
|
1077
1142
|
if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
|
1078
1143
|
|
1079
1144
|
trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
|
@@ -1081,10 +1146,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
1081
1146
|
|
1082
1147
|
trace_identifiers_result->valid = true;
|
1083
1148
|
|
1084
|
-
if (!state->endpoint_collection_enabled) return;
|
1085
|
-
|
1086
|
-
VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
|
1087
|
-
if (root_span_type == Qnil || !should_collect_resource(root_span_type)) return;
|
1149
|
+
if (!state->endpoint_collection_enabled || !should_collect_resource(root_span)) return;
|
1088
1150
|
|
1089
1151
|
VALUE trace_resource = rb_ivar_get(active_trace, at_resource_id /* @resource */);
|
1090
1152
|
if (RB_TYPE_P(trace_resource, T_STRING)) {
|
@@ -1095,21 +1157,32 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
1095
1157
|
}
|
1096
1158
|
}
|
1097
1159
|
|
1098
|
-
// We
|
1160
|
+
// We opt-in to collecting the resource for spans of types:
|
1099
1161
|
// * 'web', for web requests
|
1100
|
-
// * proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
|
1162
|
+
// * 'proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
|
1163
|
+
// * 'worker', used for sidekiq and similar background job processors
|
1101
1164
|
//
|
1102
|
-
//
|
1165
|
+
// Over time, this list may be expanded.
|
1103
1166
|
// Resources MUST NOT include personal identifiable information (PII); this should not be the case with
|
1104
1167
|
// ddtrace integrations, but worth mentioning just in case :)
|
1105
|
-
static bool should_collect_resource(VALUE
|
1168
|
+
static bool should_collect_resource(VALUE root_span) {
|
1169
|
+
VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
|
1170
|
+
if (root_span_type == Qnil) return false;
|
1106
1171
|
ENFORCE_TYPE(root_span_type, T_STRING);
|
1107
1172
|
|
1108
1173
|
int root_span_type_length = RSTRING_LEN(root_span_type);
|
1109
1174
|
const char *root_span_type_value = StringValuePtr(root_span_type);
|
1110
1175
|
|
1111
|
-
|
1176
|
+
bool is_web_request =
|
1177
|
+
(root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
|
1112
1178
|
(root_span_type_length == strlen("proxy") && (memcmp("proxy", root_span_type_value, strlen("proxy")) == 0));
|
1179
|
+
|
1180
|
+
if (is_web_request) return true;
|
1181
|
+
|
1182
|
+
bool is_worker_request =
|
1183
|
+
(root_span_type_length == strlen("worker") && (memcmp("worker", root_span_type_value, strlen("worker")) == 0));
|
1184
|
+
|
1185
|
+
return is_worker_request;
|
1113
1186
|
}
|
1114
1187
|
|
1115
1188
|
// After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent.
|
@@ -1210,13 +1283,14 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
1210
1283
|
}
|
1211
1284
|
}
|
1212
1285
|
|
1286
|
+
track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
|
1287
|
+
|
1213
1288
|
trigger_sample_for_thread(
|
1214
1289
|
state,
|
1215
1290
|
/* thread: */ current_thread,
|
1216
1291
|
/* stack_from_thread: */ current_thread,
|
1217
1292
|
get_or_create_context_for(current_thread, state),
|
1218
1293
|
(sample_values) {.alloc_samples = sample_weight},
|
1219
|
-
SAMPLE_REGULAR,
|
1220
1294
|
INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
|
1221
1295
|
&ruby_vm_type,
|
1222
1296
|
optional_class_name
|
@@ -1239,7 +1313,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) {
|
|
1239
1313
|
return rb_thread_create(new_empty_thread_inner, NULL);
|
1240
1314
|
}
|
1241
1315
|
|
1242
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
1316
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
1243
1317
|
switch (type) {
|
1244
1318
|
case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object");
|
1245
1319
|
case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class");
|
@@ -1264,3 +1338,65 @@ ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
|
1264
1338
|
default: return DDOG_CHARSLICE_C("(VM Internal, Missing class)");
|
1265
1339
|
}
|
1266
1340
|
}
|
1341
|
+
|
1342
|
+
static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
|
1343
|
+
if (state->otel_current_span_key == Qnil) {
|
1344
|
+
VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
|
1345
|
+
VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
|
1346
|
+
VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
|
1347
|
+
VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
|
1348
|
+
VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
|
1349
|
+
|
1350
|
+
if (current_span_key == Qnil) {
|
1351
|
+
rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
state->otel_current_span_key = current_span_key;
|
1355
|
+
}
|
1356
|
+
|
1357
|
+
return state->otel_current_span_key;
|
1358
|
+
}
|
1359
|
+
|
1360
|
+
// This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
|
1361
|
+
// differently, and this codepath handles it.
|
1362
|
+
static void ddtrace_otel_trace_identifiers_for(
|
1363
|
+
struct thread_context_collector_state *state,
|
1364
|
+
VALUE *active_trace,
|
1365
|
+
VALUE *root_span,
|
1366
|
+
VALUE *numeric_span_id,
|
1367
|
+
VALUE active_span,
|
1368
|
+
VALUE otel_values
|
1369
|
+
) {
|
1370
|
+
VALUE resolved_numeric_span_id =
|
1371
|
+
active_span == Qnil ?
|
1372
|
+
// For traces started from otel spans, the span id will be empty, and the @parent_span_id has the right value
|
1373
|
+
rb_ivar_get(*active_trace, at_parent_span_id_id /* @parent_span_id */) :
|
1374
|
+
// Regular span created by ddtrace
|
1375
|
+
rb_ivar_get(active_span, at_id_id /* @id */);
|
1376
|
+
|
1377
|
+
if (resolved_numeric_span_id == Qnil) return;
|
1378
|
+
|
1379
|
+
VALUE otel_current_span_key = get_otel_current_span_key(state);
|
1380
|
+
VALUE current_trace = *active_trace;
|
1381
|
+
|
1382
|
+
// ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
|
1383
|
+
// trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked
|
1384
|
+
// list. The local root span is going to be the trace/span we find at the end of this linked list.
|
1385
|
+
while (otel_values != Qnil) {
|
1386
|
+
VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key);
|
1387
|
+
if (otel_span == Qnil) break;
|
1388
|
+
VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id);
|
1389
|
+
if (next_trace == Qnil) break;
|
1390
|
+
|
1391
|
+
current_trace = next_trace;
|
1392
|
+
otel_values = rb_ivar_get(current_trace, at_otel_values_id /* @otel_values */);
|
1393
|
+
}
|
1394
|
+
|
1395
|
+
// We found the last trace in the linked list. This contains the local root span
|
1396
|
+
VALUE resolved_root_span = rb_ivar_get(current_trace, at_root_span_id /* @root_span */);
|
1397
|
+
if (resolved_root_span == Qnil) return;
|
1398
|
+
|
1399
|
+
*root_span = resolved_root_span;
|
1400
|
+
*active_trace = current_trace;
|
1401
|
+
*numeric_span_id = resolved_numeric_span_id;
|
1402
|
+
}
|