ddtrace 1.18.0 → 1.23.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +228 -2
- data/LICENSE-3rdparty.csv +1 -1
- data/bin/ddprofrb +15 -0
- data/bin/ddtracerb +3 -1
- data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
- data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +312 -117
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +422 -0
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +101 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +22 -14
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +4 -0
- data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
- data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +43 -102
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +10 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +272 -136
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +2 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +28 -7
- data/ext/datadog_profiling_native_extension/heap_recorder.c +1047 -0
- data/ext/datadog_profiling_native_extension/heap_recorder.h +166 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +6 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +20 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +11 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +50 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +19 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +18 -1
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +267 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +33 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +476 -58
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +3 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +2 -0
- data/lib/datadog/appsec/contrib/devise/tracking.rb +8 -0
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +45 -14
- data/lib/datadog/appsec/event.rb +1 -1
- data/lib/datadog/auto_instrument.rb +3 -0
- data/lib/datadog/core/configuration/components.rb +7 -6
- data/lib/datadog/core/configuration/option.rb +8 -6
- data/lib/datadog/core/configuration/settings.rb +259 -60
- data/lib/datadog/core/configuration.rb +20 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
- data/lib/datadog/core/environment/class_count.rb +6 -6
- data/lib/datadog/core/environment/git.rb +25 -0
- data/lib/datadog/core/environment/identity.rb +18 -48
- data/lib/datadog/core/environment/platform.rb +7 -1
- data/lib/datadog/core/git/ext.rb +2 -23
- data/lib/datadog/core/remote/client/capabilities.rb +1 -1
- data/lib/datadog/core/remote/component.rb +25 -12
- data/lib/datadog/core/remote/ext.rb +1 -0
- data/lib/datadog/core/remote/negotiation.rb +2 -2
- data/lib/datadog/core/remote/tie/tracing.rb +39 -0
- data/lib/datadog/core/remote/tie.rb +27 -0
- data/lib/datadog/core/remote/transport/http/config.rb +1 -1
- data/lib/datadog/core/remote/worker.rb +7 -4
- data/lib/datadog/core/telemetry/client.rb +18 -10
- data/lib/datadog/core/telemetry/emitter.rb +9 -13
- data/lib/datadog/core/telemetry/event.rb +247 -56
- data/lib/datadog/core/telemetry/ext.rb +4 -0
- data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
- data/lib/datadog/core/telemetry/http/ext.rb +4 -1
- data/lib/datadog/core/telemetry/http/response.rb +4 -0
- data/lib/datadog/core/telemetry/http/transport.rb +9 -4
- data/lib/datadog/core/telemetry/request.rb +59 -0
- data/lib/datadog/core/transport/ext.rb +2 -0
- data/lib/datadog/core/utils/url.rb +25 -0
- data/lib/datadog/opentelemetry/sdk/propagator.rb +3 -2
- data/lib/datadog/opentelemetry.rb +3 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +36 -12
- data/lib/datadog/profiling/collectors/info.rb +101 -0
- data/lib/datadog/profiling/component.rb +210 -34
- data/lib/datadog/profiling/exporter.rb +23 -6
- data/lib/datadog/profiling/ext.rb +2 -0
- data/lib/datadog/profiling/flush.rb +6 -3
- data/lib/datadog/profiling/http_transport.rb +5 -1
- data/lib/datadog/profiling/load_native_extension.rb +19 -6
- data/lib/datadog/profiling/native_extension.rb +1 -1
- data/lib/datadog/profiling/scheduler.rb +4 -6
- data/lib/datadog/profiling/stack_recorder.rb +19 -4
- data/lib/datadog/profiling/tag_builder.rb +5 -0
- data/lib/datadog/profiling/tasks/exec.rb +3 -3
- data/lib/datadog/profiling/tasks/help.rb +3 -3
- data/lib/datadog/profiling.rb +13 -2
- data/lib/datadog/tracing/configuration/ext.rb +0 -1
- data/lib/datadog/tracing/configuration/settings.rb +2 -1
- data/lib/datadog/tracing/contrib/action_cable/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_cable/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_mailer/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
- data/lib/datadog/tracing/contrib/action_mailer/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_pack/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_pack/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_view/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_view/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_job/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_job/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_model_serializers/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_model_serializers/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
- data/lib/datadog/tracing/contrib/active_record/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_record/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_support/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/analytics.rb +0 -1
- data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/aws/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
- data/lib/datadog/tracing/contrib/configurable.rb +1 -1
- data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/delayed_job/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/delayed_job/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/extensions.rb +6 -2
- data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +7 -0
- data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +1 -1
- data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/grape/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/graphql/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/http/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/http/distributed/fetcher.rb +2 -2
- data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/kafka/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/kafka/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/opensearch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
- data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/presto/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/qless/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/qless/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/que/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/racecar/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/racecar/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +9 -2
- data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
- data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rails/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rake/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rake/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/instrumentation.rb +2 -2
- data/lib/datadog/tracing/contrib/redis/patcher.rb +34 -21
- data/lib/datadog/tracing/contrib/resque/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/resque/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/roda/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/roda/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sequel/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sequel/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/shoryuken/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/shoryuken/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sinatra/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sneakers/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sneakers/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/stripe/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/stripe/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sucker_punch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sucker_punch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +58 -0
- data/lib/datadog/tracing/contrib/trilogy/ext.rb +27 -0
- data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +94 -0
- data/lib/datadog/tracing/contrib/trilogy/integration.rb +43 -0
- data/lib/datadog/tracing/contrib/trilogy/patcher.rb +31 -0
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing/sampling/matcher.rb +23 -3
- data/lib/datadog/tracing/sampling/rule.rb +7 -2
- data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
- data/lib/datadog/tracing/trace_operation.rb +1 -2
- data/lib/datadog/tracing/transport/http.rb +1 -0
- data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
- data/lib/datadog/tracing.rb +8 -2
- data/lib/ddtrace/version.rb +2 -2
- metadata +71 -61
- data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
- data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -110
- data/lib/datadog/core/telemetry/collector.rb +0 -240
- data/lib/datadog/core/telemetry/v1/app_event.rb +0 -52
- data/lib/datadog/core/telemetry/v1/application.rb +0 -92
- data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
- data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
- data/lib/datadog/core/telemetry/v1/host.rb +0 -59
- data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
- data/lib/datadog/core/telemetry/v1/product.rb +0 -36
- data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
- data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
- data/lib/datadog/core/telemetry/v2/request.rb +0 -29
- data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#include "collectors_thread_context.h"
|
|
4
4
|
#include "clock_id.h"
|
|
5
5
|
#include "collectors_stack.h"
|
|
6
|
+
#include "collectors_gc_profiling_helper.h"
|
|
6
7
|
#include "helpers.h"
|
|
7
8
|
#include "libdatadog_helpers.h"
|
|
8
9
|
#include "private_vm_api_access.h"
|
|
@@ -37,24 +38,29 @@
|
|
|
37
38
|
// When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
|
|
38
39
|
// context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
|
|
39
40
|
//
|
|
40
|
-
// While
|
|
41
|
-
//
|
|
41
|
+
// While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
|
|
42
|
+
// after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
|
|
43
|
+
// Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
|
|
42
44
|
//
|
|
43
|
-
// (Regular samples can still account for the time between the previous sample and the start of GC.)
|
|
45
|
+
// (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
|
|
44
46
|
//
|
|
45
|
-
// When `thread_context_collector_on_gc_finish` gets called, the
|
|
46
|
-
//
|
|
47
|
+
// When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
|
|
48
|
+
// into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
|
|
49
|
+
// of the thread that did GC also gets adjusted to avoid double-accounting.)
|
|
47
50
|
//
|
|
48
|
-
// Finally, when `thread_context_collector_sample_after_gc` gets called,
|
|
51
|
+
// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
|
|
52
|
+
// a single placeholder `Garbage Collection` frame. This sample gets
|
|
53
|
+
// assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
|
|
54
|
+
// as metadata for the last GC.
|
|
49
55
|
//
|
|
50
|
-
//
|
|
51
|
-
//
|
|
52
|
-
//
|
|
53
|
-
//
|
|
54
|
-
//
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
//
|
|
56
|
+
// Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
|
|
57
|
+
// so that the application can keep doing user work in between GC steps.
|
|
58
|
+
// The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
|
|
59
|
+
// that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
|
|
60
|
+
// Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
|
|
61
|
+
// spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
|
|
62
|
+
// individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
|
|
63
|
+
// happened in the coalesced period.
|
|
58
64
|
//
|
|
59
65
|
// In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
|
|
60
66
|
// discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
|
|
@@ -68,6 +74,7 @@
|
|
|
68
74
|
#define IS_WALL_TIME true
|
|
69
75
|
#define IS_NOT_WALL_TIME false
|
|
70
76
|
#define MISSING_TRACER_CONTEXT_KEY 0
|
|
77
|
+
#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
|
|
71
78
|
|
|
72
79
|
static ID at_active_span_id; // id of :@active_span in Ruby
|
|
73
80
|
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
|
@@ -75,6 +82,9 @@ static ID at_id_id; // id of :@id in Ruby
|
|
|
75
82
|
static ID at_resource_id; // id of :@resource in Ruby
|
|
76
83
|
static ID at_root_span_id; // id of :@root_span in Ruby
|
|
77
84
|
static ID at_type_id; // id of :@type in Ruby
|
|
85
|
+
static ID at_otel_values_id; // id of :@otel_values in Ruby
|
|
86
|
+
static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
|
|
87
|
+
static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
|
|
78
88
|
|
|
79
89
|
// Contains state for a single ThreadContext instance
|
|
80
90
|
struct thread_context_collector_state {
|
|
@@ -107,6 +117,8 @@ struct thread_context_collector_state {
|
|
|
107
117
|
monotonic_to_system_epoch_state time_converter_state;
|
|
108
118
|
// Used to identify the main thread, to give it a fallback name
|
|
109
119
|
VALUE main_thread;
|
|
120
|
+
// Used when extracting trace identifiers from otel spans. Lazily initialized.
|
|
121
|
+
VALUE otel_current_span_key;
|
|
110
122
|
|
|
111
123
|
struct stats {
|
|
112
124
|
// Track how many garbage collection samples we've taken.
|
|
@@ -114,6 +126,14 @@ struct thread_context_collector_state {
|
|
|
114
126
|
// See thread_context_collector_on_gc_start for details
|
|
115
127
|
unsigned int gc_samples_missed_due_to_missing_context;
|
|
116
128
|
} stats;
|
|
129
|
+
|
|
130
|
+
struct {
|
|
131
|
+
unsigned long accumulated_cpu_time_ns;
|
|
132
|
+
unsigned long accumulated_wall_time_ns;
|
|
133
|
+
|
|
134
|
+
long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
|
|
135
|
+
long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
|
|
136
|
+
} gc_tracking;
|
|
117
137
|
};
|
|
118
138
|
|
|
119
139
|
// Tracks per-thread state
|
|
@@ -127,15 +147,10 @@ struct per_thread_context {
|
|
|
127
147
|
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
|
|
128
148
|
|
|
129
149
|
struct {
|
|
130
|
-
// Both of these fields are set by on_gc_start and kept until
|
|
150
|
+
// Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
|
|
131
151
|
// Outside of this window, they will be INVALID_TIME.
|
|
132
152
|
long cpu_time_at_start_ns;
|
|
133
153
|
long wall_time_at_start_ns;
|
|
134
|
-
|
|
135
|
-
// Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
|
|
136
|
-
// Outside of this window, they will be INVALID_TIME.
|
|
137
|
-
long cpu_time_at_finish_ns;
|
|
138
|
-
long wall_time_at_finish_ns;
|
|
139
154
|
} gc_tracking;
|
|
140
155
|
};
|
|
141
156
|
|
|
@@ -180,7 +195,6 @@ static void trigger_sample_for_thread(
|
|
|
180
195
|
VALUE stack_from_thread,
|
|
181
196
|
struct per_thread_context *thread_context,
|
|
182
197
|
sample_values values,
|
|
183
|
-
sample_type type,
|
|
184
198
|
long current_monotonic_wall_time_ns,
|
|
185
199
|
ddog_CharSlice *ruby_vm_type,
|
|
186
200
|
ddog_CharSlice *class_name
|
|
@@ -193,6 +207,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
|
|
193
207
|
static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
|
|
194
208
|
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
|
|
195
209
|
static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
|
|
210
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
|
|
196
211
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
|
|
197
212
|
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
|
|
198
213
|
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
|
|
@@ -200,13 +215,22 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
|
|
|
200
215
|
static long cpu_time_now_ns(struct per_thread_context *thread_context);
|
|
201
216
|
static long thread_id_for(VALUE thread);
|
|
202
217
|
static VALUE _native_stats(VALUE self, VALUE collector_instance);
|
|
218
|
+
static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
|
|
203
219
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
|
|
204
|
-
static bool should_collect_resource(VALUE
|
|
220
|
+
static bool should_collect_resource(VALUE root_span);
|
|
205
221
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
|
206
222
|
static VALUE thread_list(struct thread_context_collector_state *state);
|
|
207
223
|
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
|
|
208
224
|
static VALUE _native_new_empty_thread(VALUE self);
|
|
209
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
|
225
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
|
226
|
+
static void ddtrace_otel_trace_identifiers_for(
|
|
227
|
+
struct thread_context_collector_state *state,
|
|
228
|
+
VALUE *active_trace,
|
|
229
|
+
VALUE *root_span,
|
|
230
|
+
VALUE *numeric_span_id,
|
|
231
|
+
VALUE active_span,
|
|
232
|
+
VALUE otel_values
|
|
233
|
+
);
|
|
210
234
|
|
|
211
235
|
void collectors_thread_context_init(VALUE profiling_module) {
|
|
212
236
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
|
@@ -235,6 +259,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
|
235
259
|
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
|
|
236
260
|
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
|
|
237
261
|
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
|
|
262
|
+
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
|
|
238
263
|
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
|
|
239
264
|
|
|
240
265
|
at_active_span_id = rb_intern_const("@active_span");
|
|
@@ -243,6 +268,11 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
|
243
268
|
at_resource_id = rb_intern_const("@resource");
|
|
244
269
|
at_root_span_id = rb_intern_const("@root_span");
|
|
245
270
|
at_type_id = rb_intern_const("@type");
|
|
271
|
+
at_otel_values_id = rb_intern_const("@otel_values");
|
|
272
|
+
at_parent_span_id_id = rb_intern_const("@parent_span_id");
|
|
273
|
+
at_datadog_trace_id = rb_intern_const("@datadog_trace");
|
|
274
|
+
|
|
275
|
+
gc_profiling_init();
|
|
246
276
|
}
|
|
247
277
|
|
|
248
278
|
// This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
|
|
@@ -268,6 +298,7 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
|
|
|
268
298
|
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
|
|
269
299
|
rb_gc_mark(state->thread_list_buffer);
|
|
270
300
|
rb_gc_mark(state->main_thread);
|
|
301
|
+
rb_gc_mark(state->otel_current_span_key);
|
|
271
302
|
}
|
|
272
303
|
|
|
273
304
|
static void thread_context_collector_typed_data_free(void *state_ptr) {
|
|
@@ -320,6 +351,9 @@ static VALUE _native_new(VALUE klass) {
|
|
|
320
351
|
state->allocation_type_enabled = true;
|
|
321
352
|
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
|
322
353
|
state->main_thread = rb_thread_main();
|
|
354
|
+
state->otel_current_span_key = Qnil;
|
|
355
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
|
356
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
|
|
323
357
|
|
|
324
358
|
return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
|
|
325
359
|
}
|
|
@@ -465,7 +499,11 @@ void update_metrics_and_sample(
|
|
|
465
499
|
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
|
466
500
|
&thread_context->wall_time_at_previous_sample_ns,
|
|
467
501
|
current_monotonic_wall_time_ns,
|
|
468
|
-
|
|
502
|
+
// We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
|
|
503
|
+
// accounting to change during GC.
|
|
504
|
+
// E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
|
|
505
|
+
// GC or not.
|
|
506
|
+
INVALID_TIME,
|
|
469
507
|
IS_WALL_TIME
|
|
470
508
|
);
|
|
471
509
|
|
|
@@ -475,7 +513,6 @@ void update_metrics_and_sample(
|
|
|
475
513
|
stack_from_thread,
|
|
476
514
|
thread_context,
|
|
477
515
|
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
|
|
478
|
-
SAMPLE_REGULAR,
|
|
479
516
|
current_monotonic_wall_time_ns,
|
|
480
517
|
NULL,
|
|
481
518
|
NULL
|
|
@@ -484,7 +521,7 @@ void update_metrics_and_sample(
|
|
|
484
521
|
|
|
485
522
|
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
|
|
486
523
|
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
|
487
|
-
// create
|
|
524
|
+
// create an event including the cpu/wall time spent in garbage collector work.
|
|
488
525
|
//
|
|
489
526
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
|
490
527
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
|
@@ -509,27 +546,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
|
509
546
|
return;
|
|
510
547
|
}
|
|
511
548
|
|
|
512
|
-
//
|
|
513
|
-
//
|
|
514
|
-
// When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
|
|
515
|
-
// called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
|
|
516
|
-
// before we can actually run that method.
|
|
517
|
-
//
|
|
518
|
-
// We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
|
|
519
|
-
// `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
|
|
520
|
-
// then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
|
|
521
|
-
// there was a single, longer GC period.
|
|
522
|
-
if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
|
|
523
|
-
thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
|
|
524
|
-
|
|
525
|
-
// Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
|
|
549
|
+
// Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
|
|
526
550
|
thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
527
551
|
thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
|
|
528
552
|
}
|
|
529
553
|
|
|
530
554
|
// This function gets called when Ruby has finished running the Garbage Collector on the current thread.
|
|
531
|
-
// It
|
|
532
|
-
// create
|
|
555
|
+
// It records the cpu/wall-time observed during GC, which will be used to later
|
|
556
|
+
// create an event including the cpu/wall time spent from the start of garbage collector work until now.
|
|
533
557
|
//
|
|
534
558
|
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
|
535
559
|
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
|
@@ -537,9 +561,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
|
537
561
|
//
|
|
538
562
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
539
563
|
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
|
540
|
-
|
|
564
|
+
bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
541
565
|
struct thread_context_collector_state *state;
|
|
542
|
-
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
|
|
566
|
+
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
|
|
543
567
|
// This should never fail the the above check passes
|
|
544
568
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
545
569
|
|
|
@@ -547,29 +571,70 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
|
547
571
|
|
|
548
572
|
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
|
|
549
573
|
// how often this happens -- see on_gc_start.
|
|
550
|
-
if (thread_context == NULL) return;
|
|
574
|
+
if (thread_context == NULL) return false;
|
|
551
575
|
|
|
552
|
-
|
|
553
|
-
|
|
576
|
+
long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
|
|
577
|
+
long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
|
|
578
|
+
|
|
579
|
+
if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
|
|
554
580
|
// If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
|
|
555
581
|
// context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
|
|
556
582
|
// do nothing.
|
|
557
|
-
return;
|
|
583
|
+
return false;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Mark thread as no longer in GC
|
|
587
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
|
588
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
|
589
|
+
|
|
590
|
+
// Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
|
|
591
|
+
long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
|
|
592
|
+
long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
593
|
+
|
|
594
|
+
// If our end timestamp is not OK, we bail out
|
|
595
|
+
if (wall_time_at_finish_ns == 0) return false;
|
|
596
|
+
|
|
597
|
+
long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
|
|
598
|
+
long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
|
|
599
|
+
|
|
600
|
+
// Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
|
|
601
|
+
// so let's ensure we don't get negative values for time deltas.
|
|
602
|
+
gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
|
|
603
|
+
gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
|
|
604
|
+
|
|
605
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
|
606
|
+
state->gc_tracking.accumulated_cpu_time_ns = 0;
|
|
607
|
+
state->gc_tracking.accumulated_wall_time_ns = 0;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
|
|
611
|
+
state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
|
|
612
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
|
|
613
|
+
|
|
614
|
+
// Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
|
|
615
|
+
// We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
|
|
616
|
+
// `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
|
|
617
|
+
if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
|
|
618
|
+
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
|
558
619
|
}
|
|
559
620
|
|
|
560
|
-
//
|
|
561
|
-
|
|
562
|
-
|
|
621
|
+
// Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
|
|
622
|
+
// on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
|
|
623
|
+
// samples first.
|
|
624
|
+
bool over_flush_time_treshold =
|
|
625
|
+
(wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
|
|
626
|
+
|
|
627
|
+
if (over_flush_time_treshold) {
|
|
628
|
+
return true;
|
|
629
|
+
} else {
|
|
630
|
+
return gc_profiling_has_major_gc_finished();
|
|
631
|
+
}
|
|
563
632
|
}
|
|
564
633
|
|
|
565
|
-
// This function gets called
|
|
634
|
+
// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
|
|
566
635
|
// It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
|
|
567
636
|
// GC-related tracking.
|
|
568
637
|
//
|
|
569
|
-
// Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
|
|
570
|
-
// and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
|
|
571
|
-
// set on their context.
|
|
572
|
-
//
|
|
573
638
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
574
639
|
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
|
575
640
|
// Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
|
|
@@ -578,70 +643,45 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
|
|
|
578
643
|
struct thread_context_collector_state *state;
|
|
579
644
|
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
580
645
|
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
const long thread_count = RARRAY_LEN(threads);
|
|
585
|
-
for (long i = 0; i < thread_count; i++) {
|
|
586
|
-
VALUE thread = RARRAY_AREF(threads, i);
|
|
587
|
-
struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
|
|
646
|
+
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
|
|
647
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
|
|
648
|
+
}
|
|
588
649
|
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
|
|
593
|
-
thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
|
|
594
|
-
) continue; // Ignore threads with no/incomplete garbage collection data
|
|
595
|
-
|
|
596
|
-
sampled_any_thread = true;
|
|
597
|
-
|
|
598
|
-
long gc_cpu_time_elapsed_ns =
|
|
599
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
|
|
600
|
-
long gc_wall_time_elapsed_ns =
|
|
601
|
-
thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
|
|
602
|
-
|
|
603
|
-
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
|
604
|
-
if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
|
|
605
|
-
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
|
606
|
-
// was a bug.
|
|
607
|
-
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
|
608
|
-
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
|
609
|
-
if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
|
|
610
|
-
|
|
611
|
-
if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
|
|
612
|
-
// Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
|
|
613
|
-
// come up with a crazy value for the frame
|
|
614
|
-
rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
|
|
615
|
-
}
|
|
650
|
+
int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
|
|
651
|
+
ddog_prof_Label labels[max_labels_needed_for_gc];
|
|
652
|
+
uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
|
|
616
653
|
|
|
617
|
-
|
|
618
|
-
state,
|
|
619
|
-
/* thread: */ thread,
|
|
620
|
-
/* stack_from_thread: */ thread,
|
|
621
|
-
thread_context,
|
|
622
|
-
(sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
|
|
623
|
-
SAMPLE_IN_GC,
|
|
624
|
-
INVALID_TIME, // For now we're not collecting timestamps for these events
|
|
625
|
-
NULL,
|
|
626
|
-
NULL
|
|
627
|
-
);
|
|
654
|
+
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
|
|
628
655
|
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
|
632
|
-
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
|
633
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
|
656
|
+
// The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
|
|
657
|
+
int64_t end_timestamp_ns = 0;
|
|
634
658
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
|
638
|
-
}
|
|
639
|
-
if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
|
|
640
|
-
thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
|
|
641
|
-
}
|
|
659
|
+
if (state->timeline_enabled) {
|
|
660
|
+
end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
|
|
642
661
|
}
|
|
643
662
|
|
|
644
|
-
|
|
663
|
+
record_placeholder_stack(
|
|
664
|
+
state->sampling_buffer,
|
|
665
|
+
state->recorder_instance,
|
|
666
|
+
(sample_values) {
|
|
667
|
+
// This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
|
|
668
|
+
// timeline duration.
|
|
669
|
+
// This is done to enable two use-cases:
|
|
670
|
+
// * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
|
|
671
|
+
// * the timeline duration is used when the event shows up in the timeline
|
|
672
|
+
.cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
|
|
673
|
+
.cpu_or_wall_samples = 1,
|
|
674
|
+
.wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
|
675
|
+
.timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
|
|
676
|
+
},
|
|
677
|
+
(sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
|
|
678
|
+
DDOG_CHARSLICE_C("Garbage Collection")
|
|
679
|
+
);
|
|
680
|
+
|
|
681
|
+
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
|
|
682
|
+
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
|
683
|
+
|
|
684
|
+
state->stats.gc_samples++;
|
|
645
685
|
|
|
646
686
|
// Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
|
|
647
687
|
return Qnil;
|
|
@@ -653,7 +693,6 @@ static void trigger_sample_for_thread(
|
|
|
653
693
|
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
|
654
694
|
struct per_thread_context *thread_context,
|
|
655
695
|
sample_values values,
|
|
656
|
-
sample_type type,
|
|
657
696
|
long current_monotonic_wall_time_ns,
|
|
658
697
|
// These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
|
|
659
698
|
ddog_CharSlice *ruby_vm_type,
|
|
@@ -776,8 +815,7 @@ static void trigger_sample_for_thread(
|
|
|
776
815
|
state->sampling_buffer,
|
|
777
816
|
state->recorder_instance,
|
|
778
817
|
values,
|
|
779
|
-
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
|
780
|
-
type
|
|
818
|
+
(sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
|
|
781
819
|
);
|
|
782
820
|
}
|
|
783
821
|
|
|
@@ -874,9 +912,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
|
|
|
874
912
|
|
|
875
913
|
// These will only be used during a GC operation
|
|
876
914
|
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
|
877
|
-
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
|
878
915
|
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
|
879
|
-
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
|
880
916
|
}
|
|
881
917
|
|
|
882
918
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
|
@@ -901,6 +937,8 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
|
901
937
|
state->time_converter_state.delta_to_epoch_ns
|
|
902
938
|
));
|
|
903
939
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
|
940
|
+
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
|
941
|
+
rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
|
|
904
942
|
|
|
905
943
|
return result;
|
|
906
944
|
}
|
|
@@ -927,9 +965,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
|
927
965
|
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
|
|
928
966
|
|
|
929
967
|
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
|
930
|
-
ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
|
|
931
968
|
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
|
932
|
-
ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
|
|
933
969
|
};
|
|
934
970
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
|
935
971
|
|
|
@@ -947,6 +983,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) {
|
|
|
947
983
|
return stats_as_hash;
|
|
948
984
|
}
|
|
949
985
|
|
|
986
|
+
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) {
|
|
987
|
+
// Update this when modifying state struct (gc_tracking inner struct)
|
|
988
|
+
VALUE result = rb_hash_new();
|
|
989
|
+
VALUE arguments[] = {
|
|
990
|
+
ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns),
|
|
991
|
+
ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns),
|
|
992
|
+
ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns),
|
|
993
|
+
ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns),
|
|
994
|
+
};
|
|
995
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]);
|
|
996
|
+
return result;
|
|
997
|
+
}
|
|
998
|
+
|
|
950
999
|
static void remove_context_for_dead_threads(struct thread_context_collector_state *state) {
|
|
951
1000
|
st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
|
|
952
1001
|
}
|
|
@@ -1049,8 +1098,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) {
|
|
|
1049
1098
|
|
|
1050
1099
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
|
1051
1100
|
// It SHOULD NOT be used for other purposes.
|
|
1052
|
-
//
|
|
1053
|
-
// Returns the whole contents of the per_thread_context structs being tracked.
|
|
1054
1101
|
static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
|
1055
1102
|
struct thread_context_collector_state *state;
|
|
1056
1103
|
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
@@ -1058,6 +1105,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
|
|
|
1058
1105
|
return stats_as_ruby_hash(state);
|
|
1059
1106
|
}
|
|
1060
1107
|
|
|
1108
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
|
1109
|
+
// It SHOULD NOT be used for other purposes.
|
|
1110
|
+
static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
|
1111
|
+
struct thread_context_collector_state *state;
|
|
1112
|
+
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1113
|
+
|
|
1114
|
+
return gc_tracking_as_ruby_hash(state);
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1061
1117
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
1062
1118
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
|
|
1063
1119
|
if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
|
|
@@ -1070,10 +1126,19 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
|
1070
1126
|
|
|
1071
1127
|
VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
|
|
1072
1128
|
VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
|
|
1073
|
-
|
|
1129
|
+
// Note: On Ruby 3.x `rb_attr_get` is exactly the same as `rb_ivar_get`. For Ruby 2.x, the difference is that
|
|
1130
|
+
// `rb_ivar_get` can trigger "warning: instance variable @otel_values not initialized" if warnings are enabled and
|
|
1131
|
+
// opentelemetry is not in use, whereas `rb_attr_get` does the lookup without generating the warning.
|
|
1132
|
+
VALUE otel_values = rb_attr_get(active_trace, at_otel_values_id /* @otel_values */);
|
|
1133
|
+
|
|
1134
|
+
VALUE numeric_span_id = Qnil;
|
|
1135
|
+
|
|
1136
|
+
if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
|
|
1137
|
+
|
|
1138
|
+
if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
|
|
1074
1139
|
|
|
1075
1140
|
VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
|
|
1076
|
-
|
|
1141
|
+
if (active_span != Qnil && numeric_span_id == Qnil) numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
|
|
1077
1142
|
if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
|
|
1078
1143
|
|
|
1079
1144
|
trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
|
|
@@ -1081,10 +1146,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
|
1081
1146
|
|
|
1082
1147
|
trace_identifiers_result->valid = true;
|
|
1083
1148
|
|
|
1084
|
-
if (!state->endpoint_collection_enabled) return;
|
|
1085
|
-
|
|
1086
|
-
VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
|
|
1087
|
-
if (root_span_type == Qnil || !should_collect_resource(root_span_type)) return;
|
|
1149
|
+
if (!state->endpoint_collection_enabled || !should_collect_resource(root_span)) return;
|
|
1088
1150
|
|
|
1089
1151
|
VALUE trace_resource = rb_ivar_get(active_trace, at_resource_id /* @resource */);
|
|
1090
1152
|
if (RB_TYPE_P(trace_resource, T_STRING)) {
|
|
@@ -1095,21 +1157,32 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
|
1095
1157
|
}
|
|
1096
1158
|
}
|
|
1097
1159
|
|
|
1098
|
-
// We
|
|
1160
|
+
// We opt-in to collecting the resource for spans of types:
|
|
1099
1161
|
// * 'web', for web requests
|
|
1100
|
-
// * proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
|
|
1162
|
+
// * 'proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
|
|
1163
|
+
// * 'worker', used for sidekiq and similar background job processors
|
|
1101
1164
|
//
|
|
1102
|
-
//
|
|
1165
|
+
// Over time, this list may be expanded.
|
|
1103
1166
|
// Resources MUST NOT include personal identifiable information (PII); this should not be the case with
|
|
1104
1167
|
// ddtrace integrations, but worth mentioning just in case :)
|
|
1105
|
-
static bool should_collect_resource(VALUE
|
|
1168
|
+
static bool should_collect_resource(VALUE root_span) {
|
|
1169
|
+
VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
|
|
1170
|
+
if (root_span_type == Qnil) return false;
|
|
1106
1171
|
ENFORCE_TYPE(root_span_type, T_STRING);
|
|
1107
1172
|
|
|
1108
1173
|
int root_span_type_length = RSTRING_LEN(root_span_type);
|
|
1109
1174
|
const char *root_span_type_value = StringValuePtr(root_span_type);
|
|
1110
1175
|
|
|
1111
|
-
|
|
1176
|
+
bool is_web_request =
|
|
1177
|
+
(root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
|
|
1112
1178
|
(root_span_type_length == strlen("proxy") && (memcmp("proxy", root_span_type_value, strlen("proxy")) == 0));
|
|
1179
|
+
|
|
1180
|
+
if (is_web_request) return true;
|
|
1181
|
+
|
|
1182
|
+
bool is_worker_request =
|
|
1183
|
+
(root_span_type_length == strlen("worker") && (memcmp("worker", root_span_type_value, strlen("worker")) == 0));
|
|
1184
|
+
|
|
1185
|
+
return is_worker_request;
|
|
1113
1186
|
}
|
|
1114
1187
|
|
|
1115
1188
|
// After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent.
|
|
@@ -1210,13 +1283,14 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
|
1210
1283
|
}
|
|
1211
1284
|
}
|
|
1212
1285
|
|
|
1286
|
+
track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
|
|
1287
|
+
|
|
1213
1288
|
trigger_sample_for_thread(
|
|
1214
1289
|
state,
|
|
1215
1290
|
/* thread: */ current_thread,
|
|
1216
1291
|
/* stack_from_thread: */ current_thread,
|
|
1217
1292
|
get_or_create_context_for(current_thread, state),
|
|
1218
1293
|
(sample_values) {.alloc_samples = sample_weight},
|
|
1219
|
-
SAMPLE_REGULAR,
|
|
1220
1294
|
INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
|
|
1221
1295
|
&ruby_vm_type,
|
|
1222
1296
|
optional_class_name
|
|
@@ -1239,7 +1313,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) {
|
|
|
1239
1313
|
return rb_thread_create(new_empty_thread_inner, NULL);
|
|
1240
1314
|
}
|
|
1241
1315
|
|
|
1242
|
-
ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
|
1316
|
+
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
|
1243
1317
|
switch (type) {
|
|
1244
1318
|
case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object");
|
|
1245
1319
|
case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class");
|
|
@@ -1264,3 +1338,65 @@ ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
|
|
1264
1338
|
default: return DDOG_CHARSLICE_C("(VM Internal, Missing class)");
|
|
1265
1339
|
}
|
|
1266
1340
|
}
|
|
1341
|
+
|
|
1342
|
+
static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
|
|
1343
|
+
if (state->otel_current_span_key == Qnil) {
|
|
1344
|
+
VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
|
|
1345
|
+
VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
|
|
1346
|
+
VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
|
|
1347
|
+
VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
|
|
1348
|
+
VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
|
|
1349
|
+
|
|
1350
|
+
if (current_span_key == Qnil) {
|
|
1351
|
+
rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
state->otel_current_span_key = current_span_key;
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
return state->otel_current_span_key;
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
|
|
1361
|
+
// differently, and this codepath handles it.
|
|
1362
|
+
static void ddtrace_otel_trace_identifiers_for(
|
|
1363
|
+
struct thread_context_collector_state *state,
|
|
1364
|
+
VALUE *active_trace,
|
|
1365
|
+
VALUE *root_span,
|
|
1366
|
+
VALUE *numeric_span_id,
|
|
1367
|
+
VALUE active_span,
|
|
1368
|
+
VALUE otel_values
|
|
1369
|
+
) {
|
|
1370
|
+
VALUE resolved_numeric_span_id =
|
|
1371
|
+
active_span == Qnil ?
|
|
1372
|
+
// For traces started from otel spans, the span id will be empty, and the @parent_span_id has the right value
|
|
1373
|
+
rb_ivar_get(*active_trace, at_parent_span_id_id /* @parent_span_id */) :
|
|
1374
|
+
// Regular span created by ddtrace
|
|
1375
|
+
rb_ivar_get(active_span, at_id_id /* @id */);
|
|
1376
|
+
|
|
1377
|
+
if (resolved_numeric_span_id == Qnil) return;
|
|
1378
|
+
|
|
1379
|
+
VALUE otel_current_span_key = get_otel_current_span_key(state);
|
|
1380
|
+
VALUE current_trace = *active_trace;
|
|
1381
|
+
|
|
1382
|
+
// ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
|
|
1383
|
+
// trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked
|
|
1384
|
+
// list. The local root span is going to be the trace/span we find at the end of this linked list.
|
|
1385
|
+
while (otel_values != Qnil) {
|
|
1386
|
+
VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key);
|
|
1387
|
+
if (otel_span == Qnil) break;
|
|
1388
|
+
VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id);
|
|
1389
|
+
if (next_trace == Qnil) break;
|
|
1390
|
+
|
|
1391
|
+
current_trace = next_trace;
|
|
1392
|
+
otel_values = rb_ivar_get(current_trace, at_otel_values_id /* @otel_values */);
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
// We found the last trace in the linked list. This contains the local root span
|
|
1396
|
+
VALUE resolved_root_span = rb_ivar_get(current_trace, at_root_span_id /* @root_span */);
|
|
1397
|
+
if (resolved_root_span == Qnil) return;
|
|
1398
|
+
|
|
1399
|
+
*root_span = resolved_root_span;
|
|
1400
|
+
*active_trace = current_trace;
|
|
1401
|
+
*numeric_span_id = resolved_numeric_span_id;
|
|
1402
|
+
}
|