datadog 2.2.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +87 -2
- data/ext/datadog_profiling_loader/datadog_profiling_loader.c +9 -1
- data/ext/datadog_profiling_loader/extconf.rb +14 -26
- data/ext/datadog_profiling_native_extension/clock_id.h +1 -0
- data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -2
- data/ext/datadog_profiling_native_extension/clock_id_noop.c +1 -2
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +257 -69
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +53 -28
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +34 -4
- data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +4 -0
- data/ext/datadog_profiling_native_extension/collectors_stack.c +136 -81
- data/ext/datadog_profiling_native_extension/collectors_stack.h +2 -2
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +661 -48
- data/ext/datadog_profiling_native_extension/collectors_thread_context.h +10 -1
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +83 -0
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +53 -0
- data/ext/datadog_profiling_native_extension/extconf.rb +91 -69
- data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +50 -0
- data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +75 -0
- data/ext/datadog_profiling_native_extension/heap_recorder.c +54 -12
- data/ext/datadog_profiling_native_extension/heap_recorder.h +3 -1
- data/ext/datadog_profiling_native_extension/helpers.h +6 -17
- data/ext/datadog_profiling_native_extension/http_transport.c +41 -9
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +0 -86
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +2 -23
- data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +61 -172
- data/ext/datadog_profiling_native_extension/private_vm_api_access.c +116 -139
- data/ext/datadog_profiling_native_extension/private_vm_api_access.h +20 -11
- data/ext/datadog_profiling_native_extension/profiling.c +1 -3
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +0 -33
- data/ext/datadog_profiling_native_extension/ruby_helpers.h +1 -26
- data/ext/datadog_profiling_native_extension/setup_signal_handler.h +1 -0
- data/ext/datadog_profiling_native_extension/stack_recorder.c +14 -2
- data/ext/datadog_profiling_native_extension/stack_recorder.h +2 -0
- data/ext/datadog_profiling_native_extension/time_helpers.c +0 -15
- data/ext/datadog_profiling_native_extension/time_helpers.h +36 -6
- data/ext/{datadog_profiling_native_extension → libdatadog_api}/crashtracker.c +37 -22
- data/ext/libdatadog_api/datadog_ruby_common.c +83 -0
- data/ext/libdatadog_api/datadog_ruby_common.h +53 -0
- data/ext/libdatadog_api/extconf.rb +108 -0
- data/ext/libdatadog_api/macos_development.md +26 -0
- data/ext/libdatadog_extconf_helpers.rb +130 -0
- data/lib/datadog/appsec/assets/waf_rules/recommended.json +2184 -108
- data/lib/datadog/appsec/assets/waf_rules/strict.json +1430 -2
- data/lib/datadog/appsec/component.rb +29 -8
- data/lib/datadog/appsec/configuration/settings.rb +2 -2
- data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +1 -0
- data/lib/datadog/appsec/contrib/devise/patcher/rememberable_patch.rb +21 -0
- data/lib/datadog/appsec/contrib/devise/patcher.rb +12 -2
- data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +35 -0
- data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +109 -0
- data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +71 -0
- data/lib/datadog/appsec/contrib/graphql/integration.rb +54 -0
- data/lib/datadog/appsec/contrib/graphql/patcher.rb +37 -0
- data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +59 -0
- data/lib/datadog/appsec/contrib/rack/gateway/request.rb +3 -6
- data/lib/datadog/appsec/event.rb +1 -1
- data/lib/datadog/appsec/processor/actions.rb +1 -1
- data/lib/datadog/appsec/processor/rule_loader.rb +3 -1
- data/lib/datadog/appsec/processor/rule_merger.rb +33 -15
- data/lib/datadog/appsec/processor.rb +36 -37
- data/lib/datadog/appsec/rate_limiter.rb +25 -40
- data/lib/datadog/appsec/remote.rb +7 -3
- data/lib/datadog/appsec/response.rb +15 -1
- data/lib/datadog/appsec.rb +3 -2
- data/lib/datadog/core/configuration/components.rb +18 -15
- data/lib/datadog/core/configuration/settings.rb +135 -9
- data/lib/datadog/core/crashtracking/agent_base_url.rb +21 -0
- data/lib/datadog/core/crashtracking/component.rb +111 -0
- data/lib/datadog/core/crashtracking/tag_builder.rb +39 -0
- data/lib/datadog/core/diagnostics/environment_logger.rb +8 -11
- data/lib/datadog/core/environment/execution.rb +5 -5
- data/lib/datadog/core/metrics/client.rb +7 -0
- data/lib/datadog/core/rate_limiter.rb +183 -0
- data/lib/datadog/core/remote/client/capabilities.rb +4 -3
- data/lib/datadog/core/remote/component.rb +4 -2
- data/lib/datadog/core/remote/negotiation.rb +4 -4
- data/lib/datadog/core/remote/tie.rb +2 -0
- data/lib/datadog/core/runtime/metrics.rb +1 -1
- data/lib/datadog/core/telemetry/component.rb +51 -2
- data/lib/datadog/core/telemetry/emitter.rb +9 -11
- data/lib/datadog/core/telemetry/event.rb +37 -1
- data/lib/datadog/core/telemetry/ext.rb +1 -0
- data/lib/datadog/core/telemetry/http/adapters/net.rb +10 -12
- data/lib/datadog/core/telemetry/http/ext.rb +3 -0
- data/lib/datadog/core/telemetry/http/transport.rb +38 -9
- data/lib/datadog/core/telemetry/logger.rb +51 -0
- data/lib/datadog/core/telemetry/logging.rb +71 -0
- data/lib/datadog/core/telemetry/request.rb +13 -1
- data/lib/datadog/core/utils/at_fork_monkey_patch.rb +102 -0
- data/lib/datadog/core/utils/time.rb +12 -0
- data/lib/datadog/di/code_tracker.rb +168 -0
- data/lib/datadog/di/configuration/settings.rb +163 -0
- data/lib/datadog/di/configuration.rb +11 -0
- data/lib/datadog/di/error.rb +31 -0
- data/lib/datadog/di/extensions.rb +16 -0
- data/lib/datadog/di/probe.rb +133 -0
- data/lib/datadog/di/probe_builder.rb +41 -0
- data/lib/datadog/di/redactor.rb +188 -0
- data/lib/datadog/di/serializer.rb +193 -0
- data/lib/datadog/di.rb +14 -0
- data/lib/datadog/kit/appsec/events.rb +2 -4
- data/lib/datadog/opentelemetry/sdk/propagator.rb +2 -0
- data/lib/datadog/opentelemetry/sdk/span_processor.rb +10 -0
- data/lib/datadog/opentelemetry/sdk/trace/span.rb +23 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +7 -7
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +28 -26
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +11 -13
- data/lib/datadog/profiling/collectors/info.rb +15 -6
- data/lib/datadog/profiling/collectors/thread_context.rb +30 -2
- data/lib/datadog/profiling/component.rb +89 -95
- data/lib/datadog/profiling/exporter.rb +3 -3
- data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
- data/lib/datadog/profiling/ext.rb +21 -21
- data/lib/datadog/profiling/flush.rb +1 -1
- data/lib/datadog/profiling/http_transport.rb +14 -7
- data/lib/datadog/profiling/load_native_extension.rb +5 -5
- data/lib/datadog/profiling/preload.rb +1 -1
- data/lib/datadog/profiling/profiler.rb +5 -8
- data/lib/datadog/profiling/scheduler.rb +33 -25
- data/lib/datadog/profiling/stack_recorder.rb +3 -0
- data/lib/datadog/profiling/tag_builder.rb +2 -2
- data/lib/datadog/profiling/tasks/exec.rb +5 -5
- data/lib/datadog/profiling/tasks/setup.rb +16 -35
- data/lib/datadog/profiling.rb +4 -5
- data/lib/datadog/single_step_instrument.rb +12 -0
- data/lib/datadog/tracing/contrib/action_cable/instrumentation.rb +8 -12
- data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -0
- data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +78 -0
- data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +33 -0
- data/lib/datadog/tracing/contrib/action_pack/patcher.rb +2 -0
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +4 -0
- data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +3 -1
- data/lib/datadog/tracing/contrib/active_record/events/sql.rb +4 -1
- data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +5 -1
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +5 -0
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
- data/lib/datadog/tracing/contrib/ext.rb +14 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +9 -0
- data/lib/datadog/tracing/contrib/grape/endpoint.rb +19 -0
- data/lib/datadog/tracing/contrib/graphql/patcher.rb +9 -12
- data/lib/datadog/tracing/contrib/graphql/trace_patcher.rb +3 -3
- data/lib/datadog/tracing/contrib/graphql/tracing_patcher.rb +3 -3
- data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +14 -10
- data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +10 -4
- data/lib/datadog/tracing/contrib/http/instrumentation.rb +18 -15
- data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -5
- data/lib/datadog/tracing/contrib/httpclient/patcher.rb +1 -14
- data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +5 -0
- data/lib/datadog/tracing/contrib/httprb/patcher.rb +1 -14
- data/lib/datadog/tracing/contrib/lograge/patcher.rb +15 -0
- data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +2 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +17 -13
- data/lib/datadog/tracing/contrib/opensearch/patcher.rb +13 -6
- data/lib/datadog/tracing/contrib/patcher.rb +2 -1
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +4 -1
- data/lib/datadog/tracing/contrib/presto/patcher.rb +1 -13
- data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +28 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +5 -1
- data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +22 -10
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +27 -0
- data/lib/datadog/tracing/contrib/redis/tags.rb +4 -0
- data/lib/datadog/tracing/contrib/sinatra/tracer.rb +4 -0
- data/lib/datadog/tracing/contrib/stripe/request.rb +3 -2
- data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +4 -1
- data/lib/datadog/tracing/diagnostics/environment_logger.rb +14 -16
- data/lib/datadog/tracing/distributed/propagation.rb +7 -0
- data/lib/datadog/tracing/metadata/errors.rb +9 -1
- data/lib/datadog/tracing/metadata/ext.rb +6 -0
- data/lib/datadog/tracing/pipeline/span_filter.rb +2 -2
- data/lib/datadog/tracing/remote.rb +5 -2
- data/lib/datadog/tracing/sampling/matcher.rb +6 -1
- data/lib/datadog/tracing/sampling/rate_sampler.rb +1 -1
- data/lib/datadog/tracing/sampling/rule.rb +2 -0
- data/lib/datadog/tracing/sampling/rule_sampler.rb +9 -5
- data/lib/datadog/tracing/sampling/span/ext.rb +1 -1
- data/lib/datadog/tracing/sampling/span/rule.rb +2 -2
- data/lib/datadog/tracing/span.rb +9 -2
- data/lib/datadog/tracing/span_event.rb +41 -0
- data/lib/datadog/tracing/span_operation.rb +6 -2
- data/lib/datadog/tracing/trace_operation.rb +26 -2
- data/lib/datadog/tracing/tracer.rb +14 -12
- data/lib/datadog/tracing/transport/http/client.rb +1 -0
- data/lib/datadog/tracing/transport/io/client.rb +1 -0
- data/lib/datadog/tracing/transport/serializable_trace.rb +3 -0
- data/lib/datadog/tracing/workers/trace_writer.rb +1 -1
- data/lib/datadog/tracing/workers.rb +1 -1
- data/lib/datadog/version.rb +1 -1
- metadata +46 -11
- data/lib/datadog/profiling/crashtracker.rb +0 -91
- data/lib/datadog/profiling/ext/forking.rb +0 -98
- data/lib/datadog/tracing/sampling/rate_limiter.rb +0 -185
@@ -76,6 +76,11 @@
|
|
76
76
|
#define MISSING_TRACER_CONTEXT_KEY 0
|
77
77
|
#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
|
78
78
|
|
79
|
+
// This is used as a placeholder to mark threads that are allowed to be profiled (enabled)
|
80
|
+
// (e.g. to avoid trying to gvl profile threads that are not from the main Ractor)
|
81
|
+
// and for which there's no data yet
|
82
|
+
#define GVL_WAITING_ENABLED_EMPTY RUBY_FIXNUM_MAX
|
83
|
+
|
79
84
|
static ID at_active_span_id; // id of :@active_span in Ruby
|
80
85
|
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
81
86
|
static ID at_id_id; // id of :@id in Ruby
|
@@ -86,13 +91,34 @@ static ID at_otel_values_id; // id of :@otel_values in Ruby
|
|
86
91
|
static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
|
87
92
|
static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
|
88
93
|
|
94
|
+
// Used to support reading trace identifiers from the opentelemetry Ruby library when the ddtrace gem tracing
|
95
|
+
// integration is NOT in use.
|
96
|
+
static ID at_span_id_id; // id of :@span_id in Ruby
|
97
|
+
static ID at_trace_id_id; // id of :@trace_id in Ruby
|
98
|
+
static ID at_entries_id; // id of :@entries in Ruby
|
99
|
+
static ID at_context_id; // id of :@context in Ruby
|
100
|
+
static ID at_kind_id; // id of :@kind in Ruby
|
101
|
+
static ID at_name_id; // id of :@name in Ruby
|
102
|
+
static ID server_id; // id of :server in Ruby
|
103
|
+
static ID otel_context_storage_id; // id of :__opentelemetry_context_storage__ in Ruby
|
104
|
+
|
105
|
+
// This is used by `thread_context_collector_on_gvl_running`. Because when that method gets called we're not sure if
|
106
|
+
// it's safe to access the state of the thread context collector, we store this setting as a global value. This does
|
107
|
+
// mean this setting is shared among all thread context collectors, and thus it's "last writer wins".
|
108
|
+
// In production this should not be a problem: there should only be one profiler, which is the last one created,
|
109
|
+
// and that'll be the one that last wrote this setting.
|
110
|
+
static uint32_t global_waiting_for_gvl_threshold_ns = MILLIS_AS_NS(10);
|
111
|
+
|
112
|
+
enum otel_context_enabled {otel_context_enabled_false, otel_context_enabled_only, otel_context_enabled_both};
|
113
|
+
|
89
114
|
// Contains state for a single ThreadContext instance
|
90
115
|
struct thread_context_collector_state {
|
91
116
|
// Note: Places in this file that usually need to be changed when this struct is changed are tagged with
|
92
117
|
// "Update this when modifying state struct"
|
93
118
|
|
94
119
|
// Required by Datadog::Profiling::Collectors::Stack as a scratch buffer during sampling
|
95
|
-
|
120
|
+
ddog_prof_Location *locations;
|
121
|
+
uint16_t max_frames;
|
96
122
|
// Hashmap <Thread Object, struct per_thread_context>
|
97
123
|
st_table *hash_map_per_thread_context;
|
98
124
|
// Datadog::Profiling::StackRecorder instance
|
@@ -111,6 +137,8 @@ struct thread_context_collector_state {
|
|
111
137
|
bool endpoint_collection_enabled;
|
112
138
|
// Used to omit timestamps / timeline events from collected data
|
113
139
|
bool timeline_enabled;
|
140
|
+
// Used to control context collection
|
141
|
+
enum otel_context_enabled otel_context_enabled;
|
114
142
|
// Used to omit class information from collected allocation data
|
115
143
|
bool allocation_type_enabled;
|
116
144
|
// Used when calling monotonic_to_system_epoch_ns
|
@@ -118,6 +146,8 @@ struct thread_context_collector_state {
|
|
118
146
|
// Used to identify the main thread, to give it a fallback name
|
119
147
|
VALUE main_thread;
|
120
148
|
// Used when extracting trace identifiers from otel spans. Lazily initialized.
|
149
|
+
// Qtrue serves as a marker we've not yet extracted it; when we try to extract it, we set it to an object if
|
150
|
+
// successful and Qnil if not.
|
121
151
|
VALUE otel_current_span_key;
|
122
152
|
|
123
153
|
struct stats {
|
@@ -138,6 +168,7 @@ struct thread_context_collector_state {
|
|
138
168
|
|
139
169
|
// Tracks per-thread state
|
140
170
|
struct per_thread_context {
|
171
|
+
sampling_buffer *sampling_buffer;
|
141
172
|
char thread_id[THREAD_ID_LIMIT_CHARS];
|
142
173
|
ddog_CharSlice thread_id_char_slice;
|
143
174
|
char thread_invoke_location[THREAD_INVOKE_LOCATION_LIMIT_CHARS];
|
@@ -162,6 +193,12 @@ struct trace_identifiers {
|
|
162
193
|
VALUE trace_endpoint;
|
163
194
|
};
|
164
195
|
|
196
|
+
struct otel_span {
|
197
|
+
VALUE span;
|
198
|
+
VALUE span_id;
|
199
|
+
VALUE trace_id;
|
200
|
+
};
|
201
|
+
|
165
202
|
static void thread_context_collector_typed_data_mark(void *state_ptr);
|
166
203
|
static void thread_context_collector_typed_data_free(void *state_ptr);
|
167
204
|
static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
|
@@ -175,17 +212,20 @@ static VALUE _native_initialize(
|
|
175
212
|
VALUE tracer_context_key,
|
176
213
|
VALUE endpoint_collection_enabled,
|
177
214
|
VALUE timeline_enabled,
|
215
|
+
VALUE waiting_for_gvl_threshold_ns,
|
216
|
+
VALUE otel_context_enabled,
|
178
217
|
VALUE allocation_type_enabled
|
179
218
|
);
|
180
219
|
static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread);
|
181
220
|
static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
|
182
221
|
static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
|
183
|
-
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
184
|
-
void update_metrics_and_sample(
|
222
|
+
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state);
|
223
|
+
static void update_metrics_and_sample(
|
185
224
|
struct thread_context_collector_state *state,
|
186
225
|
VALUE thread_being_sampled,
|
187
|
-
VALUE
|
226
|
+
VALUE stack_from_thread,
|
188
227
|
struct per_thread_context *thread_context,
|
228
|
+
sampling_buffer* sampling_buffer,
|
189
229
|
long current_cpu_time_ns,
|
190
230
|
long current_monotonic_wall_time_ns
|
191
231
|
);
|
@@ -194,15 +234,18 @@ static void trigger_sample_for_thread(
|
|
194
234
|
VALUE thread,
|
195
235
|
VALUE stack_from_thread,
|
196
236
|
struct per_thread_context *thread_context,
|
237
|
+
sampling_buffer* sampling_buffer,
|
197
238
|
sample_values values,
|
198
239
|
long current_monotonic_wall_time_ns,
|
199
240
|
ddog_CharSlice *ruby_vm_type,
|
200
|
-
ddog_CharSlice *class_name
|
241
|
+
ddog_CharSlice *class_name,
|
242
|
+
bool is_gvl_waiting_state
|
201
243
|
);
|
202
244
|
static VALUE _native_thread_list(VALUE self);
|
203
245
|
static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state);
|
204
246
|
static struct per_thread_context *get_context_for(VALUE thread, struct thread_context_collector_state *state);
|
205
247
|
static void initialize_context(VALUE thread, struct per_thread_context *thread_context, struct thread_context_collector_state *state);
|
248
|
+
static void free_context(struct per_thread_context* thread_context);
|
206
249
|
static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
207
250
|
static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
|
208
251
|
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
|
@@ -231,6 +274,27 @@ static void ddtrace_otel_trace_identifiers_for(
|
|
231
274
|
VALUE active_span,
|
232
275
|
VALUE otel_values
|
233
276
|
);
|
277
|
+
static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);
|
278
|
+
static bool handle_gvl_waiting(
|
279
|
+
struct thread_context_collector_state *state,
|
280
|
+
VALUE thread_being_sampled,
|
281
|
+
VALUE stack_from_thread,
|
282
|
+
struct per_thread_context *thread_context,
|
283
|
+
sampling_buffer* sampling_buffer,
|
284
|
+
long current_cpu_time_ns
|
285
|
+
);
|
286
|
+
static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread);
|
287
|
+
static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread);
|
288
|
+
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread);
|
289
|
+
static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread);
|
290
|
+
static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns);
|
291
|
+
static void otel_without_ddtrace_trace_identifiers_for(
|
292
|
+
struct thread_context_collector_state *state,
|
293
|
+
VALUE thread,
|
294
|
+
struct trace_identifiers *trace_identifiers_result
|
295
|
+
);
|
296
|
+
static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key);
|
297
|
+
static uint64_t otel_span_id_to_uint(VALUE otel_span_id);
|
234
298
|
|
235
299
|
void collectors_thread_context_init(VALUE profiling_module) {
|
236
300
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
@@ -248,19 +312,27 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
248
312
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
249
313
|
rb_define_alloc_func(collectors_thread_context_class, _native_new);
|
250
314
|
|
251
|
-
rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize,
|
315
|
+
rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, 9);
|
252
316
|
rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
|
253
317
|
rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
254
318
|
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
|
255
319
|
rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3);
|
256
320
|
rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
|
257
321
|
rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
|
258
|
-
rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc,
|
322
|
+
rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 2);
|
259
323
|
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
|
260
324
|
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
|
261
325
|
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
|
262
326
|
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
|
263
327
|
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
|
328
|
+
rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
|
329
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
330
|
+
rb_define_singleton_method(testing_module, "_native_on_gvl_waiting", _native_on_gvl_waiting, 1);
|
331
|
+
rb_define_singleton_method(testing_module, "_native_gvl_waiting_at_for", _native_gvl_waiting_at_for, 1);
|
332
|
+
rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 1);
|
333
|
+
rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 2);
|
334
|
+
rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 3);
|
335
|
+
#endif
|
264
336
|
|
265
337
|
at_active_span_id = rb_intern_const("@active_span");
|
266
338
|
at_active_trace_id = rb_intern_const("@active_trace");
|
@@ -271,6 +343,19 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
271
343
|
at_otel_values_id = rb_intern_const("@otel_values");
|
272
344
|
at_parent_span_id_id = rb_intern_const("@parent_span_id");
|
273
345
|
at_datadog_trace_id = rb_intern_const("@datadog_trace");
|
346
|
+
at_span_id_id = rb_intern_const("@span_id");
|
347
|
+
at_trace_id_id = rb_intern_const("@trace_id");
|
348
|
+
at_entries_id = rb_intern_const("@entries");
|
349
|
+
at_context_id = rb_intern_const("@context");
|
350
|
+
at_kind_id = rb_intern_const("@kind");
|
351
|
+
at_name_id = rb_intern_const("@name");
|
352
|
+
server_id = rb_intern_const("server");
|
353
|
+
otel_context_storage_id = rb_intern_const("__opentelemetry_context_storage__");
|
354
|
+
|
355
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
356
|
+
// This will raise if Ruby already ran out of thread-local keys
|
357
|
+
gvl_profiling_init();
|
358
|
+
#endif
|
274
359
|
|
275
360
|
gc_profiling_init();
|
276
361
|
}
|
@@ -308,7 +393,7 @@ static void thread_context_collector_typed_data_free(void *state_ptr) {
|
|
308
393
|
|
309
394
|
// Important: Remember that we're only guaranteed to see here what's been set in _native_new, aka
|
310
395
|
// pointers that have been set NULL there may still be NULL here.
|
311
|
-
if (state->
|
396
|
+
if (state->locations != NULL) ruby_xfree(state->locations);
|
312
397
|
|
313
398
|
// Free each entry in the map
|
314
399
|
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
|
@@ -327,8 +412,8 @@ static int hash_map_per_thread_context_mark(st_data_t key_thread, DDTRACE_UNUSED
|
|
327
412
|
|
328
413
|
// Used to clear each of the per_thread_contexts inside the hash_map_per_thread_context
|
329
414
|
static int hash_map_per_thread_context_free_values(DDTRACE_UNUSED st_data_t _thread, st_data_t value_per_thread_context, DDTRACE_UNUSED st_data_t _argument) {
|
330
|
-
struct per_thread_context *
|
331
|
-
|
415
|
+
struct per_thread_context *thread_context = (struct per_thread_context*) value_per_thread_context;
|
416
|
+
free_context(thread_context);
|
332
417
|
return ST_CONTINUE;
|
333
418
|
}
|
334
419
|
|
@@ -339,25 +424,39 @@ static VALUE _native_new(VALUE klass) {
|
|
339
424
|
// being leaked.
|
340
425
|
|
341
426
|
// Update this when modifying state struct
|
342
|
-
state->
|
427
|
+
state->locations = NULL;
|
428
|
+
state->max_frames = 0;
|
343
429
|
state->hash_map_per_thread_context =
|
344
430
|
// "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
|
345
431
|
st_init_numtable();
|
346
432
|
state->recorder_instance = Qnil;
|
347
433
|
state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
|
348
|
-
|
434
|
+
VALUE thread_list_buffer = rb_ary_new();
|
435
|
+
state->thread_list_buffer = thread_list_buffer;
|
349
436
|
state->endpoint_collection_enabled = true;
|
350
437
|
state->timeline_enabled = true;
|
438
|
+
state->otel_context_enabled = otel_context_enabled_false;
|
351
439
|
state->allocation_type_enabled = true;
|
352
440
|
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
353
|
-
|
354
|
-
state->
|
441
|
+
VALUE main_thread = rb_thread_main();
|
442
|
+
state->main_thread = main_thread;
|
443
|
+
state->otel_current_span_key = Qtrue;
|
355
444
|
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
356
445
|
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
|
357
446
|
|
358
|
-
|
447
|
+
// Note: Remember to keep any new allocated objects that get stored in the state also on the stack + mark them with
|
448
|
+
// RB_GC_GUARD -- otherwise it's possible for a GC to run and
|
449
|
+
// since the instance representing the state does not yet exist, such objects will not get marked.
|
450
|
+
|
451
|
+
VALUE instance = TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
|
452
|
+
|
453
|
+
RB_GC_GUARD(thread_list_buffer);
|
454
|
+
RB_GC_GUARD(main_thread); // Arguably not needed, but perhaps can be move in some future Ruby release?
|
455
|
+
|
456
|
+
return instance;
|
359
457
|
}
|
360
458
|
|
459
|
+
// TODO: Convert this to use options like CpuAndWallTimeWorker
|
361
460
|
static VALUE _native_initialize(
|
362
461
|
DDTRACE_UNUSED VALUE _self,
|
363
462
|
VALUE collector_instance,
|
@@ -366,26 +465,38 @@ static VALUE _native_initialize(
|
|
366
465
|
VALUE tracer_context_key,
|
367
466
|
VALUE endpoint_collection_enabled,
|
368
467
|
VALUE timeline_enabled,
|
468
|
+
VALUE waiting_for_gvl_threshold_ns,
|
469
|
+
VALUE otel_context_enabled,
|
369
470
|
VALUE allocation_type_enabled
|
370
471
|
) {
|
371
472
|
ENFORCE_BOOLEAN(endpoint_collection_enabled);
|
372
473
|
ENFORCE_BOOLEAN(timeline_enabled);
|
474
|
+
ENFORCE_TYPE(waiting_for_gvl_threshold_ns, T_FIXNUM);
|
373
475
|
ENFORCE_BOOLEAN(allocation_type_enabled);
|
374
476
|
|
375
477
|
struct thread_context_collector_state *state;
|
376
478
|
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
377
479
|
|
378
|
-
int max_frames_requested = NUM2INT(max_frames);
|
379
|
-
if (max_frames_requested < 0) rb_raise(rb_eArgError, "Invalid max_frames: value must not be negative");
|
380
|
-
|
381
480
|
// Update this when modifying state struct
|
382
|
-
state->
|
481
|
+
state->max_frames = sampling_buffer_check_max_frames(NUM2INT(max_frames));
|
482
|
+
state->locations = ruby_xcalloc(state->max_frames, sizeof(ddog_prof_Location));
|
383
483
|
// hash_map_per_thread_context is already initialized, nothing to do here
|
384
484
|
state->recorder_instance = enforce_recorder_instance(recorder_instance);
|
385
485
|
state->endpoint_collection_enabled = (endpoint_collection_enabled == Qtrue);
|
386
486
|
state->timeline_enabled = (timeline_enabled == Qtrue);
|
487
|
+
if (otel_context_enabled == Qfalse || otel_context_enabled == Qnil) {
|
488
|
+
state->otel_context_enabled = otel_context_enabled_false;
|
489
|
+
} else if (otel_context_enabled == ID2SYM(rb_intern("only"))) {
|
490
|
+
state->otel_context_enabled = otel_context_enabled_only;
|
491
|
+
} else if (otel_context_enabled == ID2SYM(rb_intern("both"))) {
|
492
|
+
state->otel_context_enabled = otel_context_enabled_both;
|
493
|
+
} else {
|
494
|
+
rb_raise(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
|
495
|
+
}
|
387
496
|
state->allocation_type_enabled = (allocation_type_enabled == Qtrue);
|
388
497
|
|
498
|
+
global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
|
499
|
+
|
389
500
|
if (RTEST(tracer_context_key)) {
|
390
501
|
ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
|
391
502
|
// Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
|
@@ -416,13 +527,22 @@ static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_inst
|
|
416
527
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
417
528
|
// It SHOULD NOT be used for other purposes.
|
418
529
|
static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
419
|
-
thread_context_collector_on_gc_finish(collector_instance);
|
530
|
+
(void) !thread_context_collector_on_gc_finish(collector_instance);
|
420
531
|
return Qtrue;
|
421
532
|
}
|
422
533
|
|
423
534
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
424
535
|
// It SHOULD NOT be used for other purposes.
|
425
|
-
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
536
|
+
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state) {
|
537
|
+
ENFORCE_BOOLEAN(reset_monotonic_to_system_state);
|
538
|
+
|
539
|
+
struct thread_context_collector_state *state;
|
540
|
+
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
541
|
+
|
542
|
+
if (reset_monotonic_to_system_state == Qtrue) {
|
543
|
+
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
544
|
+
}
|
545
|
+
|
426
546
|
thread_context_collector_sample_after_gc(collector_instance);
|
427
547
|
return Qtrue;
|
428
548
|
}
|
@@ -461,6 +581,7 @@ void thread_context_collector_sample(VALUE self_instance, long current_monotonic
|
|
461
581
|
/* thread_being_sampled: */ thread,
|
462
582
|
/* stack_from_thread: */ thread,
|
463
583
|
thread_context,
|
584
|
+
thread_context->sampling_buffer,
|
464
585
|
current_cpu_time_ns,
|
465
586
|
current_monotonic_wall_time_ns
|
466
587
|
);
|
@@ -477,25 +598,33 @@ void thread_context_collector_sample(VALUE self_instance, long current_monotonic
|
|
477
598
|
/* thread_being_sampled: */ current_thread,
|
478
599
|
/* stack_from_thread: */ profiler_overhead_stack_thread,
|
479
600
|
current_thread_context,
|
601
|
+
// Here we use the overhead thread's sampling buffer so as to not invalidate the cache in the buffer of the thread being sampled
|
602
|
+
get_or_create_context_for(profiler_overhead_stack_thread, state)->sampling_buffer,
|
480
603
|
cpu_time_now_ns(current_thread_context),
|
481
604
|
monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
|
482
605
|
);
|
483
606
|
}
|
484
607
|
|
485
|
-
void update_metrics_and_sample(
|
608
|
+
static void update_metrics_and_sample(
|
486
609
|
struct thread_context_collector_state *state,
|
487
610
|
VALUE thread_being_sampled,
|
488
611
|
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
489
612
|
struct per_thread_context *thread_context,
|
613
|
+
sampling_buffer* sampling_buffer,
|
490
614
|
long current_cpu_time_ns,
|
491
615
|
long current_monotonic_wall_time_ns
|
492
616
|
) {
|
493
|
-
|
617
|
+
bool is_gvl_waiting_state =
|
618
|
+
handle_gvl_waiting(state, thread_being_sampled, stack_from_thread, thread_context, sampling_buffer, current_cpu_time_ns);
|
619
|
+
|
620
|
+
// Don't assign/update cpu during "Waiting for GVL"
|
621
|
+
long cpu_time_elapsed_ns = is_gvl_waiting_state ? 0 : update_time_since_previous_sample(
|
494
622
|
&thread_context->cpu_time_at_previous_sample_ns,
|
495
623
|
current_cpu_time_ns,
|
496
624
|
thread_context->gc_tracking.cpu_time_at_start_ns,
|
497
625
|
IS_NOT_WALL_TIME
|
498
626
|
);
|
627
|
+
|
499
628
|
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
500
629
|
&thread_context->wall_time_at_previous_sample_ns,
|
501
630
|
current_monotonic_wall_time_ns,
|
@@ -507,15 +636,32 @@ void update_metrics_and_sample(
|
|
507
636
|
IS_WALL_TIME
|
508
637
|
);
|
509
638
|
|
639
|
+
// A thread enters "Waiting for GVL", well, as the name implies, without the GVL.
|
640
|
+
//
|
641
|
+
// As a consequence, it's possible that a thread enters "Waiting for GVL" in parallel with the current thread working
|
642
|
+
// on sampling, and thus for the `current_monotonic_wall_time_ns` (which is recorded at the start of sampling)
|
643
|
+
// to be < the time at which we started Waiting for GVL.
|
644
|
+
//
|
645
|
+
// All together, this means that when `handle_gvl_waiting` creates an extra sample (see comments on that function for
|
646
|
+
// what the extra sample is), it's possible that there's no more wall-time to be assigned.
|
647
|
+
// Thus, in this case, we don't want to produce a sample representing Waiting for GVL with a wall-time of 0, and
|
648
|
+
// thus we skip creating such a sample.
|
649
|
+
if (is_gvl_waiting_state && wall_time_elapsed_ns == 0) return;
|
650
|
+
// ...you may also wonder: is there any other situation where it makes sense to produce a sample with
|
651
|
+
// wall_time_elapsed_ns == 0? I believe that yes, because the sample still includes a timestamp and a stack, but we
|
652
|
+
// may revisit/change our minds on this in the future.
|
653
|
+
|
510
654
|
trigger_sample_for_thread(
|
511
655
|
state,
|
512
656
|
thread_being_sampled,
|
513
657
|
stack_from_thread,
|
514
658
|
thread_context,
|
659
|
+
sampling_buffer,
|
515
660
|
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
|
516
661
|
current_monotonic_wall_time_ns,
|
517
662
|
NULL,
|
518
|
-
NULL
|
663
|
+
NULL,
|
664
|
+
is_gvl_waiting_state
|
519
665
|
);
|
520
666
|
}
|
521
667
|
|
@@ -561,6 +707,7 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
561
707
|
//
|
562
708
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
563
709
|
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
710
|
+
__attribute__((warn_unused_result))
|
564
711
|
bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
565
712
|
struct thread_context_collector_state *state;
|
566
713
|
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
|
@@ -661,7 +808,6 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
|
|
661
808
|
}
|
662
809
|
|
663
810
|
record_placeholder_stack(
|
664
|
-
state->sampling_buffer,
|
665
811
|
state->recorder_instance,
|
666
812
|
(sample_values) {
|
667
813
|
// This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
|
@@ -692,11 +838,13 @@ static void trigger_sample_for_thread(
|
|
692
838
|
VALUE thread,
|
693
839
|
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
694
840
|
struct per_thread_context *thread_context,
|
841
|
+
sampling_buffer* sampling_buffer,
|
695
842
|
sample_values values,
|
696
843
|
long current_monotonic_wall_time_ns,
|
697
844
|
// These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
|
698
845
|
ddog_CharSlice *ruby_vm_type,
|
699
|
-
ddog_CharSlice *class_name
|
846
|
+
ddog_CharSlice *class_name,
|
847
|
+
bool is_gvl_waiting_state
|
700
848
|
) {
|
701
849
|
int max_label_count =
|
702
850
|
1 + // thread id
|
@@ -737,6 +885,11 @@ static void trigger_sample_for_thread(
|
|
737
885
|
struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
|
738
886
|
trace_identifiers_for(state, thread, &trace_identifiers_result);
|
739
887
|
|
888
|
+
if (!trace_identifiers_result.valid && state->otel_context_enabled != otel_context_enabled_false) {
|
889
|
+
// If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
|
890
|
+
otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result);
|
891
|
+
}
|
892
|
+
|
740
893
|
if (trace_identifiers_result.valid) {
|
741
894
|
labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .num = trace_identifiers_result.local_root_span_id};
|
742
895
|
labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("span id"), .num = trace_identifiers_result.span_id};
|
@@ -812,10 +965,15 @@ static void trigger_sample_for_thread(
|
|
812
965
|
|
813
966
|
sample_thread(
|
814
967
|
stack_from_thread,
|
815
|
-
|
968
|
+
sampling_buffer,
|
816
969
|
state->recorder_instance,
|
817
970
|
values,
|
818
|
-
(sample_labels) {
|
971
|
+
(sample_labels) {
|
972
|
+
.labels = slice_labels,
|
973
|
+
.state_label = state_label,
|
974
|
+
.end_timestamp_ns = end_timestamp_ns,
|
975
|
+
.is_gvl_waiting_state = is_gvl_waiting_state,
|
976
|
+
}
|
819
977
|
);
|
820
978
|
}
|
821
979
|
|
@@ -865,9 +1023,9 @@ static struct per_thread_context *get_context_for(VALUE thread, struct thread_co
|
|
865
1023
|
// to either run Ruby code during sampling (not great), or otherwise use some of the VM private APIs to detect this.
|
866
1024
|
//
|
867
1025
|
static bool is_logging_gem_monkey_patch(VALUE invoke_file_location) {
|
868
|
-
|
1026
|
+
unsigned long logging_gem_path_len = strlen(LOGGING_GEM_PATH);
|
869
1027
|
char *invoke_file = StringValueCStr(invoke_file_location);
|
870
|
-
|
1028
|
+
unsigned long invoke_file_len = strlen(invoke_file);
|
871
1029
|
|
872
1030
|
if (invoke_file_len < logging_gem_path_len) return false;
|
873
1031
|
|
@@ -875,6 +1033,8 @@ static bool is_logging_gem_monkey_patch(VALUE invoke_file_location) {
|
|
875
1033
|
}
|
876
1034
|
|
877
1035
|
static void initialize_context(VALUE thread, struct per_thread_context *thread_context, struct thread_context_collector_state *state) {
|
1036
|
+
thread_context->sampling_buffer = sampling_buffer_new(state->max_frames, state->locations);
|
1037
|
+
|
878
1038
|
snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
|
879
1039
|
thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
|
880
1040
|
|
@@ -913,6 +1073,25 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
|
|
913
1073
|
// These will only be used during a GC operation
|
914
1074
|
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
915
1075
|
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
1076
|
+
|
1077
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
1078
|
+
// We use this special location to store data that can be accessed without any
|
1079
|
+
// kind of synchronization (e.g. by threads without the GVL).
|
1080
|
+
//
|
1081
|
+
// We set this marker here for two purposes:
|
1082
|
+
// * To make sure there's no stale data from a previous execution of the profiler.
|
1083
|
+
// * To mark threads that are actually being profiled
|
1084
|
+
//
|
1085
|
+
// (Setting this is potentially a race, but what we want is to avoid _stale_ data, so
|
1086
|
+
// if this gets set concurrently with context initialization, then such a value will belong
|
1087
|
+
// to the current profiler instance, so that's OK)
|
1088
|
+
gvl_profiling_state_thread_object_set(thread, GVL_WAITING_ENABLED_EMPTY);
|
1089
|
+
#endif
|
1090
|
+
}
|
1091
|
+
|
1092
|
+
static void free_context(struct per_thread_context* thread_context) {
|
1093
|
+
sampling_buffer_free(thread_context->sampling_buffer);
|
1094
|
+
ruby_xfree(thread_context);
|
916
1095
|
}
|
917
1096
|
|
918
1097
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
@@ -922,6 +1101,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
922
1101
|
VALUE result = rb_str_new2(" (native state)");
|
923
1102
|
|
924
1103
|
// Update this when modifying state struct
|
1104
|
+
rb_str_concat(result, rb_sprintf(" max_frames=%d", state->max_frames));
|
925
1105
|
rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
|
926
1106
|
rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
|
927
1107
|
VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
|
@@ -930,6 +1110,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
930
1110
|
rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
|
931
1111
|
rb_str_concat(result, rb_sprintf(" endpoint_collection_enabled=%"PRIsVALUE, state->endpoint_collection_enabled ? Qtrue : Qfalse));
|
932
1112
|
rb_str_concat(result, rb_sprintf(" timeline_enabled=%"PRIsVALUE, state->timeline_enabled ? Qtrue : Qfalse));
|
1113
|
+
rb_str_concat(result, rb_sprintf(" otel_context_enabled=%d", state->otel_context_enabled));
|
933
1114
|
rb_str_concat(result, rb_sprintf(" allocation_type_enabled=%"PRIsVALUE, state->allocation_type_enabled ? Qtrue : Qfalse));
|
934
1115
|
rb_str_concat(result, rb_sprintf(
|
935
1116
|
" time_converter_state={.system_epoch_ns_reference=%ld, .delta_to_epoch_ns=%ld}",
|
@@ -939,6 +1120,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
939
1120
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
940
1121
|
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
941
1122
|
rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
|
1123
|
+
rb_str_concat(result, rb_sprintf(" global_waiting_for_gvl_threshold_ns=%u", global_waiting_for_gvl_threshold_ns));
|
942
1124
|
|
943
1125
|
return result;
|
944
1126
|
}
|
@@ -966,6 +1148,10 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
966
1148
|
|
967
1149
|
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
968
1150
|
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
1151
|
+
|
1152
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
1153
|
+
ID2SYM(rb_intern("gvl_waiting_at")), /* => */ LONG2NUM(gvl_profiling_state_thread_object_get(thread)),
|
1154
|
+
#endif
|
969
1155
|
};
|
970
1156
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
971
1157
|
|
@@ -1006,7 +1192,7 @@ static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context,
|
|
1006
1192
|
|
1007
1193
|
if (is_thread_alive(thread)) return ST_CONTINUE;
|
1008
1194
|
|
1009
|
-
|
1195
|
+
free_context(thread_context);
|
1010
1196
|
return ST_DELETE;
|
1011
1197
|
}
|
1012
1198
|
|
@@ -1116,6 +1302,7 @@ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_ins
|
|
1116
1302
|
|
1117
1303
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
1118
1304
|
static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
|
1305
|
+
if (state->otel_context_enabled == otel_context_enabled_only) return;
|
1119
1306
|
if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
|
1120
1307
|
|
1121
1308
|
VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
|
@@ -1170,7 +1357,7 @@ static bool should_collect_resource(VALUE root_span) {
|
|
1170
1357
|
if (root_span_type == Qnil) return false;
|
1171
1358
|
ENFORCE_TYPE(root_span_type, T_STRING);
|
1172
1359
|
|
1173
|
-
|
1360
|
+
long root_span_type_length = RSTRING_LEN(root_span_type);
|
1174
1361
|
const char *root_span_type_value = StringValuePtr(root_span_type);
|
1175
1362
|
|
1176
1363
|
bool is_web_request =
|
@@ -1193,6 +1380,9 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector
|
|
1193
1380
|
struct thread_context_collector_state *state;
|
1194
1381
|
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
1195
1382
|
|
1383
|
+
// Release all context memory before clearing the existing context
|
1384
|
+
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
|
1385
|
+
|
1196
1386
|
st_clear(state->hash_map_per_thread_context);
|
1197
1387
|
|
1198
1388
|
state->stats = (struct stats) {}; // Resets all stats back to zero
|
@@ -1257,7 +1447,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
1257
1447
|
// Thus, we need to make sure there's actually a class before getting its name.
|
1258
1448
|
|
1259
1449
|
if (klass != 0) {
|
1260
|
-
const char *name =
|
1450
|
+
const char *name = rb_class2name(klass);
|
1261
1451
|
size_t name_length = name != NULL ? strlen(name) : 0;
|
1262
1452
|
|
1263
1453
|
if (name_length > 0) {
|
@@ -1285,15 +1475,19 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
1285
1475
|
|
1286
1476
|
track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
|
1287
1477
|
|
1478
|
+
struct per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
|
1479
|
+
|
1288
1480
|
trigger_sample_for_thread(
|
1289
1481
|
state,
|
1290
1482
|
/* thread: */ current_thread,
|
1291
1483
|
/* stack_from_thread: */ current_thread,
|
1292
|
-
|
1293
|
-
|
1484
|
+
thread_context,
|
1485
|
+
thread_context->sampling_buffer,
|
1486
|
+
(sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1, .heap_sample = true},
|
1294
1487
|
INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
|
1295
1488
|
&ruby_vm_type,
|
1296
|
-
optional_class_name
|
1489
|
+
optional_class_name,
|
1490
|
+
false
|
1297
1491
|
);
|
1298
1492
|
}
|
1299
1493
|
|
@@ -1339,25 +1533,29 @@ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
|
1339
1533
|
}
|
1340
1534
|
}
|
1341
1535
|
|
1536
|
+
// Used to access OpenTelemetry::Trace.const_get(:CURRENT_SPAN_KEY). Will raise exceptions if it fails.
|
1537
|
+
static VALUE read_otel_current_span_key_const(DDTRACE_UNUSED VALUE _unused) {
|
1538
|
+
VALUE opentelemetry_module = rb_const_get(rb_cObject, rb_intern("OpenTelemetry"));
|
1539
|
+
ENFORCE_TYPE(opentelemetry_module, T_MODULE);
|
1540
|
+
VALUE trace_module = rb_const_get(opentelemetry_module, rb_intern("Trace"));
|
1541
|
+
ENFORCE_TYPE(trace_module, T_MODULE);
|
1542
|
+
return rb_const_get(trace_module, rb_intern("CURRENT_SPAN_KEY"));
|
1543
|
+
}
|
1544
|
+
|
1342
1545
|
static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
|
1343
|
-
if (state->otel_current_span_key ==
|
1344
|
-
|
1345
|
-
|
1346
|
-
VALUE
|
1347
|
-
VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
|
1348
|
-
VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
|
1349
|
-
|
1350
|
-
if (current_span_key == Qnil) {
|
1351
|
-
rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
|
1352
|
-
}
|
1546
|
+
if (state->otel_current_span_key == Qtrue) { // Qtrue means we haven't tried to extract it yet
|
1547
|
+
// If this fails, we want to fail gracefully, rather than raise an exception (e.g. if the opentelemetry gem
|
1548
|
+
// gets refactored, we should not fall on our face)
|
1549
|
+
VALUE span_key = rb_protect(read_otel_current_span_key_const, Qnil, NULL);
|
1353
1550
|
|
1354
|
-
|
1551
|
+
// Note that this gets set to Qnil if we failed to extract the correct value, and thus we won't try to extract it again
|
1552
|
+
state->otel_current_span_key = span_key;
|
1355
1553
|
}
|
1356
1554
|
|
1357
1555
|
return state->otel_current_span_key;
|
1358
1556
|
}
|
1359
1557
|
|
1360
|
-
// This method gets used when ddtrace is being used indirectly via the
|
1558
|
+
// This method gets used when ddtrace is being used indirectly via the opentelemetry APIs. Information gets stored slightly
|
1361
1559
|
// differently, and this codepath handles it.
|
1362
1560
|
static void ddtrace_otel_trace_identifiers_for(
|
1363
1561
|
struct thread_context_collector_state *state,
|
@@ -1377,6 +1575,7 @@ static void ddtrace_otel_trace_identifiers_for(
|
|
1377
1575
|
if (resolved_numeric_span_id == Qnil) return;
|
1378
1576
|
|
1379
1577
|
VALUE otel_current_span_key = get_otel_current_span_key(state);
|
1578
|
+
if (otel_current_span_key == Qnil) return;
|
1380
1579
|
VALUE current_trace = *active_trace;
|
1381
1580
|
|
1382
1581
|
// ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
|
@@ -1400,3 +1599,417 @@ static void ddtrace_otel_trace_identifiers_for(
|
|
1400
1599
|
*active_trace = current_trace;
|
1401
1600
|
*numeric_span_id = resolved_numeric_span_id;
|
1402
1601
|
}
|
1602
|
+
|
1603
|
+
void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples) {
|
1604
|
+
struct thread_context_collector_state *state;
|
1605
|
+
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
1606
|
+
|
1607
|
+
ddog_prof_Label labels[] = {
|
1608
|
+
// Providing .num = 0 should not be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
|
1609
|
+
{.key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("SS"), .num = 0},
|
1610
|
+
{.key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Skipped Samples"), .num = 0},
|
1611
|
+
{.key = DDOG_CHARSLICE_C("allocation class"), .str = DDOG_CHARSLICE_C("(Skipped Samples)"), .num = 0},
|
1612
|
+
};
|
1613
|
+
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = sizeof(labels) / sizeof(labels[0])};
|
1614
|
+
|
1615
|
+
record_placeholder_stack(
|
1616
|
+
state->recorder_instance,
|
1617
|
+
(sample_values) {.alloc_samples = skipped_samples},
|
1618
|
+
(sample_labels) {
|
1619
|
+
.labels = slice_labels,
|
1620
|
+
.state_label = NULL,
|
1621
|
+
.end_timestamp_ns = 0, // For now we're not collecting timestamps for allocation events
|
1622
|
+
},
|
1623
|
+
DDOG_CHARSLICE_C("Skipped Samples")
|
1624
|
+
);
|
1625
|
+
}
|
1626
|
+
|
1627
|
+
static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples) {
|
1628
|
+
thread_context_collector_sample_skipped_allocation_samples(collector_instance, NUM2UINT(skipped_samples));
|
1629
|
+
return Qtrue;
|
1630
|
+
}
|
1631
|
+
|
1632
|
+
// This method differs from trace_identifiers_for/ddtrace_otel_trace_identifiers_for to support the situation where
|
1633
|
+
// the opentelemetry ruby library is being used for tracing AND the ddtrace tracing bits are not involved at all.
|
1634
|
+
//
|
1635
|
+
// Thus, in this case, we're directly reading from the opentelemetry stuff, which is different to how ddtrace tracing
|
1636
|
+
// does it.
|
1637
|
+
//
|
1638
|
+
// This is somewhat brittle: we're coupling on internal details of the opentelemetry gem to get what we need. In the
|
1639
|
+
// future maybe the otel ruby folks would be open to having a nice public way of getting this data that suits the
|
1640
|
+
// usecase of profilers.
|
1641
|
+
// Until then, the strategy below is to be extremely defensive, and if anything is out of place, we immediately return
|
1642
|
+
// and give up on getting trace data from opentelemetry. (Thus, worst case would be -- you upgrade opentelemetry and
|
1643
|
+
// profiling features relying on reading this data stop working, but you'll still get profiles and the app will be
|
1644
|
+
// otherwise undisturbed).
|
1645
|
+
//
|
1646
|
+
// Specifically, the way this works is:
|
1647
|
+
// 1. The latest entry in the opentelemetry context storage represents the current span (if any). We take the span id
|
1648
|
+
// and trace id from this span.
|
1649
|
+
// 2. To find the local root span id, we walk the context storage backwards from the current span, and find the earliest
|
1650
|
+
// entry in the context storage that has the same trace id as the current span; we use the found span as the local
|
1651
|
+
// root span id.
|
1652
|
+
// This matches the semantics of how ddtrace tracing creates a TraceOperation and assigns a local root span to it.
|
1653
|
+
static void otel_without_ddtrace_trace_identifiers_for(
|
1654
|
+
struct thread_context_collector_state *state,
|
1655
|
+
VALUE thread,
|
1656
|
+
struct trace_identifiers *trace_identifiers_result
|
1657
|
+
) {
|
1658
|
+
VALUE context_storage = rb_thread_local_aref(thread, otel_context_storage_id /* __opentelemetry_context_storage__ */);
|
1659
|
+
|
1660
|
+
// If it exists, context_storage is expected to be an Array[OpenTelemetry::Context]
|
1661
|
+
if (context_storage == Qnil || !RB_TYPE_P(context_storage, T_ARRAY)) return;
|
1662
|
+
|
1663
|
+
VALUE otel_current_span_key = get_otel_current_span_key(state);
|
1664
|
+
if (otel_current_span_key == Qnil) return;
|
1665
|
+
|
1666
|
+
int active_context_index = RARRAY_LEN(context_storage) - 1;
|
1667
|
+
if (active_context_index < 0) return;
|
1668
|
+
|
1669
|
+
struct otel_span active_span = otel_span_from(rb_ary_entry(context_storage, active_context_index), otel_current_span_key);
|
1670
|
+
if (active_span.span == Qnil) return;
|
1671
|
+
|
1672
|
+
struct otel_span local_root_span = active_span;
|
1673
|
+
|
1674
|
+
// Now find the oldest span starting from the active span that still has the same trace id as the active span
|
1675
|
+
for (int i = active_context_index - 1; i >= 0; i--) {
|
1676
|
+
struct otel_span checking_span = otel_span_from(rb_ary_entry(context_storage, i), otel_current_span_key);
|
1677
|
+
if (checking_span.span == Qnil) return;
|
1678
|
+
|
1679
|
+
if (rb_str_equal(active_span.trace_id, checking_span.trace_id) == Qfalse) break;
|
1680
|
+
|
1681
|
+
local_root_span = checking_span;
|
1682
|
+
}
|
1683
|
+
|
1684
|
+
// Convert the span ids into uint64_t to match what the Datadog tracer does
|
1685
|
+
trace_identifiers_result->span_id = otel_span_id_to_uint(active_span.span_id);
|
1686
|
+
trace_identifiers_result->local_root_span_id = otel_span_id_to_uint(local_root_span.span_id);
|
1687
|
+
|
1688
|
+
if (trace_identifiers_result->span_id == 0 || trace_identifiers_result->local_root_span_id == 0) return;
|
1689
|
+
|
1690
|
+
trace_identifiers_result->valid = true;
|
1691
|
+
|
1692
|
+
if (!state->endpoint_collection_enabled) return;
|
1693
|
+
|
1694
|
+
VALUE root_span_type = rb_ivar_get(local_root_span.span, at_kind_id /* @kind */);
|
1695
|
+
// We filter out spans that don't have `kind: :server`
|
1696
|
+
if (root_span_type == Qnil || !RB_TYPE_P(root_span_type, T_SYMBOL) || SYM2ID(root_span_type) != server_id) return;
|
1697
|
+
|
1698
|
+
VALUE trace_resource = rb_ivar_get(local_root_span.span, at_name_id /* @name */);
|
1699
|
+
if (!RB_TYPE_P(trace_resource, T_STRING)) return;
|
1700
|
+
|
1701
|
+
trace_identifiers_result->trace_endpoint = trace_resource;
|
1702
|
+
}
|
1703
|
+
|
1704
|
+
static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key) {
|
1705
|
+
struct otel_span failed = {.span = Qnil, .span_id = Qnil, .trace_id = Qnil};
|
1706
|
+
|
1707
|
+
if (otel_context == Qnil) return failed;
|
1708
|
+
|
1709
|
+
VALUE context_entries = rb_ivar_get(otel_context, at_entries_id /* @entries */);
|
1710
|
+
if (context_entries == Qnil || !RB_TYPE_P(context_entries, T_HASH)) return failed;
|
1711
|
+
|
1712
|
+
// If it exists, context_entries is expected to be a Hash[OpenTelemetry::Context::Key, OpenTelemetry::Trace::Span]
|
1713
|
+
VALUE span = rb_hash_lookup(context_entries, otel_current_span_key);
|
1714
|
+
if (span == Qnil) return failed;
|
1715
|
+
|
1716
|
+
// If it exists, span_context is expected to be a OpenTelemetry::Trace::SpanContext (don't confuse it with OpenTelemetry::Context)
|
1717
|
+
VALUE span_context = rb_ivar_get(span, at_context_id /* @context */);
|
1718
|
+
if (span_context == Qnil) return failed;
|
1719
|
+
|
1720
|
+
VALUE span_id = rb_ivar_get(span_context, at_span_id_id /* @span_id */);
|
1721
|
+
VALUE trace_id = rb_ivar_get(span_context, at_trace_id_id /* @trace_id */);
|
1722
|
+
if (span_id == Qnil || trace_id == Qnil || !RB_TYPE_P(span_id, T_STRING) || !RB_TYPE_P(trace_id, T_STRING)) return failed;
|
1723
|
+
|
1724
|
+
return (struct otel_span) {.span = span, .span_id = span_id, .trace_id = trace_id};
|
1725
|
+
}
|
1726
|
+
|
1727
|
+
// Otel span ids are represented as a big-endian 8-byte string
|
1728
|
+
static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
1729
|
+
if (!RB_TYPE_P(otel_span_id, T_STRING) || RSTRING_LEN(otel_span_id) != 8) { return 0; }
|
1730
|
+
|
1731
|
+
unsigned char *span_bytes = (unsigned char*) StringValuePtr(otel_span_id);
|
1732
|
+
|
1733
|
+
return \
|
1734
|
+
((uint64_t)span_bytes[0] << 56) |
|
1735
|
+
((uint64_t)span_bytes[1] << 48) |
|
1736
|
+
((uint64_t)span_bytes[2] << 40) |
|
1737
|
+
((uint64_t)span_bytes[3] << 32) |
|
1738
|
+
((uint64_t)span_bytes[4] << 24) |
|
1739
|
+
((uint64_t)span_bytes[5] << 16) |
|
1740
|
+
((uint64_t)span_bytes[6] << 8) |
|
1741
|
+
((uint64_t)span_bytes[7]);
|
1742
|
+
}
|
1743
|
+
|
1744
|
+
#ifndef NO_GVL_INSTRUMENTATION
|
1745
|
+
// This function can get called from outside the GVL and even on non-main Ractors
|
1746
|
+
void thread_context_collector_on_gvl_waiting(gvl_profiling_thread thread) {
|
1747
|
+
// Because this function gets called from a thread that is NOT holding the GVL, we avoid touching the
|
1748
|
+
// per-thread context directly.
|
1749
|
+
//
|
1750
|
+
// Instead, we ask Ruby to hold the data we need in Ruby's own special per-thread context area
|
1751
|
+
// that's thread-safe and built for this kind of use
|
1752
|
+
//
|
1753
|
+
// Also, this function can get called on the non-main Ractor. We deal with this by checking if the value in the context
|
1754
|
+
// is non-zero, since only `initialize_context` ever sets the value from 0 to non-zero for threads it sees.
|
1755
|
+
intptr_t thread_being_profiled = gvl_profiling_state_get(thread);
|
1756
|
+
if (!thread_being_profiled) return;
|
1757
|
+
|
1758
|
+
long current_monotonic_wall_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
1759
|
+
if (current_monotonic_wall_time_ns <= 0 || current_monotonic_wall_time_ns > GVL_WAITING_ENABLED_EMPTY) return;
|
1760
|
+
|
1761
|
+
gvl_profiling_state_set(thread, current_monotonic_wall_time_ns);
|
1762
|
+
}
|
1763
|
+
|
1764
|
+
// This function can get called from outside the GVL and even on non-main Ractors
|
1765
|
+
__attribute__((warn_unused_result))
|
1766
|
+
bool thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
|
1767
|
+
intptr_t gvl_waiting_at = gvl_profiling_state_get(thread);
|
1768
|
+
|
1769
|
+
// Thread was not being profiled / not waiting on gvl
|
1770
|
+
if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) return false;
|
1771
|
+
|
1772
|
+
// @ivoanjo: I'm not sure if this can happen -- It means we should've sampled already but haven't gotten the chance yet?
|
1773
|
+
if (gvl_waiting_at < 0) return true;
|
1774
|
+
|
1775
|
+
long waiting_for_gvl_duration_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - gvl_waiting_at;
|
1776
|
+
|
1777
|
+
bool should_sample = waiting_for_gvl_duration_ns >= waiting_for_gvl_threshold_ns;
|
1778
|
+
|
1779
|
+
if (should_sample) {
|
1780
|
+
// We flip the gvl_waiting_at to negative to mark that the thread is now running and no longer waiting
|
1781
|
+
intptr_t gvl_waiting_at_is_now_running = -gvl_waiting_at;
|
1782
|
+
|
1783
|
+
gvl_profiling_state_set(thread, gvl_waiting_at_is_now_running);
|
1784
|
+
} else {
|
1785
|
+
// We decided not to sample. Let's mark the thread back to the initial "enabled but empty" state
|
1786
|
+
gvl_profiling_state_set(thread, GVL_WAITING_ENABLED_EMPTY);
|
1787
|
+
}
|
1788
|
+
|
1789
|
+
return should_sample;
|
1790
|
+
}
|
1791
|
+
|
1792
|
+
__attribute__((warn_unused_result))
|
1793
|
+
bool thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
|
1794
|
+
return thread_context_collector_on_gvl_running_with_threshold(thread, global_waiting_for_gvl_threshold_ns);
|
1795
|
+
}
|
1796
|
+
|
1797
|
+
// Why does this method need to exist?
|
1798
|
+
//
|
1799
|
+
// You may be surprised to see that if we never call this function (from cpu_and_wall_time_worker), Waiting for GVL
|
1800
|
+
// samples will still show up.
|
1801
|
+
// This is because regular cpu/wall-time samples also use `update_metrics_and_sample` which will do the right thing
|
1802
|
+
// and push "Waiting for GVL" samples as needed.
|
1803
|
+
//
|
1804
|
+
// The reason this method needs to exist and be called very shortly after thread_context_collector_on_gvl_running
|
1805
|
+
// returning true is to ensure accuracy of both the timing and stack for the Waiting for GVL sample.
|
1806
|
+
//
|
1807
|
+
// Timing:
|
1808
|
+
// Because we currently only record the timestamp when the Waiting for GVL started and not when the Waiting for GVL ended,
|
1809
|
+
// we rely on pushing a sample as soon as possible when the Waiting for GVL ends so that the timestamp of the sample
|
1810
|
+
// actually matches when we stopped waiting.
|
1811
|
+
//
|
1812
|
+
// Stack:
|
1813
|
+
// If the thread starts working without the end of the Waiting for GVL sample, then by the time the thread is sampled
|
1814
|
+
// via the regular cpu/wall-time samples mechanism, the stack can be be inaccurate (e.g. does not correctly pinpoint
|
1815
|
+
// where the waiting happened).
|
1816
|
+
//
|
1817
|
+
// Arguably, the last sample after Waiting for GVL ended (when gvl_waiting_at < 0) should always come from this method
|
1818
|
+
// and not a regular cpu/wall-time sample BUT since all of these things are happening in parallel/concurrently I suspect
|
1819
|
+
// it's possible for a regular sample to kick in just before this one.
|
1820
|
+
//
|
1821
|
+
// ---
|
1822
|
+
//
|
1823
|
+
// NOTE: In normal use, current_thread is expected to be == rb_thread_current(); the `current_thread` parameter only
|
1824
|
+
// exists to enable testing.
|
1825
|
+
VALUE thread_context_collector_sample_after_gvl_running_with_thread(VALUE self_instance, VALUE current_thread) {
|
1826
|
+
struct thread_context_collector_state *state;
|
1827
|
+
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
1828
|
+
|
1829
|
+
if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled");
|
1830
|
+
|
1831
|
+
intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread);
|
1832
|
+
|
1833
|
+
if (gvl_waiting_at >= 0) {
|
1834
|
+
// @ivoanjo: I'm not sure if this can ever happen. This means that we're not on the same thread
|
1835
|
+
// that ran `thread_context_collector_on_gvl_running` and made the decision to sample OR a regular sample was
|
1836
|
+
// triggered ahead of us.
|
1837
|
+
// We do nothing in this case.
|
1838
|
+
return Qfalse;
|
1839
|
+
}
|
1840
|
+
|
1841
|
+
struct per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
|
1842
|
+
|
1843
|
+
// We don't actually account for cpu-time during Waiting for GVL. BUT, we may chose to push an
|
1844
|
+
// extra sample to represent the period prior to Waiting for GVL. To support that, we retrieve the current
|
1845
|
+
// cpu-time of the thread and let `update_metrics_and_sample` decide what to do with it.
|
1846
|
+
long cpu_time_for_thread = cpu_time_now_ns(thread_context);
|
1847
|
+
|
1848
|
+
// TODO: Should we update the dynamic sampling rate overhead tracking with this sample as well?
|
1849
|
+
|
1850
|
+
update_metrics_and_sample(
|
1851
|
+
state,
|
1852
|
+
/* thread_being_sampled: */ current_thread,
|
1853
|
+
/* stack_from_thread: */ current_thread,
|
1854
|
+
thread_context,
|
1855
|
+
thread_context->sampling_buffer,
|
1856
|
+
cpu_time_for_thread,
|
1857
|
+
monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
|
1858
|
+
);
|
1859
|
+
|
1860
|
+
return Qtrue; // To allow this to be called from rb_rescue2
|
1861
|
+
}
|
1862
|
+
|
1863
|
+
VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance) {
|
1864
|
+
return thread_context_collector_sample_after_gvl_running_with_thread(self_instance, rb_thread_current());
|
1865
|
+
}
|
1866
|
+
|
1867
|
+
// This method is intended to be called from update_metrics_and_sample. It exists to handle extra sampling steps we
|
1868
|
+
// need to take when sampling cpu/wall-time for a thread that's in the "Waiting for GVL" state.
|
1869
|
+
__attribute__((warn_unused_result))
|
1870
|
+
static bool handle_gvl_waiting(
|
1871
|
+
struct thread_context_collector_state *state,
|
1872
|
+
VALUE thread_being_sampled,
|
1873
|
+
VALUE stack_from_thread,
|
1874
|
+
struct per_thread_context *thread_context,
|
1875
|
+
sampling_buffer* sampling_buffer,
|
1876
|
+
long current_cpu_time_ns
|
1877
|
+
) {
|
1878
|
+
intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread_being_sampled);
|
1879
|
+
|
1880
|
+
bool is_gvl_waiting_state = gvl_waiting_at != 0 && gvl_waiting_at != GVL_WAITING_ENABLED_EMPTY;
|
1881
|
+
|
1882
|
+
if (!is_gvl_waiting_state) return false;
|
1883
|
+
|
1884
|
+
// We can be in one of 2 situations here:
|
1885
|
+
//
|
1886
|
+
// 1. The current sample is the first one after we entered the "Waiting for GVL" state
|
1887
|
+
// (wall_time_at_previous_sample_ns < abs(gvl_waiting_at))
|
1888
|
+
//
|
1889
|
+
// time ─────►
|
1890
|
+
// ...──────────────┬───────────────────...
|
1891
|
+
// Other state │ Waiting for GVL
|
1892
|
+
// ...──────────────┴───────────────────...
|
1893
|
+
// ▲ ▲
|
1894
|
+
// └─ Previous sample └─ Regular sample (caller)
|
1895
|
+
//
|
1896
|
+
// In this case, we'll want to push two samples: a) one for the current time (handled by the caller), b) an extra sample
|
1897
|
+
// to represent the remaining cpu/wall time before the "Waiting for GVL" started:
|
1898
|
+
//
|
1899
|
+
// time ─────►
|
1900
|
+
// ...──────────────┬───────────────────...
|
1901
|
+
// Other state │ Waiting for GVL
|
1902
|
+
// ...──────────────┴───────────────────...
|
1903
|
+
// ▲ ▲ ▲
|
1904
|
+
// └─ Prev... └─ Extra sample └─ Regular sample (caller)
|
1905
|
+
//
|
1906
|
+
// 2. The current sample is the n-th one after we entered the "Waiting for GVL" state
|
1907
|
+
// (wall_time_at_previous_sample_ns > abs(gvl_waiting_at))
|
1908
|
+
//
|
1909
|
+
// time ─────►
|
1910
|
+
// ...──────────────┬───────────────────────────────────────────────...
|
1911
|
+
// Other state │ Waiting for GVL
|
1912
|
+
// ...──────────────┴───────────────────────────────────────────────...
|
1913
|
+
// ▲ ▲ ▲
|
1914
|
+
// └─ Previous sample └─ Previous sample └─ Regular sample (caller)
|
1915
|
+
//
|
1916
|
+
// In this case, we just report back to the caller that the thread is in the "Waiting for GVL" state.
|
1917
|
+
//
|
1918
|
+
// ---
|
1919
|
+
//
|
1920
|
+
// Overall, gvl_waiting_at will be > 0 if still in the "Waiting for GVL" state and < 0 if we actually reached the end of
|
1921
|
+
// the wait.
|
1922
|
+
//
|
1923
|
+
// It doesn't really matter if the thread is still waiting or just reached the end of the wait: each sample represents
|
1924
|
+
// a snapshot at time ending now, so if the state finished, it just means the next sample will be a regular one.
|
1925
|
+
|
1926
|
+
if (gvl_waiting_at < 0) {
|
1927
|
+
// Negative means the waiting for GVL just ended, so we clear the state, so next samples no longer represent waiting
|
1928
|
+
gvl_profiling_state_thread_object_set(thread_being_sampled, GVL_WAITING_ENABLED_EMPTY);
|
1929
|
+
}
|
1930
|
+
|
1931
|
+
long gvl_waiting_started_wall_time_ns = labs(gvl_waiting_at);
|
1932
|
+
|
1933
|
+
if (thread_context->wall_time_at_previous_sample_ns < gvl_waiting_started_wall_time_ns) { // situation 1 above
|
1934
|
+
long cpu_time_elapsed_ns = update_time_since_previous_sample(
|
1935
|
+
&thread_context->cpu_time_at_previous_sample_ns,
|
1936
|
+
current_cpu_time_ns,
|
1937
|
+
thread_context->gc_tracking.cpu_time_at_start_ns,
|
1938
|
+
IS_NOT_WALL_TIME
|
1939
|
+
);
|
1940
|
+
|
1941
|
+
long duration_until_start_of_gvl_waiting_ns = update_time_since_previous_sample(
|
1942
|
+
&thread_context->wall_time_at_previous_sample_ns,
|
1943
|
+
gvl_waiting_started_wall_time_ns,
|
1944
|
+
INVALID_TIME,
|
1945
|
+
IS_WALL_TIME
|
1946
|
+
);
|
1947
|
+
|
1948
|
+
// Push extra sample
|
1949
|
+
trigger_sample_for_thread(
|
1950
|
+
state,
|
1951
|
+
thread_being_sampled,
|
1952
|
+
stack_from_thread,
|
1953
|
+
thread_context,
|
1954
|
+
sampling_buffer,
|
1955
|
+
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = duration_until_start_of_gvl_waiting_ns},
|
1956
|
+
gvl_waiting_started_wall_time_ns,
|
1957
|
+
NULL,
|
1958
|
+
NULL,
|
1959
|
+
false // This is the extra sample before the wait begun; only the next sample will be in the gvl waiting state
|
1960
|
+
);
|
1961
|
+
}
|
1962
|
+
|
1963
|
+
return true;
|
1964
|
+
}
|
1965
|
+
|
1966
|
+
static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread) {
|
1967
|
+
ENFORCE_THREAD(thread);
|
1968
|
+
|
1969
|
+
thread_context_collector_on_gvl_waiting(thread_from_thread_object(thread));
|
1970
|
+
return Qnil;
|
1971
|
+
}
|
1972
|
+
|
1973
|
+
static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread) {
|
1974
|
+
ENFORCE_THREAD(thread);
|
1975
|
+
|
1976
|
+
intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread);
|
1977
|
+
return LONG2NUM(gvl_waiting_at);
|
1978
|
+
}
|
1979
|
+
|
1980
|
+
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) {
|
1981
|
+
ENFORCE_THREAD(thread);
|
1982
|
+
|
1983
|
+
return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) ? Qtrue : Qfalse;
|
1984
|
+
}
|
1985
|
+
|
1986
|
+
static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) {
|
1987
|
+
ENFORCE_THREAD(thread);
|
1988
|
+
|
1989
|
+
return thread_context_collector_sample_after_gvl_running_with_thread(collector_instance, thread);
|
1990
|
+
}
|
1991
|
+
|
1992
|
+
static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns) {
|
1993
|
+
ENFORCE_THREAD(thread);
|
1994
|
+
|
1995
|
+
struct thread_context_collector_state *state;
|
1996
|
+
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
|
1997
|
+
|
1998
|
+
struct per_thread_context *thread_context = get_context_for(thread, state);
|
1999
|
+
if (thread_context == NULL) rb_raise(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists");
|
2000
|
+
|
2001
|
+
thread_context->cpu_time_at_previous_sample_ns += NUM2LONG(delta_ns);
|
2002
|
+
|
2003
|
+
return Qtrue;
|
2004
|
+
}
|
2005
|
+
|
2006
|
+
#else
|
2007
|
+
static bool handle_gvl_waiting(
|
2008
|
+
DDTRACE_UNUSED struct thread_context_collector_state *state,
|
2009
|
+
DDTRACE_UNUSED VALUE thread_being_sampled,
|
2010
|
+
DDTRACE_UNUSED VALUE stack_from_thread,
|
2011
|
+
DDTRACE_UNUSED struct per_thread_context *thread_context,
|
2012
|
+
DDTRACE_UNUSED sampling_buffer* sampling_buffer,
|
2013
|
+
DDTRACE_UNUSED long current_cpu_time_ns
|
2014
|
+
) { return false; }
|
2015
|
+
#endif // NO_GVL_INSTRUMENTATION
|