datadog 2.35.0 → 2.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -1
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +68 -31
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +1 -1
- data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +1 -1
- data/ext/datadog_profiling_native_extension/collectors_stack.c +37 -18
- data/ext/datadog_profiling_native_extension/collectors_stack.h +8 -2
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +434 -300
- data/ext/datadog_profiling_native_extension/collectors_thread_context.h +9 -7
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +7 -8
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -12
- data/ext/datadog_profiling_native_extension/extconf.rb +2 -2
- data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +4 -43
- data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +15 -47
- data/ext/datadog_profiling_native_extension/heap_recorder.c +44 -26
- data/ext/datadog_profiling_native_extension/private_vm_api_access.c +14 -35
- data/ext/datadog_profiling_native_extension/profiling.c +41 -4
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +33 -34
- data/ext/datadog_profiling_native_extension/stack_recorder.c +24 -3
- data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
- data/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h +4 -2
- data/ext/libdatadog_api/datadog_ruby_common.c +7 -8
- data/ext/libdatadog_api/datadog_ruby_common.h +0 -12
- data/ext/libdatadog_extconf_helpers.rb +1 -1
- data/lib/datadog/appsec/api_security/route_extractor.rb +6 -0
- data/lib/datadog/appsec/component.rb +1 -1
- data/lib/datadog/appsec/configuration.rb +7 -0
- data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +37 -4
- data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +64 -19
- data/lib/datadog/appsec/contrib/graphql/integration.rb +1 -0
- data/lib/datadog/appsec/contrib/rack/buffered_input.rb +83 -0
- data/lib/datadog/appsec/contrib/rack/gateway/request.rb +41 -3
- data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +20 -7
- data/lib/datadog/appsec/contrib/rack/input_peeker.rb +92 -0
- data/lib/datadog/appsec/contrib/rails/gateway/request.rb +33 -0
- data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +17 -1
- data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +20 -3
- data/lib/datadog/appsec/default_header_tags.rb +10 -6
- data/lib/datadog/core/configuration/components.rb +1 -0
- data/lib/datadog/core/configuration/settings.rb +1 -2
- data/lib/datadog/core/configuration/supported_configurations.rb +2 -0
- data/lib/datadog/core/remote/component.rb +1 -1
- data/lib/datadog/core/telemetry/event/app_started.rb +0 -21
- data/lib/datadog/core/utils/at_fork_monkey_patch.rb +1 -1
- data/lib/datadog/core/utils/forking.rb +3 -1
- data/lib/datadog/core/utils/spawn_monkey_patch.rb +3 -1
- data/lib/datadog/core.rb +3 -0
- data/lib/datadog/di/base.rb +4 -1
- data/lib/datadog/di/component.rb +1 -1
- data/lib/datadog/error_tracking/collector.rb +2 -1
- data/lib/datadog/error_tracking/component.rb +2 -2
- data/lib/datadog/kit/tracing/method_tracer.rb +4 -1
- data/lib/datadog/opentelemetry/sdk/propagator.rb +9 -3
- data/lib/datadog/opentelemetry/sdk/span_processor.rb +4 -1
- data/lib/datadog/profiling/collectors/thread_context.rb +1 -0
- data/lib/datadog/profiling/component.rb +13 -15
- data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
- data/lib/datadog/ruby_version.rb +25 -0
- data/lib/datadog/symbol_database/component.rb +306 -98
- data/lib/datadog/symbol_database/extractor.rb +223 -84
- data/lib/datadog/tracing/configuration/ext.rb +13 -0
- data/lib/datadog/tracing/configuration/settings.rb +17 -0
- data/lib/datadog/tracing/contrib/configuration/resolver.rb +7 -0
- data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +2 -0
- data/lib/datadog/tracing/contrib/grpc.rb +1 -0
- data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +2 -0
- data/lib/datadog/tracing/contrib/http.rb +1 -0
- data/lib/datadog/tracing/contrib/karafka/distributed/propagation.rb +2 -0
- data/lib/datadog/tracing/contrib/karafka.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +3 -1
- data/lib/datadog/tracing/contrib/rack/route_inference.rb +3 -1
- data/lib/datadog/tracing/contrib/sidekiq/distributed/propagation.rb +2 -0
- data/lib/datadog/tracing/contrib/sidekiq.rb +1 -0
- data/lib/datadog/tracing/contrib/waterdrop/distributed/propagation.rb +2 -0
- data/lib/datadog/tracing/contrib/waterdrop.rb +1 -0
- data/lib/datadog/tracing/distributed/propagation.rb +33 -1
- data/lib/datadog/tracing/distributed/trace_context.rb +11 -2
- data/lib/datadog/tracing/trace_digest.rb +7 -0
- data/lib/datadog/tracing/trace_operation.rb +4 -1
- data/lib/datadog/tracing/tracer.rb +1 -0
- data/lib/datadog/version.rb +1 -1
- data/lib/datadog.rb +4 -1
- metadata +8 -5
|
@@ -76,14 +76,15 @@
|
|
|
76
76
|
#define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
|
|
77
77
|
#define THREAD_INVOKE_LOCATION_LIMIT_CHARS 512
|
|
78
78
|
#define IS_WALL_TIME true
|
|
79
|
-
#define
|
|
79
|
+
#define IS_CPU_TIME false
|
|
80
80
|
#define MISSING_TRACER_CONTEXT_KEY 0
|
|
81
81
|
#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
|
|
82
|
+
#define GVL_SUSPENDED ((uint64_t)1)
|
|
83
|
+
#define GVL_RUNNING ((uint64_t)0)
|
|
82
84
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
#define GVL_WAITING_ENABLED_EMPTY RUBY_FIXNUM_MAX
|
|
85
|
+
#define MAX(a, b) ((a) < (b) ? (b) : (a))
|
|
86
|
+
|
|
87
|
+
static ID dd_per_thread_context_id; // Hidden ivar (no @ prefix, inaccessible from Ruby)
|
|
87
88
|
|
|
88
89
|
static ID at_active_span_id; // id of :@active_span in Ruby
|
|
89
90
|
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
|
@@ -107,12 +108,6 @@ static ID server_id; // id of :server in Ruby
|
|
|
107
108
|
static ID otel_context_storage_id; // id of :__opentelemetry_context_storage__ in Ruby
|
|
108
109
|
static ID otel_fiber_context_storage_id; // id of :@opentelemetry_context in Ruby
|
|
109
110
|
|
|
110
|
-
// This is used by `thread_context_collector_on_gvl_running`. Because when that method gets called we're not sure if
|
|
111
|
-
// it's safe to access the state of the thread context collector, we store this setting as a global value. This does
|
|
112
|
-
// mean this setting is shared among all thread context collectors, and thus it's "last writer wins".
|
|
113
|
-
// In production this should not be a problem: there should only be one profiler, which is the last one created,
|
|
114
|
-
// and that'll be the one that last wrote this setting.
|
|
115
|
-
static uint32_t global_waiting_for_gvl_threshold_ns = MILLIS_AS_NS(10);
|
|
116
111
|
|
|
117
112
|
typedef enum { OTEL_CONTEXT_ENABLED_FALSE, OTEL_CONTEXT_ENABLED_ONLY, OTEL_CONTEXT_ENABLED_BOTH } otel_context_enabled;
|
|
118
113
|
typedef enum { OTEL_CONTEXT_SOURCE_UNKNOWN, OTEL_CONTEXT_SOURCE_FIBER_IVAR, OTEL_CONTEXT_SOURCE_FIBER_LOCAL } otel_context_source;
|
|
@@ -122,22 +117,14 @@ typedef struct {
|
|
|
122
117
|
// Note: Places in this file that usually need to be changed when this struct is changed are tagged with
|
|
123
118
|
// "Update this when modifying state struct"
|
|
124
119
|
|
|
125
|
-
//
|
|
126
|
-
|
|
127
|
-
uint16_t max_frames;
|
|
128
|
-
// Hashmap <Thread Object, per_thread_context>
|
|
129
|
-
// Note: Be very careful when mutating this map, as it gets read e.g. in the middle of GC and signal handlers.
|
|
130
|
-
st_table *hash_map_per_thread_context;
|
|
120
|
+
// Output buffer for stack traces, passed to sample_thread()
|
|
121
|
+
sample_locations locations;
|
|
131
122
|
// Datadog::Profiling::StackRecorder instance
|
|
132
123
|
VALUE recorder_instance;
|
|
133
124
|
// If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
|
|
134
125
|
// to enable code hotspots and endpoint aggregation.
|
|
135
126
|
// When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
|
|
136
127
|
ID tracer_context_key;
|
|
137
|
-
// Track how many regular samples we've taken. Does not include garbage collection samples.
|
|
138
|
-
// Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
|
|
139
|
-
// is not (just) a stat.
|
|
140
|
-
unsigned int sample_count;
|
|
141
128
|
// Reusable array to get list of threads
|
|
142
129
|
VALUE thread_list_buffer;
|
|
143
130
|
// Used to omit endpoint names (retrieved from tracer) from collected data
|
|
@@ -158,12 +145,21 @@ typedef struct {
|
|
|
158
145
|
bool native_filenames_enabled;
|
|
159
146
|
// Used to cache native filename lookup results (Map[void *function_pointer, char *filename])
|
|
160
147
|
st_table *native_filenames_cache;
|
|
148
|
+
// Used to attribute overhead during sampling to this component
|
|
149
|
+
VALUE overhead_filename;
|
|
150
|
+
// Minimum duration of a "Waiting for GVL" period to trigger a sample
|
|
151
|
+
uint32_t waiting_for_gvl_threshold_ns;
|
|
161
152
|
|
|
162
153
|
struct stats {
|
|
154
|
+
// Track how many regular samples we've taken. Does not include garbage collection samples.
|
|
155
|
+
unsigned int sample_count;
|
|
163
156
|
// Track how many garbage collection samples we've taken.
|
|
164
157
|
unsigned int gc_samples;
|
|
165
158
|
// See thread_context_collector_on_gc_start for details
|
|
166
159
|
unsigned int gc_samples_missed_due_to_missing_context;
|
|
160
|
+
// How many per-thread samples were skipped because the thread has been continuously suspended
|
|
161
|
+
// (no GVL) since its previous sample, so its Ruby stack cannot have changed.
|
|
162
|
+
unsigned int inactive_thread_samples_skipped;
|
|
167
163
|
} stats;
|
|
168
164
|
|
|
169
165
|
struct {
|
|
@@ -176,7 +172,7 @@ typedef struct {
|
|
|
176
172
|
} thread_context_collector_state;
|
|
177
173
|
|
|
178
174
|
// Tracks per-thread state
|
|
179
|
-
|
|
175
|
+
struct per_thread_context {
|
|
180
176
|
sampling_buffer sampling_buffer;
|
|
181
177
|
char thread_id[THREAD_ID_LIMIT_CHARS];
|
|
182
178
|
ddog_CharSlice thread_id_char_slice;
|
|
@@ -186,13 +182,65 @@ typedef struct {
|
|
|
186
182
|
long cpu_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized or if getting it fails for another reason
|
|
187
183
|
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
|
|
188
184
|
|
|
185
|
+
// There are 3 possible states for the GVL (per thread), and 3 transitions for which we receive GVL events:
|
|
186
|
+
// Thread holds the GVL
|
|
187
|
+
// on_gvl_released() the thread releases the GVL (RUBY_INTERNAL_THREAD_EVENT_SUSPENDED)
|
|
188
|
+
// Thread runs without the GVL
|
|
189
|
+
// on_gvl_waiting() the thread wants the GVL (RUBY_INTERNAL_THREAD_EVENT_READY)
|
|
190
|
+
// Thread is "Waiting for GVL"
|
|
191
|
+
// on_gvl_running() the thread now got the GVL (RUBY_INTERNAL_THREAD_EVENT_RESUMED)
|
|
192
|
+
// ... and the cycle restarts
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
// --- GVL waiting tracking state machine ---
|
|
196
|
+
//
|
|
197
|
+
// gvl_waiting_at tracks the GVL wait state for each profiled thread:
|
|
198
|
+
//
|
|
199
|
+
// ┌───────────────────────────────────┐
|
|
200
|
+
// │ on_gvl_waiting │
|
|
201
|
+
// │ ▼
|
|
202
|
+
// Not Waiting (0) ◀────────────────── Waiting (> 0)
|
|
203
|
+
// ▲ on_gvl_running │
|
|
204
|
+
// │ (below threshold) │ on_gvl_running (above threshold)
|
|
205
|
+
// │ ▼
|
|
206
|
+
// └─────────────────────────── Sample Pending (< 0)
|
|
207
|
+
// sample / sample_after_gvl_running
|
|
208
|
+
//
|
|
209
|
+
// Not Waiting (0): thread is running or not waiting for the GVL
|
|
210
|
+
// Waiting (> 0): monotonic wall time (ns) when the thread started waiting
|
|
211
|
+
// Sample Pending (< 0): negated timestamp; the wait ended and a sample is pending
|
|
212
|
+
//
|
|
213
|
+
// The field is accessed under the GVL for most functions EXCEPT on_gvl_waiting() which writes to it without the GVL.
|
|
214
|
+
// So we need to pack the above state in a single long to ensure atomicity.
|
|
215
|
+
long gvl_waiting_at;
|
|
216
|
+
|
|
217
|
+
// Per-thread "state + version" word, updated on every GVL state transition. The encoding is:
|
|
218
|
+
// - low bit: current state (1 = currently suspended, 0 = currently running)
|
|
219
|
+
// - bits 1+: monotonic event counter (incremented on every RESUMED)
|
|
220
|
+
// The hooks set the state bit explicitly rather than relying on parity, so the encoding stays
|
|
221
|
+
// correct even when events are not paired properly (as in tests).
|
|
222
|
+
//
|
|
223
|
+
// Note that SUSPENDED can happen multiple times in a row on Ruby 3.2,
|
|
224
|
+
// see https://github.com/DataDog/dd-trace-rb/pull/5777#discussion_r3388560254,
|
|
225
|
+
// the encoding is designed to naturally not change the field in such a case.
|
|
226
|
+
uint64_t gvl_state_change_count;
|
|
227
|
+
// Snapshot of the thread's gvl_state_change_count at the moment we last sampled it.
|
|
228
|
+
// Equality with this snapshot means no GVL transition since the last sample.
|
|
229
|
+
uint64_t gvl_state_change_count_at_previous_sample;
|
|
230
|
+
// True when the previous per-tick sample was skipped by the SUSPENDED-skip optimization, so the
|
|
231
|
+
// flush-before-serialize pass knows it needs to report this thread.
|
|
232
|
+
// As a result, we will accumulate all wall & CPU time as a single batch per reporting period,
|
|
233
|
+
// but this is deemed worth it for this optimization. In any case we don't know exactly
|
|
234
|
+
// at what time a thread was doing CPU work (unless it's on CPU 100% of the time).
|
|
235
|
+
bool was_skipped_at_last_sample;
|
|
236
|
+
|
|
189
237
|
struct {
|
|
190
238
|
// Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
|
|
191
239
|
// Outside of this window, they will be INVALID_TIME.
|
|
192
240
|
long cpu_time_at_start_ns;
|
|
193
241
|
long wall_time_at_start_ns;
|
|
194
242
|
} gc_tracking;
|
|
195
|
-
}
|
|
243
|
+
};
|
|
196
244
|
|
|
197
245
|
// Used to correlate profiles with traces
|
|
198
246
|
typedef struct {
|
|
@@ -210,27 +258,26 @@ typedef struct {
|
|
|
210
258
|
|
|
211
259
|
static void thread_context_collector_typed_data_mark(void *state_ptr);
|
|
212
260
|
static void thread_context_collector_typed_data_free(void *state_ptr);
|
|
213
|
-
static
|
|
214
|
-
static
|
|
261
|
+
static void per_thread_context_typed_data_mark(void *ctx_ptr);
|
|
262
|
+
static void per_thread_context_typed_data_free(void *ctx_ptr);
|
|
215
263
|
static VALUE _native_new(VALUE klass);
|
|
216
264
|
static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
|
|
217
|
-
static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE
|
|
265
|
+
static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE allow_exception);
|
|
218
266
|
static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
|
|
219
267
|
static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
|
|
220
268
|
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE allow_exception);
|
|
221
269
|
static void update_metrics_and_sample(
|
|
222
270
|
thread_context_collector_state *state,
|
|
223
271
|
VALUE thread_being_sampled,
|
|
224
|
-
VALUE stack_from_thread,
|
|
225
272
|
per_thread_context *thread_context,
|
|
226
273
|
sampling_buffer* sampling_buffer,
|
|
227
274
|
long current_cpu_time_ns,
|
|
228
|
-
long current_monotonic_wall_time_ns
|
|
275
|
+
long current_monotonic_wall_time_ns,
|
|
276
|
+
bool force_sample
|
|
229
277
|
);
|
|
230
278
|
static void trigger_sample_for_thread(
|
|
231
279
|
thread_context_collector_state *state,
|
|
232
|
-
VALUE
|
|
233
|
-
VALUE stack_from_thread,
|
|
280
|
+
VALUE thread_being_sampled,
|
|
234
281
|
per_thread_context *thread_context,
|
|
235
282
|
sampling_buffer* sampling_buffer,
|
|
236
283
|
sample_values values,
|
|
@@ -242,16 +289,11 @@ static void trigger_sample_for_thread(
|
|
|
242
289
|
);
|
|
243
290
|
static VALUE _native_thread_list(VALUE self);
|
|
244
291
|
static per_thread_context *get_or_create_context_for(VALUE thread, thread_context_collector_state *state);
|
|
245
|
-
static per_thread_context *get_context_for(VALUE thread, thread_context_collector_state *state);
|
|
246
292
|
static void initialize_context(VALUE thread, per_thread_context *thread_context, thread_context_collector_state *state);
|
|
247
|
-
static void free_context(per_thread_context* thread_context);
|
|
248
293
|
static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
|
249
|
-
static VALUE
|
|
250
|
-
static
|
|
251
|
-
static VALUE stats_as_ruby_hash(thread_context_collector_state *state);
|
|
294
|
+
static VALUE per_thread_context_to_ruby_hash(per_thread_context *thread_context);
|
|
295
|
+
static VALUE stats_to_ruby_hash(thread_context_collector_state *state, VALUE hash);
|
|
252
296
|
static VALUE gc_tracking_as_ruby_hash(thread_context_collector_state *state);
|
|
253
|
-
static void remove_context_for_dead_threads(thread_context_collector_state *state);
|
|
254
|
-
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
|
|
255
297
|
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
|
|
256
298
|
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time);
|
|
257
299
|
static long cpu_time_now_ns(per_thread_context *thread_context);
|
|
@@ -283,7 +325,6 @@ static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self
|
|
|
283
325
|
static bool handle_gvl_waiting(
|
|
284
326
|
thread_context_collector_state *state,
|
|
285
327
|
VALUE thread_being_sampled,
|
|
286
|
-
VALUE stack_from_thread,
|
|
287
328
|
per_thread_context *thread_context,
|
|
288
329
|
sampling_buffer* sampling_buffer,
|
|
289
330
|
long current_cpu_time_ns
|
|
@@ -291,9 +332,10 @@ static bool handle_gvl_waiting(
|
|
|
291
332
|
#ifndef NO_GVL_INSTRUMENTATION
|
|
292
333
|
static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread);
|
|
293
334
|
static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread);
|
|
294
|
-
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread);
|
|
335
|
+
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread);
|
|
336
|
+
static VALUE _native_on_gvl_released(DDTRACE_UNUSED VALUE self, VALUE thread);
|
|
295
337
|
static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE allow_exception);
|
|
296
|
-
static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE
|
|
338
|
+
static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE thread, VALUE delta_ns);
|
|
297
339
|
#endif
|
|
298
340
|
static void otel_without_ddtrace_trace_identifiers_for(
|
|
299
341
|
thread_context_collector_state *state,
|
|
@@ -305,7 +347,9 @@ static otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key)
|
|
|
305
347
|
static uint64_t otel_span_id_to_uint(VALUE otel_span_id);
|
|
306
348
|
static VALUE safely_lookup_hash_without_going_into_ruby_code(VALUE hash, VALUE key);
|
|
307
349
|
static VALUE _native_system_epoch_time_now_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
|
308
|
-
static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self
|
|
350
|
+
static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self);
|
|
351
|
+
static VALUE _native_clear_per_thread_context_for(DDTRACE_UNUSED VALUE self, VALUE thread);
|
|
352
|
+
static bool skip_sample(thread_context_collector_state *state, per_thread_context *thread_context, bool is_gvl_waiting_state, bool force_sample_suspended);
|
|
309
353
|
|
|
310
354
|
void collectors_thread_context_init(VALUE profiling_module) {
|
|
311
355
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
|
@@ -326,7 +370,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
|
326
370
|
rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, -1);
|
|
327
371
|
rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
|
|
328
372
|
rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
|
329
|
-
rb_define_singleton_method(testing_module, "_native_sample", _native_sample,
|
|
373
|
+
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
|
|
330
374
|
rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3);
|
|
331
375
|
rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
|
|
332
376
|
rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
|
|
@@ -338,13 +382,15 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
|
338
382
|
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
|
|
339
383
|
rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
|
|
340
384
|
rb_define_singleton_method(testing_module, "_native_system_epoch_time_now_ns", _native_system_epoch_time_now_ns, 1);
|
|
341
|
-
rb_define_singleton_method(testing_module, "_native_prepare_sample_inside_signal_handler", _native_prepare_sample_inside_signal_handler,
|
|
385
|
+
rb_define_singleton_method(testing_module, "_native_prepare_sample_inside_signal_handler", _native_prepare_sample_inside_signal_handler, 0);
|
|
386
|
+
rb_define_singleton_method(testing_module, "_native_clear_per_thread_context_for", _native_clear_per_thread_context_for, 1);
|
|
342
387
|
#ifndef NO_GVL_INSTRUMENTATION
|
|
343
388
|
rb_define_singleton_method(testing_module, "_native_on_gvl_waiting", _native_on_gvl_waiting, 1);
|
|
344
389
|
rb_define_singleton_method(testing_module, "_native_gvl_waiting_at_for", _native_gvl_waiting_at_for, 1);
|
|
345
|
-
rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running,
|
|
390
|
+
rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 2);
|
|
391
|
+
rb_define_singleton_method(testing_module, "_native_on_gvl_released", _native_on_gvl_released, 1);
|
|
346
392
|
rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 3);
|
|
347
|
-
rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns,
|
|
393
|
+
rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 2);
|
|
348
394
|
#endif
|
|
349
395
|
|
|
350
396
|
at_active_span_id = rb_intern_const("@active_span");
|
|
@@ -366,10 +412,10 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
|
366
412
|
otel_context_storage_id = rb_intern_const("__opentelemetry_context_storage__");
|
|
367
413
|
otel_fiber_context_storage_id = rb_intern_const("@opentelemetry_context");
|
|
368
414
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
415
|
+
dd_per_thread_context_id = rb_intern_const("dd_per_thread_context");
|
|
416
|
+
|
|
417
|
+
// This will raise if Ruby already ran out of thread-local keys
|
|
418
|
+
per_thread_context_tls_init();
|
|
373
419
|
|
|
374
420
|
gc_profiling_init();
|
|
375
421
|
}
|
|
@@ -394,10 +440,10 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
|
|
|
394
440
|
|
|
395
441
|
// Update this when modifying state struct
|
|
396
442
|
rb_gc_mark(state->recorder_instance);
|
|
397
|
-
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
|
|
398
443
|
rb_gc_mark(state->thread_list_buffer);
|
|
399
444
|
rb_gc_mark(state->main_thread);
|
|
400
445
|
rb_gc_mark(state->otel_current_span_key);
|
|
446
|
+
rb_gc_mark(state->overhead_filename);
|
|
401
447
|
}
|
|
402
448
|
|
|
403
449
|
static void thread_context_collector_typed_data_free(void *state_ptr) {
|
|
@@ -407,36 +453,47 @@ static void thread_context_collector_typed_data_free(void *state_ptr) {
|
|
|
407
453
|
|
|
408
454
|
// Important: Remember that we're only guaranteed to see here what's been set in _native_new, aka
|
|
409
455
|
// pointers that have been set NULL there may still be NULL here.
|
|
410
|
-
if (state->locations != NULL) ruby_xfree(state->locations);
|
|
411
|
-
|
|
412
|
-
// Free each entry in the map
|
|
413
|
-
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
|
|
414
|
-
// ...and then the map
|
|
415
|
-
st_free_table(state->hash_map_per_thread_context);
|
|
456
|
+
if (state->locations.ptr != NULL) ruby_xfree(state->locations.ptr);
|
|
416
457
|
|
|
417
458
|
st_free_table(state->native_filenames_cache);
|
|
418
459
|
|
|
419
460
|
ruby_xfree(state);
|
|
420
461
|
}
|
|
421
462
|
|
|
422
|
-
//
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
463
|
+
// per_thread_context is wrapped in a TypedData Ruby object stored as an ivar on each Ruby Thread.
|
|
464
|
+
// This gives us automatic GC marking (for sampling_buffer iseq VALUEs) and lifecycle management.
|
|
465
|
+
static const rb_data_type_t per_thread_context_typed_data = {
|
|
466
|
+
.wrap_struct_name = "Datadog::Profiling::PerThreadContext",
|
|
467
|
+
.function = {
|
|
468
|
+
.dmark = per_thread_context_typed_data_mark,
|
|
469
|
+
.dfree = per_thread_context_typed_data_free,
|
|
470
|
+
.dsize = NULL,
|
|
471
|
+
},
|
|
472
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
473
|
+
};
|
|
426
474
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
475
|
+
static void per_thread_context_typed_data_mark(void *ctx_ptr) {
|
|
476
|
+
per_thread_context *ctx = (per_thread_context *) ctx_ptr;
|
|
477
|
+
if (sampling_buffer_needs_marking(&ctx->sampling_buffer)) {
|
|
478
|
+
sampling_buffer_mark(&ctx->sampling_buffer);
|
|
430
479
|
}
|
|
480
|
+
}
|
|
431
481
|
|
|
432
|
-
|
|
482
|
+
static void per_thread_context_typed_data_free(void *ctx_ptr) {
|
|
483
|
+
per_thread_context *ctx = (per_thread_context *) ctx_ptr;
|
|
484
|
+
sampling_buffer_free(&ctx->sampling_buffer);
|
|
485
|
+
free(ctx);
|
|
433
486
|
}
|
|
434
487
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
488
|
+
static VALUE _native_clear_per_thread_context_for(DDTRACE_UNUSED VALUE self, VALUE thread) {
|
|
489
|
+
per_thread_context *ctx = get_per_thread_context(thread);
|
|
490
|
+
if (ctx != NULL) {
|
|
491
|
+
set_per_thread_context(thread, NULL);
|
|
492
|
+
if (!RB_OBJ_FROZEN(thread)) {
|
|
493
|
+
rb_ivar_set(thread, dd_per_thread_context_id, Qnil);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
return Qnil;
|
|
440
497
|
}
|
|
441
498
|
|
|
442
499
|
static VALUE _native_new(VALUE klass) {
|
|
@@ -446,11 +503,8 @@ static VALUE _native_new(VALUE klass) {
|
|
|
446
503
|
// being leaked.
|
|
447
504
|
|
|
448
505
|
// Update this when modifying state struct
|
|
449
|
-
state->locations = NULL;
|
|
450
|
-
state->
|
|
451
|
-
state->hash_map_per_thread_context =
|
|
452
|
-
// "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
|
|
453
|
-
st_init_numtable();
|
|
506
|
+
state->locations.ptr = NULL;
|
|
507
|
+
state->locations.len = 0;
|
|
454
508
|
state->recorder_instance = Qnil;
|
|
455
509
|
state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
|
|
456
510
|
VALUE thread_list_buffer = rb_ary_new();
|
|
@@ -492,22 +546,25 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
|
|
|
492
546
|
VALUE waiting_for_gvl_threshold_ns = rb_hash_fetch(options, ID2SYM(rb_intern("waiting_for_gvl_threshold_ns")));
|
|
493
547
|
VALUE otel_context_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("otel_context_enabled")));
|
|
494
548
|
VALUE native_filenames_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("native_filenames_enabled")));
|
|
549
|
+
VALUE overhead_filename = rb_hash_fetch(options, ID2SYM(rb_intern("overhead_filename")));
|
|
495
550
|
|
|
496
551
|
ENFORCE_TYPE(max_frames, T_FIXNUM);
|
|
497
552
|
ENFORCE_BOOLEAN(endpoint_collection_enabled);
|
|
498
553
|
ENFORCE_TYPE(waiting_for_gvl_threshold_ns, T_FIXNUM);
|
|
499
554
|
ENFORCE_BOOLEAN(native_filenames_enabled);
|
|
555
|
+
ENFORCE_TYPE(overhead_filename, T_STRING);
|
|
500
556
|
|
|
501
557
|
thread_context_collector_state *state;
|
|
502
558
|
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
503
559
|
|
|
504
560
|
// Update this when modifying state struct
|
|
505
|
-
state->
|
|
506
|
-
state->locations = ruby_xcalloc(state->
|
|
507
|
-
// hash_map_per_thread_context is already initialized, nothing to do here
|
|
561
|
+
state->locations.len = sampling_buffer_check_max_frames(NUM2INT(max_frames));
|
|
562
|
+
state->locations.ptr = ruby_xcalloc(state->locations.len, sizeof(ddog_prof_Location));
|
|
508
563
|
state->recorder_instance = enforce_recorder_instance(recorder_instance);
|
|
564
|
+
recorder_install_on_serialize(recorder_instance, self_instance);
|
|
509
565
|
state->endpoint_collection_enabled = (endpoint_collection_enabled == Qtrue);
|
|
510
566
|
state->native_filenames_enabled = (native_filenames_enabled == Qtrue);
|
|
567
|
+
state->overhead_filename = overhead_filename;
|
|
511
568
|
if (otel_context_enabled == Qfalse || otel_context_enabled == Qnil) {
|
|
512
569
|
state->otel_context_enabled = OTEL_CONTEXT_ENABLED_FALSE;
|
|
513
570
|
} else if (otel_context_enabled == ID2SYM(rb_intern("only"))) {
|
|
@@ -518,7 +575,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
|
|
|
518
575
|
raise_error(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
|
|
519
576
|
}
|
|
520
577
|
|
|
521
|
-
|
|
578
|
+
state->waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
|
|
522
579
|
|
|
523
580
|
if (RTEST(tracer_context_key)) {
|
|
524
581
|
ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
|
|
@@ -533,14 +590,12 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
|
|
|
533
590
|
|
|
534
591
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
|
535
592
|
// It SHOULD NOT be used for other purposes.
|
|
536
|
-
static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE
|
|
593
|
+
static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE allow_exception) {
|
|
537
594
|
ENFORCE_BOOLEAN(allow_exception);
|
|
538
595
|
|
|
539
|
-
if (!is_thread_alive(profiler_overhead_stack_thread)) raise_error(rb_eArgError, "Unexpected: profiler_overhead_stack_thread is not alive");
|
|
540
|
-
|
|
541
596
|
if (allow_exception == Qfalse) debug_enter_unsafe_context();
|
|
542
597
|
|
|
543
|
-
thread_context_collector_sample(collector_instance, monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
|
|
598
|
+
thread_context_collector_sample(collector_instance, monotonic_wall_time_now_ns(RAISE_ON_FAILURE));
|
|
544
599
|
|
|
545
600
|
if (allow_exception == Qfalse) debug_leave_unsafe_context();
|
|
546
601
|
|
|
@@ -583,6 +638,53 @@ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_
|
|
|
583
638
|
return Qtrue;
|
|
584
639
|
}
|
|
585
640
|
|
|
641
|
+
// Record profiler sampling overhead as a placeholder stack
|
|
642
|
+
static void record_sampling_overhead(thread_context_collector_state *state, per_thread_context *current_thread_context) {
|
|
643
|
+
long wall_time_after_sampling = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
644
|
+
long cpu_time_after_sampling = cpu_time_now_ns(current_thread_context);
|
|
645
|
+
|
|
646
|
+
long overhead_cpu_time_ns = update_time_since_previous_sample(
|
|
647
|
+
¤t_thread_context->cpu_time_at_previous_sample_ns,
|
|
648
|
+
cpu_time_after_sampling,
|
|
649
|
+
current_thread_context->gc_tracking.cpu_time_at_start_ns,
|
|
650
|
+
IS_CPU_TIME);
|
|
651
|
+
|
|
652
|
+
long overhead_wall_time_ns = update_time_since_previous_sample(
|
|
653
|
+
¤t_thread_context->wall_time_at_previous_sample_ns,
|
|
654
|
+
wall_time_after_sampling,
|
|
655
|
+
INVALID_TIME,
|
|
656
|
+
IS_WALL_TIME);
|
|
657
|
+
|
|
658
|
+
ddog_prof_Label overhead_labels[] = {
|
|
659
|
+
{.key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("0"), .num = 0},
|
|
660
|
+
{.key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Datadog::Profiling::Sampling"), .num = 0},
|
|
661
|
+
{.key = DDOG_CHARSLICE_C("state"), .str = DDOG_CHARSLICE_C("had cpu"), .num = 0},
|
|
662
|
+
{.key = DDOG_CHARSLICE_C("profiler overhead"), .num = 1},
|
|
663
|
+
};
|
|
664
|
+
|
|
665
|
+
int64_t end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, wall_time_after_sampling);
|
|
666
|
+
|
|
667
|
+
ddog_prof_Location overhead_location = {
|
|
668
|
+
.mapping = {.filename = DDOG_CHARSLICE_C(""), .build_id = DDOG_CHARSLICE_C(""), .build_id_id = {}},
|
|
669
|
+
.function = {
|
|
670
|
+
.name = DDOG_CHARSLICE_C("sampling"),
|
|
671
|
+
.filename = char_slice_from_ruby_string(state->overhead_filename),
|
|
672
|
+
},
|
|
673
|
+
.line = 0,
|
|
674
|
+
};
|
|
675
|
+
|
|
676
|
+
record_sample(
|
|
677
|
+
state->recorder_instance,
|
|
678
|
+
(ddog_prof_Slice_Location) {.ptr = &overhead_location, .len = 1},
|
|
679
|
+
(sample_values) {.cpu_time_ns = overhead_cpu_time_ns, .cpu_or_wall_samples = 1, .wall_time_ns = overhead_wall_time_ns},
|
|
680
|
+
(sample_labels) {
|
|
681
|
+
.labels = (ddog_prof_Slice_Label) {.ptr = overhead_labels, .len = sizeof(overhead_labels) / sizeof(overhead_labels[0])},
|
|
682
|
+
.state_label = NULL,
|
|
683
|
+
.end_timestamp_ns = end_timestamp_ns,
|
|
684
|
+
}
|
|
685
|
+
);
|
|
686
|
+
}
|
|
687
|
+
|
|
586
688
|
// This function gets called from the Collectors::CpuAndWallTimeWorker to trigger the actual sampling.
|
|
587
689
|
//
|
|
588
690
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
@@ -591,9 +693,7 @@ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_
|
|
|
591
693
|
// Assumption 4: This function IS NOT called in a reentrant way.
|
|
592
694
|
// Assumption 5: This function is called from the main Ractor (if Ruby has support for Ractors).
|
|
593
695
|
//
|
|
594
|
-
|
|
595
|
-
// (belonging to ddtrace), so that the overhead is visible in the profile rather than blamed on user code.
|
|
596
|
-
void thread_context_collector_sample(VALUE self_instance, long current_monotonic_wall_time_ns, VALUE profiler_overhead_stack_thread) {
|
|
696
|
+
void thread_context_collector_sample(VALUE self_instance, long current_monotonic_wall_time_ns) {
|
|
597
697
|
thread_context_collector_state *state;
|
|
598
698
|
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
599
699
|
|
|
@@ -608,66 +708,50 @@ void thread_context_collector_sample(VALUE self_instance, long current_monotonic
|
|
|
608
708
|
VALUE thread = RARRAY_AREF(threads, i);
|
|
609
709
|
per_thread_context *thread_context = get_or_create_context_for(thread, state);
|
|
610
710
|
|
|
611
|
-
// We account for cpu-time for the current thread in a different way
|
|
612
|
-
// blaming the time the profiler took on whatever
|
|
613
|
-
|
|
711
|
+
// We account for cpu-time for the current thread in a different way: we use the cpu-time at sampling start,
|
|
712
|
+
// to avoid blaming the time the profiler took on whatever is currently running on the thread,
|
|
713
|
+
// and instead we report that time the profiler took as sampling overhead below.
|
|
714
|
+
long current_cpu_time_ns = (thread == current_thread) ? cpu_time_at_sample_start_for_current_thread : cpu_time_now_ns(thread_context);
|
|
614
715
|
|
|
615
716
|
update_metrics_and_sample(
|
|
616
717
|
state,
|
|
617
|
-
|
|
618
|
-
/* stack_from_thread: */ thread,
|
|
718
|
+
thread,
|
|
619
719
|
thread_context,
|
|
620
720
|
&thread_context->sampling_buffer,
|
|
621
721
|
current_cpu_time_ns,
|
|
622
|
-
current_monotonic_wall_time_ns
|
|
623
|
-
|
|
722
|
+
current_monotonic_wall_time_ns,
|
|
723
|
+
false);
|
|
624
724
|
}
|
|
625
725
|
|
|
626
|
-
state->sample_count++;
|
|
627
|
-
|
|
628
|
-
// TODO: This seems somewhat overkill and inefficient to do often; right now we just do it every few samples
|
|
629
|
-
// but there's probably a better way to do this if we actually track when threads finish
|
|
630
|
-
if (state->sample_count % 100 == 0) remove_context_for_dead_threads(state);
|
|
631
|
-
|
|
632
|
-
update_metrics_and_sample(
|
|
633
|
-
state,
|
|
634
|
-
/* thread_being_sampled: */ current_thread,
|
|
635
|
-
/* stack_from_thread: */ profiler_overhead_stack_thread,
|
|
636
|
-
current_thread_context,
|
|
637
|
-
// Here we use the overhead thread's sampling buffer so as to not invalidate the cache in the buffer of the thread being sampled
|
|
638
|
-
&get_or_create_context_for(profiler_overhead_stack_thread, state)->sampling_buffer,
|
|
639
|
-
cpu_time_now_ns(current_thread_context),
|
|
640
|
-
monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
|
|
641
|
-
);
|
|
726
|
+
state->stats.sample_count++;
|
|
727
|
+
record_sampling_overhead(state, current_thread_context);
|
|
642
728
|
}
|
|
643
729
|
|
|
644
730
|
static void update_metrics_and_sample(
|
|
645
731
|
thread_context_collector_state *state,
|
|
646
732
|
VALUE thread_being_sampled,
|
|
647
|
-
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
|
648
733
|
per_thread_context *thread_context,
|
|
649
734
|
sampling_buffer* sampling_buffer,
|
|
650
735
|
long current_cpu_time_ns,
|
|
651
|
-
long current_monotonic_wall_time_ns
|
|
736
|
+
long current_monotonic_wall_time_ns,
|
|
737
|
+
bool force_sample_suspended
|
|
652
738
|
) {
|
|
653
739
|
bool is_gvl_waiting_state =
|
|
654
|
-
handle_gvl_waiting(state, thread_being_sampled,
|
|
740
|
+
handle_gvl_waiting(state, thread_being_sampled, thread_context, sampling_buffer, current_cpu_time_ns);
|
|
741
|
+
|
|
742
|
+
if (skip_sample(state, thread_context, is_gvl_waiting_state, force_sample_suspended)) return;
|
|
655
743
|
|
|
656
744
|
// Don't assign/update cpu during "Waiting for GVL"
|
|
657
745
|
long cpu_time_elapsed_ns = is_gvl_waiting_state ? 0 : update_time_since_previous_sample(
|
|
658
746
|
&thread_context->cpu_time_at_previous_sample_ns,
|
|
659
747
|
current_cpu_time_ns,
|
|
660
748
|
thread_context->gc_tracking.cpu_time_at_start_ns,
|
|
661
|
-
|
|
749
|
+
IS_CPU_TIME
|
|
662
750
|
);
|
|
663
751
|
|
|
664
752
|
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
|
665
753
|
&thread_context->wall_time_at_previous_sample_ns,
|
|
666
754
|
current_monotonic_wall_time_ns,
|
|
667
|
-
// We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
|
|
668
|
-
// accounting to change during GC.
|
|
669
|
-
// E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
|
|
670
|
-
// GC or not.
|
|
671
755
|
INVALID_TIME,
|
|
672
756
|
IS_WALL_TIME
|
|
673
757
|
);
|
|
@@ -675,7 +759,7 @@ static void update_metrics_and_sample(
|
|
|
675
759
|
// A thread enters "Waiting for GVL", well, as the name implies, without the GVL.
|
|
676
760
|
//
|
|
677
761
|
// As a consequence, it's possible that a thread enters "Waiting for GVL" in parallel with the current thread working
|
|
678
|
-
// on sampling, and thus for the
|
|
762
|
+
// on sampling, and thus for the `current_monotonic_wall_time_ns` (which is recorded at the start of sampling)
|
|
679
763
|
// to be < the time at which we started Waiting for GVL.
|
|
680
764
|
//
|
|
681
765
|
// All together, this means that when `handle_gvl_waiting` creates an extra sample (see comments on that function for
|
|
@@ -690,7 +774,6 @@ static void update_metrics_and_sample(
|
|
|
690
774
|
trigger_sample_for_thread(
|
|
691
775
|
state,
|
|
692
776
|
thread_being_sampled,
|
|
693
|
-
stack_from_thread,
|
|
694
777
|
thread_context,
|
|
695
778
|
sampling_buffer,
|
|
696
779
|
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
|
|
@@ -702,6 +785,32 @@ static void update_metrics_and_sample(
|
|
|
702
785
|
);
|
|
703
786
|
}
|
|
704
787
|
|
|
788
|
+
static bool skip_sample(thread_context_collector_state *state, per_thread_context *thread_context, bool is_gvl_waiting_state, bool force_sample_suspended) {
|
|
789
|
+
// Racy read but harmless, can only cause an extra sample
|
|
790
|
+
uint64_t gvl_state_change_count = thread_context->gvl_state_change_count;
|
|
791
|
+
|
|
792
|
+
// Skip this per-tick sample entirely when the thread does not have the GVL and did not acquire
|
|
793
|
+
// it since the previous sample: its Ruby-level stack has not changed. The skipped wall-time will
|
|
794
|
+
// be picked up by either by an extra sample when the thread acquires the GVL, or by
|
|
795
|
+
// the on-serialize flush in the stack recorder (using was_skipped_at_last_sample).
|
|
796
|
+
// The check is gated by `!is_gvl_waiting_state` so the existing "Waiting for GVL" machinery
|
|
797
|
+
// in handle_gvl_waiting (situation 1 extra sample, situation 2 regular sample) keeps running.
|
|
798
|
+
// TODO: we could probably also skip while "Waiting for GVL"
|
|
799
|
+
if (!is_gvl_waiting_state &&
|
|
800
|
+
!force_sample_suspended &&
|
|
801
|
+
(gvl_state_change_count & GVL_SUSPENDED) &&
|
|
802
|
+
gvl_state_change_count == thread_context->gvl_state_change_count_at_previous_sample) {
|
|
803
|
+
state->stats.inactive_thread_samples_skipped++;
|
|
804
|
+
thread_context->was_skipped_at_last_sample = true;
|
|
805
|
+
return true; // Do NOT update wall_time_at_previous_sample_ns or cpu_time_at_previous_sample_ns
|
|
806
|
+
} else {
|
|
807
|
+
// We are going to sample, update the state accordingly:
|
|
808
|
+
thread_context->gvl_state_change_count_at_previous_sample = gvl_state_change_count;
|
|
809
|
+
thread_context->was_skipped_at_last_sample = false;
|
|
810
|
+
return false;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
|
|
705
814
|
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
|
|
706
815
|
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
|
707
816
|
// create an event including the cpu/wall time spent in garbage collector work.
|
|
@@ -715,10 +824,10 @@ static void update_metrics_and_sample(
|
|
|
715
824
|
void thread_context_collector_on_gc_start(VALUE self_instance) {
|
|
716
825
|
thread_context_collector_state *state;
|
|
717
826
|
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
|
|
718
|
-
// This should never fail
|
|
827
|
+
// This should never fail when the above check passes
|
|
719
828
|
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
720
829
|
|
|
721
|
-
per_thread_context *thread_context =
|
|
830
|
+
per_thread_context *thread_context = get_per_thread_context(rb_thread_current());
|
|
722
831
|
|
|
723
832
|
// If there was no previously-existing context for this thread, we won't allocate one (see safety). For now we just drop
|
|
724
833
|
// the GC sample, under the assumption that "a thread that is so new that we never sampled it even once before it triggers
|
|
@@ -748,10 +857,10 @@ __attribute__((warn_unused_result))
|
|
|
748
857
|
bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
749
858
|
thread_context_collector_state *state;
|
|
750
859
|
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
|
|
751
|
-
// This should never fail
|
|
860
|
+
// This should never fail when the above check passes
|
|
752
861
|
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
753
862
|
|
|
754
|
-
per_thread_context *thread_context =
|
|
863
|
+
per_thread_context *thread_context = get_per_thread_context(rb_thread_current());
|
|
755
864
|
|
|
756
865
|
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
|
|
757
866
|
// how often this happens -- see on_gc_start.
|
|
@@ -871,8 +980,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
|
|
|
871
980
|
|
|
872
981
|
static void trigger_sample_for_thread(
|
|
873
982
|
thread_context_collector_state *state,
|
|
874
|
-
VALUE
|
|
875
|
-
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
|
|
983
|
+
VALUE thread_being_sampled,
|
|
876
984
|
per_thread_context *thread_context,
|
|
877
985
|
sampling_buffer* sampling_buffer,
|
|
878
986
|
sample_values values,
|
|
@@ -888,7 +996,6 @@ static void trigger_sample_for_thread(
|
|
|
888
996
|
int max_label_count =
|
|
889
997
|
1 + // thread id
|
|
890
998
|
1 + // thread name
|
|
891
|
-
1 + // profiler overhead
|
|
892
999
|
2 + // ruby vm type and allocation class
|
|
893
1000
|
1 + // state (only set for cpu/wall-time samples)
|
|
894
1001
|
2; // local root span id and span id
|
|
@@ -900,13 +1007,13 @@ static void trigger_sample_for_thread(
|
|
|
900
1007
|
.str = thread_context->thread_id_char_slice
|
|
901
1008
|
};
|
|
902
1009
|
|
|
903
|
-
VALUE thread_name = thread_name_for(
|
|
1010
|
+
VALUE thread_name = thread_name_for(thread_being_sampled);
|
|
904
1011
|
if (thread_name != Qnil) {
|
|
905
1012
|
labels[label_pos++] = (ddog_prof_Label) {
|
|
906
1013
|
.key = DDOG_CHARSLICE_C("thread name"),
|
|
907
1014
|
.str = char_slice_from_ruby_string(thread_name)
|
|
908
1015
|
};
|
|
909
|
-
} else if (
|
|
1016
|
+
} else if (thread_being_sampled == state->main_thread) { // Threads are often not named, but we can have a nice fallback for this special thread
|
|
910
1017
|
ddog_CharSlice main_thread_name = DDOG_CHARSLICE_C("main");
|
|
911
1018
|
labels[label_pos++] = (ddog_prof_Label) {
|
|
912
1019
|
.key = DDOG_CHARSLICE_C("thread name"),
|
|
@@ -922,11 +1029,11 @@ static void trigger_sample_for_thread(
|
|
|
922
1029
|
}
|
|
923
1030
|
|
|
924
1031
|
trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
|
|
925
|
-
trace_identifiers_for(state,
|
|
1032
|
+
trace_identifiers_for(state, thread_being_sampled, &trace_identifiers_result, is_safe_to_allocate_objects);
|
|
926
1033
|
|
|
927
1034
|
if (!trace_identifiers_result.valid && state->otel_context_enabled != OTEL_CONTEXT_ENABLED_FALSE) {
|
|
928
1035
|
// If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
|
|
929
|
-
otel_without_ddtrace_trace_identifiers_for(state,
|
|
1036
|
+
otel_without_ddtrace_trace_identifiers_for(state, thread_being_sampled, &trace_identifiers_result, is_safe_to_allocate_objects);
|
|
930
1037
|
}
|
|
931
1038
|
|
|
932
1039
|
if (trace_identifiers_result.valid) {
|
|
@@ -951,13 +1058,6 @@ static void trigger_sample_for_thread(
|
|
|
951
1058
|
}
|
|
952
1059
|
}
|
|
953
1060
|
|
|
954
|
-
if (thread != stack_from_thread) {
|
|
955
|
-
labels[label_pos++] = (ddog_prof_Label) {
|
|
956
|
-
.key = DDOG_CHARSLICE_C("profiler overhead"),
|
|
957
|
-
.num = 1
|
|
958
|
-
};
|
|
959
|
-
}
|
|
960
|
-
|
|
961
1061
|
if (ruby_vm_type != NULL) {
|
|
962
1062
|
labels[label_pos++] = (ddog_prof_Label) {
|
|
963
1063
|
.key = DDOG_CHARSLICE_C("ruby vm type"),
|
|
@@ -1003,8 +1103,9 @@ static void trigger_sample_for_thread(
|
|
|
1003
1103
|
}
|
|
1004
1104
|
|
|
1005
1105
|
sample_thread(
|
|
1006
|
-
|
|
1106
|
+
thread_being_sampled,
|
|
1007
1107
|
sampling_buffer,
|
|
1108
|
+
state->locations,
|
|
1008
1109
|
state->recorder_instance,
|
|
1009
1110
|
values,
|
|
1010
1111
|
(sample_labels) {
|
|
@@ -1032,29 +1133,22 @@ static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
|
|
|
1032
1133
|
return result;
|
|
1033
1134
|
}
|
|
1034
1135
|
|
|
1136
|
+
// This allocates a Ruby object and therefore needs the GVL and is not safe to call from RUBY_INTERNAL_EVENT_* hooks.
|
|
1035
1137
|
static per_thread_context *get_or_create_context_for(VALUE thread, thread_context_collector_state *state) {
|
|
1036
|
-
per_thread_context*
|
|
1037
|
-
|
|
1138
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
1139
|
+
if (thread_context != NULL) return thread_context;
|
|
1038
1140
|
|
|
1039
|
-
if (
|
|
1040
|
-
|
|
1041
|
-
} else {
|
|
1042
|
-
thread_context = calloc(1, sizeof(per_thread_context)); // See "note on calloc vs ruby_xcalloc use" in heap_recorder.c
|
|
1043
|
-
initialize_context(thread, thread_context, state);
|
|
1044
|
-
st_insert(state->hash_map_per_thread_context, (st_data_t) thread, (st_data_t) thread_context);
|
|
1141
|
+
if (RB_OBJ_FROZEN(thread)) {
|
|
1142
|
+
raise_error(rb_eFrozenError, "Cannot setup profiler state for Thread %"PRIsVALUE" because it is frozen. Please avoid freezing Thread instances and/or report the issue to dd-trace-rb", thread);
|
|
1045
1143
|
}
|
|
1046
1144
|
|
|
1047
|
-
|
|
1048
|
-
|
|
1145
|
+
thread_context = calloc(1, sizeof(per_thread_context)); // See "note on calloc vs ruby_xcalloc use" in heap_recorder.c
|
|
1146
|
+
initialize_context(thread, thread_context, state);
|
|
1049
1147
|
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
st_data_t value_context = 0;
|
|
1053
|
-
|
|
1054
|
-
if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
|
|
1055
|
-
thread_context = (per_thread_context*) value_context;
|
|
1056
|
-
}
|
|
1148
|
+
VALUE wrapper = TypedData_Wrap_Struct(rb_cObject, &per_thread_context_typed_data, thread_context);
|
|
1149
|
+
rb_ivar_set(thread, dd_per_thread_context_id, wrapper);
|
|
1057
1150
|
|
|
1151
|
+
set_per_thread_context(thread, thread_context);
|
|
1058
1152
|
return thread_context;
|
|
1059
1153
|
}
|
|
1060
1154
|
|
|
@@ -1080,7 +1174,7 @@ static bool is_logging_gem_monkey_patch(VALUE invoke_file_location) {
|
|
|
1080
1174
|
}
|
|
1081
1175
|
|
|
1082
1176
|
static void initialize_context(VALUE thread, per_thread_context *thread_context, thread_context_collector_state *state) {
|
|
1083
|
-
sampling_buffer_initialize(&thread_context->sampling_buffer, state->
|
|
1177
|
+
sampling_buffer_initialize(&thread_context->sampling_buffer, state->locations.len);
|
|
1084
1178
|
|
|
1085
1179
|
snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
|
|
1086
1180
|
thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
|
|
@@ -1121,24 +1215,8 @@ static void initialize_context(VALUE thread, per_thread_context *thread_context,
|
|
|
1121
1215
|
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
|
1122
1216
|
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
|
1123
1217
|
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
// kind of synchronization (e.g. by threads without the GVL).
|
|
1127
|
-
//
|
|
1128
|
-
// We set this marker here for two purposes:
|
|
1129
|
-
// * To make sure there's no stale data from a previous execution of the profiler.
|
|
1130
|
-
// * To mark threads that are actually being profiled
|
|
1131
|
-
//
|
|
1132
|
-
// (Setting this is potentially a race, but what we want is to avoid _stale_ data, so
|
|
1133
|
-
// if this gets set concurrently with context initialization, then such a value will belong
|
|
1134
|
-
// to the current profiler instance, so that's OK)
|
|
1135
|
-
gvl_profiling_state_thread_object_set(thread, GVL_WAITING_ENABLED_EMPTY);
|
|
1136
|
-
#endif
|
|
1137
|
-
}
|
|
1138
|
-
|
|
1139
|
-
static void free_context(per_thread_context* thread_context) {
|
|
1140
|
-
sampling_buffer_free(&thread_context->sampling_buffer);
|
|
1141
|
-
free(thread_context); // See "note on calloc vs ruby_xcalloc use" in heap_recorder.c
|
|
1218
|
+
thread_context->gvl_waiting_at = 0;
|
|
1219
|
+
thread_context->gvl_state_change_count = 0;
|
|
1142
1220
|
}
|
|
1143
1221
|
|
|
1144
1222
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
|
@@ -1148,13 +1226,11 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
|
1148
1226
|
VALUE result = rb_str_new2(" (native state)");
|
|
1149
1227
|
|
|
1150
1228
|
// Update this when modifying state struct
|
|
1151
|
-
rb_str_concat(result, rb_sprintf(" max_frames=%d", state->
|
|
1152
|
-
rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
|
|
1229
|
+
rb_str_concat(result, rb_sprintf(" max_frames=%d", state->locations.len));
|
|
1153
1230
|
rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
|
|
1154
1231
|
VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
|
|
1155
1232
|
rb_str_concat(result, rb_sprintf(" tracer_context_key=%+"PRIsVALUE, tracer_context_key));
|
|
1156
|
-
rb_str_concat(result, rb_sprintf("
|
|
1157
|
-
rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
|
|
1233
|
+
rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_to_ruby_hash(state, rb_hash_new())));
|
|
1158
1234
|
rb_str_concat(result, rb_sprintf(" endpoint_collection_enabled=%"PRIsVALUE, state->endpoint_collection_enabled ? Qtrue : Qfalse));
|
|
1159
1235
|
rb_str_concat(result, rb_sprintf(" native_filenames_enabled=%"PRIsVALUE, state->native_filenames_enabled ? Qtrue : Qfalse));
|
|
1160
1236
|
// Note: `st_table_size()` is available from Ruby 3.2+ but not before
|
|
@@ -1168,23 +1244,13 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
|
1168
1244
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
|
1169
1245
|
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
|
1170
1246
|
rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
|
|
1171
|
-
rb_str_concat(result, rb_sprintf("
|
|
1172
|
-
|
|
1173
|
-
return result;
|
|
1174
|
-
}
|
|
1247
|
+
rb_str_concat(result, rb_sprintf(" waiting_for_gvl_threshold_ns=%u", state->waiting_for_gvl_threshold_ns));
|
|
1175
1248
|
|
|
1176
|
-
static VALUE per_thread_context_st_table_as_ruby_hash(thread_context_collector_state *state) {
|
|
1177
|
-
VALUE result = rb_hash_new();
|
|
1178
|
-
st_foreach(state->hash_map_per_thread_context, per_thread_context_as_ruby_hash, result);
|
|
1179
1249
|
return result;
|
|
1180
1250
|
}
|
|
1181
1251
|
|
|
1182
|
-
static
|
|
1183
|
-
VALUE thread = (VALUE) key_thread;
|
|
1184
|
-
per_thread_context *thread_context = (per_thread_context*) value_context;
|
|
1185
|
-
VALUE result = (VALUE) result_hash;
|
|
1252
|
+
static VALUE per_thread_context_to_ruby_hash(per_thread_context *thread_context) {
|
|
1186
1253
|
VALUE context_as_hash = rb_hash_new();
|
|
1187
|
-
rb_hash_aset(result, thread, context_as_hash);
|
|
1188
1254
|
|
|
1189
1255
|
VALUE arguments[] = {
|
|
1190
1256
|
ID2SYM(rb_intern("thread_id")), /* => */ rb_str_new2(thread_context->thread_id),
|
|
@@ -1201,24 +1267,26 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
|
1201
1267
|
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
|
1202
1268
|
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
|
1203
1269
|
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1270
|
+
ID2SYM(rb_intern("gvl_waiting_at")), /* => */ LONG2NUM(thread_context->gvl_waiting_at),
|
|
1271
|
+
ID2SYM(rb_intern("gvl_state_change_count")), /* => */ ULL2NUM(thread_context->gvl_state_change_count),
|
|
1272
|
+
ID2SYM(rb_intern("gvl_state_change_count_at_previous_sample")), /* => */ ULL2NUM(thread_context->gvl_state_change_count_at_previous_sample),
|
|
1273
|
+
ID2SYM(rb_intern("was_skipped_at_last_sample")), /* => */ thread_context->was_skipped_at_last_sample ? Qtrue : Qfalse,
|
|
1207
1274
|
};
|
|
1208
1275
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
|
1209
1276
|
|
|
1210
|
-
return
|
|
1277
|
+
return context_as_hash;
|
|
1211
1278
|
}
|
|
1212
1279
|
|
|
1213
|
-
static VALUE
|
|
1280
|
+
static VALUE stats_to_ruby_hash(thread_context_collector_state *state, VALUE hash) {
|
|
1214
1281
|
// Update this when modifying state struct (stats inner struct)
|
|
1215
|
-
VALUE stats_as_hash = rb_hash_new();
|
|
1216
1282
|
VALUE arguments[] = {
|
|
1283
|
+
ID2SYM(rb_intern("sample_count")), /* => */ UINT2NUM(state->stats.sample_count),
|
|
1217
1284
|
ID2SYM(rb_intern("gc_samples")), /* => */ UINT2NUM(state->stats.gc_samples),
|
|
1218
1285
|
ID2SYM(rb_intern("gc_samples_missed_due_to_missing_context")), /* => */ UINT2NUM(state->stats.gc_samples_missed_due_to_missing_context),
|
|
1286
|
+
ID2SYM(rb_intern("inactive_thread_samples_skipped")), /* => */ UINT2NUM(state->stats.inactive_thread_samples_skipped),
|
|
1219
1287
|
};
|
|
1220
|
-
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(
|
|
1221
|
-
return
|
|
1288
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
|
|
1289
|
+
return hash;
|
|
1222
1290
|
}
|
|
1223
1291
|
|
|
1224
1292
|
static VALUE gc_tracking_as_ruby_hash(thread_context_collector_state *state) {
|
|
@@ -1234,36 +1302,35 @@ static VALUE gc_tracking_as_ruby_hash(thread_context_collector_state *state) {
|
|
|
1234
1302
|
return result;
|
|
1235
1303
|
}
|
|
1236
1304
|
|
|
1237
|
-
static void remove_context_for_dead_threads(thread_context_collector_state *state) {
|
|
1238
|
-
st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
|
|
1239
|
-
}
|
|
1240
|
-
|
|
1241
|
-
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, DDTRACE_UNUSED st_data_t _argument) {
|
|
1242
|
-
VALUE thread = (VALUE) key_thread;
|
|
1243
|
-
per_thread_context* thread_context = (per_thread_context*) value_context;
|
|
1244
|
-
|
|
1245
|
-
if (is_thread_alive(thread)) return ST_CONTINUE;
|
|
1246
|
-
|
|
1247
|
-
free_context(thread_context);
|
|
1248
|
-
return ST_DELETE;
|
|
1249
|
-
}
|
|
1250
|
-
|
|
1251
1305
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
|
1252
1306
|
// It SHOULD NOT be used for other purposes.
|
|
1253
1307
|
//
|
|
1254
|
-
// Returns the whole contents of the per_thread_context structs being tracked.
|
|
1308
|
+
// Returns the whole contents of the per_thread_context structs being tracked, by iterating all live threads.
|
|
1255
1309
|
static VALUE _native_per_thread_context(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
|
1256
1310
|
thread_context_collector_state *state;
|
|
1257
1311
|
TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1258
1312
|
|
|
1259
|
-
|
|
1313
|
+
VALUE result = rb_hash_new();
|
|
1314
|
+
VALUE threads = thread_list(state);
|
|
1315
|
+
const long thread_count = RARRAY_LEN(threads);
|
|
1316
|
+
for (long i = 0; i < thread_count; i++) {
|
|
1317
|
+
VALUE thread = RARRAY_AREF(threads, i);
|
|
1318
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
1319
|
+
if (thread_context != NULL) {
|
|
1320
|
+
rb_hash_aset(result, thread, per_thread_context_to_ruby_hash(thread_context));
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
return result;
|
|
1260
1324
|
}
|
|
1261
1325
|
|
|
1326
|
+
// gc_start_time_ns should only be passed if IS_CPU_TIME
|
|
1262
1327
|
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time) {
|
|
1263
1328
|
// If we didn't have a time for the previous sample, we use the current one
|
|
1264
1329
|
if (*time_at_previous_sample_ns == INVALID_TIME) *time_at_previous_sample_ns = current_time_ns;
|
|
1265
1330
|
|
|
1266
|
-
|
|
1331
|
+
// We don't want wall-time accounting to change during GC.
|
|
1332
|
+
// E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing GC or not.
|
|
1333
|
+
bool is_thread_doing_gc = !is_wall_time && gc_start_time_ns != INVALID_TIME;
|
|
1267
1334
|
long elapsed_time_ns = -1;
|
|
1268
1335
|
|
|
1269
1336
|
if (is_thread_doing_gc) {
|
|
@@ -1350,7 +1417,7 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
|
|
|
1350
1417
|
thread_context_collector_state *state;
|
|
1351
1418
|
TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1352
1419
|
|
|
1353
|
-
return
|
|
1420
|
+
return stats_to_ruby_hash(state, rb_hash_new());
|
|
1354
1421
|
}
|
|
1355
1422
|
|
|
1356
1423
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
|
@@ -1445,17 +1512,18 @@ static bool should_collect_resource(VALUE root_span) {
|
|
|
1445
1512
|
//
|
|
1446
1513
|
// Assumption: This method gets called BEFORE restarting profiling -- e.g. there are no components attempting to
|
|
1447
1514
|
// trigger samples at the same time.
|
|
1515
|
+
//
|
|
1516
|
+
// Note that tests call this method directly in the same process without forking,
|
|
1517
|
+
// and in such a case non-current Threads keep running.
|
|
1448
1518
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
|
1449
1519
|
thread_context_collector_state *state;
|
|
1450
1520
|
TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1451
1521
|
|
|
1452
|
-
// Release all context memory before clearing the existing context
|
|
1453
|
-
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
|
|
1454
|
-
|
|
1455
|
-
st_clear(state->hash_map_per_thread_context);
|
|
1456
|
-
|
|
1457
1522
|
state->stats = (struct stats) {}; // Resets all stats back to zero
|
|
1458
1523
|
|
|
1524
|
+
// Clear any leftover state from parent process in the current thread; all other threads are assumed dead
|
|
1525
|
+
_native_clear_per_thread_context_for(Qnil, rb_thread_current());
|
|
1526
|
+
|
|
1459
1527
|
rb_funcall(state->recorder_instance, rb_intern("reset_after_fork"), 0);
|
|
1460
1528
|
|
|
1461
1529
|
return Qtrue;
|
|
@@ -1475,14 +1543,9 @@ static VALUE thread_list(thread_context_collector_state *state) {
|
|
|
1475
1543
|
// expected to be called from a signal handler and to be async-signal-safe.
|
|
1476
1544
|
//
|
|
1477
1545
|
// Also, no allocation (Ruby or malloc) can happen.
|
|
1478
|
-
bool thread_context_collector_prepare_sample_inside_signal_handler(
|
|
1479
|
-
thread_context_collector_state *state;
|
|
1480
|
-
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
|
|
1481
|
-
// This should never fail if the above check passes
|
|
1482
|
-
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1483
|
-
|
|
1546
|
+
bool thread_context_collector_prepare_sample_inside_signal_handler(void) {
|
|
1484
1547
|
VALUE current_thread = rb_thread_current();
|
|
1485
|
-
per_thread_context *thread_context =
|
|
1548
|
+
per_thread_context *thread_context = get_per_thread_context(current_thread);
|
|
1486
1549
|
if (thread_context == NULL) return false;
|
|
1487
1550
|
|
|
1488
1551
|
return prepare_sample_thread(current_thread, &thread_context->sampling_buffer);
|
|
@@ -1493,12 +1556,12 @@ bool thread_context_collector_prepare_sample_inside_signal_handler(VALUE self_in
|
|
|
1493
1556
|
//
|
|
1494
1557
|
// Returns true if the after_allocation needs to be called (to do work that can't be done from inside the
|
|
1495
1558
|
// tracepoint, such as allocate new objects), and false if it doesn't
|
|
1496
|
-
|
|
1559
|
+
//
|
|
1560
|
+
// The callers must ensure thread_context is non-NULL.
|
|
1561
|
+
bool thread_context_collector_sample_allocation(VALUE self_instance, per_thread_context *thread_context, unsigned int sample_weight, VALUE new_object) {
|
|
1497
1562
|
thread_context_collector_state *state;
|
|
1498
1563
|
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1499
1564
|
|
|
1500
|
-
VALUE current_thread = rb_thread_current();
|
|
1501
|
-
|
|
1502
1565
|
enum ruby_value_type type = rb_type(new_object);
|
|
1503
1566
|
|
|
1504
1567
|
// Tag samples with the VM internal types
|
|
@@ -1565,12 +1628,11 @@ bool thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
|
1565
1628
|
|
|
1566
1629
|
bool needs_after_allocation = track_object(state->recorder_instance, new_object, sample_weight, class_name);
|
|
1567
1630
|
|
|
1568
|
-
|
|
1631
|
+
VALUE current_thread = rb_thread_current();
|
|
1569
1632
|
|
|
1570
1633
|
trigger_sample_for_thread(
|
|
1571
1634
|
state,
|
|
1572
|
-
|
|
1573
|
-
/* stack_from_thread: */ current_thread,
|
|
1635
|
+
current_thread,
|
|
1574
1636
|
thread_context,
|
|
1575
1637
|
&thread_context->sampling_buffer,
|
|
1576
1638
|
(sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1, .heap_sample = true},
|
|
@@ -1587,9 +1649,13 @@ bool thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
|
|
|
1587
1649
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
|
1588
1650
|
// It SHOULD NOT be used for other purposes.
|
|
1589
1651
|
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object) {
|
|
1652
|
+
thread_context_collector_state *state;
|
|
1653
|
+
TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1654
|
+
per_thread_context *thread_context = get_or_create_context_for(rb_thread_current(), state);
|
|
1655
|
+
|
|
1590
1656
|
debug_enter_unsafe_context();
|
|
1591
1657
|
|
|
1592
|
-
bool needs_after_allocation = thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight), new_object);
|
|
1658
|
+
bool needs_after_allocation = thread_context_collector_sample_allocation(collector_instance, thread_context, NUM2UINT(sample_weight), new_object);
|
|
1593
1659
|
|
|
1594
1660
|
debug_leave_unsafe_context();
|
|
1595
1661
|
|
|
@@ -1598,7 +1664,10 @@ static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collecto
|
|
|
1598
1664
|
return needs_after_allocation ? Qtrue : Qfalse;
|
|
1599
1665
|
}
|
|
1600
1666
|
|
|
1601
|
-
static VALUE new_empty_thread_inner(DDTRACE_UNUSED void *arg) {
|
|
1667
|
+
static VALUE new_empty_thread_inner(DDTRACE_UNUSED void *arg) {
|
|
1668
|
+
rb_thread_sleep(INT_MAX);
|
|
1669
|
+
return Qnil;
|
|
1670
|
+
}
|
|
1602
1671
|
|
|
1603
1672
|
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
|
|
1604
1673
|
// It SHOULD NOT be used for other purposes.
|
|
@@ -1891,36 +1960,76 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
1891
1960
|
((uint64_t)span_bytes[7]);
|
|
1892
1961
|
}
|
|
1893
1962
|
|
|
1963
|
+
void thread_context_collector_stats(VALUE self_instance, VALUE stats_hash) {
|
|
1964
|
+
thread_context_collector_state *state;
|
|
1965
|
+
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1966
|
+
stats_to_ruby_hash(state, stats_hash);
|
|
1967
|
+
}
|
|
1968
|
+
|
|
1969
|
+
void thread_context_collector_stats_reset_not_thread_safe(VALUE self_instance) {
|
|
1970
|
+
thread_context_collector_state *state;
|
|
1971
|
+
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1972
|
+
state->stats = (struct stats) {};
|
|
1973
|
+
}
|
|
1974
|
+
|
|
1894
1975
|
#ifndef NO_GVL_INSTRUMENTATION
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
// per-thread context directly.
|
|
1899
|
-
//
|
|
1900
|
-
// Instead, we ask Ruby to hold the data we need in Ruby's own special per-thread context area
|
|
1901
|
-
// that's thread-safe and built for this kind of use
|
|
1902
|
-
//
|
|
1903
|
-
// Also, this function can get called on the non-main Ractor. We deal with this by checking if the value in the context
|
|
1904
|
-
// is non-zero, since only `initialize_context` ever sets the value from 0 to non-zero for threads it sees.
|
|
1905
|
-
intptr_t thread_being_profiled = gvl_profiling_state_get(thread);
|
|
1906
|
-
if (!thread_being_profiled) return;
|
|
1976
|
+
void thread_context_collector_on_gvl_released(per_thread_context *thread_context) {
|
|
1977
|
+
thread_context->gvl_state_change_count |= GVL_SUSPENDED;
|
|
1978
|
+
}
|
|
1907
1979
|
|
|
1980
|
+
// Called by the stack recorder at the start of _native_serialize, so that threads whose last
|
|
1981
|
+
// per-tick sample was skipped by the SUSPENDED-skip optimization still get their accumulated
|
|
1982
|
+
// time recorded in this reporting period. Without this, a thread that sleeps across the whole
|
|
1983
|
+
// period would not be reported at all.
|
|
1984
|
+
void thread_context_collector_on_serialize(VALUE self_instance) {
|
|
1985
|
+
thread_context_collector_state *state;
|
|
1986
|
+
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1987
|
+
|
|
1988
|
+
long current_monotonic_wall_time_ns = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
1989
|
+
VALUE threads = thread_list(state);
|
|
1990
|
+
const long thread_count = RARRAY_LEN(threads);
|
|
1991
|
+
|
|
1992
|
+
for (long i = 0; i < thread_count; i++) {
|
|
1993
|
+
VALUE thread = RARRAY_AREF(threads, i);
|
|
1994
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
1995
|
+
|
|
1996
|
+
if (thread_context != NULL && thread_context->was_skipped_at_last_sample) {
|
|
1997
|
+
long current_cpu_time_ns = cpu_time_now_ns(thread_context);
|
|
1998
|
+
// We need to force_sample_suspended=true otherwise this sample would be skipped too
|
|
1999
|
+
update_metrics_and_sample(
|
|
2000
|
+
state,
|
|
2001
|
+
thread,
|
|
2002
|
+
thread_context,
|
|
2003
|
+
&thread_context->sampling_buffer,
|
|
2004
|
+
current_cpu_time_ns,
|
|
2005
|
+
current_monotonic_wall_time_ns,
|
|
2006
|
+
true);
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
2010
|
+
|
|
2011
|
+
void thread_context_collector_on_gvl_waiting(per_thread_context *thread_context) {
|
|
1908
2012
|
long current_monotonic_wall_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
1909
|
-
if (current_monotonic_wall_time_ns <= 0
|
|
2013
|
+
if (current_monotonic_wall_time_ns <= 0) return;
|
|
1910
2014
|
|
|
1911
|
-
|
|
2015
|
+
thread_context->gvl_waiting_at = current_monotonic_wall_time_ns;
|
|
1912
2016
|
}
|
|
1913
2017
|
|
|
1914
|
-
// This function
|
|
2018
|
+
// This function runs on the passed thread and has the GVL because it gets called just after the Ruby thread acquired the GVL
|
|
1915
2019
|
__attribute__((warn_unused_result))
|
|
1916
|
-
on_gvl_running_result
|
|
1917
|
-
|
|
2020
|
+
on_gvl_running_result thread_context_collector_on_gvl_running(VALUE self_instance, VALUE thread, per_thread_context *thread_context) {
|
|
2021
|
+
thread_context_collector_state *state;
|
|
2022
|
+
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
2023
|
+
|
|
2024
|
+
// Bump the event counter and clears the state bit to "running"
|
|
2025
|
+
uint64_t counter_portion = thread_context->gvl_state_change_count >> 1;
|
|
2026
|
+
thread_context->gvl_state_change_count = ((counter_portion + 1) << 1) | GVL_RUNNING;
|
|
1918
2027
|
|
|
1919
|
-
|
|
1920
|
-
|
|
2028
|
+
long gvl_waiting_at = thread_context->gvl_waiting_at;
|
|
2029
|
+
// Thread was not waiting on gvl
|
|
2030
|
+
if (gvl_waiting_at == 0) {
|
|
1921
2031
|
return (on_gvl_running_result) {.action = ON_GVL_RUNNING_UNKNOWN, .waiting_for_gvl_duration_ns = 0};
|
|
1922
2032
|
}
|
|
1923
|
-
|
|
1924
2033
|
// @ivoanjo: I'm not sure if this can happen -- It means we should've sampled already but haven't gotten the chance yet?
|
|
1925
2034
|
if (gvl_waiting_at < 0) {
|
|
1926
2035
|
return (on_gvl_running_result) {.action = ON_GVL_RUNNING_SAMPLE, .waiting_for_gvl_duration_ns = 0};
|
|
@@ -1928,16 +2037,30 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
1928
2037
|
|
|
1929
2038
|
long waiting_for_gvl_duration_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - gvl_waiting_at;
|
|
1930
2039
|
|
|
1931
|
-
bool should_sample = waiting_for_gvl_duration_ns >= waiting_for_gvl_threshold_ns;
|
|
2040
|
+
bool should_sample = waiting_for_gvl_duration_ns >= state->waiting_for_gvl_threshold_ns;
|
|
1932
2041
|
|
|
1933
2042
|
if (should_sample) {
|
|
1934
2043
|
// We flip the gvl_waiting_at to negative to mark that the thread is now running and no longer waiting
|
|
1935
|
-
|
|
2044
|
+
long gvl_waiting_at_is_now_running = -gvl_waiting_at;
|
|
1936
2045
|
|
|
1937
|
-
|
|
2046
|
+
thread_context->gvl_waiting_at = gvl_waiting_at_is_now_running;
|
|
1938
2047
|
} else {
|
|
1939
|
-
|
|
1940
|
-
|
|
2048
|
+
thread_context->gvl_waiting_at = 0;
|
|
2049
|
+
|
|
2050
|
+
// Even though the GVL wait itself was below threshold, if the thread had skipped samples
|
|
2051
|
+
// (was suspended for a long time without the GVL), we still need to force a sample now.
|
|
2052
|
+
// Otherwise, the accumulated idle wall-time would be reported against whatever stack the
|
|
2053
|
+
// thread runs next, misrepresenting the time spent idle.
|
|
2054
|
+
if (thread_context->was_skipped_at_last_sample) {
|
|
2055
|
+
should_sample = true;
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
if (should_sample) {
|
|
2060
|
+
// We prepare the sample here because the postponed job might be called some time later,
|
|
2061
|
+
// possibly after some Ruby calls which change the Ruby stack,
|
|
2062
|
+
// and we want to attribute the time acquiring or without the GVL to the correct Ruby stack.
|
|
2063
|
+
prepare_sample_thread(thread, &thread_context->sampling_buffer);
|
|
1941
2064
|
}
|
|
1942
2065
|
|
|
1943
2066
|
return (on_gvl_running_result) {
|
|
@@ -1946,11 +2069,6 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
1946
2069
|
};
|
|
1947
2070
|
}
|
|
1948
2071
|
|
|
1949
|
-
__attribute__((warn_unused_result))
|
|
1950
|
-
on_gvl_running_result thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
|
|
1951
|
-
return thread_context_collector_on_gvl_running_with_threshold(thread, global_waiting_for_gvl_threshold_ns);
|
|
1952
|
-
}
|
|
1953
|
-
|
|
1954
2072
|
// Why does this method need to exist?
|
|
1955
2073
|
//
|
|
1956
2074
|
// You may be surprised to see that if we never call this function (from cpu_and_wall_time_worker), Waiting for GVL
|
|
@@ -1968,7 +2086,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
1968
2086
|
//
|
|
1969
2087
|
// Stack:
|
|
1970
2088
|
// If the thread starts working without the end of the Waiting for GVL sample, then by the time the thread is sampled
|
|
1971
|
-
// via the regular cpu/wall-time samples mechanism, the stack can be
|
|
2089
|
+
// via the regular cpu/wall-time samples mechanism, the stack can be inaccurate (e.g. does not correctly pinpoint
|
|
1972
2090
|
// where the waiting happened).
|
|
1973
2091
|
//
|
|
1974
2092
|
// Arguably, the last sample after Waiting for GVL ended (when gvl_waiting_at < 0) should always come from this method
|
|
@@ -1977,15 +2095,19 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
1977
2095
|
//
|
|
1978
2096
|
// ---
|
|
1979
2097
|
//
|
|
2098
|
+
// Always called with the GVL, either from a postponed_job or from tests.
|
|
2099
|
+
//
|
|
1980
2100
|
// NOTE: In normal use, current_thread is expected to be == rb_thread_current(); the `current_thread` parameter only
|
|
1981
2101
|
// exists to enable testing.
|
|
1982
2102
|
VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance, VALUE current_thread, long current_monotonic_wall_time_ns) {
|
|
1983
2103
|
thread_context_collector_state *state;
|
|
1984
2104
|
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
1985
2105
|
|
|
1986
|
-
|
|
2106
|
+
per_thread_context *thread_context = get_or_create_context_for(current_thread, state);
|
|
1987
2107
|
|
|
1988
|
-
|
|
2108
|
+
long gvl_waiting_at = thread_context->gvl_waiting_at;
|
|
2109
|
+
|
|
2110
|
+
if (gvl_waiting_at >= 0 && !thread_context->was_skipped_at_last_sample) {
|
|
1989
2111
|
// @ivoanjo: I'm not sure if this can ever happen. This means that we're not on the same thread
|
|
1990
2112
|
// that ran `thread_context_collector_on_gvl_running` and made the decision to sample OR a regular sample was
|
|
1991
2113
|
// triggered ahead of us.
|
|
@@ -1993,9 +2115,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
1993
2115
|
return Qfalse;
|
|
1994
2116
|
}
|
|
1995
2117
|
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
// We don't actually account for cpu-time during Waiting for GVL. BUT, we may chose to push an
|
|
2118
|
+
// We don't actually account for cpu-time during Waiting for GVL. BUT, we may choose to push an
|
|
1999
2119
|
// extra sample to represent the period prior to Waiting for GVL. To support that, we retrieve the current
|
|
2000
2120
|
// cpu-time of the thread and let `update_metrics_and_sample` decide what to do with it.
|
|
2001
2121
|
long cpu_time_for_thread = cpu_time_now_ns(thread_context);
|
|
@@ -2004,13 +2124,12 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2004
2124
|
|
|
2005
2125
|
update_metrics_and_sample(
|
|
2006
2126
|
state,
|
|
2007
|
-
|
|
2008
|
-
/* stack_from_thread: */ current_thread,
|
|
2127
|
+
current_thread,
|
|
2009
2128
|
thread_context,
|
|
2010
2129
|
&thread_context->sampling_buffer,
|
|
2011
2130
|
cpu_time_for_thread,
|
|
2012
|
-
current_monotonic_wall_time_ns
|
|
2013
|
-
|
|
2131
|
+
current_monotonic_wall_time_ns,
|
|
2132
|
+
false);
|
|
2014
2133
|
|
|
2015
2134
|
return Qtrue;
|
|
2016
2135
|
}
|
|
@@ -2021,14 +2140,13 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2021
2140
|
static bool handle_gvl_waiting(
|
|
2022
2141
|
thread_context_collector_state *state,
|
|
2023
2142
|
VALUE thread_being_sampled,
|
|
2024
|
-
VALUE stack_from_thread,
|
|
2025
2143
|
per_thread_context *thread_context,
|
|
2026
2144
|
sampling_buffer* sampling_buffer,
|
|
2027
2145
|
long current_cpu_time_ns
|
|
2028
2146
|
) {
|
|
2029
|
-
|
|
2147
|
+
long gvl_waiting_at = thread_context->gvl_waiting_at;
|
|
2030
2148
|
|
|
2031
|
-
bool is_gvl_waiting_state = gvl_waiting_at != 0
|
|
2149
|
+
bool is_gvl_waiting_state = gvl_waiting_at != 0;
|
|
2032
2150
|
|
|
2033
2151
|
if (!is_gvl_waiting_state) return false;
|
|
2034
2152
|
|
|
@@ -2041,17 +2159,17 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2041
2159
|
// ...──────────────┬───────────────────...
|
|
2042
2160
|
// Other state │ Waiting for GVL
|
|
2043
2161
|
// ...──────────────┴───────────────────...
|
|
2044
|
-
// ▲
|
|
2162
|
+
// ▲ ▲
|
|
2045
2163
|
// └─ Previous sample └─ Regular sample (caller)
|
|
2046
2164
|
//
|
|
2047
2165
|
// In this case, we'll want to push two samples: a) one for the current time (handled by the caller), b) an extra sample
|
|
2048
|
-
// to represent the remaining cpu/wall time before the "Waiting for GVL" started:
|
|
2166
|
+
// to represent the remaining cpu/wall time before the "Waiting for GVL" started (for timeline purposes):
|
|
2049
2167
|
//
|
|
2050
2168
|
// time ─────►
|
|
2051
2169
|
// ...──────────────┬───────────────────...
|
|
2052
2170
|
// Other state │ Waiting for GVL
|
|
2053
2171
|
// ...──────────────┴───────────────────...
|
|
2054
|
-
// ▲
|
|
2172
|
+
// ▲ ▲ ▲
|
|
2055
2173
|
// └─ Prev... └─ Extra sample └─ Regular sample (caller)
|
|
2056
2174
|
//
|
|
2057
2175
|
// 2. The current sample is the n-th one after we entered the "Waiting for GVL" state
|
|
@@ -2061,7 +2179,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2061
2179
|
// ...──────────────┬───────────────────────────────────────────────...
|
|
2062
2180
|
// Other state │ Waiting for GVL
|
|
2063
2181
|
// ...──────────────┴───────────────────────────────────────────────...
|
|
2064
|
-
// ▲
|
|
2182
|
+
// ▲ ▲ ▲
|
|
2065
2183
|
// └─ Previous sample └─ Previous sample └─ Regular sample (caller)
|
|
2066
2184
|
//
|
|
2067
2185
|
// In this case, we just report back to the caller that the thread is in the "Waiting for GVL" state.
|
|
@@ -2076,7 +2194,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2076
2194
|
|
|
2077
2195
|
if (gvl_waiting_at < 0) {
|
|
2078
2196
|
// Negative means the waiting for GVL just ended, so we clear the state, so next samples no longer represent waiting
|
|
2079
|
-
|
|
2197
|
+
thread_context->gvl_waiting_at = 0;
|
|
2080
2198
|
}
|
|
2081
2199
|
|
|
2082
2200
|
long gvl_waiting_started_wall_time_ns = labs(gvl_waiting_at);
|
|
@@ -2086,7 +2204,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2086
2204
|
&thread_context->cpu_time_at_previous_sample_ns,
|
|
2087
2205
|
current_cpu_time_ns,
|
|
2088
2206
|
thread_context->gc_tracking.cpu_time_at_start_ns,
|
|
2089
|
-
|
|
2207
|
+
IS_CPU_TIME
|
|
2090
2208
|
);
|
|
2091
2209
|
|
|
2092
2210
|
long duration_until_start_of_gvl_waiting_ns = update_time_since_previous_sample(
|
|
@@ -2100,7 +2218,6 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2100
2218
|
trigger_sample_for_thread(
|
|
2101
2219
|
state,
|
|
2102
2220
|
thread_being_sampled,
|
|
2103
|
-
stack_from_thread,
|
|
2104
2221
|
thread_context,
|
|
2105
2222
|
sampling_buffer,
|
|
2106
2223
|
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = duration_until_start_of_gvl_waiting_ns},
|
|
@@ -2120,7 +2237,8 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2120
2237
|
|
|
2121
2238
|
debug_enter_unsafe_context();
|
|
2122
2239
|
|
|
2123
|
-
|
|
2240
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
2241
|
+
if (thread_context) thread_context_collector_on_gvl_waiting(thread_context);
|
|
2124
2242
|
|
|
2125
2243
|
debug_leave_unsafe_context();
|
|
2126
2244
|
|
|
@@ -2132,30 +2250,48 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2132
2250
|
|
|
2133
2251
|
debug_enter_unsafe_context();
|
|
2134
2252
|
|
|
2135
|
-
|
|
2253
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
2254
|
+
VALUE result = thread_context ? LONG2NUM(thread_context->gvl_waiting_at) : Qnil;
|
|
2136
2255
|
|
|
2137
2256
|
debug_leave_unsafe_context();
|
|
2138
2257
|
|
|
2139
|
-
return
|
|
2258
|
+
return result;
|
|
2140
2259
|
}
|
|
2141
2260
|
|
|
2142
|
-
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) {
|
|
2261
|
+
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) {
|
|
2143
2262
|
ENFORCE_THREAD(thread);
|
|
2144
2263
|
|
|
2145
2264
|
debug_enter_unsafe_context();
|
|
2146
2265
|
|
|
2147
|
-
|
|
2266
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
2267
|
+
VALUE result;
|
|
2268
|
+
if (thread_context) {
|
|
2269
|
+
result = thread_context_collector_on_gvl_running(collector_instance, thread, thread_context).action == ON_GVL_RUNNING_SAMPLE ? Qtrue : Qfalse;
|
|
2270
|
+
} else {
|
|
2271
|
+
result = Qfalse;
|
|
2272
|
+
}
|
|
2148
2273
|
|
|
2149
2274
|
debug_leave_unsafe_context();
|
|
2150
2275
|
|
|
2151
2276
|
return result;
|
|
2152
2277
|
}
|
|
2153
2278
|
|
|
2154
|
-
static VALUE
|
|
2279
|
+
static VALUE _native_on_gvl_released(DDTRACE_UNUSED VALUE self, VALUE thread) {
|
|
2155
2280
|
ENFORCE_THREAD(thread);
|
|
2156
|
-
ENFORCE_BOOLEAN(allow_exception);
|
|
2157
2281
|
|
|
2282
|
+
debug_enter_unsafe_context();
|
|
2283
|
+
|
|
2284
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
2285
|
+
if (thread_context) thread_context_collector_on_gvl_released(thread_context);
|
|
2286
|
+
|
|
2287
|
+
debug_leave_unsafe_context();
|
|
2158
2288
|
|
|
2289
|
+
return Qnil;
|
|
2290
|
+
}
|
|
2291
|
+
|
|
2292
|
+
static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE allow_exception) {
|
|
2293
|
+
ENFORCE_THREAD(thread);
|
|
2294
|
+
ENFORCE_BOOLEAN(allow_exception);
|
|
2159
2295
|
|
|
2160
2296
|
if (allow_exception == Qfalse) debug_enter_unsafe_context();
|
|
2161
2297
|
|
|
@@ -2170,13 +2306,10 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2170
2306
|
return result;
|
|
2171
2307
|
}
|
|
2172
2308
|
|
|
2173
|
-
static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE
|
|
2309
|
+
static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE thread, VALUE delta_ns) {
|
|
2174
2310
|
ENFORCE_THREAD(thread);
|
|
2175
2311
|
|
|
2176
|
-
|
|
2177
|
-
TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
|
|
2178
|
-
|
|
2179
|
-
per_thread_context *thread_context = get_context_for(thread, state);
|
|
2312
|
+
per_thread_context *thread_context = get_per_thread_context(thread);
|
|
2180
2313
|
if (thread_context == NULL) raise_error(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists");
|
|
2181
2314
|
|
|
2182
2315
|
thread_context->cpu_time_at_previous_sample_ns += NUM2LONG(delta_ns);
|
|
@@ -2188,11 +2321,12 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
|
|
|
2188
2321
|
static bool handle_gvl_waiting(
|
|
2189
2322
|
DDTRACE_UNUSED thread_context_collector_state *state,
|
|
2190
2323
|
DDTRACE_UNUSED VALUE thread_being_sampled,
|
|
2191
|
-
DDTRACE_UNUSED VALUE stack_from_thread,
|
|
2192
2324
|
DDTRACE_UNUSED per_thread_context *thread_context,
|
|
2193
2325
|
DDTRACE_UNUSED sampling_buffer* sampling_buffer,
|
|
2194
2326
|
DDTRACE_UNUSED long current_cpu_time_ns
|
|
2195
2327
|
) { return false; }
|
|
2328
|
+
|
|
2329
|
+
void thread_context_collector_on_serialize(DDTRACE_UNUSED VALUE self_instance) { }
|
|
2196
2330
|
#endif // NO_GVL_INSTRUMENTATION
|
|
2197
2331
|
|
|
2198
2332
|
#define MAX_SAFE_LOOKUP_SIZE 16
|
|
@@ -2239,6 +2373,6 @@ static VALUE _native_system_epoch_time_now_ns(DDTRACE_UNUSED VALUE self, VALUE c
|
|
|
2239
2373
|
return LONG2NUM(system_epoch_time_ns);
|
|
2240
2374
|
}
|
|
2241
2375
|
|
|
2242
|
-
static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self
|
|
2243
|
-
return thread_context_collector_prepare_sample_inside_signal_handler(
|
|
2376
|
+
static VALUE _native_prepare_sample_inside_signal_handler(DDTRACE_UNUSED VALUE self) {
|
|
2377
|
+
return thread_context_collector_prepare_sample_inside_signal_handler() ? Qtrue : Qfalse;
|
|
2244
2378
|
}
|