ddtrace 1.20.0 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/LICENSE-3rdparty.csv +1 -1
- data/bin/ddprofrb +15 -0
- data/bin/ddtracerb +3 -1
- data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
- data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +238 -61
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.c +145 -72
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.h +17 -5
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +97 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +2 -2
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.c +45 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.h +7 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +4 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +14 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +1 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.c +10 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +2 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +7 -9
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
- data/lib/datadog/appsec/event.rb +1 -1
- data/lib/datadog/auto_instrument.rb +3 -0
- data/lib/datadog/core/configuration/components.rb +7 -6
- data/lib/datadog/core/configuration/option.rb +8 -6
- data/lib/datadog/core/configuration/settings.rb +130 -63
- data/lib/datadog/core/configuration.rb +20 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
- data/lib/datadog/core/environment/git.rb +25 -0
- data/lib/datadog/core/environment/identity.rb +18 -48
- data/lib/datadog/core/environment/platform.rb +7 -1
- data/lib/datadog/core/git/ext.rb +2 -23
- data/lib/datadog/core/remote/client/capabilities.rb +1 -1
- data/lib/datadog/core/remote/negotiation.rb +2 -2
- data/lib/datadog/core/remote/transport/http/config.rb +1 -1
- data/lib/datadog/core/remote/worker.rb +7 -4
- data/lib/datadog/core/telemetry/client.rb +18 -10
- data/lib/datadog/core/telemetry/emitter.rb +9 -13
- data/lib/datadog/core/telemetry/event.rb +247 -57
- data/lib/datadog/core/telemetry/ext.rb +1 -0
- data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
- data/lib/datadog/core/telemetry/http/ext.rb +4 -1
- data/lib/datadog/core/telemetry/http/transport.rb +9 -4
- data/lib/datadog/core/telemetry/request.rb +59 -0
- data/lib/datadog/core/transport/ext.rb +2 -0
- data/lib/datadog/core/utils/url.rb +25 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -0
- data/lib/datadog/profiling/collectors/info.rb +101 -0
- data/lib/datadog/profiling/component.rb +34 -28
- data/lib/datadog/profiling/exporter.rb +19 -5
- data/lib/datadog/profiling/ext.rb +2 -0
- data/lib/datadog/profiling/flush.rb +6 -3
- data/lib/datadog/profiling/http_transport.rb +5 -1
- data/lib/datadog/profiling/load_native_extension.rb +19 -6
- data/lib/datadog/profiling/native_extension.rb +1 -1
- data/lib/datadog/profiling/tag_builder.rb +5 -0
- data/lib/datadog/profiling/tasks/exec.rb +3 -3
- data/lib/datadog/profiling/tasks/help.rb +3 -3
- data/lib/datadog/profiling.rb +13 -2
- data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
- data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
- data/lib/datadog/tracing/contrib/configurable.rb +1 -1
- data/lib/datadog/tracing/contrib/extensions.rb +6 -2
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
- data/lib/datadog/tracing/sampling/matcher.rb +23 -3
- data/lib/datadog/tracing/sampling/rule.rb +7 -2
- data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
- data/lib/datadog/tracing/trace_operation.rb +1 -2
- data/lib/datadog/tracing/transport/http.rb +1 -0
- data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
- data/lib/ddtrace/version.rb +1 -1
- metadata +55 -62
- data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
- data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
- data/lib/datadog/core/telemetry/collector.rb +0 -250
- data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
- data/lib/datadog/core/telemetry/v1/application.rb +0 -92
- data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
- data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
- data/lib/datadog/core/telemetry/v1/host.rb +0 -59
- data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
- data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
- data/lib/datadog/core/telemetry/v1/product.rb +0 -36
- data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
- data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
- data/lib/datadog/core/telemetry/v2/request.rb +0 -29
- data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +0 -0
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
#include "setup_signal_handler.h"
|
|
18
18
|
#include "time_helpers.h"
|
|
19
19
|
|
|
20
|
+
#define ERR_CLOCK_FAIL "failed to get clock time"
|
|
21
|
+
|
|
20
22
|
// Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
|
|
21
23
|
unsigned int MAX_ALLOC_WEIGHT = 65535;
|
|
22
24
|
|
|
@@ -94,6 +96,7 @@ struct cpu_and_wall_time_worker_state {
|
|
|
94
96
|
bool no_signals_workaround_enabled;
|
|
95
97
|
bool dynamic_sampling_rate_enabled;
|
|
96
98
|
bool allocation_profiling_enabled;
|
|
99
|
+
bool skip_idle_samples_for_testing;
|
|
97
100
|
VALUE self_instance;
|
|
98
101
|
VALUE thread_context_collector_instance;
|
|
99
102
|
VALUE idle_sampling_helper_instance;
|
|
@@ -119,6 +122,7 @@ struct cpu_and_wall_time_worker_state {
|
|
|
119
122
|
bool during_sample;
|
|
120
123
|
|
|
121
124
|
struct stats {
|
|
125
|
+
// # Generic stats
|
|
122
126
|
// How many times we tried to trigger a sample
|
|
123
127
|
unsigned int trigger_sample_attempts;
|
|
124
128
|
// How many times we tried to simulate signal delivery
|
|
@@ -129,25 +133,38 @@ struct cpu_and_wall_time_worker_state {
|
|
|
129
133
|
unsigned int signal_handler_enqueued_sample;
|
|
130
134
|
// How many times the signal handler was called from the wrong thread
|
|
131
135
|
unsigned int signal_handler_wrong_thread;
|
|
132
|
-
// How many times we actually
|
|
133
|
-
unsigned int
|
|
134
|
-
// How many times we skipped a sample because of the dynamic sampling rate mechanism
|
|
135
|
-
unsigned int skipped_sample_because_of_dynamic_sampling_rate;
|
|
136
|
+
// How many times we actually tried to interrupt a thread for sampling
|
|
137
|
+
unsigned int interrupt_thread_attempts;
|
|
136
138
|
|
|
137
|
-
// Stats for the results of calling rb_postponed_job_register_one
|
|
138
|
-
|
|
139
|
+
// # Stats for the results of calling rb_postponed_job_register_one
|
|
140
|
+
// The same function was already waiting to be executed
|
|
139
141
|
unsigned int postponed_job_skipped_already_existed;
|
|
140
|
-
|
|
142
|
+
// The function was added to the queue successfully
|
|
141
143
|
unsigned int postponed_job_success;
|
|
142
|
-
|
|
144
|
+
// The queue was full
|
|
143
145
|
unsigned int postponed_job_full;
|
|
144
|
-
|
|
146
|
+
// The function returned an unknown result code
|
|
145
147
|
unsigned int postponed_job_unknown_result;
|
|
146
148
|
|
|
147
|
-
//
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
149
|
+
// # CPU/Walltime sampling stats
|
|
150
|
+
// How many times we actually CPU/wall sampled
|
|
151
|
+
unsigned int cpu_sampled;
|
|
152
|
+
// How many times we skipped a CPU/wall sample because of the dynamic sampling rate mechanism
|
|
153
|
+
unsigned int cpu_skipped;
|
|
154
|
+
// Min/max/total wall-time spent on CPU/wall sampling
|
|
155
|
+
uint64_t cpu_sampling_time_ns_min;
|
|
156
|
+
uint64_t cpu_sampling_time_ns_max;
|
|
157
|
+
uint64_t cpu_sampling_time_ns_total;
|
|
158
|
+
|
|
159
|
+
// # Allocation sampling stats
|
|
160
|
+
// How many times we actually allocation sampled
|
|
161
|
+
uint64_t allocation_sampled;
|
|
162
|
+
// How many times we skipped an allocation sample because of the dynamic sampling rate mechanism
|
|
163
|
+
uint64_t allocation_skipped;
|
|
164
|
+
// Min/max/total wall-time spent on allocation sampling
|
|
165
|
+
uint64_t allocation_sampling_time_ns_min;
|
|
166
|
+
uint64_t allocation_sampling_time_ns_max;
|
|
167
|
+
uint64_t allocation_sampling_time_ns_total;
|
|
151
168
|
// How many times we saw allocations being done inside a sample
|
|
152
169
|
unsigned int allocations_during_sample;
|
|
153
170
|
} stats;
|
|
@@ -163,12 +180,14 @@ static VALUE _native_initialize(
|
|
|
163
180
|
VALUE no_signals_workaround_enabled,
|
|
164
181
|
VALUE dynamic_sampling_rate_enabled,
|
|
165
182
|
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
|
166
|
-
VALUE allocation_profiling_enabled
|
|
183
|
+
VALUE allocation_profiling_enabled,
|
|
184
|
+
VALUE skip_idle_samples_for_testing
|
|
167
185
|
);
|
|
168
186
|
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
|
169
187
|
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
|
|
170
188
|
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE worker_thread);
|
|
171
189
|
static VALUE stop(VALUE self_instance, VALUE optional_exception);
|
|
190
|
+
static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception);
|
|
172
191
|
static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
|
|
173
192
|
static void *run_sampling_trigger_loop(void *state_ptr);
|
|
174
193
|
static void interrupt_sampling_trigger_loop(void *state_ptr);
|
|
@@ -191,15 +210,18 @@ static VALUE _native_simulate_sample_from_postponed_job(DDTRACE_UNUSED VALUE sel
|
|
|
191
210
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
192
211
|
static VALUE _native_is_sigprof_blocked_in_current_thread(DDTRACE_UNUSED VALUE self);
|
|
193
212
|
static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
213
|
+
static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
194
214
|
void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused);
|
|
195
215
|
static void grab_gvl_and_sample(void);
|
|
196
|
-
static void
|
|
216
|
+
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
|
|
197
217
|
static void sleep_for(uint64_t time_ns);
|
|
198
218
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
|
|
199
219
|
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
|
|
200
220
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
|
|
201
221
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
|
|
202
222
|
static VALUE rescued_sample_allocation(VALUE tracepoint_data);
|
|
223
|
+
static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error);
|
|
224
|
+
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg);
|
|
203
225
|
|
|
204
226
|
// Note on sampler global state safety:
|
|
205
227
|
//
|
|
@@ -212,6 +234,11 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data);
|
|
|
212
234
|
static VALUE active_sampler_instance = Qnil;
|
|
213
235
|
static struct cpu_and_wall_time_worker_state *active_sampler_instance_state = NULL;
|
|
214
236
|
|
|
237
|
+
// See handle_sampling_signal for details on what this does
|
|
238
|
+
#ifdef NO_POSTPONED_TRIGGER
|
|
239
|
+
static void *gc_finalize_deferred_workaround;
|
|
240
|
+
#endif
|
|
241
|
+
|
|
215
242
|
// Used to implement CpuAndWallTimeWorker._native_allocation_count . To be able to use cheap thread-local variables
|
|
216
243
|
// (here with `__thread`, see https://gcc.gnu.org/onlinedocs/gcc/Thread-Local.html), this needs to be global.
|
|
217
244
|
//
|
|
@@ -230,6 +257,8 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
230
257
|
if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
|
|
231
258
|
rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
|
|
232
259
|
}
|
|
260
|
+
#else
|
|
261
|
+
gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround();
|
|
233
262
|
#endif
|
|
234
263
|
|
|
235
264
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
|
@@ -247,13 +276,16 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
247
276
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
|
248
277
|
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
|
|
249
278
|
|
|
250
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize,
|
|
279
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
|
|
251
280
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
|
|
252
281
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
|
|
253
282
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
|
254
283
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats", _native_stats, 1);
|
|
284
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats_reset_not_thread_safe", _native_stats_reset_not_thread_safe, 1);
|
|
255
285
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
|
|
286
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1);
|
|
256
287
|
rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
|
|
288
|
+
// TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it
|
|
257
289
|
rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
|
|
258
290
|
rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
|
|
259
291
|
rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
|
|
@@ -263,6 +295,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
263
295
|
rb_define_singleton_method(testing_module, "_native_simulate_sample_from_postponed_job", _native_simulate_sample_from_postponed_job, 0);
|
|
264
296
|
rb_define_singleton_method(testing_module, "_native_is_sigprof_blocked_in_current_thread", _native_is_sigprof_blocked_in_current_thread, 0);
|
|
265
297
|
rb_define_singleton_method(testing_module, "_native_with_blocked_sigprof", _native_with_blocked_sigprof, 0);
|
|
298
|
+
rb_define_singleton_method(testing_module, "_native_delayed_error", _native_delayed_error, 2);
|
|
266
299
|
}
|
|
267
300
|
|
|
268
301
|
// This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_worker_state
|
|
@@ -288,11 +321,11 @@ static VALUE _native_new(VALUE klass) {
|
|
|
288
321
|
state->no_signals_workaround_enabled = false;
|
|
289
322
|
state->dynamic_sampling_rate_enabled = true;
|
|
290
323
|
state->allocation_profiling_enabled = false;
|
|
324
|
+
state->skip_idle_samples_for_testing = false;
|
|
291
325
|
state->thread_context_collector_instance = Qnil;
|
|
292
326
|
state->idle_sampling_helper_instance = Qnil;
|
|
293
327
|
state->owner_thread = Qnil;
|
|
294
328
|
dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
|
|
295
|
-
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation");
|
|
296
329
|
state->gc_tracepoint = Qnil;
|
|
297
330
|
state->object_allocation_tracepoint = Qnil;
|
|
298
331
|
|
|
@@ -302,7 +335,15 @@ static VALUE _native_new(VALUE klass) {
|
|
|
302
335
|
|
|
303
336
|
state->during_sample = false;
|
|
304
337
|
|
|
305
|
-
|
|
338
|
+
reset_stats_not_thread_safe(state);
|
|
339
|
+
|
|
340
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
341
|
+
if (now == 0) {
|
|
342
|
+
ruby_xfree(state);
|
|
343
|
+
rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
|
|
306
347
|
|
|
307
348
|
return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
|
|
308
349
|
}
|
|
@@ -316,13 +357,15 @@ static VALUE _native_initialize(
|
|
|
316
357
|
VALUE no_signals_workaround_enabled,
|
|
317
358
|
VALUE dynamic_sampling_rate_enabled,
|
|
318
359
|
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
|
319
|
-
VALUE allocation_profiling_enabled
|
|
360
|
+
VALUE allocation_profiling_enabled,
|
|
361
|
+
VALUE skip_idle_samples_for_testing
|
|
320
362
|
) {
|
|
321
363
|
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
|
322
364
|
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
|
|
323
365
|
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
|
|
324
366
|
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
|
|
325
367
|
ENFORCE_BOOLEAN(allocation_profiling_enabled);
|
|
368
|
+
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
|
|
326
369
|
|
|
327
370
|
struct cpu_and_wall_time_worker_state *state;
|
|
328
371
|
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
@@ -331,6 +374,7 @@ static VALUE _native_initialize(
|
|
|
331
374
|
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
|
|
332
375
|
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
|
|
333
376
|
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
|
|
377
|
+
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
|
|
334
378
|
|
|
335
379
|
double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
|
|
336
380
|
if (!state->allocation_profiling_enabled) {
|
|
@@ -339,7 +383,8 @@ static VALUE _native_initialize(
|
|
|
339
383
|
// TODO: May be nice to offer customization here? Distribute available "overhead" margin with a bias towards one or the other
|
|
340
384
|
// sampler.
|
|
341
385
|
dynamic_sampling_rate_set_overhead_target_percentage(&state->cpu_dynamic_sampling_rate, total_overhead_target_percentage / 2);
|
|
342
|
-
|
|
386
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
387
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(&state->allocation_sampler, total_overhead_target_percentage / 2, now);
|
|
343
388
|
}
|
|
344
389
|
|
|
345
390
|
state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
|
|
@@ -368,6 +413,12 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
368
413
|
struct cpu_and_wall_time_worker_state *state;
|
|
369
414
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
370
415
|
|
|
416
|
+
// If we already got a delayed exception registered even before starting, raise before starting
|
|
417
|
+
if (state->failure_exception != Qnil) {
|
|
418
|
+
disable_tracepoints(state);
|
|
419
|
+
rb_exc_raise(state->failure_exception);
|
|
420
|
+
}
|
|
421
|
+
|
|
371
422
|
struct cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state;
|
|
372
423
|
if (old_state != NULL) {
|
|
373
424
|
if (is_thread_alive(old_state->owner_thread)) {
|
|
@@ -393,7 +444,8 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
393
444
|
|
|
394
445
|
// Reset the dynamic sampling rate state, if any (reminder: the monotonic clock reference may change after a fork)
|
|
395
446
|
dynamic_sampling_rate_reset(&state->cpu_dynamic_sampling_rate);
|
|
396
|
-
|
|
447
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
448
|
+
discrete_dynamic_sampler_reset(&state->allocation_sampler, now);
|
|
397
449
|
|
|
398
450
|
// This write to a global is thread-safe BECAUSE we're still holding on to the global VM lock at this point
|
|
399
451
|
active_sampler_instance_state = state;
|
|
@@ -455,15 +507,19 @@ static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE
|
|
|
455
507
|
return stop(self_instance, /* optional_exception: */ Qnil);
|
|
456
508
|
}
|
|
457
509
|
|
|
458
|
-
static
|
|
459
|
-
struct cpu_and_wall_time_worker_state *state;
|
|
460
|
-
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
461
|
-
|
|
510
|
+
static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception) {
|
|
462
511
|
atomic_store(&state->should_run, false);
|
|
463
512
|
state->failure_exception = optional_exception;
|
|
464
513
|
|
|
465
514
|
// Disable the tracepoints as soon as possible, so the VM doesn't keep on calling them
|
|
466
515
|
disable_tracepoints(state);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
static VALUE stop(VALUE self_instance, VALUE optional_exception) {
|
|
519
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
520
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
521
|
+
|
|
522
|
+
stop_state(state, optional_exception);
|
|
467
523
|
|
|
468
524
|
return Qtrue;
|
|
469
525
|
}
|
|
@@ -503,7 +559,32 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
|
|
|
503
559
|
rb_postponed_job_trigger(sample_from_postponed_job_handle);
|
|
504
560
|
state->stats.postponed_job_success++; // Always succeeds
|
|
505
561
|
#else
|
|
506
|
-
|
|
562
|
+
|
|
563
|
+
// This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
|
|
564
|
+
//
|
|
565
|
+
// TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
|
|
566
|
+
// and in rare cases can cause VM crashes.
|
|
567
|
+
//
|
|
568
|
+
// Specifically, if we're interrupting `rb_postponed_job_flush` (the function that processes postponed jobs), the way
|
|
569
|
+
// that this function reads the jobs is not atomic, and can cause our call to
|
|
570
|
+
// `rb_postponed_job_register(function, arg)` to clobber an existing job that is getting dequeued.
|
|
571
|
+
// Clobbering an existing job is somewhat annoying, but the worst part is that it can happen that we clobber only
|
|
572
|
+
// the existing job's arguments.
|
|
573
|
+
// As surveyed in https://github.com/ruby/ruby/pull/8949#issuecomment-1821441370 clobbering the arguments turns out
|
|
574
|
+
// to not matter in many cases as usually `rb_postponed_job_register` calls in the VM and ecosystem ignore the argument.
|
|
575
|
+
//
|
|
576
|
+
// https://bugs.ruby-lang.org/issues/19991 is the exception: inside Ruby's `gc.c`, when dealing with object
|
|
577
|
+
// finalizers, Ruby calls `gc_finalize_deferred_register` which internally calls
|
|
578
|
+
// `rb_postponed_job_register_one(gc_finalize_deferred, objspace)`.
|
|
579
|
+
// Clobbering this call means that `gc_finalize_deferred` would get called with `NULL`, causing a segmentation fault.
|
|
580
|
+
//
|
|
581
|
+
// Note that this is quite rare: our signal needs to land at exactly the point where the VM has read the function
|
|
582
|
+
// to execute, but has yet to read the arguments. @ivoanjo: I could only reproduce it by manually changing the VM
|
|
583
|
+
// code to simulate this happening.
|
|
584
|
+
//
|
|
585
|
+
// Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
|
|
586
|
+
// In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
|
|
587
|
+
int result = rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
|
|
507
588
|
|
|
508
589
|
// Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
|
|
509
590
|
// seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
|
|
@@ -545,17 +626,23 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
|
|
|
545
626
|
// Note that reading the GVL owner and sending them a signal is a race -- the Ruby VM keeps on executing while
|
|
546
627
|
// we're doing this, so we may still not signal the correct thread from time to time, but our signal handler
|
|
547
628
|
// includes a check to see if it got called in the right thread
|
|
629
|
+
state->stats.interrupt_thread_attempts++;
|
|
548
630
|
pthread_kill(owner.owner, SIGPROF);
|
|
549
631
|
} else {
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
632
|
+
if (state->skip_idle_samples_for_testing) {
|
|
633
|
+
// This was added to make sure our tests don't accidentally pass due to idle samples. Specifically, if we
|
|
634
|
+
// comment out the thread interruption code inside `if (owner.valid)` above, our tests should not pass!
|
|
635
|
+
} else {
|
|
636
|
+
// If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
|
|
637
|
+
// so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
|
|
638
|
+
//
|
|
639
|
+
// In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
|
|
640
|
+
// Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
|
|
641
|
+
// for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
|
|
642
|
+
// CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
|
|
643
|
+
state->stats.trigger_simulated_signal_delivery_attempts++;
|
|
644
|
+
idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
|
|
645
|
+
}
|
|
559
646
|
}
|
|
560
647
|
}
|
|
561
648
|
|
|
@@ -607,11 +694,11 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
|
|
|
607
694
|
long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
608
695
|
|
|
609
696
|
if (state->dynamic_sampling_rate_enabled && !dynamic_sampling_rate_should_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_before_sample)) {
|
|
610
|
-
state->stats.
|
|
697
|
+
state->stats.cpu_skipped++;
|
|
611
698
|
return Qnil;
|
|
612
699
|
}
|
|
613
700
|
|
|
614
|
-
state->stats.
|
|
701
|
+
state->stats.cpu_sampled++;
|
|
615
702
|
|
|
616
703
|
VALUE profiler_overhead_stack_thread = state->owner_thread; // Used to attribute profiler overhead to a different stack
|
|
617
704
|
thread_context_collector_sample(state->thread_context_collector_instance, wall_time_ns_before_sample, profiler_overhead_stack_thread);
|
|
@@ -622,9 +709,9 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
|
|
|
622
709
|
// Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
|
|
623
710
|
uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
|
|
624
711
|
|
|
625
|
-
state->stats.
|
|
626
|
-
state->stats.
|
|
627
|
-
state->stats.
|
|
712
|
+
state->stats.cpu_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_min);
|
|
713
|
+
state->stats.cpu_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_max);
|
|
714
|
+
state->stats.cpu_sampling_time_ns_total += sampling_time_ns;
|
|
628
715
|
|
|
629
716
|
dynamic_sampling_rate_after_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_after_sample, sampling_time_ns);
|
|
630
717
|
|
|
@@ -666,6 +753,9 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
|
|
|
666
753
|
if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
|
|
667
754
|
if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
|
|
668
755
|
|
|
756
|
+
// Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
|
|
757
|
+
rb_funcall(instance, rb_intern("signal_running"), 0);
|
|
758
|
+
|
|
669
759
|
rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
|
|
670
760
|
|
|
671
761
|
// If we stopped sampling due to an exception, re-raise it (now in the worker thread)
|
|
@@ -823,7 +913,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance)
|
|
|
823
913
|
// Disable all tracepoints, so that there are no more attempts to mutate the profile
|
|
824
914
|
disable_tracepoints(state);
|
|
825
915
|
|
|
826
|
-
|
|
916
|
+
reset_stats_not_thread_safe(state);
|
|
827
917
|
|
|
828
918
|
// Remove all state from the `Collectors::ThreadState` and connected downstream components
|
|
829
919
|
rb_funcall(state->thread_context_collector_instance, rb_intern("reset_after_fork"), 0);
|
|
@@ -839,11 +929,27 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
|
839
929
|
struct cpu_and_wall_time_worker_state *state;
|
|
840
930
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
841
931
|
|
|
842
|
-
VALUE
|
|
843
|
-
VALUE
|
|
844
|
-
VALUE
|
|
845
|
-
VALUE
|
|
846
|
-
state->stats.
|
|
932
|
+
VALUE pretty_cpu_sampling_time_ns_min = state->stats.cpu_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_min);
|
|
933
|
+
VALUE pretty_cpu_sampling_time_ns_max = state->stats.cpu_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_max);
|
|
934
|
+
VALUE pretty_cpu_sampling_time_ns_total = state->stats.cpu_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.cpu_sampling_time_ns_total);
|
|
935
|
+
VALUE pretty_cpu_sampling_time_ns_avg =
|
|
936
|
+
state->stats.cpu_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampling_time_ns_total) / state->stats.cpu_sampled);
|
|
937
|
+
|
|
938
|
+
VALUE pretty_allocation_sampling_time_ns_min = state->stats.allocation_sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_min);
|
|
939
|
+
VALUE pretty_allocation_sampling_time_ns_max = state->stats.allocation_sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_max);
|
|
940
|
+
VALUE pretty_allocation_sampling_time_ns_total = state->stats.allocation_sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.allocation_sampling_time_ns_total);
|
|
941
|
+
VALUE pretty_allocation_sampling_time_ns_avg =
|
|
942
|
+
state->stats.allocation_sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampling_time_ns_total) / state->stats.allocation_sampled);
|
|
943
|
+
|
|
944
|
+
unsigned long total_cpu_samples_attempted = state->stats.cpu_sampled + state->stats.cpu_skipped;
|
|
945
|
+
VALUE effective_cpu_sample_rate =
|
|
946
|
+
total_cpu_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampled) / total_cpu_samples_attempted);
|
|
947
|
+
unsigned long total_allocation_samples_attempted = state->stats.allocation_sampled + state->stats.allocation_skipped;
|
|
948
|
+
VALUE effective_allocation_sample_rate =
|
|
949
|
+
total_allocation_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampled) / total_allocation_samples_attempted);
|
|
950
|
+
|
|
951
|
+
VALUE allocation_sampler_snapshot = state->allocation_profiling_enabled && state->dynamic_sampling_rate_enabled ?
|
|
952
|
+
discrete_dynamic_sampler_state_snapshot(&state->allocation_sampler) : Qnil;
|
|
847
953
|
|
|
848
954
|
VALUE stats_as_hash = rb_hash_new();
|
|
849
955
|
VALUE arguments[] = {
|
|
@@ -852,22 +958,43 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
|
852
958
|
ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
|
|
853
959
|
ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
|
|
854
960
|
ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
|
|
855
|
-
ID2SYM(rb_intern("sampled")), /* => */ UINT2NUM(state->stats.sampled),
|
|
856
|
-
ID2SYM(rb_intern("skipped_sample_because_of_dynamic_sampling_rate")), /* => */ UINT2NUM(state->stats.skipped_sample_because_of_dynamic_sampling_rate),
|
|
857
961
|
ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
|
|
858
962
|
ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
|
|
859
963
|
ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
|
|
860
964
|
ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
|
|
861
|
-
ID2SYM(rb_intern("
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
ID2SYM(rb_intern("
|
|
865
|
-
ID2SYM(rb_intern("
|
|
965
|
+
ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
|
|
966
|
+
|
|
967
|
+
// CPU Stats
|
|
968
|
+
ID2SYM(rb_intern("cpu_sampled")), /* => */ UINT2NUM(state->stats.cpu_sampled),
|
|
969
|
+
ID2SYM(rb_intern("cpu_skipped")), /* => */ UINT2NUM(state->stats.cpu_skipped),
|
|
970
|
+
ID2SYM(rb_intern("cpu_effective_sample_rate")), /* => */ effective_cpu_sample_rate,
|
|
971
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_min")), /* => */ pretty_cpu_sampling_time_ns_min,
|
|
972
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_max")), /* => */ pretty_cpu_sampling_time_ns_max,
|
|
973
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ pretty_cpu_sampling_time_ns_total,
|
|
974
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_avg")), /* => */ pretty_cpu_sampling_time_ns_avg,
|
|
975
|
+
|
|
976
|
+
// Allocation stats
|
|
977
|
+
ID2SYM(rb_intern("allocation_sampled")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_sampled) : Qnil,
|
|
978
|
+
ID2SYM(rb_intern("allocation_skipped")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_skipped) : Qnil,
|
|
979
|
+
ID2SYM(rb_intern("allocation_effective_sample_rate")), /* => */ effective_allocation_sample_rate,
|
|
980
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_min")), /* => */ pretty_allocation_sampling_time_ns_min,
|
|
981
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_max")), /* => */ pretty_allocation_sampling_time_ns_max,
|
|
982
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ pretty_allocation_sampling_time_ns_total,
|
|
983
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ pretty_allocation_sampling_time_ns_avg,
|
|
984
|
+
ID2SYM(rb_intern("allocation_sampler_snapshot")), /* => */ allocation_sampler_snapshot,
|
|
985
|
+
ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
|
|
866
986
|
};
|
|
867
987
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
|
868
988
|
return stats_as_hash;
|
|
869
989
|
}
|
|
870
990
|
|
|
991
|
+
static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
992
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
993
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
994
|
+
reset_stats_not_thread_safe(state);
|
|
995
|
+
return Qnil;
|
|
996
|
+
}
|
|
997
|
+
|
|
871
998
|
void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
|
|
872
999
|
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
|
873
1000
|
|
|
@@ -885,9 +1012,17 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
|
|
|
885
1012
|
|
|
886
1013
|
static void grab_gvl_and_sample(void) { rb_thread_call_with_gvl(simulate_sampling_signal_delivery, NULL); }
|
|
887
1014
|
|
|
888
|
-
static void
|
|
889
|
-
|
|
890
|
-
|
|
1015
|
+
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state) {
|
|
1016
|
+
// NOTE: This is not really thread safe so ongoing sampling operations that are concurrent with a reset can have their stats:
|
|
1017
|
+
// * Lost (writes after stats retrieval but before reset).
|
|
1018
|
+
// * Included in the previous stats window (writes before stats retrieval and reset).
|
|
1019
|
+
// * Included in the following stats window (writes after stats retrieval and reset).
|
|
1020
|
+
// Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
|
|
1021
|
+
// this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
|
|
1022
|
+
state->stats = (struct stats) {
|
|
1023
|
+
.cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
|
|
1024
|
+
.allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
|
|
1025
|
+
};
|
|
891
1026
|
}
|
|
892
1027
|
|
|
893
1028
|
static void sleep_for(uint64_t time_ns) {
|
|
@@ -937,8 +1072,16 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
|
|
|
937
1072
|
return;
|
|
938
1073
|
}
|
|
939
1074
|
|
|
940
|
-
if (state->dynamic_sampling_rate_enabled
|
|
941
|
-
|
|
1075
|
+
if (state->dynamic_sampling_rate_enabled) {
|
|
1076
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
1077
|
+
if (now == 0) {
|
|
1078
|
+
delayed_error(state, ERR_CLOCK_FAIL);
|
|
1079
|
+
return;
|
|
1080
|
+
}
|
|
1081
|
+
if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) {
|
|
1082
|
+
state->stats.allocation_skipped++;
|
|
1083
|
+
return;
|
|
1084
|
+
}
|
|
942
1085
|
}
|
|
943
1086
|
|
|
944
1087
|
// @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
|
|
@@ -950,14 +1093,32 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
|
|
|
950
1093
|
// Rescue against any exceptions that happen during sampling
|
|
951
1094
|
safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
|
|
952
1095
|
|
|
953
|
-
|
|
1096
|
+
if (state->dynamic_sampling_rate_enabled) {
|
|
1097
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
1098
|
+
if (now == 0) {
|
|
1099
|
+
delayed_error(state, ERR_CLOCK_FAIL);
|
|
1100
|
+
// NOTE: Not short-circuiting here to make sure cleanup happens
|
|
1101
|
+
}
|
|
1102
|
+
uint64_t sampling_time_ns = discrete_dynamic_sampler_after_sample(&state->allocation_sampler, now);
|
|
1103
|
+
// NOTE: To keep things lean when dynamic sampling rate is disabled we skip clock interactions which is
|
|
1104
|
+
// why we're fine with having this inside this conditional.
|
|
1105
|
+
state->stats.allocation_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_min);
|
|
1106
|
+
state->stats.allocation_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_max);
|
|
1107
|
+
state->stats.allocation_sampling_time_ns_total += sampling_time_ns;
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
state->stats.allocation_sampled++;
|
|
954
1111
|
|
|
955
1112
|
state->during_sample = false;
|
|
956
1113
|
}
|
|
957
1114
|
|
|
958
1115
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
|
|
959
|
-
|
|
960
|
-
|
|
1116
|
+
if (state->gc_tracepoint != Qnil) {
|
|
1117
|
+
rb_tracepoint_disable(state->gc_tracepoint);
|
|
1118
|
+
}
|
|
1119
|
+
if (state->object_allocation_tracepoint != Qnil) {
|
|
1120
|
+
rb_tracepoint_disable(state->object_allocation_tracepoint);
|
|
1121
|
+
}
|
|
961
1122
|
}
|
|
962
1123
|
|
|
963
1124
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
|
@@ -994,3 +1155,19 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
|
|
|
994
1155
|
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
|
|
995
1156
|
return Qnil;
|
|
996
1157
|
}
|
|
1158
|
+
|
|
1159
|
+
static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error) {
|
|
1160
|
+
// If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop.
|
|
1161
|
+
stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error));
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) {
|
|
1165
|
+
ENFORCE_TYPE(error_msg, T_STRING);
|
|
1166
|
+
|
|
1167
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
1168
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
1169
|
+
|
|
1170
|
+
delayed_error(state, rb_string_value_cstr(&error_msg));
|
|
1171
|
+
|
|
1172
|
+
return Qnil;
|
|
1173
|
+
}
|