ddtrace 1.5.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +44 -1
- data/ext/ddtrace_profiling_loader/ddtrace_profiling_loader.c +9 -2
- data/ext/ddtrace_profiling_loader/extconf.rb +17 -0
- data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +38 -2
- data/ext/ddtrace_profiling_native_extension/clock_id.h +1 -0
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +1 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +517 -42
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.h +3 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +208 -30
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +156 -46
- data/ext/ddtrace_profiling_native_extension/collectors_stack.h +11 -2
- data/ext/ddtrace_profiling_native_extension/extconf.rb +11 -1
- data/ext/ddtrace_profiling_native_extension/http_transport.c +83 -64
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +4 -4
- data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +3 -2
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +59 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +10 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -1
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +4 -2
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +45 -29
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +7 -7
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +4 -0
- data/lib/datadog/appsec/event.rb +6 -0
- data/lib/datadog/core/configuration/components.rb +20 -14
- data/lib/datadog/core/configuration/settings.rb +42 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +5 -1
- data/lib/datadog/core/utils/compression.rb +5 -1
- data/lib/datadog/core.rb +0 -54
- data/lib/datadog/profiling/collectors/cpu_and_wall_time.rb +12 -2
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +5 -3
- data/lib/datadog/profiling/exporter.rb +2 -4
- data/lib/datadog/profiling/http_transport.rb +1 -1
- data/lib/datadog/tracing/configuration/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +4 -0
- data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +3 -0
- data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -0
- data/lib/datadog/tracing/contrib/ext.rb +6 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -0
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +5 -0
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +7 -1
- data/lib/datadog/tracing/contrib/grpc/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/hanami/action_tracer.rb +47 -0
- data/lib/datadog/tracing/contrib/hanami/configuration/settings.rb +22 -0
- data/lib/datadog/tracing/contrib/hanami/ext.rb +24 -0
- data/lib/datadog/tracing/contrib/hanami/integration.rb +44 -0
- data/lib/datadog/tracing/contrib/hanami/patcher.rb +33 -0
- data/lib/datadog/tracing/contrib/hanami/plugin.rb +23 -0
- data/lib/datadog/tracing/contrib/hanami/renderer_policy_tracing.rb +41 -0
- data/lib/datadog/tracing/contrib/hanami/router_tracing.rb +44 -0
- data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/mongodb/ext.rb +7 -0
- data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +4 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +12 -0
- data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -0
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +12 -0
- data/lib/datadog/tracing/contrib/pg/ext.rb +2 -1
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +34 -18
- data/lib/datadog/tracing/contrib/propagation/sql_comment/comment.rb +43 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +32 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +28 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +49 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +11 -5
- data/lib/datadog/tracing/contrib/redis/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/redis/instrumentation.rb +4 -2
- data/lib/datadog/tracing/contrib/redis/patcher.rb +41 -0
- data/lib/datadog/tracing/contrib/redis/tags.rb +5 -0
- data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/sinatra/env.rb +12 -23
- data/lib/datadog/tracing/contrib/sinatra/ext.rb +7 -3
- data/lib/datadog/tracing/contrib/sinatra/patcher.rb +2 -2
- data/lib/datadog/tracing/contrib/sinatra/tracer.rb +8 -80
- data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +14 -9
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +84 -0
- data/lib/datadog/tracing/distributed/headers/datadog.rb +122 -30
- data/lib/datadog/tracing/distributed/headers/ext.rb +2 -0
- data/lib/datadog/tracing/flush.rb +1 -1
- data/lib/datadog/tracing/metadata/ext.rb +8 -0
- data/lib/datadog/tracing/propagation/http.rb +9 -1
- data/lib/datadog/tracing/sampling/ext.rb +31 -0
- data/lib/datadog/tracing/sampling/priority_sampler.rb +46 -4
- data/lib/datadog/tracing/sampling/rate_by_key_sampler.rb +8 -9
- data/lib/datadog/tracing/sampling/rate_by_service_sampler.rb +29 -5
- data/lib/datadog/tracing/sampling/rate_sampler.rb +10 -3
- data/lib/datadog/tracing/sampling/rule_sampler.rb +4 -3
- data/lib/datadog/tracing/sampling/span/ext.rb +0 -4
- data/lib/datadog/tracing/sampling/span/rule.rb +1 -1
- data/lib/datadog/tracing/sampling/span/sampler.rb +14 -3
- data/lib/datadog/tracing/trace_digest.rb +3 -0
- data/lib/datadog/tracing/trace_operation.rb +10 -0
- data/lib/datadog/tracing/trace_segment.rb +6 -0
- data/lib/datadog/tracing/tracer.rb +3 -1
- data/lib/datadog/tracing/writer.rb +7 -0
- data/lib/ddtrace/transport/trace_formatter.rb +7 -0
- data/lib/ddtrace/transport/traces.rb +1 -1
- data/lib/ddtrace/version.rb +2 -2
- metadata +18 -14
- data/lib/datadog/profiling/old_ext.rb +0 -42
- data/lib/datadog/profiling/transport/http/api/endpoint.rb +0 -85
- data/lib/datadog/profiling/transport/http/api/instance.rb +0 -38
- data/lib/datadog/profiling/transport/http/api/spec.rb +0 -42
- data/lib/datadog/profiling/transport/http/api.rb +0 -45
- data/lib/datadog/profiling/transport/http/builder.rb +0 -30
- data/lib/datadog/profiling/transport/http/client.rb +0 -37
- data/lib/datadog/profiling/transport/http/response.rb +0 -21
- data/lib/datadog/profiling/transport/http.rb +0 -118
|
@@ -3,4 +3,7 @@
|
|
|
3
3
|
#include <ruby.h>
|
|
4
4
|
|
|
5
5
|
VALUE cpu_and_wall_time_collector_sample(VALUE self_instance);
|
|
6
|
+
VALUE cpu_and_wall_time_collector_sample_after_gc(VALUE self_instance);
|
|
7
|
+
void cpu_and_wall_time_collector_on_gc_start(VALUE self_instance);
|
|
8
|
+
void cpu_and_wall_time_collector_on_gc_finish(VALUE self_instance);
|
|
6
9
|
VALUE enforce_cpu_and_wall_time_collector_instance(VALUE object);
|
|
@@ -64,18 +64,28 @@ struct cpu_and_wall_time_worker_state {
|
|
|
64
64
|
// telling the sampling trigger loop to stop, but if we ever need to communicate more, we should move to actual
|
|
65
65
|
// atomic operations. stdatomic.h seems a nice thing to reach out for.
|
|
66
66
|
volatile bool should_run;
|
|
67
|
-
|
|
67
|
+
bool gc_profiling_enabled;
|
|
68
68
|
VALUE cpu_and_wall_time_collector_instance;
|
|
69
|
+
|
|
69
70
|
// When something goes wrong during sampling, we record the Ruby exception here, so that it can be "re-raised" on
|
|
70
71
|
// the CpuAndWallTimeWorker thread
|
|
71
72
|
VALUE failure_exception;
|
|
73
|
+
|
|
74
|
+
// Used to get gc start/finish information
|
|
75
|
+
VALUE gc_tracepoint;
|
|
72
76
|
};
|
|
73
77
|
|
|
74
78
|
static VALUE _native_new(VALUE klass);
|
|
75
|
-
static VALUE _native_initialize(
|
|
79
|
+
static VALUE _native_initialize(
|
|
80
|
+
DDTRACE_UNUSED VALUE _self,
|
|
81
|
+
VALUE self_instance,
|
|
82
|
+
VALUE cpu_and_wall_time_collector_instance,
|
|
83
|
+
VALUE gc_profiling_enabled
|
|
84
|
+
);
|
|
76
85
|
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
|
77
86
|
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
|
|
78
87
|
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance);
|
|
88
|
+
static VALUE stop(VALUE self_instance, VALUE optional_exception);
|
|
79
89
|
static void install_sigprof_signal_handler(void (*signal_handler_function)(int, siginfo_t *, void *));
|
|
80
90
|
static void remove_sigprof_signal_handler(void);
|
|
81
91
|
static void block_sigprof_signal_handler_from_running_in_current_thread(void);
|
|
@@ -90,6 +100,13 @@ static VALUE _native_is_running(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
|
90
100
|
static void testing_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
|
|
91
101
|
static VALUE _native_install_testing_signal_handler(DDTRACE_UNUSED VALUE self);
|
|
92
102
|
static VALUE _native_remove_testing_signal_handler(DDTRACE_UNUSED VALUE self);
|
|
103
|
+
static VALUE _native_trigger_sample(DDTRACE_UNUSED VALUE self);
|
|
104
|
+
static VALUE _native_gc_tracepoint(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
105
|
+
static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
|
|
106
|
+
static void after_gc_from_postponed_job(DDTRACE_UNUSED void *_unused);
|
|
107
|
+
static void safely_call(VALUE (*function_to_call_safely)(VALUE), VALUE function_to_call_safely_arg, VALUE instance);
|
|
108
|
+
static VALUE _native_simulate_handle_sampling_signal(DDTRACE_UNUSED VALUE self);
|
|
109
|
+
static VALUE _native_simulate_sample_from_postponed_job(DDTRACE_UNUSED VALUE self);
|
|
93
110
|
|
|
94
111
|
// Global state -- be very careful when accessing or modifying it
|
|
95
112
|
|
|
@@ -119,13 +136,17 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
119
136
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
|
120
137
|
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
|
|
121
138
|
|
|
122
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize,
|
|
139
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 3);
|
|
123
140
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
|
|
124
141
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 1);
|
|
125
142
|
rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
|
|
126
143
|
rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
|
|
127
144
|
rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
|
|
128
145
|
rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
|
|
146
|
+
rb_define_singleton_method(testing_module, "_native_trigger_sample", _native_trigger_sample, 0);
|
|
147
|
+
rb_define_singleton_method(testing_module, "_native_gc_tracepoint", _native_gc_tracepoint, 1);
|
|
148
|
+
rb_define_singleton_method(testing_module, "_native_simulate_handle_sampling_signal", _native_simulate_handle_sampling_signal, 0);
|
|
149
|
+
rb_define_singleton_method(testing_module, "_native_simulate_sample_from_postponed_job", _native_simulate_sample_from_postponed_job, 0);
|
|
129
150
|
}
|
|
130
151
|
|
|
131
152
|
// This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_worker_state
|
|
@@ -145,17 +166,28 @@ static VALUE _native_new(VALUE klass) {
|
|
|
145
166
|
struct cpu_and_wall_time_worker_state *state = ruby_xcalloc(1, sizeof(struct cpu_and_wall_time_worker_state));
|
|
146
167
|
|
|
147
168
|
state->should_run = false;
|
|
169
|
+
state->gc_profiling_enabled = false;
|
|
148
170
|
state->cpu_and_wall_time_collector_instance = Qnil;
|
|
149
171
|
state->failure_exception = Qnil;
|
|
172
|
+
state->gc_tracepoint = Qnil;
|
|
150
173
|
|
|
151
174
|
return TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
|
|
152
175
|
}
|
|
153
176
|
|
|
154
|
-
static VALUE _native_initialize(
|
|
177
|
+
static VALUE _native_initialize(
|
|
178
|
+
DDTRACE_UNUSED VALUE _self,
|
|
179
|
+
VALUE self_instance,
|
|
180
|
+
VALUE cpu_and_wall_time_collector_instance,
|
|
181
|
+
VALUE gc_profiling_enabled
|
|
182
|
+
) {
|
|
183
|
+
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
|
184
|
+
|
|
155
185
|
struct cpu_and_wall_time_worker_state *state;
|
|
156
186
|
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
157
187
|
|
|
188
|
+
state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
|
|
158
189
|
state->cpu_and_wall_time_collector_instance = enforce_cpu_and_wall_time_collector_instance(cpu_and_wall_time_collector_instance);
|
|
190
|
+
state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */);
|
|
159
191
|
|
|
160
192
|
return Qtrue;
|
|
161
193
|
}
|
|
@@ -166,6 +198,7 @@ static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr) {
|
|
|
166
198
|
|
|
167
199
|
rb_gc_mark(state->cpu_and_wall_time_collector_instance);
|
|
168
200
|
rb_gc_mark(state->failure_exception);
|
|
201
|
+
rb_gc_mark(state->gc_tracepoint);
|
|
169
202
|
}
|
|
170
203
|
|
|
171
204
|
// Called in a background thread created in CpuAndWallTimeWorker#start
|
|
@@ -173,11 +206,25 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
173
206
|
struct cpu_and_wall_time_worker_state *state;
|
|
174
207
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
175
208
|
|
|
176
|
-
if (active_sampler_owner_thread != Qnil
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
209
|
+
if (active_sampler_owner_thread != Qnil) {
|
|
210
|
+
if (is_thread_alive(active_sampler_owner_thread)) {
|
|
211
|
+
rb_raise(
|
|
212
|
+
rb_eRuntimeError,
|
|
213
|
+
"Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread"
|
|
214
|
+
);
|
|
215
|
+
} else {
|
|
216
|
+
// The previously active thread seems to have died without cleaning up after itself.
|
|
217
|
+
// In this case, we can still go ahead and start the profiler BUT we make sure to disable any existing GC tracepoint
|
|
218
|
+
// first as:
|
|
219
|
+
// a) If this is a new instance of the CpuAndWallTimeWorker, we don't want the tracepoint from the old instance
|
|
220
|
+
// being kept around
|
|
221
|
+
// b) If this is the same instance of the CpuAndWallTimeWorker if we call enable on a tracepoint that is already
|
|
222
|
+
// enabled, it will start firing more than once, see https://bugs.ruby-lang.org/issues/19114 for details.
|
|
223
|
+
|
|
224
|
+
struct cpu_and_wall_time_worker_state *old_state;
|
|
225
|
+
TypedData_Get_Struct(active_sampler_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, old_state);
|
|
226
|
+
rb_tracepoint_disable(old_state->gc_tracepoint);
|
|
227
|
+
}
|
|
181
228
|
}
|
|
182
229
|
|
|
183
230
|
// This write to a global is thread-safe BECAUSE we're still holding on to the global VM lock at this point
|
|
@@ -189,6 +236,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
189
236
|
block_sigprof_signal_handler_from_running_in_current_thread(); // We want to interrupt the thread with the global VM lock, never this one
|
|
190
237
|
|
|
191
238
|
install_sigprof_signal_handler(handle_sampling_signal);
|
|
239
|
+
if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
|
|
192
240
|
|
|
193
241
|
// Release GVL, get to the actual work!
|
|
194
242
|
int exception_state;
|
|
@@ -196,6 +244,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
196
244
|
|
|
197
245
|
// The sample trigger loop finished (either cleanly or with an error); let's clean up
|
|
198
246
|
|
|
247
|
+
rb_tracepoint_disable(state->gc_tracepoint);
|
|
199
248
|
remove_sigprof_signal_handler();
|
|
200
249
|
active_sampler_instance = Qnil;
|
|
201
250
|
active_sampler_owner_thread = Qnil;
|
|
@@ -209,10 +258,18 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
209
258
|
}
|
|
210
259
|
|
|
211
260
|
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance) {
|
|
261
|
+
return stop(self_instance, /* optional_exception: */ Qnil);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
static VALUE stop(VALUE self_instance, VALUE optional_exception) {
|
|
212
265
|
struct cpu_and_wall_time_worker_state *state;
|
|
213
266
|
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
214
267
|
|
|
215
268
|
state->should_run = false;
|
|
269
|
+
state->failure_exception = optional_exception;
|
|
270
|
+
|
|
271
|
+
// Disable the GC tracepoint as soon as possible, so the VM doesn't keep on calling it
|
|
272
|
+
rb_tracepoint_disable(state->gc_tracepoint);
|
|
216
273
|
|
|
217
274
|
return Qtrue;
|
|
218
275
|
}
|
|
@@ -264,10 +321,16 @@ static void block_sigprof_signal_handler_from_running_in_current_thread(void) {
|
|
|
264
321
|
pthread_sigmask(SIG_BLOCK, &signals_to_block, NULL);
|
|
265
322
|
}
|
|
266
323
|
|
|
324
|
+
// NOTE: Remember that this will run in the thread and within the scope of user code, including user C code.
|
|
325
|
+
// We need to be careful not to change any state that may be observed OR to restore it if we do. For instance, if anything
|
|
326
|
+
// we do here can set `errno`, then we must be careful to restore the old `errno` after the fact.
|
|
267
327
|
static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext) {
|
|
268
328
|
if (!ruby_thread_has_gvl_p()) {
|
|
269
329
|
return; // Not safe to enqueue a sample from this thread
|
|
270
330
|
}
|
|
331
|
+
if (!ddtrace_rb_ractor_main_p()) {
|
|
332
|
+
return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
|
|
333
|
+
}
|
|
271
334
|
|
|
272
335
|
// We implicitly assume there can be no concurrent nor nested calls to handle_sampling_signal because
|
|
273
336
|
// a) we get triggered using SIGPROF, and the docs state second SIGPROF will not interrupt an existing one
|
|
@@ -315,34 +378,23 @@ static void sample_from_postponed_job(DDTRACE_UNUSED void *_unused) {
|
|
|
315
378
|
// This can potentially happen if the CpuAndWallTimeWorker was stopped while the postponed job was waiting to be executed; nothing to do
|
|
316
379
|
if (instance == Qnil) return;
|
|
317
380
|
|
|
381
|
+
// @ivoanjo: I'm not sure this can ever happen because `handle_sampling_signal` only enqueues this callback if
|
|
382
|
+
// it's running on the main Ractor, but just in case...
|
|
383
|
+
if (!ddtrace_rb_ractor_main_p()) {
|
|
384
|
+
return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
|
|
385
|
+
}
|
|
386
|
+
|
|
318
387
|
struct cpu_and_wall_time_worker_state *state;
|
|
319
388
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
320
389
|
|
|
321
390
|
// Trigger sampling using the Collectors::CpuAndWallTime; rescue against any exceptions that happen during sampling
|
|
322
|
-
|
|
323
|
-
VALUE function_to_call_safely_arg = state->cpu_and_wall_time_collector_instance;
|
|
324
|
-
VALUE (*exception_handler_function)(VALUE, VALUE) = handle_sampling_failure;
|
|
325
|
-
VALUE exception_handler_function_arg = instance;
|
|
326
|
-
rb_rescue2(
|
|
327
|
-
function_to_call_safely,
|
|
328
|
-
function_to_call_safely_arg,
|
|
329
|
-
exception_handler_function,
|
|
330
|
-
exception_handler_function_arg,
|
|
331
|
-
rb_eException, // rb_eException is the base class of all Ruby exceptions
|
|
332
|
-
0 // Required by API to be the last argument
|
|
333
|
-
);
|
|
391
|
+
safely_call(cpu_and_wall_time_collector_sample, state->cpu_and_wall_time_collector_instance, instance);
|
|
334
392
|
}
|
|
335
393
|
|
|
336
|
-
static VALUE handle_sampling_failure(VALUE self_instance, VALUE exception) {
|
|
337
|
-
struct cpu_and_wall_time_worker_state *state;
|
|
338
|
-
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
339
|
-
|
|
340
|
-
state->should_run = false;
|
|
341
|
-
state->failure_exception = exception;
|
|
342
|
-
|
|
343
|
-
return Qnil;
|
|
344
|
-
}
|
|
394
|
+
static VALUE handle_sampling_failure(VALUE self_instance, VALUE exception) { return stop(self_instance, exception); }
|
|
345
395
|
|
|
396
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
397
|
+
// It SHOULD NOT be used for other purposes.
|
|
346
398
|
static VALUE _native_current_sigprof_signal_handler(DDTRACE_UNUSED VALUE self) {
|
|
347
399
|
struct sigaction existing_signal_handler_config = {.sa_sigaction = NULL};
|
|
348
400
|
if (sigaction(SIGPROF, NULL, &existing_signal_handler_config) != 0) {
|
|
@@ -370,6 +422,8 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
|
|
|
370
422
|
return Qnil;
|
|
371
423
|
}
|
|
372
424
|
|
|
425
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
426
|
+
// It SHOULD NOT be used for other purposes.
|
|
373
427
|
static VALUE _native_is_running(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
374
428
|
return \
|
|
375
429
|
(active_sampler_owner_thread != Qnil && is_thread_alive(active_sampler_owner_thread) && active_sampler_instance == instance) ?
|
|
@@ -380,12 +434,136 @@ static void testing_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
|
|
|
380
434
|
/* Does nothing on purpose */
|
|
381
435
|
}
|
|
382
436
|
|
|
437
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
438
|
+
// It SHOULD NOT be used for other purposes.
|
|
383
439
|
static VALUE _native_install_testing_signal_handler(DDTRACE_UNUSED VALUE self) {
|
|
384
440
|
install_sigprof_signal_handler(testing_signal_handler);
|
|
385
441
|
return Qtrue;
|
|
386
442
|
}
|
|
387
443
|
|
|
444
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
445
|
+
// It SHOULD NOT be used for other purposes.
|
|
388
446
|
static VALUE _native_remove_testing_signal_handler(DDTRACE_UNUSED VALUE self) {
|
|
389
447
|
remove_sigprof_signal_handler();
|
|
390
448
|
return Qtrue;
|
|
391
449
|
}
|
|
450
|
+
|
|
451
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
452
|
+
// It SHOULD NOT be used for other purposes.
|
|
453
|
+
static VALUE _native_trigger_sample(DDTRACE_UNUSED VALUE self) {
|
|
454
|
+
sample_from_postponed_job(NULL);
|
|
455
|
+
return Qtrue;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
459
|
+
// It SHOULD NOT be used for other purposes.
|
|
460
|
+
static VALUE _native_gc_tracepoint(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
461
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
462
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
463
|
+
|
|
464
|
+
return state->gc_tracepoint;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Implements tracking of cpu-time and wall-time spent doing GC. This function is called by Ruby from the `gc_tracepoint`
|
|
468
|
+
// when the RUBY_INTERNAL_EVENT_GC_ENTER and RUBY_INTERNAL_EVENT_GC_EXIT events are triggered.
|
|
469
|
+
//
|
|
470
|
+
// See the comments on
|
|
471
|
+
// * cpu_and_wall_time_collector_on_gc_start
|
|
472
|
+
// * cpu_and_wall_time_collector_on_gc_finish
|
|
473
|
+
// * cpu_and_wall_time_collector_sample_after_gc
|
|
474
|
+
//
|
|
475
|
+
// For the expected times in which to call them, and their assumptions.
|
|
476
|
+
//
|
|
477
|
+
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
|
478
|
+
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
|
479
|
+
// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
|
|
480
|
+
static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
|
|
481
|
+
if (!ddtrace_rb_ractor_main_p()) {
|
|
482
|
+
return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
int event = rb_tracearg_event_flag(rb_tracearg_from_tracepoint(tracepoint_data));
|
|
486
|
+
if (event != RUBY_INTERNAL_EVENT_GC_ENTER && event != RUBY_INTERNAL_EVENT_GC_EXIT) return; // Unknown event
|
|
487
|
+
|
|
488
|
+
VALUE instance = active_sampler_instance; // Read from global variable
|
|
489
|
+
|
|
490
|
+
// This should not happen in a normal situation because the tracepoint is always enabled after the instance is set
|
|
491
|
+
// and disabled before it is cleared, but just in case...
|
|
492
|
+
if (instance == Qnil) return;
|
|
493
|
+
|
|
494
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
495
|
+
if (!rb_typeddata_is_kind_of(instance, &cpu_and_wall_time_worker_typed_data)) return;
|
|
496
|
+
// This should never fail the the above check passes
|
|
497
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
498
|
+
|
|
499
|
+
if (event == RUBY_INTERNAL_EVENT_GC_ENTER) {
|
|
500
|
+
cpu_and_wall_time_collector_on_gc_start(state->cpu_and_wall_time_collector_instance);
|
|
501
|
+
} else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) {
|
|
502
|
+
// Design: In an earlier iteration of this feature (see https://github.com/DataDog/dd-trace-rb/pull/2308) we
|
|
503
|
+
// actually had a single method to implement the behavior of both cpu_and_wall_time_collector_on_gc_finish
|
|
504
|
+
// and cpu_and_wall_time_collector_sample_after_gc (the latter is called via after_gc_from_postponed_job).
|
|
505
|
+
//
|
|
506
|
+
// Unfortunately, then we discovered the safety issue around no allocations, and thus decided to separate them -- so that
|
|
507
|
+
// the sampling could run outside the tight safety constraints of the garbage collection process.
|
|
508
|
+
//
|
|
509
|
+
// There is a downside: The sample is now taken very very shortly afterwards the GC finishes, and not immediately
|
|
510
|
+
// as the GC finishes, which means the stack captured may by affected by "skid", e.g. point slightly after where
|
|
511
|
+
// it should be pointing at.
|
|
512
|
+
// Alternatives to solve this would be to capture no stack for garbage collection (as we do for Java and .net);
|
|
513
|
+
// making the sampling process allocation-safe (very hard); or separate stack sampling from sample recording,
|
|
514
|
+
// e.g. enabling us to capture the stack in cpu_and_wall_time_collector_on_gc_finish and do the rest later
|
|
515
|
+
// (medium hard).
|
|
516
|
+
|
|
517
|
+
cpu_and_wall_time_collector_on_gc_finish(state->cpu_and_wall_time_collector_instance);
|
|
518
|
+
// We use rb_postponed_job_register_one to ask Ruby to run cpu_and_wall_time_collector_sample_after_gc after if
|
|
519
|
+
// fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual.
|
|
520
|
+
rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
static void after_gc_from_postponed_job(DDTRACE_UNUSED void *_unused) {
|
|
525
|
+
VALUE instance = active_sampler_instance; // Read from global variable
|
|
526
|
+
|
|
527
|
+
// This can potentially happen if the CpuAndWallTimeWorker was stopped while the postponed job was waiting to be executed; nothing to do
|
|
528
|
+
if (instance == Qnil) return;
|
|
529
|
+
|
|
530
|
+
// @ivoanjo: I'm not sure this can ever happen because `on_gc_event` only enqueues this callback if
|
|
531
|
+
// it's running on the main Ractor, but just in case...
|
|
532
|
+
if (!ddtrace_rb_ractor_main_p()) {
|
|
533
|
+
return; // We're not on the main Ractor; we currently don't support profiling non-main Ractors
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
537
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
538
|
+
|
|
539
|
+
// Trigger sampling using the Collectors::CpuAndWallTime; rescue against any exceptions that happen during sampling
|
|
540
|
+
safely_call(cpu_and_wall_time_collector_sample_after_gc, state->cpu_and_wall_time_collector_instance, instance);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Equivalent to Ruby begin/rescue call, where we call a C function and jump to the exception handler if an
|
|
544
|
+
// exception gets raised within
|
|
545
|
+
static void safely_call(VALUE (*function_to_call_safely)(VALUE), VALUE function_to_call_safely_arg, VALUE instance) {
|
|
546
|
+
VALUE exception_handler_function_arg = instance;
|
|
547
|
+
rb_rescue2(
|
|
548
|
+
function_to_call_safely,
|
|
549
|
+
function_to_call_safely_arg,
|
|
550
|
+
handle_sampling_failure,
|
|
551
|
+
exception_handler_function_arg,
|
|
552
|
+
rb_eException, // rb_eException is the base class of all Ruby exceptions
|
|
553
|
+
0 // Required by API to be the last argument
|
|
554
|
+
);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
558
|
+
// It SHOULD NOT be used for other purposes.
|
|
559
|
+
static VALUE _native_simulate_handle_sampling_signal(DDTRACE_UNUSED VALUE self) {
|
|
560
|
+
handle_sampling_signal(0, NULL, NULL);
|
|
561
|
+
return Qtrue;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTimeWorker behavior using RSpec.
|
|
565
|
+
// It SHOULD NOT be used for other purposes.
|
|
566
|
+
static VALUE _native_simulate_sample_from_postponed_job(DDTRACE_UNUSED VALUE self) {
|
|
567
|
+
sample_from_postponed_job(NULL);
|
|
568
|
+
return Qtrue;
|
|
569
|
+
}
|