ddtrace 1.5.1 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +63 -1
- data/ext/ddtrace_profiling_loader/ddtrace_profiling_loader.c +9 -2
- data/ext/ddtrace_profiling_loader/extconf.rb +17 -0
- data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +38 -2
- data/ext/ddtrace_profiling_native_extension/clock_id.h +1 -0
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +1 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +517 -42
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.h +3 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +208 -30
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +156 -46
- data/ext/ddtrace_profiling_native_extension/collectors_stack.h +11 -2
- data/ext/ddtrace_profiling_native_extension/extconf.rb +11 -1
- data/ext/ddtrace_profiling_native_extension/http_transport.c +83 -64
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +4 -4
- data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +3 -2
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +59 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +10 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -1
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +4 -2
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +45 -29
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +7 -7
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +44 -18
- data/lib/datadog/appsec/event.rb +8 -4
- data/lib/datadog/core/configuration/components.rb +20 -14
- data/lib/datadog/core/configuration/settings.rb +59 -7
- data/lib/datadog/core/diagnostics/environment_logger.rb +5 -1
- data/lib/datadog/core/utils/compression.rb +5 -1
- data/lib/datadog/core.rb +0 -54
- data/lib/datadog/profiling/collectors/cpu_and_wall_time.rb +12 -2
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +5 -3
- data/lib/datadog/profiling/exporter.rb +2 -4
- data/lib/datadog/profiling/http_transport.rb +1 -1
- data/lib/datadog/tracing/client_ip.rb +11 -0
- data/lib/datadog/tracing/configuration/ext.rb +3 -1
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +4 -0
- data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +3 -0
- data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -0
- data/lib/datadog/tracing/contrib/ext.rb +6 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -0
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +5 -0
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +7 -1
- data/lib/datadog/tracing/contrib/grpc/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/hanami/action_tracer.rb +47 -0
- data/lib/datadog/tracing/contrib/hanami/configuration/settings.rb +22 -0
- data/lib/datadog/tracing/contrib/hanami/ext.rb +24 -0
- data/lib/datadog/tracing/contrib/hanami/integration.rb +44 -0
- data/lib/datadog/tracing/contrib/hanami/patcher.rb +33 -0
- data/lib/datadog/tracing/contrib/hanami/plugin.rb +23 -0
- data/lib/datadog/tracing/contrib/hanami/renderer_policy_tracing.rb +41 -0
- data/lib/datadog/tracing/contrib/hanami/router_tracing.rb +44 -0
- data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/mongodb/ext.rb +7 -0
- data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +4 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +12 -0
- data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -0
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +12 -0
- data/lib/datadog/tracing/contrib/pg/ext.rb +2 -1
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +34 -18
- data/lib/datadog/tracing/contrib/propagation/sql_comment/comment.rb +43 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +32 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +28 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +49 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +15 -7
- data/lib/datadog/tracing/contrib/redis/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/redis/instrumentation.rb +4 -2
- data/lib/datadog/tracing/contrib/redis/patcher.rb +41 -0
- data/lib/datadog/tracing/contrib/redis/tags.rb +5 -0
- data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/sinatra/env.rb +12 -23
- data/lib/datadog/tracing/contrib/sinatra/ext.rb +7 -3
- data/lib/datadog/tracing/contrib/sinatra/patcher.rb +2 -2
- data/lib/datadog/tracing/contrib/sinatra/tracer.rb +8 -80
- data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +14 -9
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +84 -0
- data/lib/datadog/tracing/distributed/headers/datadog.rb +122 -30
- data/lib/datadog/tracing/distributed/headers/ext.rb +2 -0
- data/lib/datadog/tracing/flush.rb +1 -1
- data/lib/datadog/tracing/metadata/ext.rb +8 -0
- data/lib/datadog/tracing/propagation/http.rb +9 -1
- data/lib/datadog/tracing/sampling/ext.rb +31 -0
- data/lib/datadog/tracing/sampling/priority_sampler.rb +46 -4
- data/lib/datadog/tracing/sampling/rate_by_key_sampler.rb +8 -9
- data/lib/datadog/tracing/sampling/rate_by_service_sampler.rb +29 -5
- data/lib/datadog/tracing/sampling/rate_sampler.rb +10 -3
- data/lib/datadog/tracing/sampling/rule_sampler.rb +4 -3
- data/lib/datadog/tracing/sampling/span/ext.rb +0 -4
- data/lib/datadog/tracing/sampling/span/rule.rb +1 -1
- data/lib/datadog/tracing/sampling/span/sampler.rb +14 -3
- data/lib/datadog/tracing/trace_digest.rb +3 -0
- data/lib/datadog/tracing/trace_operation.rb +10 -0
- data/lib/datadog/tracing/trace_segment.rb +6 -0
- data/lib/datadog/tracing/tracer.rb +3 -1
- data/lib/datadog/tracing/writer.rb +7 -0
- data/lib/ddtrace/transport/trace_formatter.rb +7 -0
- data/lib/ddtrace/transport/traces.rb +1 -1
- data/lib/ddtrace/version.rb +2 -2
- metadata +18 -14
- data/lib/datadog/profiling/old_ext.rb +0 -42
- data/lib/datadog/profiling/transport/http/api/endpoint.rb +0 -85
- data/lib/datadog/profiling/transport/http/api/instance.rb +0 -38
- data/lib/datadog/profiling/transport/http/api/spec.rb +0 -42
- data/lib/datadog/profiling/transport/http/api.rb +0 -45
- data/lib/datadog/profiling/transport/http/builder.rb +0 -30
- data/lib/datadog/profiling/transport/http/client.rb +0 -37
- data/lib/datadog/profiling/transport/http/response.rb +0 -21
- data/lib/datadog/profiling/transport/http.rb +0 -118
@@ -13,8 +13,63 @@
|
|
13
13
|
//
|
14
14
|
// Triggering of this component (e.g. deciding when to take a sample) is implemented in Collectors::CpuAndWallTimeWorker.
|
15
15
|
|
16
|
+
// ---
|
17
|
+
// ## Tracking of cpu-time and wall-time spent during garbage collection
|
18
|
+
//
|
19
|
+
// This feature works by having an implicit state that a thread can be in: doing garbage collection. This state is
|
20
|
+
// tracked inside the thread's `per_thread_context.gc_tracking` data, and three functions, listed below. The functions
|
21
|
+
// will get called by the `Collectors::CpuAndWallTimeWorker` at very specific times in the VM lifetime.
|
22
|
+
//
|
23
|
+
// * `cpu_and_wall_time_collector_on_gc_start`: Called at the very beginning of the garbage collection process.
|
24
|
+
// The internal VM `during_gc` flag is set to `true`, but Ruby has not done any work yet.
|
25
|
+
// * `cpu_and_wall_time_collector_on_gc_finish`: Called at the very end of the garbage collection process.
|
26
|
+
// The internal VM `during_gc` flag is still set to `true`, but all the work has been done.
|
27
|
+
// * `cpu_and_wall_time_collector_sample_after_gc`: Called shortly after the garbage collection process.
|
28
|
+
// The internal VM `during_gc` flag is set to `false`.
|
29
|
+
//
|
30
|
+
// Inside this component, here's what happens inside those three functions:
|
31
|
+
//
|
32
|
+
// When `cpu_and_wall_time_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
|
33
|
+
// context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
|
34
|
+
//
|
35
|
+
// While these fields are set, regular samples (if any) do not account for any time that passes after these two
|
36
|
+
// timestamps.
|
37
|
+
//
|
38
|
+
// (Regular samples can still account for the time between the previous sample and the start of GC.)
|
39
|
+
//
|
40
|
+
// When `cpu_and_wall_time_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the
|
41
|
+
// thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`.
|
42
|
+
//
|
43
|
+
// Finally, when `cpu_and_wall_time_collector_sample_after_gc` gets called, the following happens:
|
44
|
+
//
|
45
|
+
// 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder
|
46
|
+
// `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was
|
47
|
+
// recorded between calls to `on_gc_start` and `on_gc_finish`.
|
48
|
+
//
|
49
|
+
// 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`).
|
50
|
+
//
|
51
|
+
// 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in
|
52
|
+
// GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC.
|
53
|
+
//
|
54
|
+
// In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
|
55
|
+
// discovered that we needed to factor the sampling work away from `cpu_and_wall_time_collector_on_gc_finish` and into a
|
56
|
+
// separate `cpu_and_wall_time_collector_sample_after_gc` because (as documented in more detail below),
|
57
|
+
// `sample_after_gc` could trigger memory allocation in rare occasions (usually exceptions), which is actually not
|
58
|
+
// allowed to happen during Ruby's garbage collection start/finish hooks.
|
59
|
+
// ---
|
60
|
+
|
16
61
|
#define INVALID_TIME -1
|
17
|
-
#define THREAD_ID_LIMIT_CHARS
|
62
|
+
#define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
|
63
|
+
#define RAISE_ON_FAILURE true
|
64
|
+
#define DO_NOT_RAISE_ON_FAILURE false
|
65
|
+
#define IS_WALL_TIME true
|
66
|
+
#define IS_NOT_WALL_TIME false
|
67
|
+
#define MISSING_TRACER_CONTEXT_KEY 0
|
68
|
+
|
69
|
+
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
70
|
+
static ID at_root_span_id; // id of :@root_span in Ruby
|
71
|
+
static ID at_active_span_id; // id of :@active_span in Ruby
|
72
|
+
static ID at_id_id; // id of :@id in Ruby
|
18
73
|
|
19
74
|
// Contains state for a single CpuAndWallTime instance
|
20
75
|
struct cpu_and_wall_time_collector_state {
|
@@ -27,17 +82,53 @@ struct cpu_and_wall_time_collector_state {
|
|
27
82
|
st_table *hash_map_per_thread_context;
|
28
83
|
// Datadog::Profiling::StackRecorder instance
|
29
84
|
VALUE recorder_instance;
|
30
|
-
//
|
85
|
+
// If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
|
86
|
+
// to enable code hotspots and endpoint aggregation.
|
87
|
+
// When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
|
88
|
+
ID tracer_context_key;
|
89
|
+
// Track how many regular samples we've taken. Does not include garbage collection samples.
|
90
|
+
// Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
|
91
|
+
// is not (just) a stat.
|
31
92
|
unsigned int sample_count;
|
93
|
+
|
94
|
+
struct {
|
95
|
+
// Track how many garbage collection samples we've taken.
|
96
|
+
unsigned int gc_samples;
|
97
|
+
// See cpu_and_wall_time_collector_on_gc_start for details
|
98
|
+
unsigned int gc_samples_missed_due_to_missing_context;
|
99
|
+
} stats;
|
32
100
|
};
|
33
101
|
|
34
102
|
// Tracks per-thread state
|
35
103
|
struct per_thread_context {
|
36
104
|
char thread_id[THREAD_ID_LIMIT_CHARS];
|
37
|
-
|
105
|
+
ddog_CharSlice thread_id_char_slice;
|
38
106
|
thread_cpu_time_id thread_cpu_time_id;
|
39
107
|
long cpu_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized or if getting it fails for another reason
|
40
108
|
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
|
109
|
+
|
110
|
+
struct {
|
111
|
+
// Both of these fields are set by on_gc_start and kept until sample_after_gc is called.
|
112
|
+
// Outside of this window, they will be INVALID_TIME.
|
113
|
+
long cpu_time_at_start_ns;
|
114
|
+
long wall_time_at_start_ns;
|
115
|
+
|
116
|
+
// Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
|
117
|
+
// Outside of this window, they will be INVALID_TIME.
|
118
|
+
long cpu_time_at_finish_ns;
|
119
|
+
long wall_time_at_finish_ns;
|
120
|
+
} gc_tracking;
|
121
|
+
};
|
122
|
+
|
123
|
+
// Used to correlate profiles with traces
|
124
|
+
struct trace_identifiers {
|
125
|
+
#define MAXIMUM_LENGTH_64_BIT_IDENTIFIER 21 // Why 21? 2^64 => 20 digits + 1 for \0
|
126
|
+
|
127
|
+
bool valid;
|
128
|
+
ddog_CharSlice local_root_span_id;
|
129
|
+
ddog_CharSlice span_id;
|
130
|
+
char local_root_span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
|
131
|
+
char span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
|
41
132
|
};
|
42
133
|
|
43
134
|
static void cpu_and_wall_time_collector_typed_data_mark(void *state_ptr);
|
@@ -45,21 +136,35 @@ static void cpu_and_wall_time_collector_typed_data_free(void *state_ptr);
|
|
45
136
|
static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
|
46
137
|
static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
|
47
138
|
static VALUE _native_new(VALUE klass);
|
48
|
-
static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames);
|
139
|
+
static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key);
|
49
140
|
static VALUE _native_sample(VALUE self, VALUE collector_instance);
|
141
|
+
static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
|
142
|
+
static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
|
143
|
+
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
144
|
+
static void trigger_sample_for_thread(
|
145
|
+
struct cpu_and_wall_time_collector_state *state,
|
146
|
+
VALUE thread,
|
147
|
+
struct per_thread_context *thread_context,
|
148
|
+
ddog_Slice_i64 metric_values_slice,
|
149
|
+
sample_type type
|
150
|
+
);
|
50
151
|
static VALUE _native_thread_list(VALUE self);
|
51
152
|
static struct per_thread_context *get_or_create_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
|
153
|
+
static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
|
52
154
|
static void initialize_context(VALUE thread, struct per_thread_context *thread_context);
|
53
155
|
static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
54
156
|
static VALUE per_thread_context_st_table_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
|
55
157
|
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
|
158
|
+
static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
|
56
159
|
static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state);
|
57
160
|
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
|
58
161
|
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
|
59
|
-
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns);
|
162
|
+
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time);
|
60
163
|
static long cpu_time_now_ns(struct per_thread_context *thread_context);
|
61
|
-
static long wall_time_now_ns();
|
164
|
+
static long wall_time_now_ns(bool raise_on_failure);
|
62
165
|
static long thread_id_for(VALUE thread);
|
166
|
+
static VALUE _native_stats(VALUE self, VALUE collector_instance);
|
167
|
+
static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
|
63
168
|
|
64
169
|
void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
|
65
170
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
@@ -77,11 +182,20 @@ void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
|
|
77
182
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
78
183
|
rb_define_alloc_func(collectors_cpu_and_wall_time_class, _native_new);
|
79
184
|
|
80
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize,
|
185
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 4);
|
81
186
|
rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_inspect", _native_inspect, 1);
|
82
187
|
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 1);
|
188
|
+
rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
|
189
|
+
rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
|
190
|
+
rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
|
83
191
|
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
|
84
192
|
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
|
193
|
+
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
|
194
|
+
|
195
|
+
at_active_trace_id = rb_intern_const("@active_trace");
|
196
|
+
at_root_span_id = rb_intern_const("@root_span");
|
197
|
+
at_active_span_id = rb_intern_const("@active_span");
|
198
|
+
at_id_id = rb_intern_const("@id");
|
85
199
|
}
|
86
200
|
|
87
201
|
// This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_collector_state
|
@@ -147,12 +261,12 @@ static VALUE _native_new(VALUE klass) {
|
|
147
261
|
// "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
|
148
262
|
st_init_numtable();
|
149
263
|
state->recorder_instance = Qnil;
|
150
|
-
state->
|
264
|
+
state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
|
151
265
|
|
152
266
|
return TypedData_Wrap_Struct(klass, &cpu_and_wall_time_collector_typed_data, state);
|
153
267
|
}
|
154
268
|
|
155
|
-
static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames) {
|
269
|
+
static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key) {
|
156
270
|
struct cpu_and_wall_time_collector_state *state;
|
157
271
|
TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
158
272
|
|
@@ -164,6 +278,14 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_inst
|
|
164
278
|
// hash_map_per_thread_context is already initialized, nothing to do here
|
165
279
|
state->recorder_instance = enforce_recorder_instance(recorder_instance);
|
166
280
|
|
281
|
+
if (RTEST(tracer_context_key)) {
|
282
|
+
ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
|
283
|
+
// Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
|
284
|
+
// In this case, we can't really escape this because as of this writing, ruby master still calls `rb_to_id` inside
|
285
|
+
// the implementation of Thread#[]= so any symbol that gets used as a key there will already be prevented from GC.
|
286
|
+
state->tracer_context_key = rb_to_id(tracer_context_key);
|
287
|
+
}
|
288
|
+
|
167
289
|
return Qtrue;
|
168
290
|
}
|
169
291
|
|
@@ -174,18 +296,40 @@ static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance
|
|
174
296
|
return Qtrue;
|
175
297
|
}
|
176
298
|
|
299
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
300
|
+
// It SHOULD NOT be used for other purposes.
|
301
|
+
static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
302
|
+
cpu_and_wall_time_collector_on_gc_start(collector_instance);
|
303
|
+
return Qtrue;
|
304
|
+
}
|
305
|
+
|
306
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
307
|
+
// It SHOULD NOT be used for other purposes.
|
308
|
+
static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
309
|
+
cpu_and_wall_time_collector_on_gc_finish(collector_instance);
|
310
|
+
return Qtrue;
|
311
|
+
}
|
312
|
+
|
313
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
314
|
+
// It SHOULD NOT be used for other purposes.
|
315
|
+
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
316
|
+
cpu_and_wall_time_collector_sample_after_gc(collector_instance);
|
317
|
+
return Qtrue;
|
318
|
+
}
|
319
|
+
|
177
320
|
// This function gets called from the Collectors::CpuAndWallTimeWorker to trigger the actual sampling.
|
178
321
|
//
|
179
322
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
180
323
|
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
181
324
|
// Assumption 3: This function IS NOT called from a signal handler. This function is not async-signal-safe.
|
182
325
|
// Assumption 4: This function IS NOT called in a reentrant way.
|
326
|
+
// Assumption 5: This function is called from the main Ractor (if Ruby has support for Ractors).
|
183
327
|
VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
184
328
|
struct cpu_and_wall_time_collector_state *state;
|
185
329
|
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
186
330
|
|
187
331
|
VALUE threads = ddtrace_thread_list();
|
188
|
-
long current_wall_time_ns = wall_time_now_ns();
|
332
|
+
long current_wall_time_ns = wall_time_now_ns(RAISE_ON_FAILURE);
|
189
333
|
|
190
334
|
const long thread_count = RARRAY_LEN(threads);
|
191
335
|
for (long i = 0; i < thread_count; i++) {
|
@@ -194,10 +338,18 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
|
194
338
|
|
195
339
|
long current_cpu_time_ns = cpu_time_now_ns(thread_context);
|
196
340
|
|
197
|
-
long cpu_time_elapsed_ns =
|
198
|
-
|
199
|
-
|
200
|
-
|
341
|
+
long cpu_time_elapsed_ns = update_time_since_previous_sample(
|
342
|
+
&thread_context->cpu_time_at_previous_sample_ns,
|
343
|
+
current_cpu_time_ns,
|
344
|
+
thread_context->gc_tracking.cpu_time_at_start_ns,
|
345
|
+
IS_NOT_WALL_TIME
|
346
|
+
);
|
347
|
+
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
348
|
+
&thread_context->wall_time_at_previous_sample_ns,
|
349
|
+
current_wall_time_ns,
|
350
|
+
thread_context->gc_tracking.wall_time_at_start_ns,
|
351
|
+
IS_WALL_TIME
|
352
|
+
);
|
201
353
|
|
202
354
|
int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
|
203
355
|
|
@@ -205,26 +357,12 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
|
205
357
|
metric_values[CPU_SAMPLES_VALUE_POS] = 1;
|
206
358
|
metric_values[WALL_TIME_VALUE_POS] = wall_time_elapsed_ns;
|
207
359
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
int label_count = 1 + (have_thread_name ? 1 : 0);
|
212
|
-
ddprof_ffi_Label labels[label_count];
|
213
|
-
|
214
|
-
labels[0] = (ddprof_ffi_Label) {.key = DDPROF_FFI_CHARSLICE_C("thread id"), .str = thread_context->thread_id_char_slice};
|
215
|
-
if (have_thread_name) {
|
216
|
-
labels[1] = (ddprof_ffi_Label) {
|
217
|
-
.key = DDPROF_FFI_CHARSLICE_C("thread name"),
|
218
|
-
.str = char_slice_from_ruby_string(thread_name)
|
219
|
-
};
|
220
|
-
}
|
221
|
-
|
222
|
-
sample_thread(
|
360
|
+
trigger_sample_for_thread(
|
361
|
+
state,
|
223
362
|
thread,
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
(ddprof_ffi_Slice_label) {.ptr = labels, .len = label_count}
|
363
|
+
thread_context,
|
364
|
+
(ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
|
365
|
+
SAMPLE_REGULAR
|
228
366
|
);
|
229
367
|
}
|
230
368
|
|
@@ -238,6 +376,226 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
|
238
376
|
return Qnil;
|
239
377
|
}
|
240
378
|
|
379
|
+
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
|
380
|
+
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
381
|
+
// create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work.
|
382
|
+
//
|
383
|
+
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
384
|
+
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
385
|
+
// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
|
386
|
+
//
|
387
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
388
|
+
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
389
|
+
void cpu_and_wall_time_collector_on_gc_start(VALUE self_instance) {
|
390
|
+
struct cpu_and_wall_time_collector_state *state;
|
391
|
+
if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
|
392
|
+
// This should never fail the the above check passes
|
393
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
394
|
+
|
395
|
+
struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
|
396
|
+
|
397
|
+
// If there was no previously-existing context for this thread, we won't allocate one (see safety). For now we just drop
|
398
|
+
// the GC sample, under the assumption that "a thread that is so new that we never sampled it even once before it triggers
|
399
|
+
// GC" is a rare enough case that we can just ignore it.
|
400
|
+
// We can always improve this later if we find that this happens often (and we have the counter to help us figure that out)!
|
401
|
+
if (thread_context == NULL) {
|
402
|
+
state->stats.gc_samples_missed_due_to_missing_context++;
|
403
|
+
return;
|
404
|
+
}
|
405
|
+
|
406
|
+
// If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`.
|
407
|
+
//
|
408
|
+
// When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
|
409
|
+
// called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
|
410
|
+
// before we can actually run that method.
|
411
|
+
//
|
412
|
+
// We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
|
413
|
+
// `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
|
414
|
+
// then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
|
415
|
+
// there was a single, longer GC period.
|
416
|
+
if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
|
417
|
+
thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
|
418
|
+
|
419
|
+
// Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
|
420
|
+
thread_context->gc_tracking.wall_time_at_start_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
421
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
|
422
|
+
}
|
423
|
+
|
424
|
+
// This function gets called when Ruby has finished running the Garbage Collector on the current thread.
|
425
|
+
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
426
|
+
// create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now.
|
427
|
+
//
|
428
|
+
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
429
|
+
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
430
|
+
// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
|
431
|
+
//
|
432
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
433
|
+
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
434
|
+
void cpu_and_wall_time_collector_on_gc_finish(VALUE self_instance) {
|
435
|
+
struct cpu_and_wall_time_collector_state *state;
|
436
|
+
if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
|
437
|
+
// This should never fail the the above check passes
|
438
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
439
|
+
|
440
|
+
struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
|
441
|
+
|
442
|
+
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
|
443
|
+
// how often this happens -- see on_gc_start.
|
444
|
+
if (thread_context == NULL) return;
|
445
|
+
|
446
|
+
if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME &&
|
447
|
+
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
|
448
|
+
// If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
|
449
|
+
// context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
|
450
|
+
// do nothing.
|
451
|
+
return;
|
452
|
+
}
|
453
|
+
|
454
|
+
// Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time
|
455
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
|
456
|
+
thread_context->gc_tracking.wall_time_at_finish_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
457
|
+
}
|
458
|
+
|
459
|
+
// This function gets called shortly after Ruby has finished running the Garbage Collector.
|
460
|
+
// It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
|
461
|
+
// GC-related tracking.
|
462
|
+
//
|
463
|
+
// Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
|
464
|
+
// and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
|
465
|
+
// set on their context.
|
466
|
+
//
|
467
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
468
|
+
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
469
|
+
// Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
|
470
|
+
// Assumption 4: This function is called from the main Ractor (if Ruby has support for Ractors).
|
471
|
+
VALUE cpu_and_wall_time_collector_sample_after_gc(VALUE self_instance) {
|
472
|
+
struct cpu_and_wall_time_collector_state *state;
|
473
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
474
|
+
|
475
|
+
VALUE threads = ddtrace_thread_list();
|
476
|
+
bool sampled_any_thread = false;
|
477
|
+
|
478
|
+
const long thread_count = RARRAY_LEN(threads);
|
479
|
+
for (long i = 0; i < thread_count; i++) {
|
480
|
+
VALUE thread = RARRAY_AREF(threads, i);
|
481
|
+
struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
|
482
|
+
|
483
|
+
if (
|
484
|
+
thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME ||
|
485
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME ||
|
486
|
+
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
|
487
|
+
thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
|
488
|
+
) continue; // Ignore threads with no/incomplete garbage collection data
|
489
|
+
|
490
|
+
sampled_any_thread = true;
|
491
|
+
|
492
|
+
long gc_cpu_time_elapsed_ns =
|
493
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
|
494
|
+
long gc_wall_time_elapsed_ns =
|
495
|
+
thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
|
496
|
+
|
497
|
+
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
498
|
+
if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
|
499
|
+
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
500
|
+
// was a bug.
|
501
|
+
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
502
|
+
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
503
|
+
if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
|
504
|
+
|
505
|
+
if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
|
506
|
+
// Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
|
507
|
+
// come up with a crazy value for the frame
|
508
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
|
509
|
+
}
|
510
|
+
|
511
|
+
int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
|
512
|
+
|
513
|
+
metric_values[CPU_TIME_VALUE_POS] = gc_cpu_time_elapsed_ns;
|
514
|
+
metric_values[CPU_SAMPLES_VALUE_POS] = 1;
|
515
|
+
metric_values[WALL_TIME_VALUE_POS] = gc_wall_time_elapsed_ns;
|
516
|
+
|
517
|
+
trigger_sample_for_thread(
|
518
|
+
state,
|
519
|
+
thread,
|
520
|
+
thread_context,
|
521
|
+
(ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
|
522
|
+
SAMPLE_IN_GC
|
523
|
+
);
|
524
|
+
|
525
|
+
// Mark thread as no longer in GC
|
526
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
527
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
528
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
529
|
+
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
530
|
+
|
531
|
+
// Update counters so that they won't include the time in GC during the next sample
|
532
|
+
if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
|
533
|
+
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
534
|
+
}
|
535
|
+
if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
|
536
|
+
thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
|
537
|
+
}
|
538
|
+
}
|
539
|
+
|
540
|
+
if (sampled_any_thread) state->stats.gc_samples++;
|
541
|
+
|
542
|
+
// Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
|
543
|
+
return Qnil;
|
544
|
+
}
|
545
|
+
|
546
|
+
static void trigger_sample_for_thread(
|
547
|
+
struct cpu_and_wall_time_collector_state *state,
|
548
|
+
VALUE thread,
|
549
|
+
struct per_thread_context *thread_context,
|
550
|
+
ddog_Slice_i64 metric_values_slice,
|
551
|
+
sample_type type
|
552
|
+
) {
|
553
|
+
int max_label_count =
|
554
|
+
1 + // thread id
|
555
|
+
1 + // thread name
|
556
|
+
2; // local root span id and span id
|
557
|
+
ddog_Label labels[max_label_count];
|
558
|
+
int label_pos = 0;
|
559
|
+
|
560
|
+
labels[label_pos++] = (ddog_Label) {
|
561
|
+
.key = DDOG_CHARSLICE_C("thread id"),
|
562
|
+
.str = thread_context->thread_id_char_slice
|
563
|
+
};
|
564
|
+
|
565
|
+
VALUE thread_name = thread_name_for(thread);
|
566
|
+
if (thread_name != Qnil) {
|
567
|
+
labels[label_pos++] = (ddog_Label) {
|
568
|
+
.key = DDOG_CHARSLICE_C("thread name"),
|
569
|
+
.str = char_slice_from_ruby_string(thread_name)
|
570
|
+
};
|
571
|
+
}
|
572
|
+
|
573
|
+
struct trace_identifiers trace_identifiers_result = {.valid = false};
|
574
|
+
trace_identifiers_for(state, thread, &trace_identifiers_result);
|
575
|
+
|
576
|
+
if (trace_identifiers_result.valid) {
|
577
|
+
labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .str = trace_identifiers_result.local_root_span_id};
|
578
|
+
labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("span id"), .str = trace_identifiers_result.span_id};
|
579
|
+
}
|
580
|
+
|
581
|
+
// The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
|
582
|
+
// forgot to update max_label_count" in the future, we've also added this validation.
|
583
|
+
// @ivoanjo: I wonder if C compilers are smart enough to statically prove when this check never triggers happens and
|
584
|
+
// remove it entirely.
|
585
|
+
if (label_pos > max_label_count) {
|
586
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
|
587
|
+
}
|
588
|
+
|
589
|
+
sample_thread(
|
590
|
+
thread,
|
591
|
+
state->sampling_buffer,
|
592
|
+
state->recorder_instance,
|
593
|
+
metric_values_slice,
|
594
|
+
(ddog_Slice_label) {.ptr = labels, .len = label_pos},
|
595
|
+
type
|
596
|
+
);
|
597
|
+
}
|
598
|
+
|
241
599
|
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
242
600
|
// It SHOULD NOT be used for other purposes.
|
243
601
|
static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
|
@@ -259,15 +617,32 @@ static struct per_thread_context *get_or_create_context_for(VALUE thread, struct
|
|
259
617
|
return thread_context;
|
260
618
|
}
|
261
619
|
|
620
|
+
static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state) {
|
621
|
+
struct per_thread_context* thread_context = NULL;
|
622
|
+
st_data_t value_context = 0;
|
623
|
+
|
624
|
+
if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
|
625
|
+
thread_context = (struct per_thread_context*) value_context;
|
626
|
+
}
|
627
|
+
|
628
|
+
return thread_context;
|
629
|
+
}
|
630
|
+
|
262
631
|
static void initialize_context(VALUE thread, struct per_thread_context *thread_context) {
|
263
|
-
snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%
|
264
|
-
thread_context->thread_id_char_slice = (
|
632
|
+
snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
|
633
|
+
thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
|
265
634
|
|
266
635
|
thread_context->thread_cpu_time_id = thread_cpu_time_id_for(thread);
|
267
636
|
|
268
637
|
// These will get initialized during actual sampling
|
269
638
|
thread_context->cpu_time_at_previous_sample_ns = INVALID_TIME;
|
270
639
|
thread_context->wall_time_at_previous_sample_ns = INVALID_TIME;
|
640
|
+
|
641
|
+
// These will only be used during a GC operation
|
642
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
643
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
644
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
645
|
+
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
271
646
|
}
|
272
647
|
|
273
648
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
@@ -279,7 +654,10 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
279
654
|
// Update this when modifying state struct
|
280
655
|
rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
|
281
656
|
rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
|
657
|
+
VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
|
658
|
+
rb_str_concat(result, rb_sprintf(" tracer_context_key=%+"PRIsVALUE, tracer_context_key));
|
282
659
|
rb_str_concat(result, rb_sprintf(" sample_count=%u", state->sample_count));
|
660
|
+
rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
|
283
661
|
|
284
662
|
return result;
|
285
663
|
}
|
@@ -304,13 +682,29 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
304
682
|
ID2SYM(rb_intern("thread_cpu_time_id_valid?")), /* => */ thread_context->thread_cpu_time_id.valid ? Qtrue : Qfalse,
|
305
683
|
ID2SYM(rb_intern("thread_cpu_time_id")), /* => */ CLOCKID2NUM(thread_context->thread_cpu_time_id.clock_id),
|
306
684
|
ID2SYM(rb_intern("cpu_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->cpu_time_at_previous_sample_ns),
|
307
|
-
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns)
|
685
|
+
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
|
686
|
+
|
687
|
+
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
688
|
+
ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
|
689
|
+
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
690
|
+
ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
|
308
691
|
};
|
309
692
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
310
693
|
|
311
694
|
return ST_CONTINUE;
|
312
695
|
}
|
313
696
|
|
697
|
+
static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state) {
|
698
|
+
// Update this when modifying state struct (stats inner struct)
|
699
|
+
VALUE stats_as_hash = rb_hash_new();
|
700
|
+
VALUE arguments[] = {
|
701
|
+
ID2SYM(rb_intern("gc_samples")), /* => */ INT2NUM(state->stats.gc_samples),
|
702
|
+
ID2SYM(rb_intern("gc_samples_missed_due_to_missing_context")), /* => */ INT2NUM(state->stats.gc_samples_missed_due_to_missing_context),
|
703
|
+
};
|
704
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
705
|
+
return stats_as_hash;
|
706
|
+
}
|
707
|
+
|
314
708
|
static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state) {
|
315
709
|
st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
|
316
710
|
}
|
@@ -336,24 +730,58 @@ static VALUE _native_per_thread_context(DDTRACE_UNUSED VALUE _self, VALUE collec
|
|
336
730
|
return per_thread_context_st_table_as_ruby_hash(state);
|
337
731
|
}
|
338
732
|
|
339
|
-
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns) {
|
733
|
+
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time) {
|
340
734
|
// If we didn't have a time for the previous sample, we use the current one
|
341
735
|
if (*time_at_previous_sample_ns == INVALID_TIME) *time_at_previous_sample_ns = current_time_ns;
|
342
736
|
|
343
|
-
|
344
|
-
|
737
|
+
bool is_thread_doing_gc = gc_start_time_ns != INVALID_TIME;
|
738
|
+
long elapsed_time_ns = -1;
|
739
|
+
|
740
|
+
if (is_thread_doing_gc) {
|
741
|
+
bool previous_sample_was_during_gc = gc_start_time_ns <= *time_at_previous_sample_ns;
|
742
|
+
|
743
|
+
if (previous_sample_was_during_gc) {
|
744
|
+
elapsed_time_ns = 0; // No time to account for -- any time since the last sample is going to get assigned to GC separately
|
745
|
+
} else {
|
746
|
+
elapsed_time_ns = gc_start_time_ns - *time_at_previous_sample_ns; // Capture time between previous sample and start of GC only
|
747
|
+
}
|
748
|
+
|
749
|
+
// Remaining time (from gc_start_time to current_time_ns) will be accounted for inside `sample_after_gc`
|
750
|
+
*time_at_previous_sample_ns = gc_start_time_ns;
|
751
|
+
} else {
|
752
|
+
elapsed_time_ns = current_time_ns - *time_at_previous_sample_ns; // Capture all time since previous sample
|
753
|
+
*time_at_previous_sample_ns = current_time_ns;
|
754
|
+
}
|
755
|
+
|
756
|
+
if (elapsed_time_ns < 0) {
|
757
|
+
if (is_wall_time) {
|
758
|
+
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
759
|
+
// was a bug.
|
760
|
+
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
761
|
+
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
762
|
+
elapsed_time_ns = 0;
|
763
|
+
} else {
|
764
|
+
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
765
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples");
|
766
|
+
}
|
767
|
+
}
|
345
768
|
|
346
|
-
return elapsed_time_ns
|
769
|
+
return elapsed_time_ns;
|
347
770
|
}
|
348
771
|
|
349
|
-
|
772
|
+
// Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false
|
773
|
+
static long wall_time_now_ns(bool raise_on_failure) {
|
350
774
|
struct timespec current_monotonic;
|
351
775
|
|
352
|
-
if (clock_gettime(CLOCK_MONOTONIC, ¤t_monotonic) != 0)
|
776
|
+
if (clock_gettime(CLOCK_MONOTONIC, ¤t_monotonic) != 0) {
|
777
|
+
if (raise_on_failure) rb_sys_fail("Failed to read CLOCK_MONOTONIC");
|
778
|
+
else return 0;
|
779
|
+
}
|
353
780
|
|
354
781
|
return current_monotonic.tv_nsec + (current_monotonic.tv_sec * 1000 * 1000 * 1000);
|
355
782
|
}
|
356
783
|
|
784
|
+
// Safety: This function is assumed never to raise exceptions by callers
|
357
785
|
static long cpu_time_now_ns(struct per_thread_context *thread_context) {
|
358
786
|
thread_cpu_time cpu_time = thread_cpu_time_for(thread_context->thread_cpu_time_id);
|
359
787
|
|
@@ -388,3 +816,50 @@ VALUE enforce_cpu_and_wall_time_collector_instance(VALUE object) {
|
|
388
816
|
Check_TypedStruct(object, &cpu_and_wall_time_collector_typed_data);
|
389
817
|
return object;
|
390
818
|
}
|
819
|
+
|
820
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
821
|
+
// It SHOULD NOT be used for other purposes.
|
822
|
+
//
|
823
|
+
// Returns the whole contents of the per_thread_context structs being tracked.
|
824
|
+
static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
825
|
+
struct cpu_and_wall_time_collector_state *state;
|
826
|
+
TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
827
|
+
|
828
|
+
return stats_as_ruby_hash(state);
|
829
|
+
}
|
830
|
+
|
831
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
832
|
+
static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
|
833
|
+
if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
|
834
|
+
|
835
|
+
VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
|
836
|
+
if (current_context == Qnil) return;
|
837
|
+
|
838
|
+
VALUE active_trace = rb_ivar_get(current_context, at_active_trace_id /* @active_trace */);
|
839
|
+
if (active_trace == Qnil) return;
|
840
|
+
|
841
|
+
VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
|
842
|
+
VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
|
843
|
+
if (root_span == Qnil || active_span == Qnil) return;
|
844
|
+
|
845
|
+
VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
|
846
|
+
VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
|
847
|
+
if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
|
848
|
+
|
849
|
+
unsigned long long local_root_span_id = NUM2ULL(numeric_local_root_span_id);
|
850
|
+
unsigned long long span_id = NUM2ULL(numeric_span_id);
|
851
|
+
|
852
|
+
snprintf(trace_identifiers_result->local_root_span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", local_root_span_id);
|
853
|
+
snprintf(trace_identifiers_result->span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", span_id);
|
854
|
+
|
855
|
+
trace_identifiers_result->local_root_span_id = (ddog_CharSlice) {
|
856
|
+
.ptr = trace_identifiers_result->local_root_span_id_buffer,
|
857
|
+
.len = strlen(trace_identifiers_result->local_root_span_id_buffer)
|
858
|
+
};
|
859
|
+
trace_identifiers_result->span_id = (ddog_CharSlice) {
|
860
|
+
.ptr = trace_identifiers_result->span_id_buffer,
|
861
|
+
.len = strlen(trace_identifiers_result->span_id_buffer)
|
862
|
+
};
|
863
|
+
|
864
|
+
trace_identifiers_result->valid = true;
|
865
|
+
}
|