ddtrace 1.5.2 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -2
- data/ext/ddtrace_profiling_loader/ddtrace_profiling_loader.c +9 -2
- data/ext/ddtrace_profiling_loader/extconf.rb +17 -0
- data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +38 -2
- data/ext/ddtrace_profiling_native_extension/clock_id.h +1 -0
- data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +1 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +517 -42
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.h +3 -0
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +208 -30
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +156 -46
- data/ext/ddtrace_profiling_native_extension/collectors_stack.h +11 -2
- data/ext/ddtrace_profiling_native_extension/extconf.rb +11 -1
- data/ext/ddtrace_profiling_native_extension/http_transport.c +83 -64
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +4 -4
- data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +3 -2
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +59 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +10 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -1
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +4 -2
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +45 -29
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +7 -7
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +4 -0
- data/lib/datadog/appsec/event.rb +6 -0
- data/lib/datadog/core/configuration/components.rb +20 -14
- data/lib/datadog/core/configuration/settings.rb +42 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +5 -1
- data/lib/datadog/core/utils/compression.rb +5 -1
- data/lib/datadog/core.rb +0 -54
- data/lib/datadog/profiling/collectors/cpu_and_wall_time.rb +12 -2
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +5 -3
- data/lib/datadog/profiling/exporter.rb +2 -4
- data/lib/datadog/profiling/http_transport.rb +1 -1
- data/lib/datadog/tracing/configuration/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +4 -0
- data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +3 -0
- data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -0
- data/lib/datadog/tracing/contrib/ext.rb +6 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -0
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +5 -0
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +7 -1
- data/lib/datadog/tracing/contrib/grpc/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/hanami/action_tracer.rb +47 -0
- data/lib/datadog/tracing/contrib/hanami/configuration/settings.rb +22 -0
- data/lib/datadog/tracing/contrib/hanami/ext.rb +24 -0
- data/lib/datadog/tracing/contrib/hanami/integration.rb +44 -0
- data/lib/datadog/tracing/contrib/hanami/patcher.rb +33 -0
- data/lib/datadog/tracing/contrib/hanami/plugin.rb +23 -0
- data/lib/datadog/tracing/contrib/hanami/renderer_policy_tracing.rb +41 -0
- data/lib/datadog/tracing/contrib/hanami/router_tracing.rb +44 -0
- data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -0
- data/lib/datadog/tracing/contrib/mongodb/ext.rb +7 -0
- data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +4 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +12 -0
- data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -0
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +12 -0
- data/lib/datadog/tracing/contrib/pg/ext.rb +2 -1
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +34 -18
- data/lib/datadog/tracing/contrib/propagation/sql_comment/comment.rb +43 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +32 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +28 -0
- data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +49 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +11 -5
- data/lib/datadog/tracing/contrib/redis/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/redis/instrumentation.rb +4 -2
- data/lib/datadog/tracing/contrib/redis/integration.rb +2 -1
- data/lib/datadog/tracing/contrib/redis/patcher.rb +40 -0
- data/lib/datadog/tracing/contrib/redis/tags.rb +5 -0
- data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +2 -0
- data/lib/datadog/tracing/contrib/sinatra/env.rb +12 -23
- data/lib/datadog/tracing/contrib/sinatra/ext.rb +7 -3
- data/lib/datadog/tracing/contrib/sinatra/patcher.rb +2 -2
- data/lib/datadog/tracing/contrib/sinatra/tracer.rb +8 -80
- data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +14 -9
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +84 -0
- data/lib/datadog/tracing/distributed/headers/datadog.rb +122 -30
- data/lib/datadog/tracing/distributed/headers/ext.rb +2 -0
- data/lib/datadog/tracing/flush.rb +1 -1
- data/lib/datadog/tracing/metadata/ext.rb +8 -0
- data/lib/datadog/tracing/propagation/http.rb +9 -1
- data/lib/datadog/tracing/sampling/ext.rb +31 -0
- data/lib/datadog/tracing/sampling/priority_sampler.rb +46 -4
- data/lib/datadog/tracing/sampling/rate_by_key_sampler.rb +8 -9
- data/lib/datadog/tracing/sampling/rate_by_service_sampler.rb +29 -5
- data/lib/datadog/tracing/sampling/rate_sampler.rb +10 -3
- data/lib/datadog/tracing/sampling/rule_sampler.rb +4 -3
- data/lib/datadog/tracing/sampling/span/ext.rb +0 -4
- data/lib/datadog/tracing/sampling/span/rule.rb +1 -1
- data/lib/datadog/tracing/sampling/span/sampler.rb +14 -3
- data/lib/datadog/tracing/trace_digest.rb +3 -0
- data/lib/datadog/tracing/trace_operation.rb +10 -0
- data/lib/datadog/tracing/trace_segment.rb +6 -0
- data/lib/datadog/tracing/tracer.rb +3 -1
- data/lib/datadog/tracing/writer.rb +7 -0
- data/lib/ddtrace/transport/trace_formatter.rb +7 -0
- data/lib/ddtrace/transport/traces.rb +1 -1
- data/lib/ddtrace/version.rb +2 -2
- metadata +18 -14
- data/lib/datadog/profiling/old_ext.rb +0 -42
- data/lib/datadog/profiling/transport/http/api/endpoint.rb +0 -85
- data/lib/datadog/profiling/transport/http/api/instance.rb +0 -38
- data/lib/datadog/profiling/transport/http/api/spec.rb +0 -42
- data/lib/datadog/profiling/transport/http/api.rb +0 -45
- data/lib/datadog/profiling/transport/http/builder.rb +0 -30
- data/lib/datadog/profiling/transport/http/client.rb +0 -37
- data/lib/datadog/profiling/transport/http/response.rb +0 -21
- data/lib/datadog/profiling/transport/http.rb +0 -118
|
@@ -13,8 +13,63 @@
|
|
|
13
13
|
//
|
|
14
14
|
// Triggering of this component (e.g. deciding when to take a sample) is implemented in Collectors::CpuAndWallTimeWorker.
|
|
15
15
|
|
|
16
|
+
// ---
|
|
17
|
+
// ## Tracking of cpu-time and wall-time spent during garbage collection
|
|
18
|
+
//
|
|
19
|
+
// This feature works by having an implicit state that a thread can be in: doing garbage collection. This state is
|
|
20
|
+
// tracked inside the thread's `per_thread_context.gc_tracking` data, and three functions, listed below. The functions
|
|
21
|
+
// will get called by the `Collectors::CpuAndWallTimeWorker` at very specific times in the VM lifetime.
|
|
22
|
+
//
|
|
23
|
+
// * `cpu_and_wall_time_collector_on_gc_start`: Called at the very beginning of the garbage collection process.
|
|
24
|
+
// The internal VM `during_gc` flag is set to `true`, but Ruby has not done any work yet.
|
|
25
|
+
// * `cpu_and_wall_time_collector_on_gc_finish`: Called at the very end of the garbage collection process.
|
|
26
|
+
// The internal VM `during_gc` flag is still set to `true`, but all the work has been done.
|
|
27
|
+
// * `cpu_and_wall_time_collector_sample_after_gc`: Called shortly after the garbage collection process.
|
|
28
|
+
// The internal VM `during_gc` flag is set to `false`.
|
|
29
|
+
//
|
|
30
|
+
// Inside this component, here's what happens inside those three functions:
|
|
31
|
+
//
|
|
32
|
+
// When `cpu_and_wall_time_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
|
|
33
|
+
// context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
|
|
34
|
+
//
|
|
35
|
+
// While these fields are set, regular samples (if any) do not account for any time that passes after these two
|
|
36
|
+
// timestamps.
|
|
37
|
+
//
|
|
38
|
+
// (Regular samples can still account for the time between the previous sample and the start of GC.)
|
|
39
|
+
//
|
|
40
|
+
// When `cpu_and_wall_time_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the
|
|
41
|
+
// thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`.
|
|
42
|
+
//
|
|
43
|
+
// Finally, when `cpu_and_wall_time_collector_sample_after_gc` gets called, the following happens:
|
|
44
|
+
//
|
|
45
|
+
// 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder
|
|
46
|
+
// `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was
|
|
47
|
+
// recorded between calls to `on_gc_start` and `on_gc_finish`.
|
|
48
|
+
//
|
|
49
|
+
// 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`).
|
|
50
|
+
//
|
|
51
|
+
// 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in
|
|
52
|
+
// GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC.
|
|
53
|
+
//
|
|
54
|
+
// In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
|
|
55
|
+
// discovered that we needed to factor the sampling work away from `cpu_and_wall_time_collector_on_gc_finish` and into a
|
|
56
|
+
// separate `cpu_and_wall_time_collector_sample_after_gc` because (as documented in more detail below),
|
|
57
|
+
// `sample_after_gc` could trigger memory allocation in rare occasions (usually exceptions), which is actually not
|
|
58
|
+
// allowed to happen during Ruby's garbage collection start/finish hooks.
|
|
59
|
+
// ---
|
|
60
|
+
|
|
16
61
|
#define INVALID_TIME -1
|
|
17
|
-
#define THREAD_ID_LIMIT_CHARS
|
|
62
|
+
#define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
|
|
63
|
+
#define RAISE_ON_FAILURE true
|
|
64
|
+
#define DO_NOT_RAISE_ON_FAILURE false
|
|
65
|
+
#define IS_WALL_TIME true
|
|
66
|
+
#define IS_NOT_WALL_TIME false
|
|
67
|
+
#define MISSING_TRACER_CONTEXT_KEY 0
|
|
68
|
+
|
|
69
|
+
static ID at_active_trace_id; // id of :@active_trace in Ruby
|
|
70
|
+
static ID at_root_span_id; // id of :@root_span in Ruby
|
|
71
|
+
static ID at_active_span_id; // id of :@active_span in Ruby
|
|
72
|
+
static ID at_id_id; // id of :@id in Ruby
|
|
18
73
|
|
|
19
74
|
// Contains state for a single CpuAndWallTime instance
|
|
20
75
|
struct cpu_and_wall_time_collector_state {
|
|
@@ -27,17 +82,53 @@ struct cpu_and_wall_time_collector_state {
|
|
|
27
82
|
st_table *hash_map_per_thread_context;
|
|
28
83
|
// Datadog::Profiling::StackRecorder instance
|
|
29
84
|
VALUE recorder_instance;
|
|
30
|
-
//
|
|
85
|
+
// If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
|
|
86
|
+
// to enable code hotspots and endpoint aggregation.
|
|
87
|
+
// When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
|
|
88
|
+
ID tracer_context_key;
|
|
89
|
+
// Track how many regular samples we've taken. Does not include garbage collection samples.
|
|
90
|
+
// Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
|
|
91
|
+
// is not (just) a stat.
|
|
31
92
|
unsigned int sample_count;
|
|
93
|
+
|
|
94
|
+
struct {
|
|
95
|
+
// Track how many garbage collection samples we've taken.
|
|
96
|
+
unsigned int gc_samples;
|
|
97
|
+
// See cpu_and_wall_time_collector_on_gc_start for details
|
|
98
|
+
unsigned int gc_samples_missed_due_to_missing_context;
|
|
99
|
+
} stats;
|
|
32
100
|
};
|
|
33
101
|
|
|
34
102
|
// Tracks per-thread state
|
|
35
103
|
struct per_thread_context {
|
|
36
104
|
char thread_id[THREAD_ID_LIMIT_CHARS];
|
|
37
|
-
|
|
105
|
+
ddog_CharSlice thread_id_char_slice;
|
|
38
106
|
thread_cpu_time_id thread_cpu_time_id;
|
|
39
107
|
long cpu_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized or if getting it fails for another reason
|
|
40
108
|
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
|
|
109
|
+
|
|
110
|
+
struct {
|
|
111
|
+
// Both of these fields are set by on_gc_start and kept until sample_after_gc is called.
|
|
112
|
+
// Outside of this window, they will be INVALID_TIME.
|
|
113
|
+
long cpu_time_at_start_ns;
|
|
114
|
+
long wall_time_at_start_ns;
|
|
115
|
+
|
|
116
|
+
// Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
|
|
117
|
+
// Outside of this window, they will be INVALID_TIME.
|
|
118
|
+
long cpu_time_at_finish_ns;
|
|
119
|
+
long wall_time_at_finish_ns;
|
|
120
|
+
} gc_tracking;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
// Used to correlate profiles with traces
|
|
124
|
+
struct trace_identifiers {
|
|
125
|
+
#define MAXIMUM_LENGTH_64_BIT_IDENTIFIER 21 // Why 21? 2^64 => 20 digits + 1 for \0
|
|
126
|
+
|
|
127
|
+
bool valid;
|
|
128
|
+
ddog_CharSlice local_root_span_id;
|
|
129
|
+
ddog_CharSlice span_id;
|
|
130
|
+
char local_root_span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
|
|
131
|
+
char span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
|
|
41
132
|
};
|
|
42
133
|
|
|
43
134
|
static void cpu_and_wall_time_collector_typed_data_mark(void *state_ptr);
|
|
@@ -45,21 +136,35 @@ static void cpu_and_wall_time_collector_typed_data_free(void *state_ptr);
|
|
|
45
136
|
static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
|
|
46
137
|
static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
|
|
47
138
|
static VALUE _native_new(VALUE klass);
|
|
48
|
-
static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames);
|
|
139
|
+
static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key);
|
|
49
140
|
static VALUE _native_sample(VALUE self, VALUE collector_instance);
|
|
141
|
+
static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
|
|
142
|
+
static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
|
|
143
|
+
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
|
|
144
|
+
static void trigger_sample_for_thread(
|
|
145
|
+
struct cpu_and_wall_time_collector_state *state,
|
|
146
|
+
VALUE thread,
|
|
147
|
+
struct per_thread_context *thread_context,
|
|
148
|
+
ddog_Slice_i64 metric_values_slice,
|
|
149
|
+
sample_type type
|
|
150
|
+
);
|
|
50
151
|
static VALUE _native_thread_list(VALUE self);
|
|
51
152
|
static struct per_thread_context *get_or_create_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
|
|
153
|
+
static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
|
|
52
154
|
static void initialize_context(VALUE thread, struct per_thread_context *thread_context);
|
|
53
155
|
static VALUE _native_inspect(VALUE self, VALUE collector_instance);
|
|
54
156
|
static VALUE per_thread_context_st_table_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
|
|
55
157
|
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
|
|
158
|
+
static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
|
|
56
159
|
static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state);
|
|
57
160
|
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
|
|
58
161
|
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
|
|
59
|
-
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns);
|
|
162
|
+
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time);
|
|
60
163
|
static long cpu_time_now_ns(struct per_thread_context *thread_context);
|
|
61
|
-
static long wall_time_now_ns();
|
|
164
|
+
static long wall_time_now_ns(bool raise_on_failure);
|
|
62
165
|
static long thread_id_for(VALUE thread);
|
|
166
|
+
static VALUE _native_stats(VALUE self, VALUE collector_instance);
|
|
167
|
+
static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
|
|
63
168
|
|
|
64
169
|
void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
|
|
65
170
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
|
@@ -77,11 +182,20 @@ void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
|
|
|
77
182
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
|
78
183
|
rb_define_alloc_func(collectors_cpu_and_wall_time_class, _native_new);
|
|
79
184
|
|
|
80
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize,
|
|
185
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 4);
|
|
81
186
|
rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_inspect", _native_inspect, 1);
|
|
82
187
|
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 1);
|
|
188
|
+
rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
|
|
189
|
+
rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
|
|
190
|
+
rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
|
|
83
191
|
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
|
|
84
192
|
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
|
|
193
|
+
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
|
|
194
|
+
|
|
195
|
+
at_active_trace_id = rb_intern_const("@active_trace");
|
|
196
|
+
at_root_span_id = rb_intern_const("@root_span");
|
|
197
|
+
at_active_span_id = rb_intern_const("@active_span");
|
|
198
|
+
at_id_id = rb_intern_const("@id");
|
|
85
199
|
}
|
|
86
200
|
|
|
87
201
|
// This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_collector_state
|
|
@@ -147,12 +261,12 @@ static VALUE _native_new(VALUE klass) {
|
|
|
147
261
|
// "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
|
|
148
262
|
st_init_numtable();
|
|
149
263
|
state->recorder_instance = Qnil;
|
|
150
|
-
state->
|
|
264
|
+
state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
|
|
151
265
|
|
|
152
266
|
return TypedData_Wrap_Struct(klass, &cpu_and_wall_time_collector_typed_data, state);
|
|
153
267
|
}
|
|
154
268
|
|
|
155
|
-
static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames) {
|
|
269
|
+
static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key) {
|
|
156
270
|
struct cpu_and_wall_time_collector_state *state;
|
|
157
271
|
TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
|
158
272
|
|
|
@@ -164,6 +278,14 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_inst
|
|
|
164
278
|
// hash_map_per_thread_context is already initialized, nothing to do here
|
|
165
279
|
state->recorder_instance = enforce_recorder_instance(recorder_instance);
|
|
166
280
|
|
|
281
|
+
if (RTEST(tracer_context_key)) {
|
|
282
|
+
ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
|
|
283
|
+
// Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
|
|
284
|
+
// In this case, we can't really escape this because as of this writing, ruby master still calls `rb_to_id` inside
|
|
285
|
+
// the implementation of Thread#[]= so any symbol that gets used as a key there will already be prevented from GC.
|
|
286
|
+
state->tracer_context_key = rb_to_id(tracer_context_key);
|
|
287
|
+
}
|
|
288
|
+
|
|
167
289
|
return Qtrue;
|
|
168
290
|
}
|
|
169
291
|
|
|
@@ -174,18 +296,40 @@ static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance
|
|
|
174
296
|
return Qtrue;
|
|
175
297
|
}
|
|
176
298
|
|
|
299
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
|
300
|
+
// It SHOULD NOT be used for other purposes.
|
|
301
|
+
static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
|
302
|
+
cpu_and_wall_time_collector_on_gc_start(collector_instance);
|
|
303
|
+
return Qtrue;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
|
307
|
+
// It SHOULD NOT be used for other purposes.
|
|
308
|
+
static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
|
309
|
+
cpu_and_wall_time_collector_on_gc_finish(collector_instance);
|
|
310
|
+
return Qtrue;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
|
314
|
+
// It SHOULD NOT be used for other purposes.
|
|
315
|
+
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
|
|
316
|
+
cpu_and_wall_time_collector_sample_after_gc(collector_instance);
|
|
317
|
+
return Qtrue;
|
|
318
|
+
}
|
|
319
|
+
|
|
177
320
|
// This function gets called from the Collectors::CpuAndWallTimeWorker to trigger the actual sampling.
|
|
178
321
|
//
|
|
179
322
|
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
180
323
|
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
|
181
324
|
// Assumption 3: This function IS NOT called from a signal handler. This function is not async-signal-safe.
|
|
182
325
|
// Assumption 4: This function IS NOT called in a reentrant way.
|
|
326
|
+
// Assumption 5: This function is called from the main Ractor (if Ruby has support for Ractors).
|
|
183
327
|
VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
|
184
328
|
struct cpu_and_wall_time_collector_state *state;
|
|
185
329
|
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
|
186
330
|
|
|
187
331
|
VALUE threads = ddtrace_thread_list();
|
|
188
|
-
long current_wall_time_ns = wall_time_now_ns();
|
|
332
|
+
long current_wall_time_ns = wall_time_now_ns(RAISE_ON_FAILURE);
|
|
189
333
|
|
|
190
334
|
const long thread_count = RARRAY_LEN(threads);
|
|
191
335
|
for (long i = 0; i < thread_count; i++) {
|
|
@@ -194,10 +338,18 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
|
|
194
338
|
|
|
195
339
|
long current_cpu_time_ns = cpu_time_now_ns(thread_context);
|
|
196
340
|
|
|
197
|
-
long cpu_time_elapsed_ns =
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
341
|
+
long cpu_time_elapsed_ns = update_time_since_previous_sample(
|
|
342
|
+
&thread_context->cpu_time_at_previous_sample_ns,
|
|
343
|
+
current_cpu_time_ns,
|
|
344
|
+
thread_context->gc_tracking.cpu_time_at_start_ns,
|
|
345
|
+
IS_NOT_WALL_TIME
|
|
346
|
+
);
|
|
347
|
+
long wall_time_elapsed_ns = update_time_since_previous_sample(
|
|
348
|
+
&thread_context->wall_time_at_previous_sample_ns,
|
|
349
|
+
current_wall_time_ns,
|
|
350
|
+
thread_context->gc_tracking.wall_time_at_start_ns,
|
|
351
|
+
IS_WALL_TIME
|
|
352
|
+
);
|
|
201
353
|
|
|
202
354
|
int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
|
|
203
355
|
|
|
@@ -205,26 +357,12 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
|
|
205
357
|
metric_values[CPU_SAMPLES_VALUE_POS] = 1;
|
|
206
358
|
metric_values[WALL_TIME_VALUE_POS] = wall_time_elapsed_ns;
|
|
207
359
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
int label_count = 1 + (have_thread_name ? 1 : 0);
|
|
212
|
-
ddprof_ffi_Label labels[label_count];
|
|
213
|
-
|
|
214
|
-
labels[0] = (ddprof_ffi_Label) {.key = DDPROF_FFI_CHARSLICE_C("thread id"), .str = thread_context->thread_id_char_slice};
|
|
215
|
-
if (have_thread_name) {
|
|
216
|
-
labels[1] = (ddprof_ffi_Label) {
|
|
217
|
-
.key = DDPROF_FFI_CHARSLICE_C("thread name"),
|
|
218
|
-
.str = char_slice_from_ruby_string(thread_name)
|
|
219
|
-
};
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
sample_thread(
|
|
360
|
+
trigger_sample_for_thread(
|
|
361
|
+
state,
|
|
223
362
|
thread,
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
(ddprof_ffi_Slice_label) {.ptr = labels, .len = label_count}
|
|
363
|
+
thread_context,
|
|
364
|
+
(ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
|
|
365
|
+
SAMPLE_REGULAR
|
|
228
366
|
);
|
|
229
367
|
}
|
|
230
368
|
|
|
@@ -238,6 +376,226 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
|
|
|
238
376
|
return Qnil;
|
|
239
377
|
}
|
|
240
378
|
|
|
379
|
+
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
|
|
380
|
+
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
|
381
|
+
// create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work.
|
|
382
|
+
//
|
|
383
|
+
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
|
384
|
+
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
|
385
|
+
// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
|
|
386
|
+
//
|
|
387
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
388
|
+
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
|
389
|
+
void cpu_and_wall_time_collector_on_gc_start(VALUE self_instance) {
|
|
390
|
+
struct cpu_and_wall_time_collector_state *state;
|
|
391
|
+
if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
|
|
392
|
+
// This should never fail the the above check passes
|
|
393
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
|
394
|
+
|
|
395
|
+
struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
|
|
396
|
+
|
|
397
|
+
// If there was no previously-existing context for this thread, we won't allocate one (see safety). For now we just drop
|
|
398
|
+
// the GC sample, under the assumption that "a thread that is so new that we never sampled it even once before it triggers
|
|
399
|
+
// GC" is a rare enough case that we can just ignore it.
|
|
400
|
+
// We can always improve this later if we find that this happens often (and we have the counter to help us figure that out)!
|
|
401
|
+
if (thread_context == NULL) {
|
|
402
|
+
state->stats.gc_samples_missed_due_to_missing_context++;
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`.
|
|
407
|
+
//
|
|
408
|
+
// When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
|
|
409
|
+
// called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
|
|
410
|
+
// before we can actually run that method.
|
|
411
|
+
//
|
|
412
|
+
// We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
|
|
413
|
+
// `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
|
|
414
|
+
// then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
|
|
415
|
+
// there was a single, longer GC period.
|
|
416
|
+
if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
|
|
417
|
+
thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
|
|
418
|
+
|
|
419
|
+
// Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
|
|
420
|
+
thread_context->gc_tracking.wall_time_at_start_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
421
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// This function gets called when Ruby has finished running the Garbage Collector on the current thread.
|
|
425
|
+
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
|
|
426
|
+
// create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now.
|
|
427
|
+
//
|
|
428
|
+
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
|
|
429
|
+
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
|
|
430
|
+
// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
|
|
431
|
+
//
|
|
432
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
433
|
+
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
|
|
434
|
+
void cpu_and_wall_time_collector_on_gc_finish(VALUE self_instance) {
|
|
435
|
+
struct cpu_and_wall_time_collector_state *state;
|
|
436
|
+
if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
|
|
437
|
+
// This should never fail the the above check passes
|
|
438
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
|
439
|
+
|
|
440
|
+
struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
|
|
441
|
+
|
|
442
|
+
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
|
|
443
|
+
// how often this happens -- see on_gc_start.
|
|
444
|
+
if (thread_context == NULL) return;
|
|
445
|
+
|
|
446
|
+
if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME &&
|
|
447
|
+
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
|
|
448
|
+
// If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
|
|
449
|
+
// context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
|
|
450
|
+
// do nothing.
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time
|
|
455
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
|
|
456
|
+
thread_context->gc_tracking.wall_time_at_finish_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// This function gets called shortly after Ruby has finished running the Garbage Collector.
|
|
460
|
+
// It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
|
|
461
|
+
// GC-related tracking.
|
|
462
|
+
//
|
|
463
|
+
// Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
|
|
464
|
+
// and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
|
|
465
|
+
// set on their context.
|
|
466
|
+
//
|
|
467
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
468
|
+
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
|
|
469
|
+
// Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
|
|
470
|
+
// Assumption 4: This function is called from the main Ractor (if Ruby has support for Ractors).
|
|
471
|
+
VALUE cpu_and_wall_time_collector_sample_after_gc(VALUE self_instance) {
|
|
472
|
+
struct cpu_and_wall_time_collector_state *state;
|
|
473
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
|
474
|
+
|
|
475
|
+
VALUE threads = ddtrace_thread_list();
|
|
476
|
+
bool sampled_any_thread = false;
|
|
477
|
+
|
|
478
|
+
const long thread_count = RARRAY_LEN(threads);
|
|
479
|
+
for (long i = 0; i < thread_count; i++) {
|
|
480
|
+
VALUE thread = RARRAY_AREF(threads, i);
|
|
481
|
+
struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
|
|
482
|
+
|
|
483
|
+
if (
|
|
484
|
+
thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME ||
|
|
485
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME ||
|
|
486
|
+
thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
|
|
487
|
+
thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
|
|
488
|
+
) continue; // Ignore threads with no/incomplete garbage collection data
|
|
489
|
+
|
|
490
|
+
sampled_any_thread = true;
|
|
491
|
+
|
|
492
|
+
long gc_cpu_time_elapsed_ns =
|
|
493
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
|
|
494
|
+
long gc_wall_time_elapsed_ns =
|
|
495
|
+
thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
|
|
496
|
+
|
|
497
|
+
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
|
498
|
+
if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
|
|
499
|
+
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
|
500
|
+
// was a bug.
|
|
501
|
+
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
|
502
|
+
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
|
503
|
+
if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
|
|
504
|
+
|
|
505
|
+
if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
|
|
506
|
+
// Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
|
|
507
|
+
// come up with a crazy value for the frame
|
|
508
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
|
|
512
|
+
|
|
513
|
+
metric_values[CPU_TIME_VALUE_POS] = gc_cpu_time_elapsed_ns;
|
|
514
|
+
metric_values[CPU_SAMPLES_VALUE_POS] = 1;
|
|
515
|
+
metric_values[WALL_TIME_VALUE_POS] = gc_wall_time_elapsed_ns;
|
|
516
|
+
|
|
517
|
+
trigger_sample_for_thread(
|
|
518
|
+
state,
|
|
519
|
+
thread,
|
|
520
|
+
thread_context,
|
|
521
|
+
(ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
|
|
522
|
+
SAMPLE_IN_GC
|
|
523
|
+
);
|
|
524
|
+
|
|
525
|
+
// Mark thread as no longer in GC
|
|
526
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
|
527
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
|
528
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
|
529
|
+
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
|
530
|
+
|
|
531
|
+
// Update counters so that they won't include the time in GC during the next sample
|
|
532
|
+
if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
|
|
533
|
+
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
|
|
534
|
+
}
|
|
535
|
+
if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
|
|
536
|
+
thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if (sampled_any_thread) state->stats.gc_samples++;
|
|
541
|
+
|
|
542
|
+
// Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
|
|
543
|
+
return Qnil;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
static void trigger_sample_for_thread(
|
|
547
|
+
struct cpu_and_wall_time_collector_state *state,
|
|
548
|
+
VALUE thread,
|
|
549
|
+
struct per_thread_context *thread_context,
|
|
550
|
+
ddog_Slice_i64 metric_values_slice,
|
|
551
|
+
sample_type type
|
|
552
|
+
) {
|
|
553
|
+
int max_label_count =
|
|
554
|
+
1 + // thread id
|
|
555
|
+
1 + // thread name
|
|
556
|
+
2; // local root span id and span id
|
|
557
|
+
ddog_Label labels[max_label_count];
|
|
558
|
+
int label_pos = 0;
|
|
559
|
+
|
|
560
|
+
labels[label_pos++] = (ddog_Label) {
|
|
561
|
+
.key = DDOG_CHARSLICE_C("thread id"),
|
|
562
|
+
.str = thread_context->thread_id_char_slice
|
|
563
|
+
};
|
|
564
|
+
|
|
565
|
+
VALUE thread_name = thread_name_for(thread);
|
|
566
|
+
if (thread_name != Qnil) {
|
|
567
|
+
labels[label_pos++] = (ddog_Label) {
|
|
568
|
+
.key = DDOG_CHARSLICE_C("thread name"),
|
|
569
|
+
.str = char_slice_from_ruby_string(thread_name)
|
|
570
|
+
};
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
struct trace_identifiers trace_identifiers_result = {.valid = false};
|
|
574
|
+
trace_identifiers_for(state, thread, &trace_identifiers_result);
|
|
575
|
+
|
|
576
|
+
if (trace_identifiers_result.valid) {
|
|
577
|
+
labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .str = trace_identifiers_result.local_root_span_id};
|
|
578
|
+
labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("span id"), .str = trace_identifiers_result.span_id};
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
|
|
582
|
+
// forgot to update max_label_count" in the future, we've also added this validation.
|
|
583
|
+
// @ivoanjo: I wonder if C compilers are smart enough to statically prove when this check never triggers happens and
|
|
584
|
+
// remove it entirely.
|
|
585
|
+
if (label_pos > max_label_count) {
|
|
586
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
sample_thread(
|
|
590
|
+
thread,
|
|
591
|
+
state->sampling_buffer,
|
|
592
|
+
state->recorder_instance,
|
|
593
|
+
metric_values_slice,
|
|
594
|
+
(ddog_Slice_label) {.ptr = labels, .len = label_pos},
|
|
595
|
+
type
|
|
596
|
+
);
|
|
597
|
+
}
|
|
598
|
+
|
|
241
599
|
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
|
242
600
|
// It SHOULD NOT be used for other purposes.
|
|
243
601
|
static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
|
|
@@ -259,15 +617,32 @@ static struct per_thread_context *get_or_create_context_for(VALUE thread, struct
|
|
|
259
617
|
return thread_context;
|
|
260
618
|
}
|
|
261
619
|
|
|
620
|
+
static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state) {
|
|
621
|
+
struct per_thread_context* thread_context = NULL;
|
|
622
|
+
st_data_t value_context = 0;
|
|
623
|
+
|
|
624
|
+
if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
|
|
625
|
+
thread_context = (struct per_thread_context*) value_context;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
return thread_context;
|
|
629
|
+
}
|
|
630
|
+
|
|
262
631
|
static void initialize_context(VALUE thread, struct per_thread_context *thread_context) {
|
|
263
|
-
snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%
|
|
264
|
-
thread_context->thread_id_char_slice = (
|
|
632
|
+
snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
|
|
633
|
+
thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
|
|
265
634
|
|
|
266
635
|
thread_context->thread_cpu_time_id = thread_cpu_time_id_for(thread);
|
|
267
636
|
|
|
268
637
|
// These will get initialized during actual sampling
|
|
269
638
|
thread_context->cpu_time_at_previous_sample_ns = INVALID_TIME;
|
|
270
639
|
thread_context->wall_time_at_previous_sample_ns = INVALID_TIME;
|
|
640
|
+
|
|
641
|
+
// These will only be used during a GC operation
|
|
642
|
+
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
|
|
643
|
+
thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
|
|
644
|
+
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
|
|
645
|
+
thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
|
|
271
646
|
}
|
|
272
647
|
|
|
273
648
|
static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
|
@@ -279,7 +654,10 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
|
279
654
|
// Update this when modifying state struct
|
|
280
655
|
rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
|
|
281
656
|
rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
|
|
657
|
+
VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
|
|
658
|
+
rb_str_concat(result, rb_sprintf(" tracer_context_key=%+"PRIsVALUE, tracer_context_key));
|
|
282
659
|
rb_str_concat(result, rb_sprintf(" sample_count=%u", state->sample_count));
|
|
660
|
+
rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
|
|
283
661
|
|
|
284
662
|
return result;
|
|
285
663
|
}
|
|
@@ -304,13 +682,29 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
|
|
|
304
682
|
ID2SYM(rb_intern("thread_cpu_time_id_valid?")), /* => */ thread_context->thread_cpu_time_id.valid ? Qtrue : Qfalse,
|
|
305
683
|
ID2SYM(rb_intern("thread_cpu_time_id")), /* => */ CLOCKID2NUM(thread_context->thread_cpu_time_id.clock_id),
|
|
306
684
|
ID2SYM(rb_intern("cpu_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->cpu_time_at_previous_sample_ns),
|
|
307
|
-
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns)
|
|
685
|
+
ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
|
|
686
|
+
|
|
687
|
+
ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
|
|
688
|
+
ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
|
|
689
|
+
ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
|
|
690
|
+
ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
|
|
308
691
|
};
|
|
309
692
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
|
|
310
693
|
|
|
311
694
|
return ST_CONTINUE;
|
|
312
695
|
}
|
|
313
696
|
|
|
697
|
+
static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state) {
|
|
698
|
+
// Update this when modifying state struct (stats inner struct)
|
|
699
|
+
VALUE stats_as_hash = rb_hash_new();
|
|
700
|
+
VALUE arguments[] = {
|
|
701
|
+
ID2SYM(rb_intern("gc_samples")), /* => */ INT2NUM(state->stats.gc_samples),
|
|
702
|
+
ID2SYM(rb_intern("gc_samples_missed_due_to_missing_context")), /* => */ INT2NUM(state->stats.gc_samples_missed_due_to_missing_context),
|
|
703
|
+
};
|
|
704
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
|
705
|
+
return stats_as_hash;
|
|
706
|
+
}
|
|
707
|
+
|
|
314
708
|
static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state) {
|
|
315
709
|
st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
|
|
316
710
|
}
|
|
@@ -336,24 +730,58 @@ static VALUE _native_per_thread_context(DDTRACE_UNUSED VALUE _self, VALUE collec
|
|
|
336
730
|
return per_thread_context_st_table_as_ruby_hash(state);
|
|
337
731
|
}
|
|
338
732
|
|
|
339
|
-
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns) {
|
|
733
|
+
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time) {
|
|
340
734
|
// If we didn't have a time for the previous sample, we use the current one
|
|
341
735
|
if (*time_at_previous_sample_ns == INVALID_TIME) *time_at_previous_sample_ns = current_time_ns;
|
|
342
736
|
|
|
343
|
-
|
|
344
|
-
|
|
737
|
+
bool is_thread_doing_gc = gc_start_time_ns != INVALID_TIME;
|
|
738
|
+
long elapsed_time_ns = -1;
|
|
739
|
+
|
|
740
|
+
if (is_thread_doing_gc) {
|
|
741
|
+
bool previous_sample_was_during_gc = gc_start_time_ns <= *time_at_previous_sample_ns;
|
|
742
|
+
|
|
743
|
+
if (previous_sample_was_during_gc) {
|
|
744
|
+
elapsed_time_ns = 0; // No time to account for -- any time since the last sample is going to get assigned to GC separately
|
|
745
|
+
} else {
|
|
746
|
+
elapsed_time_ns = gc_start_time_ns - *time_at_previous_sample_ns; // Capture time between previous sample and start of GC only
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// Remaining time (from gc_start_time to current_time_ns) will be accounted for inside `sample_after_gc`
|
|
750
|
+
*time_at_previous_sample_ns = gc_start_time_ns;
|
|
751
|
+
} else {
|
|
752
|
+
elapsed_time_ns = current_time_ns - *time_at_previous_sample_ns; // Capture all time since previous sample
|
|
753
|
+
*time_at_previous_sample_ns = current_time_ns;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
if (elapsed_time_ns < 0) {
|
|
757
|
+
if (is_wall_time) {
|
|
758
|
+
// Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
|
|
759
|
+
// was a bug.
|
|
760
|
+
// @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
|
|
761
|
+
// https://github.com/DataDog/dd-trace-rb/pull/2336.
|
|
762
|
+
elapsed_time_ns = 0;
|
|
763
|
+
} else {
|
|
764
|
+
// We don't expect non-wall time to go backwards, so let's flag this as a bug
|
|
765
|
+
rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples");
|
|
766
|
+
}
|
|
767
|
+
}
|
|
345
768
|
|
|
346
|
-
return elapsed_time_ns
|
|
769
|
+
return elapsed_time_ns;
|
|
347
770
|
}
|
|
348
771
|
|
|
349
|
-
|
|
772
|
+
// Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false
|
|
773
|
+
static long wall_time_now_ns(bool raise_on_failure) {
|
|
350
774
|
struct timespec current_monotonic;
|
|
351
775
|
|
|
352
|
-
if (clock_gettime(CLOCK_MONOTONIC, ¤t_monotonic) != 0)
|
|
776
|
+
if (clock_gettime(CLOCK_MONOTONIC, ¤t_monotonic) != 0) {
|
|
777
|
+
if (raise_on_failure) rb_sys_fail("Failed to read CLOCK_MONOTONIC");
|
|
778
|
+
else return 0;
|
|
779
|
+
}
|
|
353
780
|
|
|
354
781
|
return current_monotonic.tv_nsec + (current_monotonic.tv_sec * 1000 * 1000 * 1000);
|
|
355
782
|
}
|
|
356
783
|
|
|
784
|
+
// Safety: This function is assumed never to raise exceptions by callers
|
|
357
785
|
static long cpu_time_now_ns(struct per_thread_context *thread_context) {
|
|
358
786
|
thread_cpu_time cpu_time = thread_cpu_time_for(thread_context->thread_cpu_time_id);
|
|
359
787
|
|
|
@@ -388,3 +816,50 @@ VALUE enforce_cpu_and_wall_time_collector_instance(VALUE object) {
|
|
|
388
816
|
Check_TypedStruct(object, &cpu_and_wall_time_collector_typed_data);
|
|
389
817
|
return object;
|
|
390
818
|
}
|
|
819
|
+
|
|
820
|
+
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
|
|
821
|
+
// It SHOULD NOT be used for other purposes.
|
|
822
|
+
//
|
|
823
|
+
// Returns the whole contents of the per_thread_context structs being tracked.
|
|
824
|
+
static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
|
|
825
|
+
struct cpu_and_wall_time_collector_state *state;
|
|
826
|
+
TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
|
|
827
|
+
|
|
828
|
+
return stats_as_ruby_hash(state);
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
|
|
832
|
+
static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
|
|
833
|
+
if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
|
|
834
|
+
|
|
835
|
+
VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
|
|
836
|
+
if (current_context == Qnil) return;
|
|
837
|
+
|
|
838
|
+
VALUE active_trace = rb_ivar_get(current_context, at_active_trace_id /* @active_trace */);
|
|
839
|
+
if (active_trace == Qnil) return;
|
|
840
|
+
|
|
841
|
+
VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
|
|
842
|
+
VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
|
|
843
|
+
if (root_span == Qnil || active_span == Qnil) return;
|
|
844
|
+
|
|
845
|
+
VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
|
|
846
|
+
VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
|
|
847
|
+
if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
|
|
848
|
+
|
|
849
|
+
unsigned long long local_root_span_id = NUM2ULL(numeric_local_root_span_id);
|
|
850
|
+
unsigned long long span_id = NUM2ULL(numeric_span_id);
|
|
851
|
+
|
|
852
|
+
snprintf(trace_identifiers_result->local_root_span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", local_root_span_id);
|
|
853
|
+
snprintf(trace_identifiers_result->span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", span_id);
|
|
854
|
+
|
|
855
|
+
trace_identifiers_result->local_root_span_id = (ddog_CharSlice) {
|
|
856
|
+
.ptr = trace_identifiers_result->local_root_span_id_buffer,
|
|
857
|
+
.len = strlen(trace_identifiers_result->local_root_span_id_buffer)
|
|
858
|
+
};
|
|
859
|
+
trace_identifiers_result->span_id = (ddog_CharSlice) {
|
|
860
|
+
.ptr = trace_identifiers_result->span_id_buffer,
|
|
861
|
+
.len = strlen(trace_identifiers_result->span_id_buffer)
|
|
862
|
+
};
|
|
863
|
+
|
|
864
|
+
trace_identifiers_result->valid = true;
|
|
865
|
+
}
|