ddtrace 1.20.0 → 1.22.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/LICENSE-3rdparty.csv +1 -1
- data/bin/ddprofrb +15 -0
- data/bin/ddtracerb +3 -1
- data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
- data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +238 -61
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.c +145 -72
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.h +17 -5
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +97 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +2 -2
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.c +45 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.h +7 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +4 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +14 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +1 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.c +10 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +2 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +7 -9
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
- data/lib/datadog/appsec/event.rb +1 -1
- data/lib/datadog/auto_instrument.rb +3 -0
- data/lib/datadog/core/configuration/components.rb +7 -6
- data/lib/datadog/core/configuration/option.rb +8 -6
- data/lib/datadog/core/configuration/settings.rb +130 -63
- data/lib/datadog/core/configuration.rb +20 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
- data/lib/datadog/core/environment/git.rb +25 -0
- data/lib/datadog/core/environment/identity.rb +18 -48
- data/lib/datadog/core/environment/platform.rb +7 -1
- data/lib/datadog/core/git/ext.rb +2 -23
- data/lib/datadog/core/remote/client/capabilities.rb +1 -1
- data/lib/datadog/core/remote/negotiation.rb +2 -2
- data/lib/datadog/core/remote/transport/http/config.rb +1 -1
- data/lib/datadog/core/remote/worker.rb +7 -4
- data/lib/datadog/core/telemetry/client.rb +18 -10
- data/lib/datadog/core/telemetry/emitter.rb +9 -13
- data/lib/datadog/core/telemetry/event.rb +247 -57
- data/lib/datadog/core/telemetry/ext.rb +1 -0
- data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
- data/lib/datadog/core/telemetry/http/ext.rb +4 -1
- data/lib/datadog/core/telemetry/http/transport.rb +9 -4
- data/lib/datadog/core/telemetry/request.rb +59 -0
- data/lib/datadog/core/transport/ext.rb +2 -0
- data/lib/datadog/core/utils/url.rb +25 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -0
- data/lib/datadog/profiling/collectors/info.rb +101 -0
- data/lib/datadog/profiling/component.rb +34 -28
- data/lib/datadog/profiling/exporter.rb +19 -5
- data/lib/datadog/profiling/ext.rb +2 -0
- data/lib/datadog/profiling/flush.rb +6 -3
- data/lib/datadog/profiling/http_transport.rb +5 -1
- data/lib/datadog/profiling/load_native_extension.rb +19 -6
- data/lib/datadog/profiling/native_extension.rb +1 -1
- data/lib/datadog/profiling/tag_builder.rb +5 -0
- data/lib/datadog/profiling/tasks/exec.rb +3 -3
- data/lib/datadog/profiling/tasks/help.rb +3 -3
- data/lib/datadog/profiling.rb +13 -2
- data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
- data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
- data/lib/datadog/tracing/contrib/configurable.rb +1 -1
- data/lib/datadog/tracing/contrib/extensions.rb +6 -2
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
- data/lib/datadog/tracing/sampling/matcher.rb +23 -3
- data/lib/datadog/tracing/sampling/rule.rb +7 -2
- data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
- data/lib/datadog/tracing/trace_operation.rb +1 -2
- data/lib/datadog/tracing/transport/http.rb +1 -0
- data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
- data/lib/ddtrace/version.rb +1 -1
- metadata +55 -62
- data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
- data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
- data/lib/datadog/core/telemetry/collector.rb +0 -250
- data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
- data/lib/datadog/core/telemetry/v1/application.rb +0 -92
- data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
- data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
- data/lib/datadog/core/telemetry/v1/host.rb +0 -59
- data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
- data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
- data/lib/datadog/core/telemetry/v1/product.rb +0 -36
- data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
- data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
- data/lib/datadog/core/telemetry/v2/request.rb +0 -29
- data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +0 -0
@@ -9,16 +9,23 @@
|
|
9
9
|
#define BASE_SAMPLING_INTERVAL 50
|
10
10
|
|
11
11
|
#define ADJUSTMENT_WINDOW_NS SECONDS_AS_NS(1)
|
12
|
+
#define ADJUSTMENT_WINDOW_SAMPLES 100
|
13
|
+
// Any average sampling times above this value will be clamped to this value.
|
14
|
+
// In practice, this limits the budget consumption of a single sample to that of an adjustment window,
|
15
|
+
// thus aiming for a minimum sample rate of once per adjustment window (dependent on actual event rate).
|
16
|
+
// NOTE: This is our main strategy to deal with timing hiccups such as those that can be caused by
|
17
|
+
// suspensions, system overloads and other things that could lead to arbitrarily big sampling
|
18
|
+
// time measurements.
|
19
|
+
#define MAX_ALLOWED_SAMPLING_NS(target_overhead) (long) (ADJUSTMENT_WINDOW_NS * target_overhead / 100.)
|
12
20
|
|
13
21
|
#define EMA_SMOOTHING_FACTOR 0.6
|
14
|
-
#define EXP_MOVING_AVERAGE(last, avg, first) first ? last : (1-EMA_SMOOTHING_FACTOR) * avg + EMA_SMOOTHING_FACTOR * last
|
15
22
|
|
16
|
-
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name) {
|
23
|
+
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns) {
|
17
24
|
sampler->debug_name = debug_name;
|
18
|
-
discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT);
|
25
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT, now_ns);
|
19
26
|
}
|
20
27
|
|
21
|
-
|
28
|
+
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
|
22
29
|
const char *debug_name = sampler->debug_name;
|
23
30
|
double target_overhead = sampler->target_overhead;
|
24
31
|
(*sampler) = (discrete_dynamic_sampler) {
|
@@ -31,6 +38,7 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
|
|
31
38
|
// This fake readjustment will use a hardcoded sampling interval
|
32
39
|
.sampling_interval = BASE_SAMPLING_INTERVAL,
|
33
40
|
.sampling_probability = 1.0 / BASE_SAMPLING_INTERVAL,
|
41
|
+
.max_sampling_time_ns = MAX_ALLOWED_SAMPLING_NS(target_overhead),
|
34
42
|
// But we want to make sure we sample at least once in the next window so that our first
|
35
43
|
// real readjustment has some notion of how heavy sampling is. Therefore, we'll make it so that
|
36
44
|
// the next event is automatically sampled by artificially locating it in the interval threshold.
|
@@ -38,27 +46,17 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
|
|
38
46
|
};
|
39
47
|
}
|
40
48
|
|
41
|
-
void
|
42
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
43
|
-
_discrete_dynamic_sampler_reset(sampler, now);
|
44
|
-
}
|
45
|
-
|
46
|
-
static void _discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
|
49
|
+
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
|
47
50
|
if (target_overhead <= 0 || target_overhead > 100) {
|
48
51
|
rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
|
49
52
|
}
|
50
53
|
sampler->target_overhead = target_overhead;
|
51
|
-
|
52
|
-
}
|
53
|
-
|
54
|
-
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead) {
|
55
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
56
|
-
_discrete_dynamic_sampler_set_overhead_target_percentage(sampler, target_overhead, now);
|
54
|
+
return discrete_dynamic_sampler_reset(sampler, now_ns);
|
57
55
|
}
|
58
56
|
|
59
57
|
static void maybe_readjust(discrete_dynamic_sampler *sampler, long now);
|
60
58
|
|
61
|
-
|
59
|
+
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
62
60
|
// For efficiency reasons we don't do true random sampling but rather systematic
|
63
61
|
// sampling following a sample interval/skip. This can be biased and hide patterns
|
64
62
|
// but the dynamic interval and rather indeterministic pattern of allocations in
|
@@ -77,12 +75,7 @@ static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sa
|
|
77
75
|
return should_sample;
|
78
76
|
}
|
79
77
|
|
80
|
-
|
81
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
82
|
-
return _discrete_dynamic_sampler_should_sample(sampler, now);
|
83
|
-
}
|
84
|
-
|
85
|
-
static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
78
|
+
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
86
79
|
long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns);
|
87
80
|
sampler->samples_since_last_readjustment++;
|
88
81
|
sampler->sampling_time_since_last_readjustment_ns += last_sampling_time_ns;
|
@@ -94,11 +87,6 @@ static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sam
|
|
94
87
|
return last_sampling_time_ns;
|
95
88
|
}
|
96
89
|
|
97
|
-
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler) {
|
98
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
99
|
-
return _discrete_dynamic_sampler_after_sample(sampler, now);
|
100
|
-
}
|
101
|
-
|
102
90
|
double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler) {
|
103
91
|
return sampler->sampling_probability * 100.;
|
104
92
|
}
|
@@ -107,35 +95,66 @@ size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sample
|
|
107
95
|
return sampler->events_since_last_sample;
|
108
96
|
}
|
109
97
|
|
98
|
+
static double ewma_adj_window(double latest_value, double avg, long current_window_time_ns, bool is_first) {
|
99
|
+
if (is_first) {
|
100
|
+
return latest_value;
|
101
|
+
}
|
102
|
+
|
103
|
+
// We don't want samples coming from partial adjustment windows (e.g. preempted due to number of samples)
|
104
|
+
// to lead to quick "forgetting" of the past. Thus, we'll tweak the weight of this new value based on the
|
105
|
+
// size of the time window from which we gathered it in relation to our standard adjustment window time.
|
106
|
+
double fraction_of_full_window = double_min_of((double) current_window_time_ns / ADJUSTMENT_WINDOW_NS, 1);
|
107
|
+
double alpha = EMA_SMOOTHING_FACTOR * fraction_of_full_window;
|
108
|
+
|
109
|
+
return (1-alpha) * avg + alpha * latest_value;
|
110
|
+
}
|
111
|
+
|
110
112
|
static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
111
|
-
long
|
113
|
+
long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
|
114
|
+
|
115
|
+
bool should_readjust_based_on_time = this_window_time_ns >= ADJUSTMENT_WINDOW_NS;
|
116
|
+
bool should_readjust_based_on_samples = sampler->samples_since_last_readjustment >= ADJUSTMENT_WINDOW_SAMPLES;
|
117
|
+
|
118
|
+
if (!should_readjust_based_on_time && !should_readjust_based_on_samples) {
|
119
|
+
// not enough time or samples have passed to perform a readjustment
|
120
|
+
return;
|
121
|
+
}
|
112
122
|
|
113
|
-
if (
|
114
|
-
// not
|
123
|
+
if (this_window_time_ns == 0) {
|
124
|
+
// should not be possible given previous condition but lets protect against div by 0 below.
|
115
125
|
return;
|
116
126
|
}
|
117
127
|
|
118
128
|
// If we got this far, lets recalculate our sampling params based on new observations
|
119
129
|
bool first_readjustment = !sampler->has_completed_full_adjustment_window;
|
120
130
|
|
121
|
-
// Update our running average of events/sec with latest observation
|
122
|
-
sampler->events_per_ns =
|
123
|
-
(double) sampler->events_since_last_readjustment /
|
131
|
+
// Update our running average of events/sec with latest observation.
|
132
|
+
sampler->events_per_ns = ewma_adj_window(
|
133
|
+
(double) sampler->events_since_last_readjustment / this_window_time_ns,
|
124
134
|
sampler->events_per_ns,
|
135
|
+
this_window_time_ns,
|
125
136
|
first_readjustment
|
126
137
|
);
|
127
138
|
|
128
139
|
// Update our running average of sampling time for a specific event
|
129
|
-
long sampling_window_time_ns = sampler->sampling_time_since_last_readjustment_ns;
|
130
|
-
long sampling_overshoot_time_ns = -1;
|
131
140
|
if (sampler->samples_since_last_readjustment > 0) {
|
132
141
|
// We can only update sampling-related stats if we actually sampled on the last window...
|
133
142
|
|
134
143
|
// Lets update our average sampling time per event
|
135
|
-
long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 :
|
136
|
-
sampler->
|
144
|
+
long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampler->sampling_time_since_last_readjustment_ns / sampler->samples_since_last_readjustment;
|
145
|
+
if (avg_sampling_time_in_window_ns > sampler->max_sampling_time_ns) {
|
146
|
+
// If the average sampling time in the previous window was deemed unnacceptable, clamp it to the
|
147
|
+
// maximum acceptable value and register this operation in our counter.
|
148
|
+
// NOTE: This is important so that events like suspensions or system overloads do not lead us to
|
149
|
+
// learn arbitrarily big sampling times which may then result in us not sampling anything
|
150
|
+
// for very long periods of time.
|
151
|
+
avg_sampling_time_in_window_ns = sampler->max_sampling_time_ns;
|
152
|
+
sampler->sampling_time_clamps++;
|
153
|
+
}
|
154
|
+
sampler->sampling_time_ns = ewma_adj_window(
|
137
155
|
avg_sampling_time_in_window_ns,
|
138
156
|
sampler->sampling_time_ns,
|
157
|
+
this_window_time_ns,
|
139
158
|
first_readjustment
|
140
159
|
);
|
141
160
|
}
|
@@ -145,21 +164,21 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
145
164
|
// NOTE: Updating this even when no samples occur is a conscious choice which enables us to cooldown extreme adjustments over time.
|
146
165
|
// If we didn't do this, whenever a big spike caused target_overhead_adjustment to equal target_overhead, we'd get stuck
|
147
166
|
// in a "probability = 0" state.
|
148
|
-
long
|
167
|
+
long this_window_sampling_target_time_ns = this_window_time_ns * (sampler->target_overhead / 100.);
|
149
168
|
// Overshoot by definition is always >= 0. < 0 would be undershooting!
|
150
|
-
|
169
|
+
long this_window_sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - this_window_sampling_target_time_ns);
|
151
170
|
// Our overhead adjustment should always be between [-target_overhead, 0]. Higher adjustments would lead to negative overhead targets
|
152
171
|
// which don't make much sense.
|
153
|
-
double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead,
|
154
|
-
sampler->target_overhead_adjustment =
|
172
|
+
double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, this_window_sampling_overshoot_time_ns * 100. / this_window_time_ns);
|
173
|
+
sampler->target_overhead_adjustment = ewma_adj_window(
|
155
174
|
last_target_overhead_adjustment,
|
156
175
|
sampler->target_overhead_adjustment,
|
176
|
+
this_window_time_ns,
|
157
177
|
first_readjustment
|
158
178
|
);
|
159
179
|
|
160
180
|
// Apply our overhead adjustment to figure out our real targets for this readjustment.
|
161
181
|
double target_overhead = double_max_of(0, sampler->target_overhead + sampler->target_overhead_adjustment);
|
162
|
-
long target_sampling_time_ns = window_time_ns * (target_overhead / 100.);
|
163
182
|
|
164
183
|
// Recalculate target sampling probability so that the following 2 hold:
|
165
184
|
// * window_time_ns = working_window_time_ns + sampling_window_time_ns
|
@@ -175,11 +194,13 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
175
194
|
// ┌─ assuming no events will be emitted during sampling
|
176
195
|
// │
|
177
196
|
// = events_per_ns * working_window_time_ns * sampling_probability * sampling_time_ns
|
197
|
+
// = events_per_ns * (window_time_ns - sampling_window_time_ns) * sampling_probability * sampling_time_ns
|
178
198
|
//
|
179
199
|
// Re-ordering for sampling_probability and solving for the upper-bound of sampling_window_time_ns:
|
180
200
|
//
|
181
201
|
// sampling_window_time_ns = window_time_ns * target_overhead / 100
|
182
|
-
// sampling_probability =
|
202
|
+
// sampling_probability = (sampling_window_time_ns) / (events_per_ns * sampling_time_ns * (window_time_ns - sampling_window_time_ns))
|
203
|
+
// = (window_time_ns * target_overhead / 100) / (events_per_ns * sampling_time_ns * window_time_ns * (1 - target_overhead / 100))
|
183
204
|
//
|
184
205
|
// Which you can intuitively understand as:
|
185
206
|
//
|
@@ -190,16 +211,22 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
190
211
|
// then probability will be > 1 (but we should clamp to 1 since probabilities higher than 1 don't make sense).
|
191
212
|
// * If app is eventing a lot or our sampling overhead is big, then as time_to_sample_all_events_ns grows, sampling_probability will
|
192
213
|
// tend to 0.
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
214
|
+
//
|
215
|
+
// In fact, we can simplify the equation further since the `window_time_ns` components cancel each other out:
|
216
|
+
//
|
217
|
+
// sampling_probability = (target_overhead / 100) / (events_per_ns * sampling_time_ns * (1 - target_overhead / 100))
|
218
|
+
// = max_sampling_overhead / avg_sampling_overhead
|
219
|
+
|
220
|
+
double max_sampling_overhead = target_overhead / 100.;
|
221
|
+
double avg_sampling_overhead = sampler->events_per_ns * sampler->sampling_time_ns * (1 - max_sampling_overhead);
|
222
|
+
|
223
|
+
if (max_sampling_overhead == 0) {
|
224
|
+
// if we aren't allowed any sampling overhead at all, probability has to be 0
|
198
225
|
sampler->sampling_probability = 0;
|
199
226
|
} else {
|
200
227
|
// otherwise apply the formula described above (protecting against div by 0)
|
201
|
-
sampler->sampling_probability =
|
202
|
-
double_min_of(1.,
|
228
|
+
sampler->sampling_probability = avg_sampling_overhead == 0 ? 1. :
|
229
|
+
double_min_of(1., max_sampling_overhead / avg_sampling_overhead);
|
203
230
|
}
|
204
231
|
|
205
232
|
// Doing true random selection would involve "tossing a coin" on every allocation. Lets do systematic sampling instead so that our
|
@@ -225,26 +252,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
225
252
|
double samples_in_60s = allocs_in_60s * sampler->sampling_probability;
|
226
253
|
double expected_total_sampling_time_in_60s =
|
227
254
|
samples_in_60s * sampler->sampling_time_ns / 1e9;
|
228
|
-
double
|
255
|
+
double num_this_windows_in_60s = 60 * 1e9 / this_window_time_ns;
|
256
|
+
double real_total_sampling_time_in_60s = sampler->sampling_time_since_last_readjustment_ns * num_this_windows_in_60s / 1e9;
|
229
257
|
|
230
|
-
|
258
|
+
const char* readjustment_reason = should_readjust_based_on_time ? "time" : "samples";
|
259
|
+
|
260
|
+
fprintf(stderr, "[dds.%s] readjusting due to %s...\n", sampler->debug_name, readjustment_reason);
|
261
|
+
fprintf(stderr, "events_since_last_readjustment=%ld\n", sampler->events_since_last_readjustment);
|
231
262
|
fprintf(stderr, "samples_since_last_readjustment=%ld\n", sampler->samples_since_last_readjustment);
|
232
|
-
fprintf(stderr, "
|
263
|
+
fprintf(stderr, "this_window_time=%ld\n", this_window_time_ns);
|
264
|
+
fprintf(stderr, "this_window_sampling_time=%ld\n", sampler->sampling_time_since_last_readjustment_ns);
|
265
|
+
fprintf(stderr, "this_working_window_time=%ld\n", this_window_time_ns - sampler->sampling_time_since_last_readjustment_ns);
|
266
|
+
fprintf(stderr, "this_window_sampling_target_time=%ld\n", this_window_sampling_target_time_ns);
|
267
|
+
fprintf(stderr, "this_window_sampling_overshoot_time=%ld\n", this_window_sampling_overshoot_time_ns);
|
268
|
+
fprintf(stderr, "\n");
|
269
|
+
fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
|
270
|
+
fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
|
233
271
|
fprintf(stderr, "events_per_sec=%f\n", sampler->events_per_ns * 1e9);
|
234
272
|
fprintf(stderr, "sampling_time=%ld\n", sampler->sampling_time_ns);
|
235
|
-
fprintf(stderr, "
|
236
|
-
fprintf(stderr, "sampling_target_time=%ld\n", reference_target_sampling_time_ns);
|
237
|
-
fprintf(stderr, "sampling_overshoot_time=%ld\n", sampling_overshoot_time_ns);
|
238
|
-
fprintf(stderr, "working_window_time=%ld\n", working_window_time_ns);
|
273
|
+
fprintf(stderr, "avg_sampling_overhead=%f\n", avg_sampling_overhead * 100);
|
239
274
|
fprintf(stderr, "sampling_interval=%zu\n", sampler->sampling_interval);
|
240
|
-
fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability);
|
275
|
+
fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability * 100);
|
276
|
+
fprintf(stderr, "\n");
|
241
277
|
fprintf(stderr, "expected allocs in 60s=%f\n", allocs_in_60s);
|
242
278
|
fprintf(stderr, "expected samples in 60s=%f\n", samples_in_60s);
|
243
279
|
fprintf(stderr, "expected sampling time in 60s=%f (previous real=%f)\n", expected_total_sampling_time_in_60s, real_total_sampling_time_in_60s);
|
244
|
-
fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
|
245
|
-
fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
|
246
|
-
fprintf(stderr, "target_sampling_time=%ld\n", target_sampling_time_ns);
|
247
280
|
fprintf(stderr, "expected max overhead in 60s=%f\n", target_overhead / 100.0 * 60);
|
281
|
+
fprintf(stderr, "\n");
|
282
|
+
fprintf(stderr, "sampling_time_clamps=%zu\n", sampler->sampling_time_clamps);
|
248
283
|
fprintf(stderr, "-------\n");
|
249
284
|
#endif
|
250
285
|
|
@@ -255,15 +290,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
255
290
|
sampler->has_completed_full_adjustment_window = true;
|
256
291
|
}
|
257
292
|
|
293
|
+
VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler) {
|
294
|
+
VALUE arguments[] = {
|
295
|
+
ID2SYM(rb_intern("target_overhead")), /* => */ DBL2NUM(sampler->target_overhead),
|
296
|
+
ID2SYM(rb_intern("target_overhead_adjustment")), /* => */ DBL2NUM(sampler->target_overhead_adjustment),
|
297
|
+
ID2SYM(rb_intern("events_per_sec")), /* => */ DBL2NUM(sampler->events_per_ns * 1e9),
|
298
|
+
ID2SYM(rb_intern("sampling_time_ns")), /* => */ LONG2NUM(sampler->sampling_time_ns),
|
299
|
+
ID2SYM(rb_intern("sampling_interval")), /* => */ ULONG2NUM(sampler->sampling_interval),
|
300
|
+
ID2SYM(rb_intern("sampling_probability")), /* => */ DBL2NUM(sampler->sampling_probability * 100),
|
301
|
+
ID2SYM(rb_intern("events_since_last_readjustment")), /* => */ ULONG2NUM(sampler->events_since_last_readjustment),
|
302
|
+
ID2SYM(rb_intern("samples_since_last_readjustment")), /* => */ ULONG2NUM(sampler->samples_since_last_readjustment),
|
303
|
+
ID2SYM(rb_intern("max_sampling_time_ns")), /* => */ LONG2NUM(sampler->max_sampling_time_ns),
|
304
|
+
ID2SYM(rb_intern("sampling_time_clamps")), /* => */ ULONG2NUM(sampler->sampling_time_clamps),
|
305
|
+
};
|
306
|
+
VALUE hash = rb_hash_new();
|
307
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
|
308
|
+
return hash;
|
309
|
+
}
|
310
|
+
|
258
311
|
// ---
|
259
312
|
// Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
|
260
313
|
|
261
314
|
static VALUE _native_new(VALUE klass);
|
315
|
+
static VALUE _native_initialize(VALUE self, VALUE now);
|
262
316
|
static VALUE _native_reset(VALUE self, VALUE now);
|
263
317
|
static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now);
|
264
318
|
static VALUE _native_should_sample(VALUE self, VALUE now);
|
265
319
|
static VALUE _native_after_sample(VALUE self, VALUE now);
|
266
|
-
static VALUE
|
320
|
+
static VALUE _native_state_snapshot(VALUE self);
|
267
321
|
|
268
322
|
typedef struct sampler_state {
|
269
323
|
discrete_dynamic_sampler sampler;
|
@@ -276,12 +330,15 @@ void collectors_discrete_dynamic_sampler_init(VALUE profiling_module) {
|
|
276
330
|
VALUE sampler_class = rb_define_class_under(testing_module, "Sampler", rb_cObject);
|
277
331
|
|
278
332
|
rb_define_alloc_func(sampler_class, _native_new);
|
333
|
+
// NOTE: Despite being native, we're using the normal ruby keyword to prevent having to write a whole
|
334
|
+
// new ruby file to simply proxy the initialization call.
|
335
|
+
rb_define_method(sampler_class, "initialize", _native_initialize, 1);
|
279
336
|
|
280
337
|
rb_define_method(sampler_class, "_native_reset", _native_reset, 1);
|
281
338
|
rb_define_method(sampler_class, "_native_set_overhead_target_percentage", _native_set_overhead_target_percentage, 2);
|
282
339
|
rb_define_method(sampler_class, "_native_should_sample", _native_should_sample, 1);
|
283
340
|
rb_define_method(sampler_class, "_native_after_sample", _native_after_sample, 1);
|
284
|
-
rb_define_method(sampler_class, "
|
341
|
+
rb_define_method(sampler_class, "_native_state_snapshot", _native_state_snapshot, 0);
|
285
342
|
}
|
286
343
|
|
287
344
|
static const rb_data_type_t sampler_typed_data = {
|
@@ -296,21 +353,37 @@ static const rb_data_type_t sampler_typed_data = {
|
|
296
353
|
static VALUE _native_new(VALUE klass) {
|
297
354
|
sampler_state *state = ruby_xcalloc(sizeof(sampler_state), 1);
|
298
355
|
|
299
|
-
|
356
|
+
long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
357
|
+
if (now_ns == 0) {
|
358
|
+
rb_raise(rb_eRuntimeError, "failed to get clock time");
|
359
|
+
}
|
360
|
+
discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns);
|
300
361
|
|
301
362
|
return TypedData_Wrap_Struct(klass, &sampler_typed_data, state);
|
302
363
|
}
|
303
364
|
|
304
|
-
static VALUE
|
365
|
+
static VALUE _native_initialize(VALUE self, VALUE now_ns) {
|
305
366
|
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
306
367
|
|
307
368
|
sampler_state *state;
|
308
369
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
309
370
|
|
310
|
-
|
371
|
+
discrete_dynamic_sampler_init(&state->sampler, "test sampler", NUM2LONG(now_ns));
|
372
|
+
|
311
373
|
return Qtrue;
|
312
374
|
}
|
313
375
|
|
376
|
+
static VALUE _native_reset(VALUE self, VALUE now_ns) {
|
377
|
+
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
378
|
+
|
379
|
+
sampler_state *state;
|
380
|
+
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
381
|
+
|
382
|
+
discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
|
383
|
+
|
384
|
+
return Qnil;
|
385
|
+
}
|
386
|
+
|
314
387
|
static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now_ns) {
|
315
388
|
ENFORCE_TYPE(target_overhead, T_FLOAT);
|
316
389
|
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
@@ -318,7 +391,7 @@ static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_ove
|
|
318
391
|
sampler_state *state;
|
319
392
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
320
393
|
|
321
|
-
|
394
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
|
322
395
|
|
323
396
|
return Qnil;
|
324
397
|
}
|
@@ -329,7 +402,7 @@ VALUE _native_should_sample(VALUE self, VALUE now_ns) {
|
|
329
402
|
sampler_state *state;
|
330
403
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
331
404
|
|
332
|
-
return
|
405
|
+
return discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
|
333
406
|
}
|
334
407
|
|
335
408
|
VALUE _native_after_sample(VALUE self, VALUE now_ns) {
|
@@ -338,12 +411,12 @@ VALUE _native_after_sample(VALUE self, VALUE now_ns) {
|
|
338
411
|
sampler_state *state;
|
339
412
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
340
413
|
|
341
|
-
return LONG2NUM(
|
414
|
+
return LONG2NUM(discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
|
342
415
|
}
|
343
416
|
|
344
|
-
VALUE
|
417
|
+
VALUE _native_state_snapshot(VALUE self) {
|
345
418
|
sampler_state *state;
|
346
419
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
347
420
|
|
348
|
-
return
|
421
|
+
return discrete_dynamic_sampler_state_snapshot(&state->sampler);
|
349
422
|
}
|
@@ -3,6 +3,8 @@
|
|
3
3
|
#include <stdbool.h>
|
4
4
|
#include <stddef.h>
|
5
5
|
|
6
|
+
#include <ruby.h>
|
7
|
+
|
6
8
|
// A sampler that will sample discrete events based on the overhead of their
|
7
9
|
// sampling.
|
8
10
|
//
|
@@ -31,6 +33,8 @@ typedef struct discrete_dynamic_sampler {
|
|
31
33
|
// NOTE: This is an inverted view of the probability.
|
32
34
|
// NOTE: A value of 0 works as +inf, effectively disabling sampling (to align with probability=0)
|
33
35
|
unsigned long sampling_interval;
|
36
|
+
// Max allowed value for an individual sampling time measurement.
|
37
|
+
long max_sampling_time_ns;
|
34
38
|
|
35
39
|
// -- Sampling State --
|
36
40
|
// How many events have we seen since we last decided to sample.
|
@@ -53,20 +57,23 @@ typedef struct discrete_dynamic_sampler {
|
|
53
57
|
// A negative number that we add to target_overhead to serve as extra padding to
|
54
58
|
// try and mitigate observed overshooting of max sampling time.
|
55
59
|
double target_overhead_adjustment;
|
60
|
+
|
61
|
+
// -- Interesting stats --
|
62
|
+
unsigned long sampling_time_clamps;
|
56
63
|
} discrete_dynamic_sampler;
|
57
64
|
|
58
65
|
|
59
66
|
// Init a new sampler with sane defaults.
|
60
|
-
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name);
|
67
|
+
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns);
|
61
68
|
|
62
69
|
// Reset a sampler, clearing all stored state.
|
63
|
-
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler);
|
70
|
+
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns);
|
64
71
|
|
65
72
|
// Sets a new target_overhead for the provided sampler, resetting it in the process.
|
66
73
|
// @param target_overhead A double representing the percentage of total time we are
|
67
74
|
// willing to use as overhead for the resulting sampling. Values are expected
|
68
75
|
// to be in the range ]0.0, 100.0].
|
69
|
-
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead);
|
76
|
+
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns);
|
70
77
|
|
71
78
|
// Make a sampling decision.
|
72
79
|
//
|
@@ -75,15 +82,20 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa
|
|
75
82
|
//
|
76
83
|
// NOTE: If true is returned we implicitly assume the start of a sampling operation
|
77
84
|
// and it is expected that a follow-up after_sample call is issued.
|
78
|
-
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler);
|
85
|
+
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns);
|
79
86
|
|
80
87
|
// Signal the end of a sampling operation.
|
81
88
|
//
|
82
89
|
// @return Sampling time in nanoseconds for the sample operation we just finished.
|
83
|
-
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler);
|
90
|
+
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns);
|
84
91
|
|
85
92
|
// Retrieve the current sampling probability ([0.0, 100.0]) being applied by this sampler.
|
86
93
|
double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler);
|
87
94
|
|
88
95
|
// Retrieve the current number of events seen since last sample.
|
89
96
|
unsigned long discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler);
|
97
|
+
|
98
|
+
// Return a Ruby hash containing a snapshot of this sampler's interesting state at calling time.
|
99
|
+
// WARN: This allocates in the Ruby VM and therefore should not be called without the
|
100
|
+
// VM lock or during GC.
|
101
|
+
VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler);
|
@@ -82,6 +82,9 @@ static ID at_id_id; // id of :@id in Ruby
|
|
82
82
|
static ID at_resource_id; // id of :@resource in Ruby
|
83
83
|
static ID at_root_span_id; // id of :@root_span in Ruby
|
84
84
|
static ID at_type_id; // id of :@type in Ruby
|
85
|
+
static ID at_otel_values_id; // id of :@otel_values in Ruby
|
86
|
+
static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
|
87
|
+
static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
|
85
88
|
|
86
89
|
// Contains state for a single ThreadContext instance
|
87
90
|
struct thread_context_collector_state {
|
@@ -114,6 +117,8 @@ struct thread_context_collector_state {
|
|
114
117
|
monotonic_to_system_epoch_state time_converter_state;
|
115
118
|
// Used to identify the main thread, to give it a fallback name
|
116
119
|
VALUE main_thread;
|
120
|
+
// Used when extracting trace identifiers from otel spans. Lazily initialized.
|
121
|
+
VALUE otel_current_span_key;
|
117
122
|
|
118
123
|
struct stats {
|
119
124
|
// Track how many garbage collection samples we've taken.
|
@@ -218,6 +223,14 @@ static VALUE thread_list(struct thread_context_collector_state *state);
|
|
218
223
|
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
|
219
224
|
static VALUE _native_new_empty_thread(VALUE self);
|
220
225
|
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
226
|
+
static void ddtrace_otel_trace_identifiers_for(
|
227
|
+
struct thread_context_collector_state *state,
|
228
|
+
VALUE *active_trace,
|
229
|
+
VALUE *root_span,
|
230
|
+
VALUE *numeric_span_id,
|
231
|
+
VALUE active_span,
|
232
|
+
VALUE otel_values
|
233
|
+
);
|
221
234
|
|
222
235
|
void collectors_thread_context_init(VALUE profiling_module) {
|
223
236
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
@@ -255,6 +268,9 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
255
268
|
at_resource_id = rb_intern_const("@resource");
|
256
269
|
at_root_span_id = rb_intern_const("@root_span");
|
257
270
|
at_type_id = rb_intern_const("@type");
|
271
|
+
at_otel_values_id = rb_intern_const("@otel_values");
|
272
|
+
at_parent_span_id_id = rb_intern_const("@parent_span_id");
|
273
|
+
at_datadog_trace_id = rb_intern_const("@datadog_trace");
|
258
274
|
|
259
275
|
gc_profiling_init();
|
260
276
|
}
|
@@ -282,6 +298,7 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
|
|
282
298
|
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
|
283
299
|
rb_gc_mark(state->thread_list_buffer);
|
284
300
|
rb_gc_mark(state->main_thread);
|
301
|
+
rb_gc_mark(state->otel_current_span_key);
|
285
302
|
}
|
286
303
|
|
287
304
|
static void thread_context_collector_typed_data_free(void *state_ptr) {
|
@@ -334,6 +351,7 @@ static VALUE _native_new(VALUE klass) {
|
|
334
351
|
state->allocation_type_enabled = true;
|
335
352
|
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
336
353
|
state->main_thread = rb_thread_main();
|
354
|
+
state->otel_current_span_key = Qnil;
|
337
355
|
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
338
356
|
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
|
339
357
|
|
@@ -603,11 +621,14 @@ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
603
621
|
// Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
|
604
622
|
// on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
|
605
623
|
// samples first.
|
606
|
-
bool finished_major_gc = gc_profiling_has_major_gc_finished();
|
607
624
|
bool over_flush_time_treshold =
|
608
625
|
(wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
|
609
626
|
|
610
|
-
|
627
|
+
if (over_flush_time_treshold) {
|
628
|
+
return true;
|
629
|
+
} else {
|
630
|
+
return gc_profiling_has_major_gc_finished();
|
631
|
+
}
|
611
632
|
}
|
612
633
|
|
613
634
|
// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
|
@@ -917,6 +938,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
917
938
|
));
|
918
939
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
919
940
|
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
941
|
+
rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
|
920
942
|
|
921
943
|
return result;
|
922
944
|
}
|
@@ -1104,10 +1126,19 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
1104
1126
|
|
1105
1127
|
VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
|
1106
1128
|
VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
|
1107
|
-
|
1129
|
+
// Note: On Ruby 3.x `rb_attr_get` is exactly the same as `rb_ivar_get`. For Ruby 2.x, the difference is that
|
1130
|
+
// `rb_ivar_get` can trigger "warning: instance variable @otel_values not initialized" if warnings are enabled and
|
1131
|
+
// opentelemetry is not in use, whereas `rb_attr_get` does the lookup without generating the warning.
|
1132
|
+
VALUE otel_values = rb_attr_get(active_trace, at_otel_values_id /* @otel_values */);
|
1133
|
+
|
1134
|
+
VALUE numeric_span_id = Qnil;
|
1135
|
+
|
1136
|
+
if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
|
1137
|
+
|
1138
|
+
if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
|
1108
1139
|
|
1109
1140
|
VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
|
1110
|
-
|
1141
|
+
if (active_span != Qnil && numeric_span_id == Qnil) numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
|
1111
1142
|
if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
|
1112
1143
|
|
1113
1144
|
trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
|
@@ -1299,3 +1330,65 @@ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
|
1299
1330
|
default: return DDOG_CHARSLICE_C("(VM Internal, Missing class)");
|
1300
1331
|
}
|
1301
1332
|
}
|
1333
|
+
|
1334
|
+
static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
|
1335
|
+
if (state->otel_current_span_key == Qnil) {
|
1336
|
+
VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
|
1337
|
+
VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
|
1338
|
+
VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
|
1339
|
+
VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
|
1340
|
+
VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
|
1341
|
+
|
1342
|
+
if (current_span_key == Qnil) {
|
1343
|
+
rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
state->otel_current_span_key = current_span_key;
|
1347
|
+
}
|
1348
|
+
|
1349
|
+
return state->otel_current_span_key;
|
1350
|
+
}
|
1351
|
+
|
1352
|
+
// This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
|
1353
|
+
// differently, and this codepath handles it.
|
1354
|
+
static void ddtrace_otel_trace_identifiers_for(
|
1355
|
+
struct thread_context_collector_state *state,
|
1356
|
+
VALUE *active_trace,
|
1357
|
+
VALUE *root_span,
|
1358
|
+
VALUE *numeric_span_id,
|
1359
|
+
VALUE active_span,
|
1360
|
+
VALUE otel_values
|
1361
|
+
) {
|
1362
|
+
VALUE resolved_numeric_span_id =
|
1363
|
+
active_span == Qnil ?
|
1364
|
+
// For traces started from otel spans, the span id will be empty, and the @parent_span_id has the right value
|
1365
|
+
rb_ivar_get(*active_trace, at_parent_span_id_id /* @parent_span_id */) :
|
1366
|
+
// Regular span created by ddtrace
|
1367
|
+
rb_ivar_get(active_span, at_id_id /* @id */);
|
1368
|
+
|
1369
|
+
if (resolved_numeric_span_id == Qnil) return;
|
1370
|
+
|
1371
|
+
VALUE otel_current_span_key = get_otel_current_span_key(state);
|
1372
|
+
VALUE current_trace = *active_trace;
|
1373
|
+
|
1374
|
+
// ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
|
1375
|
+
// trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked
|
1376
|
+
// list. The local root span is going to be the trace/span we find at the end of this linked list.
|
1377
|
+
while (otel_values != Qnil) {
|
1378
|
+
VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key);
|
1379
|
+
if (otel_span == Qnil) break;
|
1380
|
+
VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id);
|
1381
|
+
if (next_trace == Qnil) break;
|
1382
|
+
|
1383
|
+
current_trace = next_trace;
|
1384
|
+
otel_values = rb_ivar_get(current_trace, at_otel_values_id /* @otel_values */);
|
1385
|
+
}
|
1386
|
+
|
1387
|
+
// We found the last trace in the linked list. This contains the local root span
|
1388
|
+
VALUE resolved_root_span = rb_ivar_get(current_trace, at_root_span_id /* @root_span */);
|
1389
|
+
if (resolved_root_span == Qnil) return;
|
1390
|
+
|
1391
|
+
*root_span = resolved_root_span;
|
1392
|
+
*active_trace = current_trace;
|
1393
|
+
*numeric_span_id = resolved_numeric_span_id;
|
1394
|
+
}
|