ddtrace 1.20.0 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/LICENSE-3rdparty.csv +1 -1
- data/bin/ddprofrb +15 -0
- data/bin/ddtracerb +3 -1
- data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
- data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +238 -61
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.c +145 -72
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.h +17 -5
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +97 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +2 -2
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.c +45 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.h +7 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +4 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +14 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +1 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.c +10 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +2 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +7 -9
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
- data/lib/datadog/appsec/event.rb +1 -1
- data/lib/datadog/auto_instrument.rb +3 -0
- data/lib/datadog/core/configuration/components.rb +7 -6
- data/lib/datadog/core/configuration/option.rb +8 -6
- data/lib/datadog/core/configuration/settings.rb +130 -63
- data/lib/datadog/core/configuration.rb +20 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
- data/lib/datadog/core/environment/git.rb +25 -0
- data/lib/datadog/core/environment/identity.rb +18 -48
- data/lib/datadog/core/environment/platform.rb +7 -1
- data/lib/datadog/core/git/ext.rb +2 -23
- data/lib/datadog/core/remote/client/capabilities.rb +1 -1
- data/lib/datadog/core/remote/negotiation.rb +2 -2
- data/lib/datadog/core/remote/transport/http/config.rb +1 -1
- data/lib/datadog/core/remote/worker.rb +7 -4
- data/lib/datadog/core/telemetry/client.rb +18 -10
- data/lib/datadog/core/telemetry/emitter.rb +9 -13
- data/lib/datadog/core/telemetry/event.rb +247 -57
- data/lib/datadog/core/telemetry/ext.rb +1 -0
- data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
- data/lib/datadog/core/telemetry/http/ext.rb +4 -1
- data/lib/datadog/core/telemetry/http/transport.rb +9 -4
- data/lib/datadog/core/telemetry/request.rb +59 -0
- data/lib/datadog/core/transport/ext.rb +2 -0
- data/lib/datadog/core/utils/url.rb +25 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -0
- data/lib/datadog/profiling/collectors/info.rb +101 -0
- data/lib/datadog/profiling/component.rb +34 -28
- data/lib/datadog/profiling/exporter.rb +19 -5
- data/lib/datadog/profiling/ext.rb +2 -0
- data/lib/datadog/profiling/flush.rb +6 -3
- data/lib/datadog/profiling/http_transport.rb +5 -1
- data/lib/datadog/profiling/load_native_extension.rb +19 -6
- data/lib/datadog/profiling/native_extension.rb +1 -1
- data/lib/datadog/profiling/tag_builder.rb +5 -0
- data/lib/datadog/profiling/tasks/exec.rb +3 -3
- data/lib/datadog/profiling/tasks/help.rb +3 -3
- data/lib/datadog/profiling.rb +13 -2
- data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
- data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
- data/lib/datadog/tracing/contrib/configurable.rb +1 -1
- data/lib/datadog/tracing/contrib/extensions.rb +6 -2
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
- data/lib/datadog/tracing/sampling/matcher.rb +23 -3
- data/lib/datadog/tracing/sampling/rule.rb +7 -2
- data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
- data/lib/datadog/tracing/trace_operation.rb +1 -2
- data/lib/datadog/tracing/transport/http.rb +1 -0
- data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
- data/lib/ddtrace/version.rb +1 -1
- metadata +55 -62
- data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
- data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
- data/lib/datadog/core/telemetry/collector.rb +0 -250
- data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
- data/lib/datadog/core/telemetry/v1/application.rb +0 -92
- data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
- data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
- data/lib/datadog/core/telemetry/v1/host.rb +0 -59
- data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
- data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
- data/lib/datadog/core/telemetry/v1/product.rb +0 -36
- data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
- data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
- data/lib/datadog/core/telemetry/v2/request.rb +0 -29
- data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +0 -0
|
@@ -9,16 +9,23 @@
|
|
|
9
9
|
#define BASE_SAMPLING_INTERVAL 50
|
|
10
10
|
|
|
11
11
|
#define ADJUSTMENT_WINDOW_NS SECONDS_AS_NS(1)
|
|
12
|
+
#define ADJUSTMENT_WINDOW_SAMPLES 100
|
|
13
|
+
// Any average sampling times above this value will be clamped to this value.
|
|
14
|
+
// In practice, this limits the budget consumption of a single sample to that of an adjustment window,
|
|
15
|
+
// thus aiming for a minimum sample rate of once per adjustment window (dependent on actual event rate).
|
|
16
|
+
// NOTE: This is our main strategy to deal with timing hiccups such as those that can be caused by
|
|
17
|
+
// suspensions, system overloads and other things that could lead to arbitrarily big sampling
|
|
18
|
+
// time measurements.
|
|
19
|
+
#define MAX_ALLOWED_SAMPLING_NS(target_overhead) (long) (ADJUSTMENT_WINDOW_NS * target_overhead / 100.)
|
|
12
20
|
|
|
13
21
|
#define EMA_SMOOTHING_FACTOR 0.6
|
|
14
|
-
#define EXP_MOVING_AVERAGE(last, avg, first) first ? last : (1-EMA_SMOOTHING_FACTOR) * avg + EMA_SMOOTHING_FACTOR * last
|
|
15
22
|
|
|
16
|
-
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name) {
|
|
23
|
+
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns) {
|
|
17
24
|
sampler->debug_name = debug_name;
|
|
18
|
-
discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT);
|
|
25
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT, now_ns);
|
|
19
26
|
}
|
|
20
27
|
|
|
21
|
-
|
|
28
|
+
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
|
|
22
29
|
const char *debug_name = sampler->debug_name;
|
|
23
30
|
double target_overhead = sampler->target_overhead;
|
|
24
31
|
(*sampler) = (discrete_dynamic_sampler) {
|
|
@@ -31,6 +38,7 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
|
|
|
31
38
|
// This fake readjustment will use a hardcoded sampling interval
|
|
32
39
|
.sampling_interval = BASE_SAMPLING_INTERVAL,
|
|
33
40
|
.sampling_probability = 1.0 / BASE_SAMPLING_INTERVAL,
|
|
41
|
+
.max_sampling_time_ns = MAX_ALLOWED_SAMPLING_NS(target_overhead),
|
|
34
42
|
// But we want to make sure we sample at least once in the next window so that our first
|
|
35
43
|
// real readjustment has some notion of how heavy sampling is. Therefore, we'll make it so that
|
|
36
44
|
// the next event is automatically sampled by artificially locating it in the interval threshold.
|
|
@@ -38,27 +46,17 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
|
|
|
38
46
|
};
|
|
39
47
|
}
|
|
40
48
|
|
|
41
|
-
void
|
|
42
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
43
|
-
_discrete_dynamic_sampler_reset(sampler, now);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
static void _discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
|
|
49
|
+
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
|
|
47
50
|
if (target_overhead <= 0 || target_overhead > 100) {
|
|
48
51
|
rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
|
|
49
52
|
}
|
|
50
53
|
sampler->target_overhead = target_overhead;
|
|
51
|
-
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead) {
|
|
55
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
56
|
-
_discrete_dynamic_sampler_set_overhead_target_percentage(sampler, target_overhead, now);
|
|
54
|
+
return discrete_dynamic_sampler_reset(sampler, now_ns);
|
|
57
55
|
}
|
|
58
56
|
|
|
59
57
|
static void maybe_readjust(discrete_dynamic_sampler *sampler, long now);
|
|
60
58
|
|
|
61
|
-
|
|
59
|
+
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
|
62
60
|
// For efficiency reasons we don't do true random sampling but rather systematic
|
|
63
61
|
// sampling following a sample interval/skip. This can be biased and hide patterns
|
|
64
62
|
// but the dynamic interval and rather indeterministic pattern of allocations in
|
|
@@ -77,12 +75,7 @@ static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sa
|
|
|
77
75
|
return should_sample;
|
|
78
76
|
}
|
|
79
77
|
|
|
80
|
-
|
|
81
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
82
|
-
return _discrete_dynamic_sampler_should_sample(sampler, now);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
|
78
|
+
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
|
86
79
|
long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns);
|
|
87
80
|
sampler->samples_since_last_readjustment++;
|
|
88
81
|
sampler->sampling_time_since_last_readjustment_ns += last_sampling_time_ns;
|
|
@@ -94,11 +87,6 @@ static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sam
|
|
|
94
87
|
return last_sampling_time_ns;
|
|
95
88
|
}
|
|
96
89
|
|
|
97
|
-
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler) {
|
|
98
|
-
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
99
|
-
return _discrete_dynamic_sampler_after_sample(sampler, now);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
90
|
double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler) {
|
|
103
91
|
return sampler->sampling_probability * 100.;
|
|
104
92
|
}
|
|
@@ -107,35 +95,66 @@ size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sample
|
|
|
107
95
|
return sampler->events_since_last_sample;
|
|
108
96
|
}
|
|
109
97
|
|
|
98
|
+
static double ewma_adj_window(double latest_value, double avg, long current_window_time_ns, bool is_first) {
|
|
99
|
+
if (is_first) {
|
|
100
|
+
return latest_value;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// We don't want samples coming from partial adjustment windows (e.g. preempted due to number of samples)
|
|
104
|
+
// to lead to quick "forgetting" of the past. Thus, we'll tweak the weight of this new value based on the
|
|
105
|
+
// size of the time window from which we gathered it in relation to our standard adjustment window time.
|
|
106
|
+
double fraction_of_full_window = double_min_of((double) current_window_time_ns / ADJUSTMENT_WINDOW_NS, 1);
|
|
107
|
+
double alpha = EMA_SMOOTHING_FACTOR * fraction_of_full_window;
|
|
108
|
+
|
|
109
|
+
return (1-alpha) * avg + alpha * latest_value;
|
|
110
|
+
}
|
|
111
|
+
|
|
110
112
|
static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
111
|
-
long
|
|
113
|
+
long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
|
|
114
|
+
|
|
115
|
+
bool should_readjust_based_on_time = this_window_time_ns >= ADJUSTMENT_WINDOW_NS;
|
|
116
|
+
bool should_readjust_based_on_samples = sampler->samples_since_last_readjustment >= ADJUSTMENT_WINDOW_SAMPLES;
|
|
117
|
+
|
|
118
|
+
if (!should_readjust_based_on_time && !should_readjust_based_on_samples) {
|
|
119
|
+
// not enough time or samples have passed to perform a readjustment
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
112
122
|
|
|
113
|
-
if (
|
|
114
|
-
// not
|
|
123
|
+
if (this_window_time_ns == 0) {
|
|
124
|
+
// should not be possible given previous condition but lets protect against div by 0 below.
|
|
115
125
|
return;
|
|
116
126
|
}
|
|
117
127
|
|
|
118
128
|
// If we got this far, lets recalculate our sampling params based on new observations
|
|
119
129
|
bool first_readjustment = !sampler->has_completed_full_adjustment_window;
|
|
120
130
|
|
|
121
|
-
// Update our running average of events/sec with latest observation
|
|
122
|
-
sampler->events_per_ns =
|
|
123
|
-
(double) sampler->events_since_last_readjustment /
|
|
131
|
+
// Update our running average of events/sec with latest observation.
|
|
132
|
+
sampler->events_per_ns = ewma_adj_window(
|
|
133
|
+
(double) sampler->events_since_last_readjustment / this_window_time_ns,
|
|
124
134
|
sampler->events_per_ns,
|
|
135
|
+
this_window_time_ns,
|
|
125
136
|
first_readjustment
|
|
126
137
|
);
|
|
127
138
|
|
|
128
139
|
// Update our running average of sampling time for a specific event
|
|
129
|
-
long sampling_window_time_ns = sampler->sampling_time_since_last_readjustment_ns;
|
|
130
|
-
long sampling_overshoot_time_ns = -1;
|
|
131
140
|
if (sampler->samples_since_last_readjustment > 0) {
|
|
132
141
|
// We can only update sampling-related stats if we actually sampled on the last window...
|
|
133
142
|
|
|
134
143
|
// Lets update our average sampling time per event
|
|
135
|
-
long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 :
|
|
136
|
-
sampler->
|
|
144
|
+
long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampler->sampling_time_since_last_readjustment_ns / sampler->samples_since_last_readjustment;
|
|
145
|
+
if (avg_sampling_time_in_window_ns > sampler->max_sampling_time_ns) {
|
|
146
|
+
// If the average sampling time in the previous window was deemed unnacceptable, clamp it to the
|
|
147
|
+
// maximum acceptable value and register this operation in our counter.
|
|
148
|
+
// NOTE: This is important so that events like suspensions or system overloads do not lead us to
|
|
149
|
+
// learn arbitrarily big sampling times which may then result in us not sampling anything
|
|
150
|
+
// for very long periods of time.
|
|
151
|
+
avg_sampling_time_in_window_ns = sampler->max_sampling_time_ns;
|
|
152
|
+
sampler->sampling_time_clamps++;
|
|
153
|
+
}
|
|
154
|
+
sampler->sampling_time_ns = ewma_adj_window(
|
|
137
155
|
avg_sampling_time_in_window_ns,
|
|
138
156
|
sampler->sampling_time_ns,
|
|
157
|
+
this_window_time_ns,
|
|
139
158
|
first_readjustment
|
|
140
159
|
);
|
|
141
160
|
}
|
|
@@ -145,21 +164,21 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
|
145
164
|
// NOTE: Updating this even when no samples occur is a conscious choice which enables us to cooldown extreme adjustments over time.
|
|
146
165
|
// If we didn't do this, whenever a big spike caused target_overhead_adjustment to equal target_overhead, we'd get stuck
|
|
147
166
|
// in a "probability = 0" state.
|
|
148
|
-
long
|
|
167
|
+
long this_window_sampling_target_time_ns = this_window_time_ns * (sampler->target_overhead / 100.);
|
|
149
168
|
// Overshoot by definition is always >= 0. < 0 would be undershooting!
|
|
150
|
-
|
|
169
|
+
long this_window_sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - this_window_sampling_target_time_ns);
|
|
151
170
|
// Our overhead adjustment should always be between [-target_overhead, 0]. Higher adjustments would lead to negative overhead targets
|
|
152
171
|
// which don't make much sense.
|
|
153
|
-
double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead,
|
|
154
|
-
sampler->target_overhead_adjustment =
|
|
172
|
+
double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, this_window_sampling_overshoot_time_ns * 100. / this_window_time_ns);
|
|
173
|
+
sampler->target_overhead_adjustment = ewma_adj_window(
|
|
155
174
|
last_target_overhead_adjustment,
|
|
156
175
|
sampler->target_overhead_adjustment,
|
|
176
|
+
this_window_time_ns,
|
|
157
177
|
first_readjustment
|
|
158
178
|
);
|
|
159
179
|
|
|
160
180
|
// Apply our overhead adjustment to figure out our real targets for this readjustment.
|
|
161
181
|
double target_overhead = double_max_of(0, sampler->target_overhead + sampler->target_overhead_adjustment);
|
|
162
|
-
long target_sampling_time_ns = window_time_ns * (target_overhead / 100.);
|
|
163
182
|
|
|
164
183
|
// Recalculate target sampling probability so that the following 2 hold:
|
|
165
184
|
// * window_time_ns = working_window_time_ns + sampling_window_time_ns
|
|
@@ -175,11 +194,13 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
|
175
194
|
// ┌─ assuming no events will be emitted during sampling
|
|
176
195
|
// │
|
|
177
196
|
// = events_per_ns * working_window_time_ns * sampling_probability * sampling_time_ns
|
|
197
|
+
// = events_per_ns * (window_time_ns - sampling_window_time_ns) * sampling_probability * sampling_time_ns
|
|
178
198
|
//
|
|
179
199
|
// Re-ordering for sampling_probability and solving for the upper-bound of sampling_window_time_ns:
|
|
180
200
|
//
|
|
181
201
|
// sampling_window_time_ns = window_time_ns * target_overhead / 100
|
|
182
|
-
// sampling_probability =
|
|
202
|
+
// sampling_probability = (sampling_window_time_ns) / (events_per_ns * sampling_time_ns * (window_time_ns - sampling_window_time_ns))
|
|
203
|
+
// = (window_time_ns * target_overhead / 100) / (events_per_ns * sampling_time_ns * window_time_ns * (1 - target_overhead / 100))
|
|
183
204
|
//
|
|
184
205
|
// Which you can intuitively understand as:
|
|
185
206
|
//
|
|
@@ -190,16 +211,22 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
|
190
211
|
// then probability will be > 1 (but we should clamp to 1 since probabilities higher than 1 don't make sense).
|
|
191
212
|
// * If app is eventing a lot or our sampling overhead is big, then as time_to_sample_all_events_ns grows, sampling_probability will
|
|
192
213
|
// tend to 0.
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
214
|
+
//
|
|
215
|
+
// In fact, we can simplify the equation further since the `window_time_ns` components cancel each other out:
|
|
216
|
+
//
|
|
217
|
+
// sampling_probability = (target_overhead / 100) / (events_per_ns * sampling_time_ns * (1 - target_overhead / 100))
|
|
218
|
+
// = max_sampling_overhead / avg_sampling_overhead
|
|
219
|
+
|
|
220
|
+
double max_sampling_overhead = target_overhead / 100.;
|
|
221
|
+
double avg_sampling_overhead = sampler->events_per_ns * sampler->sampling_time_ns * (1 - max_sampling_overhead);
|
|
222
|
+
|
|
223
|
+
if (max_sampling_overhead == 0) {
|
|
224
|
+
// if we aren't allowed any sampling overhead at all, probability has to be 0
|
|
198
225
|
sampler->sampling_probability = 0;
|
|
199
226
|
} else {
|
|
200
227
|
// otherwise apply the formula described above (protecting against div by 0)
|
|
201
|
-
sampler->sampling_probability =
|
|
202
|
-
double_min_of(1.,
|
|
228
|
+
sampler->sampling_probability = avg_sampling_overhead == 0 ? 1. :
|
|
229
|
+
double_min_of(1., max_sampling_overhead / avg_sampling_overhead);
|
|
203
230
|
}
|
|
204
231
|
|
|
205
232
|
// Doing true random selection would involve "tossing a coin" on every allocation. Lets do systematic sampling instead so that our
|
|
@@ -225,26 +252,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
|
225
252
|
double samples_in_60s = allocs_in_60s * sampler->sampling_probability;
|
|
226
253
|
double expected_total_sampling_time_in_60s =
|
|
227
254
|
samples_in_60s * sampler->sampling_time_ns / 1e9;
|
|
228
|
-
double
|
|
255
|
+
double num_this_windows_in_60s = 60 * 1e9 / this_window_time_ns;
|
|
256
|
+
double real_total_sampling_time_in_60s = sampler->sampling_time_since_last_readjustment_ns * num_this_windows_in_60s / 1e9;
|
|
229
257
|
|
|
230
|
-
|
|
258
|
+
const char* readjustment_reason = should_readjust_based_on_time ? "time" : "samples";
|
|
259
|
+
|
|
260
|
+
fprintf(stderr, "[dds.%s] readjusting due to %s...\n", sampler->debug_name, readjustment_reason);
|
|
261
|
+
fprintf(stderr, "events_since_last_readjustment=%ld\n", sampler->events_since_last_readjustment);
|
|
231
262
|
fprintf(stderr, "samples_since_last_readjustment=%ld\n", sampler->samples_since_last_readjustment);
|
|
232
|
-
fprintf(stderr, "
|
|
263
|
+
fprintf(stderr, "this_window_time=%ld\n", this_window_time_ns);
|
|
264
|
+
fprintf(stderr, "this_window_sampling_time=%ld\n", sampler->sampling_time_since_last_readjustment_ns);
|
|
265
|
+
fprintf(stderr, "this_working_window_time=%ld\n", this_window_time_ns - sampler->sampling_time_since_last_readjustment_ns);
|
|
266
|
+
fprintf(stderr, "this_window_sampling_target_time=%ld\n", this_window_sampling_target_time_ns);
|
|
267
|
+
fprintf(stderr, "this_window_sampling_overshoot_time=%ld\n", this_window_sampling_overshoot_time_ns);
|
|
268
|
+
fprintf(stderr, "\n");
|
|
269
|
+
fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
|
|
270
|
+
fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
|
|
233
271
|
fprintf(stderr, "events_per_sec=%f\n", sampler->events_per_ns * 1e9);
|
|
234
272
|
fprintf(stderr, "sampling_time=%ld\n", sampler->sampling_time_ns);
|
|
235
|
-
fprintf(stderr, "
|
|
236
|
-
fprintf(stderr, "sampling_target_time=%ld\n", reference_target_sampling_time_ns);
|
|
237
|
-
fprintf(stderr, "sampling_overshoot_time=%ld\n", sampling_overshoot_time_ns);
|
|
238
|
-
fprintf(stderr, "working_window_time=%ld\n", working_window_time_ns);
|
|
273
|
+
fprintf(stderr, "avg_sampling_overhead=%f\n", avg_sampling_overhead * 100);
|
|
239
274
|
fprintf(stderr, "sampling_interval=%zu\n", sampler->sampling_interval);
|
|
240
|
-
fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability);
|
|
275
|
+
fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability * 100);
|
|
276
|
+
fprintf(stderr, "\n");
|
|
241
277
|
fprintf(stderr, "expected allocs in 60s=%f\n", allocs_in_60s);
|
|
242
278
|
fprintf(stderr, "expected samples in 60s=%f\n", samples_in_60s);
|
|
243
279
|
fprintf(stderr, "expected sampling time in 60s=%f (previous real=%f)\n", expected_total_sampling_time_in_60s, real_total_sampling_time_in_60s);
|
|
244
|
-
fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
|
|
245
|
-
fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
|
|
246
|
-
fprintf(stderr, "target_sampling_time=%ld\n", target_sampling_time_ns);
|
|
247
280
|
fprintf(stderr, "expected max overhead in 60s=%f\n", target_overhead / 100.0 * 60);
|
|
281
|
+
fprintf(stderr, "\n");
|
|
282
|
+
fprintf(stderr, "sampling_time_clamps=%zu\n", sampler->sampling_time_clamps);
|
|
248
283
|
fprintf(stderr, "-------\n");
|
|
249
284
|
#endif
|
|
250
285
|
|
|
@@ -255,15 +290,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
|
|
255
290
|
sampler->has_completed_full_adjustment_window = true;
|
|
256
291
|
}
|
|
257
292
|
|
|
293
|
+
VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler) {
|
|
294
|
+
VALUE arguments[] = {
|
|
295
|
+
ID2SYM(rb_intern("target_overhead")), /* => */ DBL2NUM(sampler->target_overhead),
|
|
296
|
+
ID2SYM(rb_intern("target_overhead_adjustment")), /* => */ DBL2NUM(sampler->target_overhead_adjustment),
|
|
297
|
+
ID2SYM(rb_intern("events_per_sec")), /* => */ DBL2NUM(sampler->events_per_ns * 1e9),
|
|
298
|
+
ID2SYM(rb_intern("sampling_time_ns")), /* => */ LONG2NUM(sampler->sampling_time_ns),
|
|
299
|
+
ID2SYM(rb_intern("sampling_interval")), /* => */ ULONG2NUM(sampler->sampling_interval),
|
|
300
|
+
ID2SYM(rb_intern("sampling_probability")), /* => */ DBL2NUM(sampler->sampling_probability * 100),
|
|
301
|
+
ID2SYM(rb_intern("events_since_last_readjustment")), /* => */ ULONG2NUM(sampler->events_since_last_readjustment),
|
|
302
|
+
ID2SYM(rb_intern("samples_since_last_readjustment")), /* => */ ULONG2NUM(sampler->samples_since_last_readjustment),
|
|
303
|
+
ID2SYM(rb_intern("max_sampling_time_ns")), /* => */ LONG2NUM(sampler->max_sampling_time_ns),
|
|
304
|
+
ID2SYM(rb_intern("sampling_time_clamps")), /* => */ ULONG2NUM(sampler->sampling_time_clamps),
|
|
305
|
+
};
|
|
306
|
+
VALUE hash = rb_hash_new();
|
|
307
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
|
|
308
|
+
return hash;
|
|
309
|
+
}
|
|
310
|
+
|
|
258
311
|
// ---
|
|
259
312
|
// Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
|
|
260
313
|
|
|
261
314
|
static VALUE _native_new(VALUE klass);
|
|
315
|
+
static VALUE _native_initialize(VALUE self, VALUE now);
|
|
262
316
|
static VALUE _native_reset(VALUE self, VALUE now);
|
|
263
317
|
static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now);
|
|
264
318
|
static VALUE _native_should_sample(VALUE self, VALUE now);
|
|
265
319
|
static VALUE _native_after_sample(VALUE self, VALUE now);
|
|
266
|
-
static VALUE
|
|
320
|
+
static VALUE _native_state_snapshot(VALUE self);
|
|
267
321
|
|
|
268
322
|
typedef struct sampler_state {
|
|
269
323
|
discrete_dynamic_sampler sampler;
|
|
@@ -276,12 +330,15 @@ void collectors_discrete_dynamic_sampler_init(VALUE profiling_module) {
|
|
|
276
330
|
VALUE sampler_class = rb_define_class_under(testing_module, "Sampler", rb_cObject);
|
|
277
331
|
|
|
278
332
|
rb_define_alloc_func(sampler_class, _native_new);
|
|
333
|
+
// NOTE: Despite being native, we're using the normal ruby keyword to prevent having to write a whole
|
|
334
|
+
// new ruby file to simply proxy the initialization call.
|
|
335
|
+
rb_define_method(sampler_class, "initialize", _native_initialize, 1);
|
|
279
336
|
|
|
280
337
|
rb_define_method(sampler_class, "_native_reset", _native_reset, 1);
|
|
281
338
|
rb_define_method(sampler_class, "_native_set_overhead_target_percentage", _native_set_overhead_target_percentage, 2);
|
|
282
339
|
rb_define_method(sampler_class, "_native_should_sample", _native_should_sample, 1);
|
|
283
340
|
rb_define_method(sampler_class, "_native_after_sample", _native_after_sample, 1);
|
|
284
|
-
rb_define_method(sampler_class, "
|
|
341
|
+
rb_define_method(sampler_class, "_native_state_snapshot", _native_state_snapshot, 0);
|
|
285
342
|
}
|
|
286
343
|
|
|
287
344
|
static const rb_data_type_t sampler_typed_data = {
|
|
@@ -296,21 +353,37 @@ static const rb_data_type_t sampler_typed_data = {
|
|
|
296
353
|
static VALUE _native_new(VALUE klass) {
|
|
297
354
|
sampler_state *state = ruby_xcalloc(sizeof(sampler_state), 1);
|
|
298
355
|
|
|
299
|
-
|
|
356
|
+
long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
357
|
+
if (now_ns == 0) {
|
|
358
|
+
rb_raise(rb_eRuntimeError, "failed to get clock time");
|
|
359
|
+
}
|
|
360
|
+
discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns);
|
|
300
361
|
|
|
301
362
|
return TypedData_Wrap_Struct(klass, &sampler_typed_data, state);
|
|
302
363
|
}
|
|
303
364
|
|
|
304
|
-
static VALUE
|
|
365
|
+
static VALUE _native_initialize(VALUE self, VALUE now_ns) {
|
|
305
366
|
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
|
306
367
|
|
|
307
368
|
sampler_state *state;
|
|
308
369
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
|
309
370
|
|
|
310
|
-
|
|
371
|
+
discrete_dynamic_sampler_init(&state->sampler, "test sampler", NUM2LONG(now_ns));
|
|
372
|
+
|
|
311
373
|
return Qtrue;
|
|
312
374
|
}
|
|
313
375
|
|
|
376
|
+
static VALUE _native_reset(VALUE self, VALUE now_ns) {
|
|
377
|
+
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
|
378
|
+
|
|
379
|
+
sampler_state *state;
|
|
380
|
+
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
|
381
|
+
|
|
382
|
+
discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
|
|
383
|
+
|
|
384
|
+
return Qnil;
|
|
385
|
+
}
|
|
386
|
+
|
|
314
387
|
static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now_ns) {
|
|
315
388
|
ENFORCE_TYPE(target_overhead, T_FLOAT);
|
|
316
389
|
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
|
@@ -318,7 +391,7 @@ static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_ove
|
|
|
318
391
|
sampler_state *state;
|
|
319
392
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
|
320
393
|
|
|
321
|
-
|
|
394
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
|
|
322
395
|
|
|
323
396
|
return Qnil;
|
|
324
397
|
}
|
|
@@ -329,7 +402,7 @@ VALUE _native_should_sample(VALUE self, VALUE now_ns) {
|
|
|
329
402
|
sampler_state *state;
|
|
330
403
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
|
331
404
|
|
|
332
|
-
return
|
|
405
|
+
return discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
|
|
333
406
|
}
|
|
334
407
|
|
|
335
408
|
VALUE _native_after_sample(VALUE self, VALUE now_ns) {
|
|
@@ -338,12 +411,12 @@ VALUE _native_after_sample(VALUE self, VALUE now_ns) {
|
|
|
338
411
|
sampler_state *state;
|
|
339
412
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
|
340
413
|
|
|
341
|
-
return LONG2NUM(
|
|
414
|
+
return LONG2NUM(discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
|
|
342
415
|
}
|
|
343
416
|
|
|
344
|
-
VALUE
|
|
417
|
+
VALUE _native_state_snapshot(VALUE self) {
|
|
345
418
|
sampler_state *state;
|
|
346
419
|
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
|
347
420
|
|
|
348
|
-
return
|
|
421
|
+
return discrete_dynamic_sampler_state_snapshot(&state->sampler);
|
|
349
422
|
}
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
#include <stdbool.h>
|
|
4
4
|
#include <stddef.h>
|
|
5
5
|
|
|
6
|
+
#include <ruby.h>
|
|
7
|
+
|
|
6
8
|
// A sampler that will sample discrete events based on the overhead of their
|
|
7
9
|
// sampling.
|
|
8
10
|
//
|
|
@@ -31,6 +33,8 @@ typedef struct discrete_dynamic_sampler {
|
|
|
31
33
|
// NOTE: This is an inverted view of the probability.
|
|
32
34
|
// NOTE: A value of 0 works as +inf, effectively disabling sampling (to align with probability=0)
|
|
33
35
|
unsigned long sampling_interval;
|
|
36
|
+
// Max allowed value for an individual sampling time measurement.
|
|
37
|
+
long max_sampling_time_ns;
|
|
34
38
|
|
|
35
39
|
// -- Sampling State --
|
|
36
40
|
// How many events have we seen since we last decided to sample.
|
|
@@ -53,20 +57,23 @@ typedef struct discrete_dynamic_sampler {
|
|
|
53
57
|
// A negative number that we add to target_overhead to serve as extra padding to
|
|
54
58
|
// try and mitigate observed overshooting of max sampling time.
|
|
55
59
|
double target_overhead_adjustment;
|
|
60
|
+
|
|
61
|
+
// -- Interesting stats --
|
|
62
|
+
unsigned long sampling_time_clamps;
|
|
56
63
|
} discrete_dynamic_sampler;
|
|
57
64
|
|
|
58
65
|
|
|
59
66
|
// Init a new sampler with sane defaults.
|
|
60
|
-
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name);
|
|
67
|
+
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns);
|
|
61
68
|
|
|
62
69
|
// Reset a sampler, clearing all stored state.
|
|
63
|
-
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler);
|
|
70
|
+
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns);
|
|
64
71
|
|
|
65
72
|
// Sets a new target_overhead for the provided sampler, resetting it in the process.
|
|
66
73
|
// @param target_overhead A double representing the percentage of total time we are
|
|
67
74
|
// willing to use as overhead for the resulting sampling. Values are expected
|
|
68
75
|
// to be in the range ]0.0, 100.0].
|
|
69
|
-
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead);
|
|
76
|
+
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns);
|
|
70
77
|
|
|
71
78
|
// Make a sampling decision.
|
|
72
79
|
//
|
|
@@ -75,15 +82,20 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa
|
|
|
75
82
|
//
|
|
76
83
|
// NOTE: If true is returned we implicitly assume the start of a sampling operation
|
|
77
84
|
// and it is expected that a follow-up after_sample call is issued.
|
|
78
|
-
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler);
|
|
85
|
+
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns);
|
|
79
86
|
|
|
80
87
|
// Signal the end of a sampling operation.
|
|
81
88
|
//
|
|
82
89
|
// @return Sampling time in nanoseconds for the sample operation we just finished.
|
|
83
|
-
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler);
|
|
90
|
+
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns);
|
|
84
91
|
|
|
85
92
|
// Retrieve the current sampling probability ([0.0, 100.0]) being applied by this sampler.
|
|
86
93
|
double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler);
|
|
87
94
|
|
|
88
95
|
// Retrieve the current number of events seen since last sample.
|
|
89
96
|
unsigned long discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler);
|
|
97
|
+
|
|
98
|
+
// Return a Ruby hash containing a snapshot of this sampler's interesting state at calling time.
|
|
99
|
+
// WARN: This allocates in the Ruby VM and therefore should not be called without the
|
|
100
|
+
// VM lock or during GC.
|
|
101
|
+
VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler);
|
|
@@ -82,6 +82,9 @@ static ID at_id_id; // id of :@id in Ruby
|
|
|
82
82
|
static ID at_resource_id; // id of :@resource in Ruby
|
|
83
83
|
static ID at_root_span_id; // id of :@root_span in Ruby
|
|
84
84
|
static ID at_type_id; // id of :@type in Ruby
|
|
85
|
+
static ID at_otel_values_id; // id of :@otel_values in Ruby
|
|
86
|
+
static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
|
|
87
|
+
static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
|
|
85
88
|
|
|
86
89
|
// Contains state for a single ThreadContext instance
|
|
87
90
|
struct thread_context_collector_state {
|
|
@@ -114,6 +117,8 @@ struct thread_context_collector_state {
|
|
|
114
117
|
monotonic_to_system_epoch_state time_converter_state;
|
|
115
118
|
// Used to identify the main thread, to give it a fallback name
|
|
116
119
|
VALUE main_thread;
|
|
120
|
+
// Used when extracting trace identifiers from otel spans. Lazily initialized.
|
|
121
|
+
VALUE otel_current_span_key;
|
|
117
122
|
|
|
118
123
|
struct stats {
|
|
119
124
|
// Track how many garbage collection samples we've taken.
|
|
@@ -218,6 +223,14 @@ static VALUE thread_list(struct thread_context_collector_state *state);
|
|
|
218
223
|
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
|
|
219
224
|
static VALUE _native_new_empty_thread(VALUE self);
|
|
220
225
|
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
|
|
226
|
+
static void ddtrace_otel_trace_identifiers_for(
|
|
227
|
+
struct thread_context_collector_state *state,
|
|
228
|
+
VALUE *active_trace,
|
|
229
|
+
VALUE *root_span,
|
|
230
|
+
VALUE *numeric_span_id,
|
|
231
|
+
VALUE active_span,
|
|
232
|
+
VALUE otel_values
|
|
233
|
+
);
|
|
221
234
|
|
|
222
235
|
void collectors_thread_context_init(VALUE profiling_module) {
|
|
223
236
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
|
@@ -255,6 +268,9 @@ void collectors_thread_context_init(VALUE profiling_module) {
|
|
|
255
268
|
at_resource_id = rb_intern_const("@resource");
|
|
256
269
|
at_root_span_id = rb_intern_const("@root_span");
|
|
257
270
|
at_type_id = rb_intern_const("@type");
|
|
271
|
+
at_otel_values_id = rb_intern_const("@otel_values");
|
|
272
|
+
at_parent_span_id_id = rb_intern_const("@parent_span_id");
|
|
273
|
+
at_datadog_trace_id = rb_intern_const("@datadog_trace");
|
|
258
274
|
|
|
259
275
|
gc_profiling_init();
|
|
260
276
|
}
|
|
@@ -282,6 +298,7 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
|
|
|
282
298
|
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
|
|
283
299
|
rb_gc_mark(state->thread_list_buffer);
|
|
284
300
|
rb_gc_mark(state->main_thread);
|
|
301
|
+
rb_gc_mark(state->otel_current_span_key);
|
|
285
302
|
}
|
|
286
303
|
|
|
287
304
|
static void thread_context_collector_typed_data_free(void *state_ptr) {
|
|
@@ -334,6 +351,7 @@ static VALUE _native_new(VALUE klass) {
|
|
|
334
351
|
state->allocation_type_enabled = true;
|
|
335
352
|
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
|
|
336
353
|
state->main_thread = rb_thread_main();
|
|
354
|
+
state->otel_current_span_key = Qnil;
|
|
337
355
|
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
|
|
338
356
|
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
|
|
339
357
|
|
|
@@ -603,11 +621,14 @@ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
|
|
|
603
621
|
// Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
|
|
604
622
|
// on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
|
|
605
623
|
// samples first.
|
|
606
|
-
bool finished_major_gc = gc_profiling_has_major_gc_finished();
|
|
607
624
|
bool over_flush_time_treshold =
|
|
608
625
|
(wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
|
|
609
626
|
|
|
610
|
-
|
|
627
|
+
if (over_flush_time_treshold) {
|
|
628
|
+
return true;
|
|
629
|
+
} else {
|
|
630
|
+
return gc_profiling_has_major_gc_finished();
|
|
631
|
+
}
|
|
611
632
|
}
|
|
612
633
|
|
|
613
634
|
// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
|
|
@@ -917,6 +938,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
|
|
|
917
938
|
));
|
|
918
939
|
rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
|
|
919
940
|
rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
|
|
941
|
+
rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
|
|
920
942
|
|
|
921
943
|
return result;
|
|
922
944
|
}
|
|
@@ -1104,10 +1126,19 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
|
|
|
1104
1126
|
|
|
1105
1127
|
VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
|
|
1106
1128
|
VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
|
|
1107
|
-
|
|
1129
|
+
// Note: On Ruby 3.x `rb_attr_get` is exactly the same as `rb_ivar_get`. For Ruby 2.x, the difference is that
|
|
1130
|
+
// `rb_ivar_get` can trigger "warning: instance variable @otel_values not initialized" if warnings are enabled and
|
|
1131
|
+
// opentelemetry is not in use, whereas `rb_attr_get` does the lookup without generating the warning.
|
|
1132
|
+
VALUE otel_values = rb_attr_get(active_trace, at_otel_values_id /* @otel_values */);
|
|
1133
|
+
|
|
1134
|
+
VALUE numeric_span_id = Qnil;
|
|
1135
|
+
|
|
1136
|
+
if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
|
|
1137
|
+
|
|
1138
|
+
if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
|
|
1108
1139
|
|
|
1109
1140
|
VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
|
|
1110
|
-
|
|
1141
|
+
if (active_span != Qnil && numeric_span_id == Qnil) numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
|
|
1111
1142
|
if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
|
|
1112
1143
|
|
|
1113
1144
|
trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
|
|
@@ -1299,3 +1330,65 @@ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
|
|
|
1299
1330
|
default: return DDOG_CHARSLICE_C("(VM Internal, Missing class)");
|
|
1300
1331
|
}
|
|
1301
1332
|
}
|
|
1333
|
+
|
|
1334
|
+
static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
|
|
1335
|
+
if (state->otel_current_span_key == Qnil) {
|
|
1336
|
+
VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
|
|
1337
|
+
VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
|
|
1338
|
+
VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
|
|
1339
|
+
VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
|
|
1340
|
+
VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
|
|
1341
|
+
|
|
1342
|
+
if (current_span_key == Qnil) {
|
|
1343
|
+
rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
state->otel_current_span_key = current_span_key;
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
return state->otel_current_span_key;
|
|
1350
|
+
}
|
|
1351
|
+
|
|
1352
|
+
// This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
|
|
1353
|
+
// differently, and this codepath handles it.
|
|
1354
|
+
static void ddtrace_otel_trace_identifiers_for(
|
|
1355
|
+
struct thread_context_collector_state *state,
|
|
1356
|
+
VALUE *active_trace,
|
|
1357
|
+
VALUE *root_span,
|
|
1358
|
+
VALUE *numeric_span_id,
|
|
1359
|
+
VALUE active_span,
|
|
1360
|
+
VALUE otel_values
|
|
1361
|
+
) {
|
|
1362
|
+
VALUE resolved_numeric_span_id =
|
|
1363
|
+
active_span == Qnil ?
|
|
1364
|
+
// For traces started from otel spans, the span id will be empty, and the @parent_span_id has the right value
|
|
1365
|
+
rb_ivar_get(*active_trace, at_parent_span_id_id /* @parent_span_id */) :
|
|
1366
|
+
// Regular span created by ddtrace
|
|
1367
|
+
rb_ivar_get(active_span, at_id_id /* @id */);
|
|
1368
|
+
|
|
1369
|
+
if (resolved_numeric_span_id == Qnil) return;
|
|
1370
|
+
|
|
1371
|
+
VALUE otel_current_span_key = get_otel_current_span_key(state);
|
|
1372
|
+
VALUE current_trace = *active_trace;
|
|
1373
|
+
|
|
1374
|
+
// ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
|
|
1375
|
+
// trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked
|
|
1376
|
+
// list. The local root span is going to be the trace/span we find at the end of this linked list.
|
|
1377
|
+
while (otel_values != Qnil) {
|
|
1378
|
+
VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key);
|
|
1379
|
+
if (otel_span == Qnil) break;
|
|
1380
|
+
VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id);
|
|
1381
|
+
if (next_trace == Qnil) break;
|
|
1382
|
+
|
|
1383
|
+
current_trace = next_trace;
|
|
1384
|
+
otel_values = rb_ivar_get(current_trace, at_otel_values_id /* @otel_values */);
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
// We found the last trace in the linked list. This contains the local root span
|
|
1388
|
+
VALUE resolved_root_span = rb_ivar_get(current_trace, at_root_span_id /* @root_span */);
|
|
1389
|
+
if (resolved_root_span == Qnil) return;
|
|
1390
|
+
|
|
1391
|
+
*root_span = resolved_root_span;
|
|
1392
|
+
*active_trace = current_trace;
|
|
1393
|
+
*numeric_span_id = resolved_numeric_span_id;
|
|
1394
|
+
}
|