ddtrace 1.18.0 → 1.20.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +82 -1
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +96 -66
- data/ext/ddtrace_profiling_native_extension/collectors_discrete_dynamic_sampler.c +349 -0
- data/ext/ddtrace_profiling_native_extension/collectors_discrete_dynamic_sampler.h +89 -0
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
- data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +159 -124
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
- data/ext/ddtrace_profiling_native_extension/extconf.rb +19 -0
- data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
- data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
- data/ext/ddtrace_profiling_native_extension/helpers.h +6 -0
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +5 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +17 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +329 -10
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
- data/ext/ddtrace_profiling_native_extension/time_helpers.h +2 -0
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +2 -1
- data/lib/datadog/core/configuration/settings.rb +153 -21
- data/lib/datadog/core/environment/class_count.rb +6 -6
- data/lib/datadog/core/remote/component.rb +25 -12
- data/lib/datadog/core/remote/ext.rb +1 -0
- data/lib/datadog/core/remote/tie/tracing.rb +39 -0
- data/lib/datadog/core/remote/tie.rb +27 -0
- data/lib/datadog/core/telemetry/collector.rb +10 -0
- data/lib/datadog/core/telemetry/event.rb +2 -1
- data/lib/datadog/core/telemetry/ext.rb +3 -0
- data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
- data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
- data/lib/datadog/opentelemetry/sdk/propagator.rb +3 -2
- data/lib/datadog/opentelemetry.rb +3 -0
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +5 -12
- data/lib/datadog/profiling/component.rb +183 -13
- data/lib/datadog/profiling/scheduler.rb +4 -6
- data/lib/datadog/profiling/stack_recorder.rb +13 -2
- data/lib/datadog/tracing/configuration/ext.rb +0 -1
- data/lib/datadog/tracing/configuration/settings.rb +2 -1
- data/lib/datadog/tracing/contrib/action_cable/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_cable/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_mailer/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_mailer/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_pack/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_pack/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/action_view/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/action_view/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_job/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_job/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_model_serializers/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_model_serializers/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_record/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_record/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/active_support/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/analytics.rb +0 -1
- data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/aws/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/delayed_job/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/delayed_job/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +7 -0
- data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +1 -1
- data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/grape/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/graphql/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/http/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/http/distributed/fetcher.rb +2 -2
- data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/kafka/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/kafka/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
- data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/opensearch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/pg/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/presto/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/qless/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/qless/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/que/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/racecar/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/racecar/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +9 -2
- data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
- data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rails/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rake/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rake/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/redis/instrumentation.rb +2 -2
- data/lib/datadog/tracing/contrib/redis/patcher.rb +34 -21
- data/lib/datadog/tracing/contrib/resque/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/resque/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/roda/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/roda/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sequel/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sequel/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/shoryuken/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/shoryuken/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sinatra/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sneakers/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sneakers/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/stripe/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/stripe/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sucker_punch/configuration/settings.rb +1 -0
- data/lib/datadog/tracing/contrib/sucker_punch/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +58 -0
- data/lib/datadog/tracing/contrib/trilogy/ext.rb +27 -0
- data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +94 -0
- data/lib/datadog/tracing/contrib/trilogy/integration.rb +43 -0
- data/lib/datadog/tracing/contrib/trilogy/patcher.rb +31 -0
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing.rb +8 -2
- data/lib/ddtrace/version.rb +1 -1
- metadata +20 -6
@@ -0,0 +1,349 @@
|
|
1
|
+
#include "collectors_discrete_dynamic_sampler.h"
|
2
|
+
|
3
|
+
#include <ruby.h>
|
4
|
+
#include "helpers.h"
|
5
|
+
#include "time_helpers.h"
|
6
|
+
#include "ruby_helpers.h"
|
7
|
+
|
8
|
+
#define BASE_OVERHEAD_PCT 1.0
|
9
|
+
#define BASE_SAMPLING_INTERVAL 50
|
10
|
+
|
11
|
+
#define ADJUSTMENT_WINDOW_NS SECONDS_AS_NS(1)
|
12
|
+
|
13
|
+
#define EMA_SMOOTHING_FACTOR 0.6
|
14
|
+
#define EXP_MOVING_AVERAGE(last, avg, first) first ? last : (1-EMA_SMOOTHING_FACTOR) * avg + EMA_SMOOTHING_FACTOR * last
|
15
|
+
|
16
|
+
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name) {
|
17
|
+
sampler->debug_name = debug_name;
|
18
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT);
|
19
|
+
}
|
20
|
+
|
21
|
+
static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
|
22
|
+
const char *debug_name = sampler->debug_name;
|
23
|
+
double target_overhead = sampler->target_overhead;
|
24
|
+
(*sampler) = (discrete_dynamic_sampler) {
|
25
|
+
.debug_name = debug_name,
|
26
|
+
.target_overhead = target_overhead,
|
27
|
+
// Act as if a reset is a readjustment (it kinda is!) and wait for a full adjustment window
|
28
|
+
// to compute stats. Otherwise, we'd readjust on the next event that comes and thus be operating
|
29
|
+
// with very incomplete information
|
30
|
+
.last_readjust_time_ns = now_ns,
|
31
|
+
// This fake readjustment will use a hardcoded sampling interval
|
32
|
+
.sampling_interval = BASE_SAMPLING_INTERVAL,
|
33
|
+
.sampling_probability = 1.0 / BASE_SAMPLING_INTERVAL,
|
34
|
+
// But we want to make sure we sample at least once in the next window so that our first
|
35
|
+
// real readjustment has some notion of how heavy sampling is. Therefore, we'll make it so that
|
36
|
+
// the next event is automatically sampled by artificially locating it in the interval threshold.
|
37
|
+
.events_since_last_sample = BASE_SAMPLING_INTERVAL - 1,
|
38
|
+
};
|
39
|
+
}
|
40
|
+
|
41
|
+
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler) {
|
42
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
43
|
+
_discrete_dynamic_sampler_reset(sampler, now);
|
44
|
+
}
|
45
|
+
|
46
|
+
static void _discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
|
47
|
+
if (target_overhead <= 0 || target_overhead > 100) {
|
48
|
+
rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
|
49
|
+
}
|
50
|
+
sampler->target_overhead = target_overhead;
|
51
|
+
_discrete_dynamic_sampler_reset(sampler, now_ns);
|
52
|
+
}
|
53
|
+
|
54
|
+
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead) {
|
55
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
56
|
+
_discrete_dynamic_sampler_set_overhead_target_percentage(sampler, target_overhead, now);
|
57
|
+
}
|
58
|
+
|
59
|
+
static void maybe_readjust(discrete_dynamic_sampler *sampler, long now);
|
60
|
+
|
61
|
+
static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
62
|
+
// For efficiency reasons we don't do true random sampling but rather systematic
|
63
|
+
// sampling following a sample interval/skip. This can be biased and hide patterns
|
64
|
+
// but the dynamic interval and rather indeterministic pattern of allocations in
|
65
|
+
// most real applications should help reduce the bias impact.
|
66
|
+
sampler->events_since_last_sample++;
|
67
|
+
sampler->events_since_last_readjustment++;
|
68
|
+
bool should_sample = sampler->sampling_interval > 0 && sampler->events_since_last_sample >= sampler->sampling_interval;
|
69
|
+
|
70
|
+
if (should_sample) {
|
71
|
+
sampler->sample_start_time_ns = now_ns;
|
72
|
+
} else {
|
73
|
+
// check if we should readjust our sampler after this event, even if we didn't sample it
|
74
|
+
maybe_readjust(sampler, now_ns);
|
75
|
+
}
|
76
|
+
|
77
|
+
return should_sample;
|
78
|
+
}
|
79
|
+
|
80
|
+
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler) {
|
81
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
82
|
+
return _discrete_dynamic_sampler_should_sample(sampler, now);
|
83
|
+
}
|
84
|
+
|
85
|
+
static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
|
86
|
+
long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns);
|
87
|
+
sampler->samples_since_last_readjustment++;
|
88
|
+
sampler->sampling_time_since_last_readjustment_ns += last_sampling_time_ns;
|
89
|
+
sampler->events_since_last_sample = 0;
|
90
|
+
|
91
|
+
// check if we should readjust our sampler after this sample
|
92
|
+
maybe_readjust(sampler, now_ns);
|
93
|
+
|
94
|
+
return last_sampling_time_ns;
|
95
|
+
}
|
96
|
+
|
97
|
+
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler) {
|
98
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
99
|
+
return _discrete_dynamic_sampler_after_sample(sampler, now);
|
100
|
+
}
|
101
|
+
|
102
|
+
double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler) {
|
103
|
+
return sampler->sampling_probability * 100.;
|
104
|
+
}
|
105
|
+
|
106
|
+
size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler) {
|
107
|
+
return sampler->events_since_last_sample;
|
108
|
+
}
|
109
|
+
|
110
|
+
static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
|
111
|
+
long window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
|
112
|
+
|
113
|
+
if (window_time_ns < ADJUSTMENT_WINDOW_NS) {
|
114
|
+
// not enough time has passed to perform a readjustment
|
115
|
+
return;
|
116
|
+
}
|
117
|
+
|
118
|
+
// If we got this far, lets recalculate our sampling params based on new observations
|
119
|
+
bool first_readjustment = !sampler->has_completed_full_adjustment_window;
|
120
|
+
|
121
|
+
// Update our running average of events/sec with latest observation
|
122
|
+
sampler->events_per_ns = EXP_MOVING_AVERAGE(
|
123
|
+
(double) sampler->events_since_last_readjustment / window_time_ns,
|
124
|
+
sampler->events_per_ns,
|
125
|
+
first_readjustment
|
126
|
+
);
|
127
|
+
|
128
|
+
// Update our running average of sampling time for a specific event
|
129
|
+
long sampling_window_time_ns = sampler->sampling_time_since_last_readjustment_ns;
|
130
|
+
long sampling_overshoot_time_ns = -1;
|
131
|
+
if (sampler->samples_since_last_readjustment > 0) {
|
132
|
+
// We can only update sampling-related stats if we actually sampled on the last window...
|
133
|
+
|
134
|
+
// Lets update our average sampling time per event
|
135
|
+
long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampling_window_time_ns / sampler->samples_since_last_readjustment;
|
136
|
+
sampler->sampling_time_ns = EXP_MOVING_AVERAGE(
|
137
|
+
avg_sampling_time_in_window_ns,
|
138
|
+
sampler->sampling_time_ns,
|
139
|
+
first_readjustment
|
140
|
+
);
|
141
|
+
}
|
142
|
+
|
143
|
+
// Are we meeting our target in practice? If we're consistently overshooting our estimate due to non-uniform allocation patterns lets
|
144
|
+
// adjust our overhead target.
|
145
|
+
// NOTE: Updating this even when no samples occur is a conscious choice which enables us to cooldown extreme adjustments over time.
|
146
|
+
// If we didn't do this, whenever a big spike caused target_overhead_adjustment to equal target_overhead, we'd get stuck
|
147
|
+
// in a "probability = 0" state.
|
148
|
+
long reference_target_sampling_time_ns = window_time_ns * (sampler->target_overhead / 100.);
|
149
|
+
// Overshoot by definition is always >= 0. < 0 would be undershooting!
|
150
|
+
sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - reference_target_sampling_time_ns);
|
151
|
+
// Our overhead adjustment should always be between [-target_overhead, 0]. Higher adjustments would lead to negative overhead targets
|
152
|
+
// which don't make much sense.
|
153
|
+
double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, sampling_overshoot_time_ns * 100. / window_time_ns);
|
154
|
+
sampler->target_overhead_adjustment = EXP_MOVING_AVERAGE(
|
155
|
+
last_target_overhead_adjustment,
|
156
|
+
sampler->target_overhead_adjustment,
|
157
|
+
first_readjustment
|
158
|
+
);
|
159
|
+
|
160
|
+
// Apply our overhead adjustment to figure out our real targets for this readjustment.
|
161
|
+
double target_overhead = double_max_of(0, sampler->target_overhead + sampler->target_overhead_adjustment);
|
162
|
+
long target_sampling_time_ns = window_time_ns * (target_overhead / 100.);
|
163
|
+
|
164
|
+
// Recalculate target sampling probability so that the following 2 hold:
|
165
|
+
// * window_time_ns = working_window_time_ns + sampling_window_time_ns
|
166
|
+
// │ │ │
|
167
|
+
// │ │ └ how much time is spent sampling
|
168
|
+
// │ └── how much time is spent doing actual app stuff
|
169
|
+
// └── total (wall) time in this adjustment window
|
170
|
+
// * sampling_window_time_ns <= window_time_ns * target_overhead / 100
|
171
|
+
//
|
172
|
+
// Note that
|
173
|
+
//
|
174
|
+
// sampling_window_time_ns = samples_in_window * sampling_time_ns =
|
175
|
+
// ┌─ assuming no events will be emitted during sampling
|
176
|
+
// │
|
177
|
+
// = events_per_ns * working_window_time_ns * sampling_probability * sampling_time_ns
|
178
|
+
//
|
179
|
+
// Re-ordering for sampling_probability and solving for the upper-bound of sampling_window_time_ns:
|
180
|
+
//
|
181
|
+
// sampling_window_time_ns = window_time_ns * target_overhead / 100
|
182
|
+
// sampling_probability = window_time_ns * target_overhead / 100 / (events_per_ns * working_window_time_ns * sampling_time_ns) =
|
183
|
+
//
|
184
|
+
// Which you can intuitively understand as:
|
185
|
+
//
|
186
|
+
// sampling_probability = max_allowed_time_for_sampling_ns / time_to_sample_all_events_ns
|
187
|
+
//
|
188
|
+
// As a quick sanity check:
|
189
|
+
// * If app is eventing very little or we're sampling very fast, so that time_to_sample_all_events_ns < max_allowed_time_for_sampling_ns
|
190
|
+
// then probability will be > 1 (but we should clamp to 1 since probabilities higher than 1 don't make sense).
|
191
|
+
// * If app is eventing a lot or our sampling overhead is big, then as time_to_sample_all_events_ns grows, sampling_probability will
|
192
|
+
// tend to 0.
|
193
|
+
long working_window_time_ns = long_max_of(0, window_time_ns - sampling_window_time_ns);
|
194
|
+
double max_allowed_time_for_sampling_ns = target_sampling_time_ns;
|
195
|
+
long time_to_sample_all_events_ns = sampler->events_per_ns * working_window_time_ns * sampler->sampling_time_ns;
|
196
|
+
if (max_allowed_time_for_sampling_ns == 0) {
|
197
|
+
// if we aren't allowed any sampling time at all, probability has to be 0
|
198
|
+
sampler->sampling_probability = 0;
|
199
|
+
} else {
|
200
|
+
// otherwise apply the formula described above (protecting against div by 0)
|
201
|
+
sampler->sampling_probability = time_to_sample_all_events_ns == 0 ? 1. :
|
202
|
+
double_min_of(1., max_allowed_time_for_sampling_ns / time_to_sample_all_events_ns);
|
203
|
+
}
|
204
|
+
|
205
|
+
// Doing true random selection would involve "tossing a coin" on every allocation. Lets do systematic sampling instead so that our
|
206
|
+
// sampling decision can rely solely on a sampling skip/interval (i.e. more efficient).
|
207
|
+
//
|
208
|
+
// sampling_interval = events / samples =
|
209
|
+
// = event_rate * working_window_time_ns / (event_rate * working_window_time_ns * sampling_probability)
|
210
|
+
// = 1 / sampling_probability
|
211
|
+
//
|
212
|
+
// NOTE: The sampling interval has to be an integer since we're dealing with discrete events here. This means that there'll be
|
213
|
+
// a loss of precision (and thus control) when adjusting between probabilities that lead to non-integer granularity
|
214
|
+
// changes (e.g. probabilities in the range of ]50%, 100%[ which map to intervals in the range of ]1, 2[). Our approach
|
215
|
+
// when the sampling interval is a non-integer is to ceil it (i.e. we'll always choose to sample less often).
|
216
|
+
// NOTE: Overhead target adjustments or very big sampling times can in theory bring probability so close to 0 as to effectively
|
217
|
+
// round down to full 0. This means we have to be careful to handle div-by-0 as well as resulting double intervals that
|
218
|
+
// are so big they don't fit into the sampling_interval. In both cases lets just disable sampling until next readjustment
|
219
|
+
// by setting interval to 0.
|
220
|
+
double sampling_interval = sampler->sampling_probability == 0 ? 0 : ceil(1.0 / sampler->sampling_probability);
|
221
|
+
sampler->sampling_interval = sampling_interval > ULONG_MAX ? 0 : sampling_interval;
|
222
|
+
|
223
|
+
#ifdef DD_DEBUG
|
224
|
+
double allocs_in_60s = sampler->events_per_ns * 1e9 * 60;
|
225
|
+
double samples_in_60s = allocs_in_60s * sampler->sampling_probability;
|
226
|
+
double expected_total_sampling_time_in_60s =
|
227
|
+
samples_in_60s * sampler->sampling_time_ns / 1e9;
|
228
|
+
double real_total_sampling_time_in_60s = sampling_window_time_ns / 1e9 * 60 / (window_time_ns / 1e9);
|
229
|
+
|
230
|
+
fprintf(stderr, "[dds.%s] readjusting...\n", sampler->debug_name);
|
231
|
+
fprintf(stderr, "samples_since_last_readjustment=%ld\n", sampler->samples_since_last_readjustment);
|
232
|
+
fprintf(stderr, "window_time=%ld\n", window_time_ns);
|
233
|
+
fprintf(stderr, "events_per_sec=%f\n", sampler->events_per_ns * 1e9);
|
234
|
+
fprintf(stderr, "sampling_time=%ld\n", sampler->sampling_time_ns);
|
235
|
+
fprintf(stderr, "sampling_window_time=%ld\n", sampling_window_time_ns);
|
236
|
+
fprintf(stderr, "sampling_target_time=%ld\n", reference_target_sampling_time_ns);
|
237
|
+
fprintf(stderr, "sampling_overshoot_time=%ld\n", sampling_overshoot_time_ns);
|
238
|
+
fprintf(stderr, "working_window_time=%ld\n", working_window_time_ns);
|
239
|
+
fprintf(stderr, "sampling_interval=%zu\n", sampler->sampling_interval);
|
240
|
+
fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability);
|
241
|
+
fprintf(stderr, "expected allocs in 60s=%f\n", allocs_in_60s);
|
242
|
+
fprintf(stderr, "expected samples in 60s=%f\n", samples_in_60s);
|
243
|
+
fprintf(stderr, "expected sampling time in 60s=%f (previous real=%f)\n", expected_total_sampling_time_in_60s, real_total_sampling_time_in_60s);
|
244
|
+
fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
|
245
|
+
fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
|
246
|
+
fprintf(stderr, "target_sampling_time=%ld\n", target_sampling_time_ns);
|
247
|
+
fprintf(stderr, "expected max overhead in 60s=%f\n", target_overhead / 100.0 * 60);
|
248
|
+
fprintf(stderr, "-------\n");
|
249
|
+
#endif
|
250
|
+
|
251
|
+
sampler->events_since_last_readjustment = 0;
|
252
|
+
sampler->samples_since_last_readjustment = 0;
|
253
|
+
sampler->sampling_time_since_last_readjustment_ns = 0;
|
254
|
+
sampler->last_readjust_time_ns = now;
|
255
|
+
sampler->has_completed_full_adjustment_window = true;
|
256
|
+
}
|
257
|
+
|
258
|
+
// ---
|
259
|
+
// Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
|
260
|
+
|
261
|
+
static VALUE _native_new(VALUE klass);
|
262
|
+
static VALUE _native_reset(VALUE self, VALUE now);
|
263
|
+
static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now);
|
264
|
+
static VALUE _native_should_sample(VALUE self, VALUE now);
|
265
|
+
static VALUE _native_after_sample(VALUE self, VALUE now);
|
266
|
+
static VALUE _native_probability(VALUE self);
|
267
|
+
|
268
|
+
typedef struct sampler_state {
|
269
|
+
discrete_dynamic_sampler sampler;
|
270
|
+
} sampler_state;
|
271
|
+
|
272
|
+
void collectors_discrete_dynamic_sampler_init(VALUE profiling_module) {
|
273
|
+
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
274
|
+
VALUE discrete_sampler_module = rb_define_module_under(collectors_module, "DiscreteDynamicSampler");
|
275
|
+
VALUE testing_module = rb_define_module_under(discrete_sampler_module, "Testing");
|
276
|
+
VALUE sampler_class = rb_define_class_under(testing_module, "Sampler", rb_cObject);
|
277
|
+
|
278
|
+
rb_define_alloc_func(sampler_class, _native_new);
|
279
|
+
|
280
|
+
rb_define_method(sampler_class, "_native_reset", _native_reset, 1);
|
281
|
+
rb_define_method(sampler_class, "_native_set_overhead_target_percentage", _native_set_overhead_target_percentage, 2);
|
282
|
+
rb_define_method(sampler_class, "_native_should_sample", _native_should_sample, 1);
|
283
|
+
rb_define_method(sampler_class, "_native_after_sample", _native_after_sample, 1);
|
284
|
+
rb_define_method(sampler_class, "_native_probability", _native_probability, 0);
|
285
|
+
}
|
286
|
+
|
287
|
+
static const rb_data_type_t sampler_typed_data = {
|
288
|
+
.wrap_struct_name = "Datadog::Profiling::DiscreteDynamicSampler::Testing::Sampler",
|
289
|
+
.function = {
|
290
|
+
.dfree = RUBY_DEFAULT_FREE,
|
291
|
+
.dsize = NULL,
|
292
|
+
},
|
293
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
294
|
+
};
|
295
|
+
|
296
|
+
static VALUE _native_new(VALUE klass) {
|
297
|
+
sampler_state *state = ruby_xcalloc(sizeof(sampler_state), 1);
|
298
|
+
|
299
|
+
discrete_dynamic_sampler_init(&state->sampler, "test sampler");
|
300
|
+
|
301
|
+
return TypedData_Wrap_Struct(klass, &sampler_typed_data, state);
|
302
|
+
}
|
303
|
+
|
304
|
+
static VALUE _native_reset(VALUE self, VALUE now_ns) {
|
305
|
+
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
306
|
+
|
307
|
+
sampler_state *state;
|
308
|
+
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
309
|
+
|
310
|
+
_discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
|
311
|
+
return Qtrue;
|
312
|
+
}
|
313
|
+
|
314
|
+
static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now_ns) {
|
315
|
+
ENFORCE_TYPE(target_overhead, T_FLOAT);
|
316
|
+
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
317
|
+
|
318
|
+
sampler_state *state;
|
319
|
+
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
320
|
+
|
321
|
+
_discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
|
322
|
+
|
323
|
+
return Qnil;
|
324
|
+
}
|
325
|
+
|
326
|
+
VALUE _native_should_sample(VALUE self, VALUE now_ns) {
|
327
|
+
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
328
|
+
|
329
|
+
sampler_state *state;
|
330
|
+
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
331
|
+
|
332
|
+
return _discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
|
333
|
+
}
|
334
|
+
|
335
|
+
VALUE _native_after_sample(VALUE self, VALUE now_ns) {
|
336
|
+
ENFORCE_TYPE(now_ns, T_FIXNUM);
|
337
|
+
|
338
|
+
sampler_state *state;
|
339
|
+
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
340
|
+
|
341
|
+
return LONG2NUM(_discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
|
342
|
+
}
|
343
|
+
|
344
|
+
VALUE _native_probability(VALUE self) {
|
345
|
+
sampler_state *state;
|
346
|
+
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
|
347
|
+
|
348
|
+
return DBL2NUM(discrete_dynamic_sampler_probability(&state->sampler));
|
349
|
+
}
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <stdbool.h>
|
4
|
+
#include <stddef.h>
|
5
|
+
|
6
|
+
// A sampler that will sample discrete events based on the overhead of their
|
7
|
+
// sampling.
|
8
|
+
//
|
9
|
+
// NOTE: For performance reasons, this sampler does systematic sampling via
|
10
|
+
// sampling intervals/skips that are dynamically adjusted over time.
|
11
|
+
// It will not perform truly random sampling by "throwing a coin" at
|
12
|
+
// every event and is thus, in theory, susceptible to some pattern
|
13
|
+
// biases. In practice, the dynamic readjustment of sampling interval
|
14
|
+
// and randomized starting point should help with avoiding heavy biases.
|
15
|
+
typedef struct discrete_dynamic_sampler {
|
16
|
+
// --- Config ---
|
17
|
+
// Name of this sampler for debug logs.
|
18
|
+
const char *debug_name;
|
19
|
+
// Value in the range ]0, 100] representing the % of time we're willing to dedicate
|
20
|
+
// to sampling.
|
21
|
+
double target_overhead;
|
22
|
+
|
23
|
+
// -- Reference State ---
|
24
|
+
// Moving average of how many events per ns we saw over the recent past.
|
25
|
+
double events_per_ns;
|
26
|
+
// Moving average of the sampling time of each individual event.
|
27
|
+
long sampling_time_ns;
|
28
|
+
// Sampling probability being applied by this sampler.
|
29
|
+
double sampling_probability;
|
30
|
+
// Sampling interval/skip that drives the systematic sampling done by this sampler.
|
31
|
+
// NOTE: This is an inverted view of the probability.
|
32
|
+
// NOTE: A value of 0 works as +inf, effectively disabling sampling (to align with probability=0)
|
33
|
+
unsigned long sampling_interval;
|
34
|
+
|
35
|
+
// -- Sampling State --
|
36
|
+
// How many events have we seen since we last decided to sample.
|
37
|
+
unsigned long events_since_last_sample;
|
38
|
+
// Captures the time at which the last true-returning call to should_sample happened.
|
39
|
+
// This is used in after_sample to understand the total sample time.
|
40
|
+
long sample_start_time_ns;
|
41
|
+
|
42
|
+
// -- Adjustment State --
|
43
|
+
// Has this sampler already ran for at least one complete adjustment window?
|
44
|
+
bool has_completed_full_adjustment_window;
|
45
|
+
// Time at which we last readjust our sampling parameters.
|
46
|
+
long last_readjust_time_ns;
|
47
|
+
// How many events have we seen since the last readjustment.
|
48
|
+
unsigned long events_since_last_readjustment;
|
49
|
+
// How many samples have we seen since the last readjustment.
|
50
|
+
unsigned long samples_since_last_readjustment;
|
51
|
+
// How much time have we spent sampling since the last readjustment.
|
52
|
+
unsigned long sampling_time_since_last_readjustment_ns;
|
53
|
+
// A negative number that we add to target_overhead to serve as extra padding to
|
54
|
+
// try and mitigate observed overshooting of max sampling time.
|
55
|
+
double target_overhead_adjustment;
|
56
|
+
} discrete_dynamic_sampler;
|
57
|
+
|
58
|
+
|
59
|
+
// Init a new sampler with sane defaults.
|
60
|
+
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name);
|
61
|
+
|
62
|
+
// Reset a sampler, clearing all stored state.
|
63
|
+
void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler);
|
64
|
+
|
65
|
+
// Sets a new target_overhead for the provided sampler, resetting it in the process.
|
66
|
+
// @param target_overhead A double representing the percentage of total time we are
|
67
|
+
// willing to use as overhead for the resulting sampling. Values are expected
|
68
|
+
// to be in the range ]0.0, 100.0].
|
69
|
+
void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead);
|
70
|
+
|
71
|
+
// Make a sampling decision.
|
72
|
+
//
|
73
|
+
// @return True if the event associated with this decision should be sampled, false
|
74
|
+
// otherwise.
|
75
|
+
//
|
76
|
+
// NOTE: If true is returned we implicitly assume the start of a sampling operation
|
77
|
+
// and it is expected that a follow-up after_sample call is issued.
|
78
|
+
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler);
|
79
|
+
|
80
|
+
// Signal the end of a sampling operation.
|
81
|
+
//
|
82
|
+
// @return Sampling time in nanoseconds for the sample operation we just finished.
|
83
|
+
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler);
|
84
|
+
|
85
|
+
// Retrieve the current sampling probability ([0.0, 100.0]) being applied by this sampler.
|
86
|
+
double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler);
|
87
|
+
|
88
|
+
// Retrieve the current number of events seen since last sample.
|
89
|
+
unsigned long discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler);
|
@@ -19,7 +19,7 @@
|
|
19
19
|
//
|
20
20
|
// Instead of sampling at a fixed sample rate, the actual sampling rate should be decided by also observing the impact
|
21
21
|
// that running the profiler is having. This protects against issues such as the profiler being deployed in very busy
|
22
|
-
//machines or containers with unrealistic CPU restrictions.
|
22
|
+
// machines or containers with unrealistic CPU restrictions.
|
23
23
|
//
|
24
24
|
// ### Implementation
|
25
25
|
//
|
@@ -35,13 +35,13 @@
|
|
35
35
|
// sample. If it's not, it will skip sampling.
|
36
36
|
//
|
37
37
|
// Finally, as an additional optimization, there's a `dynamic_sampling_rate_get_sleep()` which, given the current
|
38
|
-
// wall-time, will return the time remaining (*there's an exception, check
|
38
|
+
// wall-time, will return the time remaining (*there's an exception, check function) until the next sample.
|
39
39
|
//
|
40
40
|
// ---
|
41
41
|
|
42
42
|
// This is the wall-time overhead we're targeting. E.g. we target to spend no more than 2%, or 1.2 seconds per minute,
|
43
|
-
// taking profiling samples.
|
44
|
-
#define
|
43
|
+
// taking profiling samples by default.
|
44
|
+
#define DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
|
45
45
|
// See `dynamic_sampling_rate_get_sleep()` for details
|
46
46
|
#define MAX_SLEEP_TIME_NS MILLIS_AS_NS(100)
|
47
47
|
// See `dynamic_sampling_rate_after_sample()` for details
|
@@ -49,6 +49,11 @@
|
|
49
49
|
|
50
50
|
void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state) {
|
51
51
|
atomic_init(&state->next_sample_after_monotonic_wall_time_ns, 0);
|
52
|
+
dynamic_sampling_rate_set_overhead_target_percentage(state, DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE);
|
53
|
+
}
|
54
|
+
|
55
|
+
void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage) {
|
56
|
+
state->overhead_target_percentage = overhead_target_percentage;
|
52
57
|
}
|
53
58
|
|
54
59
|
void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state) {
|
@@ -76,7 +81,7 @@ bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, lon
|
|
76
81
|
}
|
77
82
|
|
78
83
|
void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns) {
|
79
|
-
double overhead_target =
|
84
|
+
double overhead_target = state->overhead_target_percentage;
|
80
85
|
|
81
86
|
// The idea here is that we're targeting a maximum % of wall-time spent sampling.
|
82
87
|
// So for instance, if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend
|
@@ -93,48 +98,51 @@ void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long
|
|
93
98
|
// ---
|
94
99
|
// Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
|
95
100
|
|
96
|
-
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
|
97
|
-
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
|
98
|
-
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
|
101
|
+
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
|
102
|
+
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
|
103
|
+
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
|
99
104
|
|
100
105
|
void collectors_dynamic_sampling_rate_init(VALUE profiling_module) {
|
101
106
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
102
107
|
VALUE dynamic_sampling_rate_module = rb_define_module_under(collectors_module, "DynamicSamplingRate");
|
103
108
|
VALUE testing_module = rb_define_module_under(dynamic_sampling_rate_module, "Testing");
|
104
109
|
|
105
|
-
rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep,
|
106
|
-
rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample,
|
107
|
-
rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample,
|
110
|
+
rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 3);
|
111
|
+
rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 3);
|
112
|
+
rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 3);
|
108
113
|
}
|
109
114
|
|
110
|
-
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
|
115
|
+
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
|
111
116
|
ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
|
112
117
|
ENFORCE_TYPE(current_monotonic_wall_time_ns, T_FIXNUM);
|
113
118
|
|
114
119
|
dynamic_sampling_rate_state state;
|
115
120
|
dynamic_sampling_rate_init(&state);
|
121
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
|
116
122
|
atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
|
117
123
|
|
118
124
|
return ULL2NUM(dynamic_sampling_rate_get_sleep(&state, NUM2LONG(current_monotonic_wall_time_ns)));
|
119
125
|
}
|
120
126
|
|
121
|
-
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
|
127
|
+
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
|
122
128
|
ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
|
123
129
|
ENFORCE_TYPE(wall_time_ns_before_sample, T_FIXNUM);
|
124
130
|
|
125
131
|
dynamic_sampling_rate_state state;
|
126
132
|
dynamic_sampling_rate_init(&state);
|
133
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
|
127
134
|
atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
|
128
135
|
|
129
136
|
return dynamic_sampling_rate_should_sample(&state, NUM2LONG(wall_time_ns_before_sample)) ? Qtrue : Qfalse;
|
130
137
|
}
|
131
138
|
|
132
|
-
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
|
139
|
+
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
|
133
140
|
ENFORCE_TYPE(wall_time_ns_after_sample, T_FIXNUM);
|
134
141
|
ENFORCE_TYPE(sampling_time_ns, T_FIXNUM);
|
135
142
|
|
136
143
|
dynamic_sampling_rate_state state;
|
137
144
|
dynamic_sampling_rate_init(&state);
|
145
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
|
138
146
|
|
139
147
|
dynamic_sampling_rate_after_sample(&state, NUM2LONG(wall_time_ns_after_sample), NUM2ULL(sampling_time_ns));
|
140
148
|
|
@@ -4,10 +4,14 @@
|
|
4
4
|
#include <stdbool.h>
|
5
5
|
|
6
6
|
typedef struct {
|
7
|
+
// This is the wall-time overhead we're targeting. E.g. by default, we target to spend no more than 2%, or 1.2 seconds
|
8
|
+
// per minute, taking profiling samples.
|
9
|
+
double overhead_target_percentage;
|
7
10
|
atomic_long next_sample_after_monotonic_wall_time_ns;
|
8
11
|
} dynamic_sampling_rate_state;
|
9
12
|
|
10
13
|
void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state);
|
14
|
+
void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage);
|
11
15
|
void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state);
|
12
16
|
uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns);
|
13
17
|
bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample);
|