ddtrace 1.18.0 → 1.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +50 -1
- data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
- data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
- data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
- data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
- data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +159 -124
- data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
- data/ext/ddtrace_profiling_native_extension/extconf.rb +16 -0
- data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
- data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
- data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
- data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +5 -0
- data/ext/ddtrace_profiling_native_extension/profiling.c +1 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +329 -10
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
- data/lib/datadog/core/configuration/settings.rb +139 -22
- data/lib/datadog/core/telemetry/collector.rb +10 -0
- data/lib/datadog/core/telemetry/event.rb +2 -1
- data/lib/datadog/core/telemetry/ext.rb +3 -0
- data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
- data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -11
- data/lib/datadog/profiling/component.rb +197 -13
- data/lib/datadog/profiling/scheduler.rb +4 -6
- data/lib/datadog/profiling/stack_recorder.rb +13 -2
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
- data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
- data/lib/ddtrace/version.rb +1 -1
- metadata +12 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37ea5c2fe193569e17d13e026b4477dd8806c00df50250fb5f69854c23e6e6a5
|
4
|
+
data.tar.gz: 858b756d1ef6baddb66f85fb44f3301b317e151d58e4e299390f819621d4ecb8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d86acc37f0bcb7d3b680c4ee8698b45eeb10c026d7dbcdf65ca5c7991eeb561ab35b40b02e98aa24dd984fd7871da8c4906ccc1aa64f9b8c2d8ad86aded199b
|
7
|
+
data.tar.gz: 4463963285c39c09e2d1d090fe0f80518107b7b60fdb58a0bd993d11dfbaf64e0131cd774b16483d6bd84b5a7faf4256eb8304c3bf4b08b4b90dadfaa513adb9
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,33 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [1.19.0] - 2024-01-10
|
6
|
+
|
7
|
+
### Highlights
|
8
|
+
Alpha support for memory profiling has been added. For more details, check the [release notes](https://github.com/DataDog/dd-trace-rb/releases/tag/v1.19.0)
|
9
|
+
|
10
|
+
### Added
|
11
|
+
* Tracing: Add `on_error` settings for `mysql2` ([#3316][])
|
12
|
+
* Core: Add install_signature to app-started telemetry event ([#3349][])
|
13
|
+
* Profiling: Heap Profiling ([#3281][]) ([#3287][]) ([#3328][]) ([#3329][]) ([#3333][]) ([#3360][])
|
14
|
+
* Profiling: Redesign GC profiling to add timeline support and reduce overhead ([#3313][])
|
15
|
+
* Core: Use Ruby 3.3 stable for CI testing ([#3354][])
|
16
|
+
|
17
|
+
### Changed
|
18
|
+
* Core: Bump `datadog-ci` dependency to 0.6.0 ([#3361][])
|
19
|
+
* Core: Bump debase-ruby_core_source dependency to 3.3.1 ([#3373][])
|
20
|
+
* Docs: Backport "List Ruby 3.3 as supported in the docs" to master branch ([#3374][])
|
21
|
+
* Profiling: Import upstream `rb_profile_frames` fix ([#3352][])
|
22
|
+
* Profiling: Allow the dynamic sampling rate overhead target to be set ([#3310][])
|
23
|
+
* Profiling: Split profiling tests into ractor and non-ractor suites. ([#3320][])
|
24
|
+
|
25
|
+
### Fixed
|
26
|
+
* Docs: Fix `pg` doc markdown format ([#3317][])
|
27
|
+
* Tracing: Fix recursive `require` in Railtie ([#3365][])
|
28
|
+
* Profiling: Fix issues stemming from rb_gc_force_recycle ([#3366][])
|
29
|
+
* Profiling: Fix Ruby 3.3 CI being broken in master due to profiler ([#3356][])
|
30
|
+
* Profiling: Fix "no signals" workaround detection when mariadb is in use ([#3362][])
|
31
|
+
|
5
32
|
## [1.18.0] - 2023-12-07
|
6
33
|
|
7
34
|
### Added
|
@@ -2680,7 +2707,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
|
|
2680
2707
|
Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
2681
2708
|
|
2682
2709
|
|
2683
|
-
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.
|
2710
|
+
[Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.19.0...master
|
2711
|
+
[1.19.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.18.0...v1.19.0
|
2684
2712
|
[1.18.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.17.0...v1.18.0
|
2685
2713
|
[1.17.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.16.2...v1.17.0
|
2686
2714
|
[1.16.2]: https://github.com/DataDog/dd-trace-rb/compare/v1.16.1...v1.16.2
|
@@ -3910,12 +3938,33 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
|
|
3910
3938
|
[#3273]: https://github.com/DataDog/dd-trace-rb/issues/3273
|
3911
3939
|
[#3279]: https://github.com/DataDog/dd-trace-rb/issues/3279
|
3912
3940
|
[#3280]: https://github.com/DataDog/dd-trace-rb/issues/3280
|
3941
|
+
[#3281]: https://github.com/DataDog/dd-trace-rb/issues/3281
|
3913
3942
|
[#3284]: https://github.com/DataDog/dd-trace-rb/issues/3284
|
3914
3943
|
[#3286]: https://github.com/DataDog/dd-trace-rb/issues/3286
|
3944
|
+
[#3287]: https://github.com/DataDog/dd-trace-rb/issues/3287
|
3915
3945
|
[#3289]: https://github.com/DataDog/dd-trace-rb/issues/3289
|
3916
3946
|
[#3303]: https://github.com/DataDog/dd-trace-rb/issues/3303
|
3917
3947
|
[#3307]: https://github.com/DataDog/dd-trace-rb/issues/3307
|
3918
3948
|
[#3308]: https://github.com/DataDog/dd-trace-rb/issues/3308
|
3949
|
+
[#3310]: https://github.com/DataDog/dd-trace-rb/issues/3310
|
3950
|
+
[#3313]: https://github.com/DataDog/dd-trace-rb/issues/3313
|
3951
|
+
[#3316]: https://github.com/DataDog/dd-trace-rb/issues/3316
|
3952
|
+
[#3317]: https://github.com/DataDog/dd-trace-rb/issues/3317
|
3953
|
+
[#3320]: https://github.com/DataDog/dd-trace-rb/issues/3320
|
3954
|
+
[#3328]: https://github.com/DataDog/dd-trace-rb/issues/3328
|
3955
|
+
[#3329]: https://github.com/DataDog/dd-trace-rb/issues/3329
|
3956
|
+
[#3333]: https://github.com/DataDog/dd-trace-rb/issues/3333
|
3957
|
+
[#3349]: https://github.com/DataDog/dd-trace-rb/issues/3349
|
3958
|
+
[#3352]: https://github.com/DataDog/dd-trace-rb/issues/3352
|
3959
|
+
[#3354]: https://github.com/DataDog/dd-trace-rb/issues/3354
|
3960
|
+
[#3356]: https://github.com/DataDog/dd-trace-rb/issues/3356
|
3961
|
+
[#3360]: https://github.com/DataDog/dd-trace-rb/issues/3360
|
3962
|
+
[#3361]: https://github.com/DataDog/dd-trace-rb/issues/3361
|
3963
|
+
[#3362]: https://github.com/DataDog/dd-trace-rb/issues/3362
|
3964
|
+
[#3365]: https://github.com/DataDog/dd-trace-rb/issues/3365
|
3965
|
+
[#3366]: https://github.com/DataDog/dd-trace-rb/issues/3366
|
3966
|
+
[#3373]: https://github.com/DataDog/dd-trace-rb/issues/3373
|
3967
|
+
[#3374]: https://github.com/DataDog/dd-trace-rb/issues/3374
|
3919
3968
|
[@AdrianLC]: https://github.com/AdrianLC
|
3920
3969
|
[@Azure7111]: https://github.com/Azure7111
|
3921
3970
|
[@BabyGroot]: https://github.com/BabyGroot
|
@@ -75,15 +75,22 @@
|
|
75
75
|
//
|
76
76
|
// ---
|
77
77
|
|
78
|
+
#ifndef NO_POSTPONED_TRIGGER
|
79
|
+
// Used to call the rb_postponed_job_trigger from Ruby 3.3+. These get initialized in
|
80
|
+
// `collectors_cpu_and_wall_time_worker_init` below and always get reused after that.
|
81
|
+
static rb_postponed_job_handle_t sample_from_postponed_job_handle;
|
82
|
+
static rb_postponed_job_handle_t after_gc_from_postponed_job_handle;
|
83
|
+
#endif
|
84
|
+
|
78
85
|
// Contains state for a single CpuAndWallTimeWorker instance
|
79
86
|
struct cpu_and_wall_time_worker_state {
|
80
87
|
// These are immutable after initialization
|
81
88
|
|
82
89
|
bool gc_profiling_enabled;
|
83
|
-
bool allocation_counting_enabled;
|
84
90
|
bool no_signals_workaround_enabled;
|
85
91
|
bool dynamic_sampling_rate_enabled;
|
86
|
-
int allocation_sample_every;
|
92
|
+
int allocation_sample_every;
|
93
|
+
bool allocation_profiling_enabled;
|
87
94
|
VALUE self_instance;
|
88
95
|
VALUE thread_context_collector_instance;
|
89
96
|
VALUE idle_sampling_helper_instance;
|
@@ -149,10 +156,11 @@ static VALUE _native_initialize(
|
|
149
156
|
VALUE thread_context_collector_instance,
|
150
157
|
VALUE gc_profiling_enabled,
|
151
158
|
VALUE idle_sampling_helper_instance,
|
152
|
-
VALUE allocation_counting_enabled,
|
153
159
|
VALUE no_signals_workaround_enabled,
|
154
160
|
VALUE dynamic_sampling_rate_enabled,
|
155
|
-
VALUE
|
161
|
+
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
162
|
+
VALUE allocation_sample_every,
|
163
|
+
VALUE allocation_profiling_enabled
|
156
164
|
);
|
157
165
|
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
158
166
|
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
|
@@ -211,6 +219,16 @@ __thread uint64_t allocation_count = 0;
|
|
211
219
|
void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
212
220
|
rb_global_variable(&active_sampler_instance);
|
213
221
|
|
222
|
+
#ifndef NO_POSTPONED_TRIGGER
|
223
|
+
int unused_flags = 0;
|
224
|
+
sample_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, sample_from_postponed_job, NULL);
|
225
|
+
after_gc_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, after_gc_from_postponed_job, NULL);
|
226
|
+
|
227
|
+
if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
|
228
|
+
rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
|
229
|
+
}
|
230
|
+
#endif
|
231
|
+
|
214
232
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
215
233
|
VALUE collectors_cpu_and_wall_time_worker_class = rb_define_class_under(collectors_module, "CpuAndWallTimeWorker", rb_cObject);
|
216
234
|
// Hosts methods used for testing the native code using RSpec
|
@@ -226,7 +244,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
226
244
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
227
245
|
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
|
228
246
|
|
229
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize,
|
247
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
|
230
248
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
|
231
249
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
|
232
250
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
@@ -264,10 +282,10 @@ static VALUE _native_new(VALUE klass) {
|
|
264
282
|
// being leaked.
|
265
283
|
|
266
284
|
state->gc_profiling_enabled = false;
|
267
|
-
state->allocation_counting_enabled = false;
|
268
285
|
state->no_signals_workaround_enabled = false;
|
269
286
|
state->dynamic_sampling_rate_enabled = true;
|
270
287
|
state->allocation_sample_every = 0;
|
288
|
+
state->allocation_profiling_enabled = false;
|
271
289
|
state->thread_context_collector_instance = Qnil;
|
272
290
|
state->idle_sampling_helper_instance = Qnil;
|
273
291
|
state->owner_thread = Qnil;
|
@@ -292,28 +310,31 @@ static VALUE _native_initialize(
|
|
292
310
|
VALUE thread_context_collector_instance,
|
293
311
|
VALUE gc_profiling_enabled,
|
294
312
|
VALUE idle_sampling_helper_instance,
|
295
|
-
VALUE allocation_counting_enabled,
|
296
313
|
VALUE no_signals_workaround_enabled,
|
297
314
|
VALUE dynamic_sampling_rate_enabled,
|
298
|
-
VALUE
|
315
|
+
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
316
|
+
VALUE allocation_sample_every,
|
317
|
+
VALUE allocation_profiling_enabled
|
299
318
|
) {
|
300
319
|
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
301
|
-
ENFORCE_BOOLEAN(allocation_counting_enabled);
|
302
320
|
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
|
303
321
|
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
|
304
322
|
ENFORCE_TYPE(allocation_sample_every, T_FIXNUM);
|
323
|
+
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
|
324
|
+
ENFORCE_BOOLEAN(allocation_profiling_enabled);
|
305
325
|
|
306
326
|
struct cpu_and_wall_time_worker_state *state;
|
307
327
|
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
308
328
|
|
309
329
|
state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
|
310
|
-
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
|
311
330
|
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
|
312
331
|
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
|
332
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage));
|
313
333
|
state->allocation_sample_every = NUM2INT(allocation_sample_every);
|
334
|
+
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
|
314
335
|
|
315
|
-
if (state->allocation_sample_every
|
316
|
-
rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be
|
336
|
+
if (state->allocation_sample_every <= 0) {
|
337
|
+
rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every);
|
317
338
|
}
|
318
339
|
|
319
340
|
state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
|
@@ -472,20 +493,25 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
|
|
472
493
|
|
473
494
|
// Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
|
474
495
|
// this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
496
|
+
#ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
|
497
|
+
rb_postponed_job_trigger(sample_from_postponed_job_handle);
|
498
|
+
state->stats.postponed_job_success++; // Always succeeds
|
499
|
+
#else
|
500
|
+
int result = rb_postponed_job_register_one(0, sample_from_postponed_job, NULL);
|
501
|
+
|
502
|
+
// Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
|
503
|
+
// seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
|
504
|
+
switch (result) {
|
505
|
+
case 0:
|
506
|
+
state->stats.postponed_job_full++; break;
|
507
|
+
case 1:
|
508
|
+
state->stats.postponed_job_success++; break;
|
509
|
+
case 2:
|
510
|
+
state->stats.postponed_job_skipped_already_existed++; break;
|
511
|
+
default:
|
512
|
+
state->stats.postponed_job_unknown_result++;
|
513
|
+
}
|
514
|
+
#endif
|
489
515
|
}
|
490
516
|
|
491
517
|
// The actual sampling trigger loop always runs **without** the global vm lock.
|
@@ -632,7 +658,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
|
|
632
658
|
// because they may raise exceptions.
|
633
659
|
install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
|
634
660
|
if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
|
635
|
-
if (state->
|
661
|
+
if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
|
636
662
|
|
637
663
|
rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
|
638
664
|
|
@@ -714,28 +740,17 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
|
|
714
740
|
if (event == RUBY_INTERNAL_EVENT_GC_ENTER) {
|
715
741
|
thread_context_collector_on_gc_start(state->thread_context_collector_instance);
|
716
742
|
} else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) {
|
717
|
-
|
718
|
-
|
719
|
-
//
|
720
|
-
//
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
// making the sampling process allocation-safe (very hard); or separate stack sampling from sample recording,
|
729
|
-
// e.g. enabling us to capture the stack in thread_context_collector_on_gc_finish and do the rest later
|
730
|
-
// (medium hard).
|
731
|
-
|
732
|
-
thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
|
733
|
-
// We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after if
|
734
|
-
// fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual.
|
735
|
-
//
|
736
|
-
// Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
|
737
|
-
// this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
|
738
|
-
rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
|
743
|
+
bool should_flush = thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
|
744
|
+
|
745
|
+
// We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc when the
|
746
|
+
// thread collector flags it's time to flush.
|
747
|
+
if (should_flush) {
|
748
|
+
#ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
|
749
|
+
rb_postponed_job_trigger(after_gc_from_postponed_job_handle);
|
750
|
+
#else
|
751
|
+
rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
|
752
|
+
#endif
|
753
|
+
}
|
739
754
|
}
|
740
755
|
}
|
741
756
|
|
@@ -888,9 +903,9 @@ static void sleep_for(uint64_t time_ns) {
|
|
888
903
|
}
|
889
904
|
|
890
905
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
|
891
|
-
bool
|
906
|
+
bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
|
892
907
|
|
893
|
-
return
|
908
|
+
return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
|
894
909
|
}
|
895
910
|
|
896
911
|
// Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
|
@@ -924,7 +939,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
|
|
924
939
|
|
925
940
|
// TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
|
926
941
|
// beta), and having something here allows us to test the rest of feature, sampling decision aside.
|
927
|
-
if (
|
942
|
+
if (allocation_count % state->allocation_sample_every == 0) {
|
928
943
|
// Rescue against any exceptions that happen during sampling
|
929
944
|
safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
|
930
945
|
}
|
@@ -19,7 +19,7 @@
|
|
19
19
|
//
|
20
20
|
// Instead of sampling at a fixed sample rate, the actual sampling rate should be decided by also observing the impact
|
21
21
|
// that running the profiler is having. This protects against issues such as the profiler being deployed in very busy
|
22
|
-
//machines or containers with unrealistic CPU restrictions.
|
22
|
+
// machines or containers with unrealistic CPU restrictions.
|
23
23
|
//
|
24
24
|
// ### Implementation
|
25
25
|
//
|
@@ -35,13 +35,13 @@
|
|
35
35
|
// sample. If it's not, it will skip sampling.
|
36
36
|
//
|
37
37
|
// Finally, as an additional optimization, there's a `dynamic_sampling_rate_get_sleep()` which, given the current
|
38
|
-
// wall-time, will return the time remaining (*there's an exception, check
|
38
|
+
// wall-time, will return the time remaining (*there's an exception, check function) until the next sample.
|
39
39
|
//
|
40
40
|
// ---
|
41
41
|
|
42
42
|
// This is the wall-time overhead we're targeting. E.g. we target to spend no more than 2%, or 1.2 seconds per minute,
|
43
|
-
// taking profiling samples.
|
44
|
-
#define
|
43
|
+
// taking profiling samples by default.
|
44
|
+
#define DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
|
45
45
|
// See `dynamic_sampling_rate_get_sleep()` for details
|
46
46
|
#define MAX_SLEEP_TIME_NS MILLIS_AS_NS(100)
|
47
47
|
// See `dynamic_sampling_rate_after_sample()` for details
|
@@ -49,6 +49,11 @@
|
|
49
49
|
|
50
50
|
void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state) {
|
51
51
|
atomic_init(&state->next_sample_after_monotonic_wall_time_ns, 0);
|
52
|
+
dynamic_sampling_rate_set_overhead_target_percentage(state, DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE);
|
53
|
+
}
|
54
|
+
|
55
|
+
void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage) {
|
56
|
+
state->overhead_target_percentage = overhead_target_percentage;
|
52
57
|
}
|
53
58
|
|
54
59
|
void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state) {
|
@@ -76,7 +81,7 @@ bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, lon
|
|
76
81
|
}
|
77
82
|
|
78
83
|
void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns) {
|
79
|
-
double overhead_target =
|
84
|
+
double overhead_target = state->overhead_target_percentage;
|
80
85
|
|
81
86
|
// The idea here is that we're targeting a maximum % of wall-time spent sampling.
|
82
87
|
// So for instance, if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend
|
@@ -93,48 +98,51 @@ void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long
|
|
93
98
|
// ---
|
94
99
|
// Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
|
95
100
|
|
96
|
-
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
|
97
|
-
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
|
98
|
-
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
|
101
|
+
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
|
102
|
+
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
|
103
|
+
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
|
99
104
|
|
100
105
|
void collectors_dynamic_sampling_rate_init(VALUE profiling_module) {
|
101
106
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
102
107
|
VALUE dynamic_sampling_rate_module = rb_define_module_under(collectors_module, "DynamicSamplingRate");
|
103
108
|
VALUE testing_module = rb_define_module_under(dynamic_sampling_rate_module, "Testing");
|
104
109
|
|
105
|
-
rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep,
|
106
|
-
rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample,
|
107
|
-
rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample,
|
110
|
+
rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 3);
|
111
|
+
rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 3);
|
112
|
+
rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 3);
|
108
113
|
}
|
109
114
|
|
110
|
-
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
|
115
|
+
VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
|
111
116
|
ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
|
112
117
|
ENFORCE_TYPE(current_monotonic_wall_time_ns, T_FIXNUM);
|
113
118
|
|
114
119
|
dynamic_sampling_rate_state state;
|
115
120
|
dynamic_sampling_rate_init(&state);
|
121
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
|
116
122
|
atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
|
117
123
|
|
118
124
|
return ULL2NUM(dynamic_sampling_rate_get_sleep(&state, NUM2LONG(current_monotonic_wall_time_ns)));
|
119
125
|
}
|
120
126
|
|
121
|
-
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
|
127
|
+
VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
|
122
128
|
ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
|
123
129
|
ENFORCE_TYPE(wall_time_ns_before_sample, T_FIXNUM);
|
124
130
|
|
125
131
|
dynamic_sampling_rate_state state;
|
126
132
|
dynamic_sampling_rate_init(&state);
|
133
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
|
127
134
|
atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
|
128
135
|
|
129
136
|
return dynamic_sampling_rate_should_sample(&state, NUM2LONG(wall_time_ns_before_sample)) ? Qtrue : Qfalse;
|
130
137
|
}
|
131
138
|
|
132
|
-
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
|
139
|
+
VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
|
133
140
|
ENFORCE_TYPE(wall_time_ns_after_sample, T_FIXNUM);
|
134
141
|
ENFORCE_TYPE(sampling_time_ns, T_FIXNUM);
|
135
142
|
|
136
143
|
dynamic_sampling_rate_state state;
|
137
144
|
dynamic_sampling_rate_init(&state);
|
145
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
|
138
146
|
|
139
147
|
dynamic_sampling_rate_after_sample(&state, NUM2LONG(wall_time_ns_after_sample), NUM2ULL(sampling_time_ns));
|
140
148
|
|
@@ -4,10 +4,14 @@
|
|
4
4
|
#include <stdbool.h>
|
5
5
|
|
6
6
|
typedef struct {
|
7
|
+
// This is the wall-time overhead we're targeting. E.g. by default, we target to spend no more than 2%, or 1.2 seconds
|
8
|
+
// per minute, taking profiling samples.
|
9
|
+
double overhead_target_percentage;
|
7
10
|
atomic_long next_sample_after_monotonic_wall_time_ns;
|
8
11
|
} dynamic_sampling_rate_state;
|
9
12
|
|
10
13
|
void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state);
|
14
|
+
void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage);
|
11
15
|
void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state);
|
12
16
|
uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns);
|
13
17
|
bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample);
|
@@ -0,0 +1,156 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <datadog/profiling.h>
|
3
|
+
|
4
|
+
#include "collectors_gc_profiling_helper.h"
|
5
|
+
|
6
|
+
// This helper is used by the Datadog::Profiling::Collectors::ThreadContext to profile garbage collection.
|
7
|
+
// It's tested through that class' interfaces.
|
8
|
+
// ---
|
9
|
+
|
10
|
+
// Used when retrieving GC information from the VM.
|
11
|
+
// All these are symbols, but we don't need to mark them since we ask for them to be interned (and thus live forever)
|
12
|
+
static VALUE state_sym;
|
13
|
+
static VALUE marking_sym;
|
14
|
+
static VALUE sweeping_sym;
|
15
|
+
static VALUE none_sym;
|
16
|
+
static VALUE gc_by_sym;
|
17
|
+
static VALUE newobj_sym;
|
18
|
+
static VALUE malloc_sym;
|
19
|
+
static VALUE method_sym;
|
20
|
+
static VALUE capi_sym;
|
21
|
+
static VALUE stress_sym;
|
22
|
+
static VALUE major_by_sym;
|
23
|
+
static VALUE nofree_sym;
|
24
|
+
static VALUE oldgen_sym;
|
25
|
+
static VALUE shady_sym;
|
26
|
+
static VALUE force_sym;
|
27
|
+
static VALUE oldmalloc_sym;
|
28
|
+
|
29
|
+
static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason);
|
30
|
+
static ddog_CharSlice gc_cause_pretty(VALUE gc_cause);
|
31
|
+
static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state);
|
32
|
+
|
33
|
+
void gc_profiling_init(void) {
|
34
|
+
// This function lazy-interns a few constants, which may trigger allocations. Since we want to call it during GC as
|
35
|
+
// well, when allocations are not allowed, we call it once here so that the constants get defined ahead of time.
|
36
|
+
rb_gc_latest_gc_info(rb_hash_new());
|
37
|
+
|
38
|
+
// Used to query and look up the results of GC information
|
39
|
+
state_sym = ID2SYM(rb_intern_const("state"));
|
40
|
+
marking_sym = ID2SYM(rb_intern_const("marking"));
|
41
|
+
sweeping_sym = ID2SYM(rb_intern_const("sweeping"));
|
42
|
+
none_sym = ID2SYM(rb_intern_const("none"));
|
43
|
+
gc_by_sym = ID2SYM(rb_intern_const("gc_by"));
|
44
|
+
newobj_sym = ID2SYM(rb_intern_const("newobj"));
|
45
|
+
malloc_sym = ID2SYM(rb_intern_const("malloc"));
|
46
|
+
method_sym = ID2SYM(rb_intern_const("method"));
|
47
|
+
capi_sym = ID2SYM(rb_intern_const("capi"));
|
48
|
+
stress_sym = ID2SYM(rb_intern_const("stress"));
|
49
|
+
major_by_sym = ID2SYM(rb_intern_const("major_by"));
|
50
|
+
nofree_sym = ID2SYM(rb_intern_const("nofree"));
|
51
|
+
oldgen_sym = ID2SYM(rb_intern_const("oldgen"));
|
52
|
+
shady_sym = ID2SYM(rb_intern_const("shady"));
|
53
|
+
force_sym = ID2SYM(rb_intern_const("force"));
|
54
|
+
oldmalloc_sym = ID2SYM(rb_intern_const("oldmalloc"));
|
55
|
+
state_sym = ID2SYM(rb_intern_const("state"));
|
56
|
+
none_sym = ID2SYM(rb_intern_const("none"));
|
57
|
+
}
|
58
|
+
|
59
|
+
bool gc_profiling_has_major_gc_finished(void) {
|
60
|
+
return rb_gc_latest_gc_info(state_sym) == none_sym && rb_gc_latest_gc_info(major_by_sym) != Qnil;
|
61
|
+
}
|
62
|
+
|
63
|
+
uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) {
|
64
|
+
uint8_t max_label_count =
|
65
|
+
1 + // thread id
|
66
|
+
1 + // thread name
|
67
|
+
1 + // state
|
68
|
+
1 + // event
|
69
|
+
1 + // gc reason
|
70
|
+
1 + // gc cause
|
71
|
+
1; // gc type
|
72
|
+
|
73
|
+
if (max_label_count > labels_length) {
|
74
|
+
rb_raise(rb_eArgError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count);
|
75
|
+
}
|
76
|
+
|
77
|
+
uint8_t label_pos = 0;
|
78
|
+
|
79
|
+
labels[label_pos++] = (ddog_prof_Label) {
|
80
|
+
.key = DDOG_CHARSLICE_C("thread id"),
|
81
|
+
.str = DDOG_CHARSLICE_C("GC"),
|
82
|
+
.num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
|
83
|
+
};
|
84
|
+
|
85
|
+
labels[label_pos++] = (ddog_prof_Label) {
|
86
|
+
.key = DDOG_CHARSLICE_C("thread name"),
|
87
|
+
.str = DDOG_CHARSLICE_C("Garbage Collection"),
|
88
|
+
.num = 0, // Workaround, same as above
|
89
|
+
};
|
90
|
+
|
91
|
+
labels[label_pos++] = (ddog_prof_Label) {
|
92
|
+
.key = DDOG_CHARSLICE_C("state"),
|
93
|
+
.str = DDOG_CHARSLICE_C("had cpu"),
|
94
|
+
.num = 0, // Workaround, same as above
|
95
|
+
};
|
96
|
+
|
97
|
+
labels[label_pos++] = (ddog_prof_Label) {
|
98
|
+
.key = DDOG_CHARSLICE_C("event"),
|
99
|
+
.str = DDOG_CHARSLICE_C("gc"),
|
100
|
+
.num = 0, // Workaround, same as above
|
101
|
+
};
|
102
|
+
|
103
|
+
VALUE major_by = rb_gc_latest_gc_info(major_by_sym);
|
104
|
+
if (major_by != Qnil) {
|
105
|
+
labels[label_pos++] = (ddog_prof_Label) {
|
106
|
+
.key = DDOG_CHARSLICE_C("gc reason"),
|
107
|
+
.str = major_gc_reason_pretty(major_by),
|
108
|
+
};
|
109
|
+
}
|
110
|
+
|
111
|
+
labels[label_pos++] = (ddog_prof_Label) {
|
112
|
+
.key = DDOG_CHARSLICE_C("gc cause"),
|
113
|
+
.str = gc_cause_pretty(rb_gc_latest_gc_info(gc_by_sym)),
|
114
|
+
};
|
115
|
+
|
116
|
+
labels[label_pos++] = (ddog_prof_Label) {
|
117
|
+
.key = DDOG_CHARSLICE_C("gc type"),
|
118
|
+
.str = gc_type_pretty(major_by, rb_gc_latest_gc_info(state_sym)),
|
119
|
+
};
|
120
|
+
|
121
|
+
if (label_pos > max_label_count) {
|
122
|
+
rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
|
123
|
+
}
|
124
|
+
|
125
|
+
return label_pos;
|
126
|
+
}
|
127
|
+
|
128
|
+
static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason) {
|
129
|
+
if (major_gc_reason == nofree_sym ) return DDOG_CHARSLICE_C("not enough free slots (NOFREE)");
|
130
|
+
if (major_gc_reason == oldgen_sym ) return DDOG_CHARSLICE_C("old generation full (OLDGEN)");
|
131
|
+
if (major_gc_reason == shady_sym ) return DDOG_CHARSLICE_C("too many objects without write barriers (SHADY)");
|
132
|
+
if (major_gc_reason == force_sym ) return DDOG_CHARSLICE_C("requested (FORCE)");
|
133
|
+
if (major_gc_reason == oldmalloc_sym) return DDOG_CHARSLICE_C("heap bytes allocated threshold (OLDMALLOC)");
|
134
|
+
return DDOG_CHARSLICE_C("unknown");
|
135
|
+
}
|
136
|
+
|
137
|
+
static ddog_CharSlice gc_cause_pretty(VALUE gc_cause) {
|
138
|
+
if (gc_cause == newobj_sym) return DDOG_CHARSLICE_C("object allocation");
|
139
|
+
if (gc_cause == malloc_sym) return DDOG_CHARSLICE_C("malloc()");
|
140
|
+
if (gc_cause == method_sym) return DDOG_CHARSLICE_C("GC.start()");
|
141
|
+
if (gc_cause == capi_sym ) return DDOG_CHARSLICE_C("rb_gc()");
|
142
|
+
if (gc_cause == stress_sym) return DDOG_CHARSLICE_C("stress");
|
143
|
+
return DDOG_CHARSLICE_C("unknown");
|
144
|
+
}
|
145
|
+
|
146
|
+
static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state) {
|
147
|
+
if (major_gc_reason != Qnil) {
|
148
|
+
if (gc_state == marking_sym ) return DDOG_CHARSLICE_C("major (ongoing, marking)");
|
149
|
+
if (gc_state == sweeping_sym) return DDOG_CHARSLICE_C("major (ongoing, sweeping)");
|
150
|
+
return DDOG_CHARSLICE_C("major");
|
151
|
+
} else {
|
152
|
+
// As we delay flushing events when a minor GC finishes, it's not relevant to include the observed state of the
|
153
|
+
// minor GC, as we often won't record a marking -> sweeping -> done cycle, as it happens too quickly.
|
154
|
+
return DDOG_CHARSLICE_C("minor");
|
155
|
+
}
|
156
|
+
}
|