ddtrace 1.18.0 → 1.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -1
  3. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +67 -52
  4. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +22 -14
  5. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +4 -0
  6. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  7. data/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +43 -102
  9. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +10 -3
  10. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +159 -124
  11. data/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +2 -1
  12. data/ext/ddtrace_profiling_native_extension/extconf.rb +16 -0
  13. data/ext/ddtrace_profiling_native_extension/heap_recorder.c +970 -0
  14. data/ext/ddtrace_profiling_native_extension/heap_recorder.h +155 -0
  15. data/ext/ddtrace_profiling_native_extension/helpers.h +2 -0
  16. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +20 -0
  17. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +11 -0
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +5 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +1 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +147 -0
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -0
  22. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +329 -10
  23. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +3 -0
  24. data/lib/datadog/core/configuration/settings.rb +139 -22
  25. data/lib/datadog/core/telemetry/collector.rb +10 -0
  26. data/lib/datadog/core/telemetry/event.rb +2 -1
  27. data/lib/datadog/core/telemetry/ext.rb +3 -0
  28. data/lib/datadog/core/telemetry/v1/app_event.rb +8 -1
  29. data/lib/datadog/core/telemetry/v1/install_signature.rb +38 -0
  30. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -11
  31. data/lib/datadog/profiling/component.rb +197 -13
  32. data/lib/datadog/profiling/scheduler.rb +4 -6
  33. data/lib/datadog/profiling/stack_recorder.rb +13 -2
  34. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -0
  35. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  36. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  37. data/lib/ddtrace/version.rb +1 -1
  38. metadata +12 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69e775ab06a83ce14114a7287056e3d3fb575191b7ff6ccdc5c7b33f7fd58172
4
- data.tar.gz: 13b607a4e29e516be4988dca7827eca09b79e968cf98e0641a155039d2ec3273
3
+ metadata.gz: 37ea5c2fe193569e17d13e026b4477dd8806c00df50250fb5f69854c23e6e6a5
4
+ data.tar.gz: 858b756d1ef6baddb66f85fb44f3301b317e151d58e4e299390f819621d4ecb8
5
5
  SHA512:
6
- metadata.gz: d345e07c8b0a654974c51a7457b3fc6d3d7eb99226cfc5555d6bc8ee3e65b17b3782b1e582591be925297c09dd104108007b2081e28ee43c103f8f2fec3ffe5b
7
- data.tar.gz: '085ea801f5fae16ed58cd79bab86839cd1aa23fa09261b39219264f603455633e19dbb8f60d647e407c7218d7d00052ef7b593405ec5af828af56ffecd58227d'
6
+ metadata.gz: 3d86acc37f0bcb7d3b680c4ee8698b45eeb10c026d7dbcdf65ca5c7991eeb561ab35b40b02e98aa24dd984fd7871da8c4906ccc1aa64f9b8c2d8ad86aded199b
7
+ data.tar.gz: 4463963285c39c09e2d1d090fe0f80518107b7b60fdb58a0bd993d11dfbaf64e0131cd774b16483d6bd84b5a7faf4256eb8304c3bf4b08b4b90dadfaa513adb9
data/CHANGELOG.md CHANGED
@@ -2,6 +2,33 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [1.19.0] - 2024-01-10
6
+
7
+ ### Highlights
8
+ Alpha support for memory profiling has been added. For more details, check the [release notes](https://github.com/DataDog/dd-trace-rb/releases/tag/v1.19.0)
9
+
10
+ ### Added
11
+ * Tracing: Add `on_error` settings for `mysql2` ([#3316][])
12
+ * Core: Add install_signature to app-started telemetry event ([#3349][])
13
+ * Profiling: Heap Profiling ([#3281][]) ([#3287][]) ([#3328][]) ([#3329][]) ([#3333][]) ([#3360][])
14
+ * Profiling: Redesign GC profiling to add timeline support and reduce overhead ([#3313][])
15
+ * Core: Use Ruby 3.3 stable for CI testing ([#3354][])
16
+
17
+ ### Changed
18
+ * Core: Bump `datadog-ci` dependency to 0.6.0 ([#3361][])
19
+ * Core: Bump debase-ruby_core_source dependency to 3.3.1 ([#3373][])
20
+ * Docs: Backport "List Ruby 3.3 as supported in the docs" to master branch ([#3374][])
21
+ * Profiling: Import upstream `rb_profile_frames` fix ([#3352][])
22
+ * Profiling: Allow the dynamic sampling rate overhead target to be set ([#3310][])
23
+ * Profiling: Split profiling tests into ractor and non-ractor suites. ([#3320][])
24
+
25
+ ### Fixed
26
+ * Docs: Fix `pg` doc markdown format ([#3317][])
27
+ * Tracing: Fix recursive `require` in Railtie ([#3365][])
28
+ * Profiling: Fix issues stemming from rb_gc_force_recycle ([#3366][])
29
+ * Profiling: Fix Ruby 3.3 CI being broken in master due to profiler ([#3356][])
30
+ * Profiling: Fix "no signals" workaround detection when mariadb is in use ([#3362][])
31
+
5
32
  ## [1.18.0] - 2023-12-07
6
33
 
7
34
  ### Added
@@ -2680,7 +2707,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
2680
2707
  Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
2681
2708
 
2682
2709
 
2683
- [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.18.0...master
2710
+ [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v1.19.0...master
2711
+ [1.19.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.18.0...v1.19.0
2684
2712
  [1.18.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.17.0...v1.18.0
2685
2713
  [1.17.0]: https://github.com/DataDog/dd-trace-rb/compare/v1.16.2...v1.17.0
2686
2714
  [1.16.2]: https://github.com/DataDog/dd-trace-rb/compare/v1.16.1...v1.16.2
@@ -3910,12 +3938,33 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
3910
3938
  [#3273]: https://github.com/DataDog/dd-trace-rb/issues/3273
3911
3939
  [#3279]: https://github.com/DataDog/dd-trace-rb/issues/3279
3912
3940
  [#3280]: https://github.com/DataDog/dd-trace-rb/issues/3280
3941
+ [#3281]: https://github.com/DataDog/dd-trace-rb/issues/3281
3913
3942
  [#3284]: https://github.com/DataDog/dd-trace-rb/issues/3284
3914
3943
  [#3286]: https://github.com/DataDog/dd-trace-rb/issues/3286
3944
+ [#3287]: https://github.com/DataDog/dd-trace-rb/issues/3287
3915
3945
  [#3289]: https://github.com/DataDog/dd-trace-rb/issues/3289
3916
3946
  [#3303]: https://github.com/DataDog/dd-trace-rb/issues/3303
3917
3947
  [#3307]: https://github.com/DataDog/dd-trace-rb/issues/3307
3918
3948
  [#3308]: https://github.com/DataDog/dd-trace-rb/issues/3308
3949
+ [#3310]: https://github.com/DataDog/dd-trace-rb/issues/3310
3950
+ [#3313]: https://github.com/DataDog/dd-trace-rb/issues/3313
3951
+ [#3316]: https://github.com/DataDog/dd-trace-rb/issues/3316
3952
+ [#3317]: https://github.com/DataDog/dd-trace-rb/issues/3317
3953
+ [#3320]: https://github.com/DataDog/dd-trace-rb/issues/3320
3954
+ [#3328]: https://github.com/DataDog/dd-trace-rb/issues/3328
3955
+ [#3329]: https://github.com/DataDog/dd-trace-rb/issues/3329
3956
+ [#3333]: https://github.com/DataDog/dd-trace-rb/issues/3333
3957
+ [#3349]: https://github.com/DataDog/dd-trace-rb/issues/3349
3958
+ [#3352]: https://github.com/DataDog/dd-trace-rb/issues/3352
3959
+ [#3354]: https://github.com/DataDog/dd-trace-rb/issues/3354
3960
+ [#3356]: https://github.com/DataDog/dd-trace-rb/issues/3356
3961
+ [#3360]: https://github.com/DataDog/dd-trace-rb/issues/3360
3962
+ [#3361]: https://github.com/DataDog/dd-trace-rb/issues/3361
3963
+ [#3362]: https://github.com/DataDog/dd-trace-rb/issues/3362
3964
+ [#3365]: https://github.com/DataDog/dd-trace-rb/issues/3365
3965
+ [#3366]: https://github.com/DataDog/dd-trace-rb/issues/3366
3966
+ [#3373]: https://github.com/DataDog/dd-trace-rb/issues/3373
3967
+ [#3374]: https://github.com/DataDog/dd-trace-rb/issues/3374
3919
3968
  [@AdrianLC]: https://github.com/AdrianLC
3920
3969
  [@Azure7111]: https://github.com/Azure7111
3921
3970
  [@BabyGroot]: https://github.com/BabyGroot
@@ -75,15 +75,22 @@
75
75
  //
76
76
  // ---
77
77
 
78
+ #ifndef NO_POSTPONED_TRIGGER
79
+ // Used to call the rb_postponed_job_trigger from Ruby 3.3+. These get initialized in
80
+ // `collectors_cpu_and_wall_time_worker_init` below and always get reused after that.
81
+ static rb_postponed_job_handle_t sample_from_postponed_job_handle;
82
+ static rb_postponed_job_handle_t after_gc_from_postponed_job_handle;
83
+ #endif
84
+
78
85
  // Contains state for a single CpuAndWallTimeWorker instance
79
86
  struct cpu_and_wall_time_worker_state {
80
87
  // These are immutable after initialization
81
88
 
82
89
  bool gc_profiling_enabled;
83
- bool allocation_counting_enabled;
84
90
  bool no_signals_workaround_enabled;
85
91
  bool dynamic_sampling_rate_enabled;
86
- int allocation_sample_every; // Temporarily used for development/testing of allocation profiling
92
+ int allocation_sample_every;
93
+ bool allocation_profiling_enabled;
87
94
  VALUE self_instance;
88
95
  VALUE thread_context_collector_instance;
89
96
  VALUE idle_sampling_helper_instance;
@@ -149,10 +156,11 @@ static VALUE _native_initialize(
149
156
  VALUE thread_context_collector_instance,
150
157
  VALUE gc_profiling_enabled,
151
158
  VALUE idle_sampling_helper_instance,
152
- VALUE allocation_counting_enabled,
153
159
  VALUE no_signals_workaround_enabled,
154
160
  VALUE dynamic_sampling_rate_enabled,
155
- VALUE allocation_sample_every
161
+ VALUE dynamic_sampling_rate_overhead_target_percentage,
162
+ VALUE allocation_sample_every,
163
+ VALUE allocation_profiling_enabled
156
164
  );
157
165
  static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
158
166
  static VALUE _native_sampling_loop(VALUE self, VALUE instance);
@@ -211,6 +219,16 @@ __thread uint64_t allocation_count = 0;
211
219
  void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
212
220
  rb_global_variable(&active_sampler_instance);
213
221
 
222
+ #ifndef NO_POSTPONED_TRIGGER
223
+ int unused_flags = 0;
224
+ sample_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, sample_from_postponed_job, NULL);
225
+ after_gc_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, after_gc_from_postponed_job, NULL);
226
+
227
+ if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
228
+ rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
229
+ }
230
+ #endif
231
+
214
232
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
215
233
  VALUE collectors_cpu_and_wall_time_worker_class = rb_define_class_under(collectors_module, "CpuAndWallTimeWorker", rb_cObject);
216
234
  // Hosts methods used for testing the native code using RSpec
@@ -226,7 +244,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
226
244
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
227
245
  rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
228
246
 
229
- rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8);
247
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
230
248
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
231
249
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
232
250
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
@@ -264,10 +282,10 @@ static VALUE _native_new(VALUE klass) {
264
282
  // being leaked.
265
283
 
266
284
  state->gc_profiling_enabled = false;
267
- state->allocation_counting_enabled = false;
268
285
  state->no_signals_workaround_enabled = false;
269
286
  state->dynamic_sampling_rate_enabled = true;
270
287
  state->allocation_sample_every = 0;
288
+ state->allocation_profiling_enabled = false;
271
289
  state->thread_context_collector_instance = Qnil;
272
290
  state->idle_sampling_helper_instance = Qnil;
273
291
  state->owner_thread = Qnil;
@@ -292,28 +310,31 @@ static VALUE _native_initialize(
292
310
  VALUE thread_context_collector_instance,
293
311
  VALUE gc_profiling_enabled,
294
312
  VALUE idle_sampling_helper_instance,
295
- VALUE allocation_counting_enabled,
296
313
  VALUE no_signals_workaround_enabled,
297
314
  VALUE dynamic_sampling_rate_enabled,
298
- VALUE allocation_sample_every
315
+ VALUE dynamic_sampling_rate_overhead_target_percentage,
316
+ VALUE allocation_sample_every,
317
+ VALUE allocation_profiling_enabled
299
318
  ) {
300
319
  ENFORCE_BOOLEAN(gc_profiling_enabled);
301
- ENFORCE_BOOLEAN(allocation_counting_enabled);
302
320
  ENFORCE_BOOLEAN(no_signals_workaround_enabled);
303
321
  ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
304
322
  ENFORCE_TYPE(allocation_sample_every, T_FIXNUM);
323
+ ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
324
+ ENFORCE_BOOLEAN(allocation_profiling_enabled);
305
325
 
306
326
  struct cpu_and_wall_time_worker_state *state;
307
327
  TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
308
328
 
309
329
  state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
310
- state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
311
330
  state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
312
331
  state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
332
+ dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage));
313
333
  state->allocation_sample_every = NUM2INT(allocation_sample_every);
334
+ state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
314
335
 
315
- if (state->allocation_sample_every < 0) {
316
- rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every);
336
+ if (state->allocation_sample_every <= 0) {
337
+ rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every);
317
338
  }
318
339
 
319
340
  state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
@@ -472,20 +493,25 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
472
493
 
473
494
  // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
474
495
  // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
475
- int result = rb_postponed_job_register_one(0, sample_from_postponed_job, NULL);
476
-
477
- // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
478
- // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
479
- switch (result) {
480
- case 0:
481
- state->stats.postponed_job_full++; break;
482
- case 1:
483
- state->stats.postponed_job_success++; break;
484
- case 2:
485
- state->stats.postponed_job_skipped_already_existed++; break;
486
- default:
487
- state->stats.postponed_job_unknown_result++;
488
- }
496
+ #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
497
+ rb_postponed_job_trigger(sample_from_postponed_job_handle);
498
+ state->stats.postponed_job_success++; // Always succeeds
499
+ #else
500
+ int result = rb_postponed_job_register_one(0, sample_from_postponed_job, NULL);
501
+
502
+ // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
503
+ // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
504
+ switch (result) {
505
+ case 0:
506
+ state->stats.postponed_job_full++; break;
507
+ case 1:
508
+ state->stats.postponed_job_success++; break;
509
+ case 2:
510
+ state->stats.postponed_job_skipped_already_existed++; break;
511
+ default:
512
+ state->stats.postponed_job_unknown_result++;
513
+ }
514
+ #endif
489
515
  }
490
516
 
491
517
  // The actual sampling trigger loop always runs **without** the global vm lock.
@@ -632,7 +658,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
632
658
  // because they may raise exceptions.
633
659
  install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
634
660
  if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
635
- if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
661
+ if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
636
662
 
637
663
  rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
638
664
 
@@ -714,28 +740,17 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
714
740
  if (event == RUBY_INTERNAL_EVENT_GC_ENTER) {
715
741
  thread_context_collector_on_gc_start(state->thread_context_collector_instance);
716
742
  } else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) {
717
- // Design: In an earlier iteration of this feature (see https://github.com/DataDog/dd-trace-rb/pull/2308) we
718
- // actually had a single method to implement the behavior of both thread_context_collector_on_gc_finish
719
- // and thread_context_collector_sample_after_gc (the latter is called via after_gc_from_postponed_job).
720
- //
721
- // Unfortunately, then we discovered the safety issue around no allocations, and thus decided to separate them -- so that
722
- // the sampling could run outside the tight safety constraints of the garbage collection process.
723
- //
724
- // There is a downside: The sample is now taken very very shortly afterwards the GC finishes, and not immediately
725
- // as the GC finishes, which means the stack captured may by affected by "skid", e.g. point slightly after where
726
- // it should be pointing at.
727
- // Alternatives to solve this would be to capture no stack for garbage collection (as we do for Java and .net);
728
- // making the sampling process allocation-safe (very hard); or separate stack sampling from sample recording,
729
- // e.g. enabling us to capture the stack in thread_context_collector_on_gc_finish and do the rest later
730
- // (medium hard).
731
-
732
- thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
733
- // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after if
734
- // fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual.
735
- //
736
- // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
737
- // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
738
- rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
743
+ bool should_flush = thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
744
+
745
+ // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc when the
746
+ // thread collector flags it's time to flush.
747
+ if (should_flush) {
748
+ #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
749
+ rb_postponed_job_trigger(after_gc_from_postponed_job_handle);
750
+ #else
751
+ rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
752
+ #endif
753
+ }
739
754
  }
740
755
  }
741
756
 
@@ -888,9 +903,9 @@ static void sleep_for(uint64_t time_ns) {
888
903
  }
889
904
 
890
905
  static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
891
- bool is_profiler_running = active_sampler_instance_state != NULL;
906
+ bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
892
907
 
893
- return is_profiler_running ? ULL2NUM(allocation_count) : Qnil;
908
+ return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
894
909
  }
895
910
 
896
911
  // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
@@ -924,7 +939,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
924
939
 
925
940
  // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
926
941
  // beta), and having something here allows us to test the rest of feature, sampling decision aside.
927
- if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) {
942
+ if (allocation_count % state->allocation_sample_every == 0) {
928
943
  // Rescue against any exceptions that happen during sampling
929
944
  safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
930
945
  }
@@ -19,7 +19,7 @@
19
19
  //
20
20
  // Instead of sampling at a fixed sample rate, the actual sampling rate should be decided by also observing the impact
21
21
  // that running the profiler is having. This protects against issues such as the profiler being deployed in very busy
22
- //machines or containers with unrealistic CPU restrictions.
22
+ // machines or containers with unrealistic CPU restrictions.
23
23
  //
24
24
  // ### Implementation
25
25
  //
@@ -35,13 +35,13 @@
35
35
  // sample. If it's not, it will skip sampling.
36
36
  //
37
37
  // Finally, as an additional optimization, there's a `dynamic_sampling_rate_get_sleep()` which, given the current
38
- // wall-time, will return the time remaining (*there's an exception, check below) until the next sample.
38
+ // wall-time, will return the time remaining (*there's an exception, check function) until the next sample.
39
39
  //
40
40
  // ---
41
41
 
42
42
  // This is the wall-time overhead we're targeting. E.g. we target to spend no more than 2%, or 1.2 seconds per minute,
43
- // taking profiling samples.
44
- #define WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
43
+ // taking profiling samples by default.
44
+ #define DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // %
45
45
  // See `dynamic_sampling_rate_get_sleep()` for details
46
46
  #define MAX_SLEEP_TIME_NS MILLIS_AS_NS(100)
47
47
  // See `dynamic_sampling_rate_after_sample()` for details
@@ -49,6 +49,11 @@
49
49
 
50
50
  void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state) {
51
51
  atomic_init(&state->next_sample_after_monotonic_wall_time_ns, 0);
52
+ dynamic_sampling_rate_set_overhead_target_percentage(state, DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE);
53
+ }
54
+
55
+ void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage) {
56
+ state->overhead_target_percentage = overhead_target_percentage;
52
57
  }
53
58
 
54
59
  void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state) {
@@ -76,7 +81,7 @@ bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, lon
76
81
  }
77
82
 
78
83
  void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns) {
79
- double overhead_target = (double) WALL_TIME_OVERHEAD_TARGET_PERCENTAGE;
84
+ double overhead_target = state->overhead_target_percentage;
80
85
 
81
86
  // The idea here is that we're targeting a maximum % of wall-time spent sampling.
82
87
  // So for instance, if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend
@@ -93,48 +98,51 @@ void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long
93
98
  // ---
94
99
  // Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
95
100
 
96
- VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
97
- VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
98
- VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
101
+ VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns);
102
+ VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample);
103
+ VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns);
99
104
 
100
105
  void collectors_dynamic_sampling_rate_init(VALUE profiling_module) {
101
106
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
102
107
  VALUE dynamic_sampling_rate_module = rb_define_module_under(collectors_module, "DynamicSamplingRate");
103
108
  VALUE testing_module = rb_define_module_under(dynamic_sampling_rate_module, "Testing");
104
109
 
105
- rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 2);
106
- rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 2);
107
- rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 2);
110
+ rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 3);
111
+ rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 3);
112
+ rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 3);
108
113
  }
109
114
 
110
- VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
115
+ VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) {
111
116
  ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
112
117
  ENFORCE_TYPE(current_monotonic_wall_time_ns, T_FIXNUM);
113
118
 
114
119
  dynamic_sampling_rate_state state;
115
120
  dynamic_sampling_rate_init(&state);
121
+ dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
116
122
  atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
117
123
 
118
124
  return ULL2NUM(dynamic_sampling_rate_get_sleep(&state, NUM2LONG(current_monotonic_wall_time_ns)));
119
125
  }
120
126
 
121
- VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
127
+ VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) {
122
128
  ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM);
123
129
  ENFORCE_TYPE(wall_time_ns_before_sample, T_FIXNUM);
124
130
 
125
131
  dynamic_sampling_rate_state state;
126
132
  dynamic_sampling_rate_init(&state);
133
+ dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
127
134
  atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns));
128
135
 
129
136
  return dynamic_sampling_rate_should_sample(&state, NUM2LONG(wall_time_ns_before_sample)) ? Qtrue : Qfalse;
130
137
  }
131
138
 
132
- VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
139
+ VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) {
133
140
  ENFORCE_TYPE(wall_time_ns_after_sample, T_FIXNUM);
134
141
  ENFORCE_TYPE(sampling_time_ns, T_FIXNUM);
135
142
 
136
143
  dynamic_sampling_rate_state state;
137
144
  dynamic_sampling_rate_init(&state);
145
+ dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage));
138
146
 
139
147
  dynamic_sampling_rate_after_sample(&state, NUM2LONG(wall_time_ns_after_sample), NUM2ULL(sampling_time_ns));
140
148
 
@@ -4,10 +4,14 @@
4
4
  #include <stdbool.h>
5
5
 
6
6
  typedef struct {
7
+ // This is the wall-time overhead we're targeting. E.g. by default, we target to spend no more than 2%, or 1.2 seconds
8
+ // per minute, taking profiling samples.
9
+ double overhead_target_percentage;
7
10
  atomic_long next_sample_after_monotonic_wall_time_ns;
8
11
  } dynamic_sampling_rate_state;
9
12
 
10
13
  void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state);
14
+ void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage);
11
15
  void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state);
12
16
  uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns);
13
17
  bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample);
@@ -0,0 +1,156 @@
1
+ #include <ruby.h>
2
+ #include <datadog/profiling.h>
3
+
4
+ #include "collectors_gc_profiling_helper.h"
5
+
6
+ // This helper is used by the Datadog::Profiling::Collectors::ThreadContext to profile garbage collection.
7
+ // It's tested through that class' interfaces.
8
+ // ---
9
+
10
+ // Used when retrieving GC information from the VM.
11
+ // All these are symbols, but we don't need to mark them since we ask for them to be interned (and thus live forever)
12
+ static VALUE state_sym;
13
+ static VALUE marking_sym;
14
+ static VALUE sweeping_sym;
15
+ static VALUE none_sym;
16
+ static VALUE gc_by_sym;
17
+ static VALUE newobj_sym;
18
+ static VALUE malloc_sym;
19
+ static VALUE method_sym;
20
+ static VALUE capi_sym;
21
+ static VALUE stress_sym;
22
+ static VALUE major_by_sym;
23
+ static VALUE nofree_sym;
24
+ static VALUE oldgen_sym;
25
+ static VALUE shady_sym;
26
+ static VALUE force_sym;
27
+ static VALUE oldmalloc_sym;
28
+
29
+ static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason);
30
+ static ddog_CharSlice gc_cause_pretty(VALUE gc_cause);
31
+ static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state);
32
+
33
+ void gc_profiling_init(void) {
34
+ // This function lazy-interns a few constants, which may trigger allocations. Since we want to call it during GC as
35
+ // well, when allocations are not allowed, we call it once here so that the constants get defined ahead of time.
36
+ rb_gc_latest_gc_info(rb_hash_new());
37
+
38
+ // Used to query and look up the results of GC information
39
+ state_sym = ID2SYM(rb_intern_const("state"));
40
+ marking_sym = ID2SYM(rb_intern_const("marking"));
41
+ sweeping_sym = ID2SYM(rb_intern_const("sweeping"));
42
+ none_sym = ID2SYM(rb_intern_const("none"));
43
+ gc_by_sym = ID2SYM(rb_intern_const("gc_by"));
44
+ newobj_sym = ID2SYM(rb_intern_const("newobj"));
45
+ malloc_sym = ID2SYM(rb_intern_const("malloc"));
46
+ method_sym = ID2SYM(rb_intern_const("method"));
47
+ capi_sym = ID2SYM(rb_intern_const("capi"));
48
+ stress_sym = ID2SYM(rb_intern_const("stress"));
49
+ major_by_sym = ID2SYM(rb_intern_const("major_by"));
50
+ nofree_sym = ID2SYM(rb_intern_const("nofree"));
51
+ oldgen_sym = ID2SYM(rb_intern_const("oldgen"));
52
+ shady_sym = ID2SYM(rb_intern_const("shady"));
53
+ force_sym = ID2SYM(rb_intern_const("force"));
54
+ oldmalloc_sym = ID2SYM(rb_intern_const("oldmalloc"));
55
+ state_sym = ID2SYM(rb_intern_const("state"));
56
+ none_sym = ID2SYM(rb_intern_const("none"));
57
+ }
58
+
59
+ bool gc_profiling_has_major_gc_finished(void) {
60
+ return rb_gc_latest_gc_info(state_sym) == none_sym && rb_gc_latest_gc_info(major_by_sym) != Qnil;
61
+ }
62
+
63
+ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) {
64
+ uint8_t max_label_count =
65
+ 1 + // thread id
66
+ 1 + // thread name
67
+ 1 + // state
68
+ 1 + // event
69
+ 1 + // gc reason
70
+ 1 + // gc cause
71
+ 1; // gc type
72
+
73
+ if (max_label_count > labels_length) {
74
+ rb_raise(rb_eArgError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count);
75
+ }
76
+
77
+ uint8_t label_pos = 0;
78
+
79
+ labels[label_pos++] = (ddog_prof_Label) {
80
+ .key = DDOG_CHARSLICE_C("thread id"),
81
+ .str = DDOG_CHARSLICE_C("GC"),
82
+ .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
83
+ };
84
+
85
+ labels[label_pos++] = (ddog_prof_Label) {
86
+ .key = DDOG_CHARSLICE_C("thread name"),
87
+ .str = DDOG_CHARSLICE_C("Garbage Collection"),
88
+ .num = 0, // Workaround, same as above
89
+ };
90
+
91
+ labels[label_pos++] = (ddog_prof_Label) {
92
+ .key = DDOG_CHARSLICE_C("state"),
93
+ .str = DDOG_CHARSLICE_C("had cpu"),
94
+ .num = 0, // Workaround, same as above
95
+ };
96
+
97
+ labels[label_pos++] = (ddog_prof_Label) {
98
+ .key = DDOG_CHARSLICE_C("event"),
99
+ .str = DDOG_CHARSLICE_C("gc"),
100
+ .num = 0, // Workaround, same as above
101
+ };
102
+
103
+ VALUE major_by = rb_gc_latest_gc_info(major_by_sym);
104
+ if (major_by != Qnil) {
105
+ labels[label_pos++] = (ddog_prof_Label) {
106
+ .key = DDOG_CHARSLICE_C("gc reason"),
107
+ .str = major_gc_reason_pretty(major_by),
108
+ };
109
+ }
110
+
111
+ labels[label_pos++] = (ddog_prof_Label) {
112
+ .key = DDOG_CHARSLICE_C("gc cause"),
113
+ .str = gc_cause_pretty(rb_gc_latest_gc_info(gc_by_sym)),
114
+ };
115
+
116
+ labels[label_pos++] = (ddog_prof_Label) {
117
+ .key = DDOG_CHARSLICE_C("gc type"),
118
+ .str = gc_type_pretty(major_by, rb_gc_latest_gc_info(state_sym)),
119
+ };
120
+
121
+ if (label_pos > max_label_count) {
122
+ rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
123
+ }
124
+
125
+ return label_pos;
126
+ }
127
+
128
+ static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason) {
129
+ if (major_gc_reason == nofree_sym ) return DDOG_CHARSLICE_C("not enough free slots (NOFREE)");
130
+ if (major_gc_reason == oldgen_sym ) return DDOG_CHARSLICE_C("old generation full (OLDGEN)");
131
+ if (major_gc_reason == shady_sym ) return DDOG_CHARSLICE_C("too many objects without write barriers (SHADY)");
132
+ if (major_gc_reason == force_sym ) return DDOG_CHARSLICE_C("requested (FORCE)");
133
+ if (major_gc_reason == oldmalloc_sym) return DDOG_CHARSLICE_C("heap bytes allocated threshold (OLDMALLOC)");
134
+ return DDOG_CHARSLICE_C("unknown");
135
+ }
136
+
137
+ static ddog_CharSlice gc_cause_pretty(VALUE gc_cause) {
138
+ if (gc_cause == newobj_sym) return DDOG_CHARSLICE_C("object allocation");
139
+ if (gc_cause == malloc_sym) return DDOG_CHARSLICE_C("malloc()");
140
+ if (gc_cause == method_sym) return DDOG_CHARSLICE_C("GC.start()");
141
+ if (gc_cause == capi_sym ) return DDOG_CHARSLICE_C("rb_gc()");
142
+ if (gc_cause == stress_sym) return DDOG_CHARSLICE_C("stress");
143
+ return DDOG_CHARSLICE_C("unknown");
144
+ }
145
+
146
+ static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state) {
147
+ if (major_gc_reason != Qnil) {
148
+ if (gc_state == marking_sym ) return DDOG_CHARSLICE_C("major (ongoing, marking)");
149
+ if (gc_state == sweeping_sym) return DDOG_CHARSLICE_C("major (ongoing, sweeping)");
150
+ return DDOG_CHARSLICE_C("major");
151
+ } else {
152
+ // As we delay flushing events when a minor GC finishes, it's not relevant to include the observed state of the
153
+ // minor GC, as we often won't record a marking -> sweeping -> done cycle, as it happens too quickly.
154
+ return DDOG_CHARSLICE_C("minor");
155
+ }
156
+ }
@@ -0,0 +1,5 @@
1
+ #pragma once
2
+
3
+ void gc_profiling_init(void);
4
+ bool gc_profiling_has_major_gc_finished(void);
5
+ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length);