ddtrace 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -1
  3. data/README.md +2 -2
  4. data/ext/ddtrace_profiling_loader/extconf.rb +4 -1
  5. data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +1 -1
  6. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +3 -2
  7. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +15 -41
  8. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.h +1 -1
  9. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +284 -74
  10. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +142 -0
  11. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +14 -0
  12. data/ext/ddtrace_profiling_native_extension/collectors_idle_sampling_helper.c +241 -0
  13. data/ext/ddtrace_profiling_native_extension/collectors_idle_sampling_helper.h +3 -0
  14. data/ext/ddtrace_profiling_native_extension/extconf.rb +21 -7
  15. data/ext/ddtrace_profiling_native_extension/helpers.h +5 -0
  16. data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +8 -0
  17. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +108 -24
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +9 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +205 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +86 -0
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -6
  22. data/ext/ddtrace_profiling_native_extension/setup_signal_handler.c +23 -4
  23. data/ext/ddtrace_profiling_native_extension/setup_signal_handler.h +4 -0
  24. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +15 -18
  25. data/ext/ddtrace_profiling_native_extension/time_helpers.c +17 -0
  26. data/ext/ddtrace_profiling_native_extension/time_helpers.h +10 -0
  27. data/lib/datadog/core/configuration/components.rb +27 -6
  28. data/lib/datadog/core/configuration/ext.rb +18 -0
  29. data/lib/datadog/core/configuration/settings.rb +14 -341
  30. data/lib/datadog/core/diagnostics/health.rb +4 -22
  31. data/lib/datadog/core/environment/variable_helpers.rb +58 -10
  32. data/lib/datadog/core/utils.rb +0 -21
  33. data/lib/datadog/core.rb +21 -1
  34. data/lib/datadog/opentracer/distributed_headers.rb +2 -2
  35. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +16 -5
  36. data/lib/datadog/profiling/collectors/dynamic_sampling_rate.rb +14 -0
  37. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +68 -0
  38. data/lib/datadog/profiling/stack_recorder.rb +14 -0
  39. data/lib/datadog/profiling.rb +2 -0
  40. data/lib/datadog/tracing/configuration/ext.rb +33 -3
  41. data/lib/datadog/tracing/configuration/settings.rb +433 -0
  42. data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +4 -1
  43. data/lib/datadog/tracing/contrib/aws/ext.rb +1 -0
  44. data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +4 -1
  45. data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
  46. data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +5 -1
  47. data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +1 -0
  48. data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +6 -1
  49. data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
  50. data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +5 -1
  51. data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
  52. data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +5 -1
  53. data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
  54. data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +6 -1
  55. data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +9 -4
  56. data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
  57. data/lib/datadog/tracing/contrib/http/configuration/settings.rb +6 -1
  58. data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +9 -4
  59. data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
  60. data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +6 -1
  61. data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
  62. data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +6 -1
  63. data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
  64. data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +5 -1
  65. data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
  66. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -1
  67. data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
  68. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -2
  69. data/lib/datadog/tracing/contrib/patcher.rb +3 -2
  70. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +4 -1
  71. data/lib/datadog/tracing/contrib/pg/ext.rb +1 -0
  72. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +12 -2
  73. data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +4 -1
  74. data/lib/datadog/tracing/contrib/presto/ext.rb +1 -0
  75. data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +1 -0
  76. data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +10 -12
  77. data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +4 -1
  78. data/lib/datadog/tracing/contrib/redis/ext.rb +1 -0
  79. data/lib/datadog/tracing/contrib/redis/instrumentation.rb +30 -23
  80. data/lib/datadog/tracing/contrib/redis/integration.rb +34 -2
  81. data/lib/datadog/tracing/contrib/redis/patcher.rb +18 -14
  82. data/lib/datadog/tracing/contrib/redis/quantize.rb +12 -9
  83. data/lib/datadog/tracing/contrib/redis/tags.rb +4 -6
  84. data/lib/datadog/tracing/contrib/redis/trace_middleware.rb +72 -0
  85. data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +6 -1
  86. data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
  87. data/lib/datadog/{core → tracing}/diagnostics/ext.rb +1 -6
  88. data/lib/datadog/tracing/diagnostics/health.rb +40 -0
  89. data/lib/datadog/tracing/distributed/{b3.rb → b3_multi.rb} +2 -2
  90. data/lib/datadog/tracing/distributed/helpers.rb +2 -1
  91. data/lib/datadog/tracing/distributed/none.rb +19 -0
  92. data/lib/datadog/tracing/distributed/trace_context.rb +369 -0
  93. data/lib/datadog/tracing/metadata/ext.rb +1 -1
  94. data/lib/datadog/tracing/sampling/priority_sampler.rb +11 -0
  95. data/lib/datadog/tracing/sampling/rate_sampler.rb +3 -3
  96. data/lib/datadog/tracing/span.rb +3 -19
  97. data/lib/datadog/tracing/span_operation.rb +5 -4
  98. data/lib/datadog/tracing/trace_digest.rb +75 -2
  99. data/lib/datadog/tracing/trace_operation.rb +5 -4
  100. data/lib/datadog/tracing/utils.rb +50 -0
  101. data/lib/ddtrace/version.rb +1 -1
  102. metadata +20 -5
@@ -50,6 +50,100 @@ rb_nativethread_id_t pthread_id_for(VALUE thread) {
50
50
  #endif
51
51
  }
52
52
 
53
+ // Queries if the current thread is the owner of the global VM lock.
54
+ //
55
+ // @ivoanjo: Ruby has a similarly-named `ruby_thread_has_gvl_p` but that API is insufficient for our needs because it can
56
+ // still return `true` even when a thread DOES NOT HAVE the global VM lock.
57
+ // In particular, looking at the implementation, that API assumes that if a thread is not in a "blocking region" then it
58
+ // will have the GVL which is probably true for the situations that API was designed to be called from BUT this assumption
59
+ // does not hold true when calling `ruby_thread_has_gvl_p` from a signal handler. (Because the thread may have lost the
60
+ // GVL due to a scheduler decision, not because it decided to block.)
61
+ // I have also submitted https://bugs.ruby-lang.org/issues/19172 to discuss this with upstream Ruby developers.
62
+ //
63
+ // Thus we need our own gvl-checking method which actually looks at the gvl structure to determine if it is the owner.
64
+ bool is_current_thread_holding_the_gvl(void) {
65
+ current_gvl_owner owner = gvl_owner();
66
+ return owner.valid && pthread_equal(pthread_self(), owner.owner);
67
+ }
68
+
69
+ #ifndef NO_GVL_OWNER // Ruby < 2.6 doesn't have the owner/running field
70
+ // NOTE: Reading the owner in this is a racy read, because we're not grabbing the lock that Ruby uses to protect it.
71
+ //
72
+ // While we could potentially grab this lock, I (@ivoanjo) think we actually don't need it because:
73
+ // * In the case where a thread owns the GVL and calls `gvl_owner`, it will always see the correct value. That's
74
+ // because every thread sets itself as the owner when it grabs the GVL and unsets itself at the end.
75
+ // That means that `is_current_thread_holding_the_gvl` is always accurate.
76
+ // * In a case where we observe a different thread, then this may change by the time we do something with this value
77
+ // anyway. So unless we want to prevent the Ruby scheduler from switching threads, we need to deal with races here.
78
+ current_gvl_owner gvl_owner(void) {
79
+ const rb_thread_t *current_owner =
80
+ #ifndef NO_RB_THREAD_SCHED // Introduced in Ruby 3.2 as a replacement for struct rb_global_vm_lock_struct
81
+ GET_RACTOR()->threads.sched.running;
82
+ #elif HAVE_RUBY_RACTOR_H
83
+ GET_RACTOR()->threads.gvl.owner;
84
+ #else
85
+ GET_VM()->gvl.owner;
86
+ #endif
87
+
88
+ if (current_owner == NULL) return (current_gvl_owner) {.valid = false};
89
+
90
+ return (current_gvl_owner) {
91
+ .valid = true,
92
+ .owner =
93
+ #ifndef NO_RB_NATIVE_THREAD
94
+ current_owner->nt->thread_id
95
+ #else
96
+ current_owner->thread_id
97
+ #endif
98
+ };
99
+ }
100
+ #else
101
+ current_gvl_owner gvl_owner(void) {
102
+ rb_vm_t *vm =
103
+ #ifndef NO_GET_VM
104
+ GET_VM();
105
+ #else
106
+ thread_struct_from_object(rb_thread_current())->vm;
107
+ #endif
108
+
109
+ // BIG Issue: Ruby < 2.6 did not have the owner field. The really nice thing about the owner field is that it's
110
+ // "atomic" -- when a thread sets it, it "declares" two things in a single step
111
+ // * Declaration 1: Someone has the GVL
112
+ // * Declaration 2: That someone is the specific thread
113
+ //
114
+ // Observation 1: On older versions of Ruby, this ownership concept is actually split. Specifically, `gvl.acquired`
115
+ // is a boolean that represents declaration 1 above, and `vm->running_thread` (or `ruby_current_thread`/
116
+ // `ruby_current_execution_context_ptr`) represents declaration 2.
117
+ //
118
+ // Observation 2: In addition, when a thread releases the GVL, it only sets `gvl.acquired` back to 0 **BUT CRUCIALLY
119
+ // DOES NOT CHANGE THE OTHER global variables**.
120
+ //
121
+ // Observation 1+2 above lead to the following possible race:
122
+ // * Thread A grabs the GVL (`gvl.acquired == 1`)
123
+ // * Thread A sets `running_thread` (`gvl.acquired == 1` + `running_thread == Thread A`)
124
+ // * Thread A releases the GVL (`gvl.acquired == 0` + `running_thread == Thread A`)
125
+ // * Thread B grabs the GVL (`gvl.acquired == 1` + `running_thread == Thread A`)
126
+ // * Thread A calls gvl_owner. Due to the current state (`gvl.acquired == 1` + `running_thread == Thread A`), this
127
+ // function returns an incorrect result.
128
+ // * Thread B finally sets `running_thread` (`gvl.acquired == 1` + `running_thread == Thread B`)
129
+ //
130
+ // This is especially problematic because we use `gvl_owner` to implement `is_current_thread_holding_the_gvl` which
131
+ // is called in a signal handler to decide "is it safe for me to call `rb_postponed_job_register_one` or not".
132
+ // (See constraints in `collectors_cpu_and_wall_time_worker.c` comments for why).
133
+ //
134
+ // Thus an incorrect `is_current_thread_holding_the_gvl` result may lead to issues inside `rb_postponed_job_register_one`.
135
+ //
136
+ // For this reason we currently do not enable the new Ruby profiler on Ruby 2.5 and below by default, and we print a
137
+ // warning when customers force-enable it.
138
+ bool gvl_acquired = vm->gvl.acquired != 0;
139
+ rb_thread_t *current_owner = vm->running_thread;
140
+
141
+ if (!gvl_acquired || current_owner == NULL) return (current_gvl_owner) {.valid = false};
142
+
143
+ return (current_gvl_owner) {.valid = true, .owner = current_owner->thread_id};
144
+ }
145
+ #endif // NO_GVL_OWNER
146
+
53
147
  // Taken from upstream vm_core.h at commit d9cf0388599a3234b9f3c06ddd006cd59a58ab8b (November 2022, Ruby 3.2 trunk)
54
148
  // Copyright (C) 2004-2007 Koichi Sasada
55
149
  // to support tid_for (see below)
@@ -128,7 +222,12 @@ VALUE ddtrace_thread_list(void) {
128
222
  rb_ractor_t *current_ractor = GET_RACTOR();
129
223
  ccan_list_for_each(&current_ractor->threads.set, thread, lt_node) {
130
224
  #else
131
- rb_vm_t *vm = thread_struct_from_object(rb_thread_current())->vm;
225
+ rb_vm_t *vm =
226
+ #ifndef NO_GET_VM
227
+ GET_VM();
228
+ #else
229
+ thread_struct_from_object(rb_thread_current())->vm;
230
+ #endif
132
231
  list_for_each(&vm->living_threads, thread, vmlt_node) {
133
232
  #endif
134
233
  switch (thread->status) {
@@ -284,9 +383,6 @@ calc_lineno(const rb_iseq_t *iseq, const VALUE *pc)
284
383
  // * Add `end_cfp == NULL` and `end_cfp <= cfp` safety checks. These are used in a bunch of places in
285
384
  // `vm_backtrace.c` (`backtrace_each`, `backtrace_size`, `rb_ec_partial_backtrace_object`) but are conspicuously
286
385
  // absent from `rb_profile_frames`. Oversight?
287
- // * Distinguish between `end_cfp == NULL` (dead thread or some other error, returns 0) and `end_cfp <= cfp`
288
- // (alive thread which may just be executing native code and has not pushed anything on the Ruby stack, returns
289
- // PLACEHOLDER_STACK_IN_NATIVE_CODE). See comments on `record_placeholder_stack_in_native_code` for more details.
290
386
  // * Skip frames where `cfp->iseq && !cfp->pc`. These seem to be internal and are skipped by `backtrace_each` in
291
387
  // `vm_backtrace.c`.
292
388
  // * Check thread status and do not sample if thread has been killed.
@@ -340,13 +436,13 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i
340
436
  const rb_control_frame_t *cfp = ec->cfp, *end_cfp = RUBY_VM_END_CONTROL_FRAME(ec);
341
437
  const rb_callable_method_entry_t *cme;
342
438
 
343
- // `vm_backtrace.c` includes this check in several methods, and I think this happens on either dead or newly-created
344
- // threads, but I'm not entirely sure
345
- if (end_cfp == NULL) return 0;
346
-
347
439
  // Avoid sampling dead threads
348
440
  if (th->status == THREAD_KILLED) return 0;
349
441
 
442
+ // `vm_backtrace.c` includes this check in several methods. This happens on newly-created threads, and may
443
+ // also (not entirely sure) happen on dead threads
444
+ if (end_cfp == NULL) return PLACEHOLDER_STACK_IN_NATIVE_CODE;
445
+
350
446
  // Fix: Skip dummy frame that shows up in main thread.
351
447
  //
352
448
  // According to a comment in `backtrace_each` (`vm_backtrace.c`), there's two dummy frames that we should ignore
@@ -650,9 +746,6 @@ calc_lineno(const rb_iseq_t *iseq, const VALUE *pc)
650
746
  // * Add `end_cfp == NULL` and `end_cfp <= cfp` safety checks. These are used in a bunch of places in
651
747
  // `vm_backtrace.c` (`backtrace_each`, `backtrace_size`, `rb_ec_partial_backtrace_object`) but are conspicuously
652
748
  // absent from `rb_profile_frames`. Oversight?
653
- // * Distinguish between `end_cfp == NULL` (dead thread or some other error, returns 0) and `end_cfp <= cfp`
654
- // (alive thread which may just be executing native code and has not pushed anything on the Ruby stack, returns
655
- // PLACEHOLDER_STACK_IN_NATIVE_CODE). See comments on `record_placeholder_stack_in_native_code` for more details.
656
749
  // * Check thread status and do not sample if thread has been killed.
657
750
  //
658
751
  // The `rb_profile_frames` function changed quite a bit between Ruby 2.2 and 2.3. Since the change was quite complex
@@ -667,13 +760,13 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i
667
760
  rb_thread_t *th = thread_struct_from_object(thread);
668
761
  rb_control_frame_t *cfp = th->cfp, *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
669
762
 
670
- // `vm_backtrace.c` includes this check in several methods, and I think this happens on either dead or newly-created
671
- // threads, but I'm not entirely sure
672
- if (end_cfp == NULL) return 0;
673
-
674
763
  // Avoid sampling dead threads
675
764
  if (th->status == THREAD_KILLED) return 0;
676
765
 
766
+ // `vm_backtrace.c` includes this check in several methods. This happens on newly-created threads, and may
767
+ // also (not entirely sure) happen on dead threads
768
+ if (end_cfp == NULL) return PLACEHOLDER_STACK_IN_NATIVE_CODE;
769
+
677
770
  // Fix: Skip dummy frame that shows up in main thread.
678
771
  //
679
772
  // According to a comment in `backtrace_each` (`vm_backtrace.c`), there's two dummy frames that we should ignore
@@ -717,15 +810,6 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i
717
810
 
718
811
  #endif // USE_LEGACY_RB_PROFILE_FRAMES
719
812
 
720
- #ifdef NO_THREAD_HAS_GVL
721
- int ruby_thread_has_gvl_p(void) {
722
- // TODO: The CpuAndWallTimeWorker needs this function, but Ruby 2.2 doesn't expose it... For now this placeholder
723
- // will enable the profiling native extension to continue to compile on Ruby 2.2, but the CpuAndWallTimeWorker will
724
- // not work properly on 2.2. Will be addressed later.
725
- return 0;
726
- }
727
- #endif // NO_THREAD_HAS_GVL
728
-
729
813
  #ifndef NO_RACTORS
730
814
  // This API and definition are exported as a public symbol by the VM BUT the function header is not defined in any public header, so we
731
815
  // repeat it here to be able to use in our code.
@@ -7,11 +7,20 @@
7
7
  // without also dragging the incompatible includes
8
8
  #ifndef PRIVATE_VM_API_ACCESS_SKIP_RUBY_INCLUDES
9
9
  #include <ruby/thread_native.h>
10
+ #include <ruby/vm.h>
10
11
  #endif
11
12
 
12
13
  #include "extconf.h"
13
14
 
15
+ // Contains the current gvl owner, and a flag to indicate if it is valid
16
+ typedef struct {
17
+ bool valid;
18
+ rb_nativethread_id_t owner;
19
+ } current_gvl_owner;
20
+
14
21
  rb_nativethread_id_t pthread_id_for(VALUE thread);
22
+ bool is_current_thread_holding_the_gvl(void);
23
+ current_gvl_owner gvl_owner(void);
15
24
  uint64_t native_thread_id_for(VALUE thread);
16
25
  ptrdiff_t stack_depth_for(VALUE thread);
17
26
  VALUE ddtrace_thread_list(void);
@@ -1,18 +1,37 @@
1
1
  #include <ruby.h>
2
+ #include <ruby/thread.h>
3
+ #include <errno.h>
2
4
 
3
5
  #include "clock_id.h"
4
6
  #include "helpers.h"
5
7
  #include "private_vm_api_access.h"
8
+ #include "ruby_helpers.h"
9
+ #include "setup_signal_handler.h"
10
+ #include "time_helpers.h"
6
11
 
7
12
  // Each class/module here is implemented in their separate file
8
13
  void collectors_cpu_and_wall_time_init(VALUE profiling_module);
9
14
  void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module);
15
+ void collectors_dynamic_sampling_rate_init(VALUE profiling_module);
16
+ void collectors_idle_sampling_helper_init(VALUE profiling_module);
10
17
  void collectors_stack_init(VALUE profiling_module);
11
18
  void http_transport_init(VALUE profiling_module);
12
19
  void stack_recorder_init(VALUE profiling_module);
13
20
 
14
21
  static VALUE native_working_p(VALUE self);
22
+ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE exception_class, VALUE test_message, VALUE test_message_arg, VALUE release_gvl);
23
+ static void *trigger_grab_gvl_and_raise(void *trigger_args);
24
+ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE test_message, VALUE test_message_arg, VALUE release_gvl);
25
+ static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args);
15
26
  static VALUE _native_ddtrace_rb_ractor_main_p(DDTRACE_UNUSED VALUE _self);
27
+ static VALUE _native_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self);
28
+ static VALUE _native_release_gvl_and_call_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self);
29
+ static void *testing_is_current_thread_holding_the_gvl(DDTRACE_UNUSED void *_unused);
30
+ static VALUE _native_install_holding_the_gvl_signal_handler(DDTRACE_UNUSED VALUE _self);
31
+ static void holding_the_gvl_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
32
+ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VALUE _self, VALUE background_thread);
33
+ static VALUE _native_enforce_success(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE with_gvl);
34
+ static void *trigger_enforce_success(void *trigger_args);
16
35
 
17
36
  void DDTRACE_EXPORT Init_ddtrace_profiling_native_extension(void) {
18
37
  VALUE datadog_module = rb_define_module("Datadog");
@@ -26,13 +45,27 @@ void DDTRACE_EXPORT Init_ddtrace_profiling_native_extension(void) {
26
45
 
27
46
  collectors_cpu_and_wall_time_init(profiling_module);
28
47
  collectors_cpu_and_wall_time_worker_init(profiling_module);
48
+ collectors_dynamic_sampling_rate_init(profiling_module);
49
+ collectors_idle_sampling_helper_init(profiling_module);
29
50
  collectors_stack_init(profiling_module);
30
51
  http_transport_init(profiling_module);
31
52
  stack_recorder_init(profiling_module);
32
53
 
33
54
  // Hosts methods used for testing the native code using RSpec
34
55
  VALUE testing_module = rb_define_module_under(native_extension_module, "Testing");
56
+ rb_define_singleton_method(testing_module, "_native_grab_gvl_and_raise", _native_grab_gvl_and_raise, 4);
57
+ rb_define_singleton_method(testing_module, "_native_grab_gvl_and_raise_syserr", _native_grab_gvl_and_raise_syserr, 4);
35
58
  rb_define_singleton_method(testing_module, "_native_ddtrace_rb_ractor_main_p", _native_ddtrace_rb_ractor_main_p, 0);
59
+ rb_define_singleton_method(testing_module, "_native_is_current_thread_holding_the_gvl", _native_is_current_thread_holding_the_gvl, 0);
60
+ rb_define_singleton_method(
61
+ testing_module,
62
+ "_native_release_gvl_and_call_is_current_thread_holding_the_gvl",
63
+ _native_release_gvl_and_call_is_current_thread_holding_the_gvl,
64
+ 0
65
+ );
66
+ rb_define_singleton_method(testing_module, "_native_install_holding_the_gvl_signal_handler", _native_install_holding_the_gvl_signal_handler, 0);
67
+ rb_define_singleton_method(testing_module, "_native_trigger_holding_the_gvl_signal_handler_on", _native_trigger_holding_the_gvl_signal_handler_on, 1);
68
+ rb_define_singleton_method(testing_module, "_native_enforce_success", _native_enforce_success, 2);
36
69
  }
37
70
 
38
71
  static VALUE native_working_p(DDTRACE_UNUSED VALUE _self) {
@@ -41,6 +74,178 @@ static VALUE native_working_p(DDTRACE_UNUSED VALUE _self) {
41
74
  return Qtrue;
42
75
  }
43
76
 
77
+ struct trigger_grab_gvl_and_raise_arguments {
78
+ VALUE exception_class;
79
+ char *test_message;
80
+ int test_message_arg;
81
+ };
82
+
83
+ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE exception_class, VALUE test_message, VALUE test_message_arg, VALUE release_gvl) {
84
+ ENFORCE_TYPE(test_message, T_STRING);
85
+
86
+ struct trigger_grab_gvl_and_raise_arguments args;
87
+
88
+ args.exception_class = exception_class;
89
+ args.test_message = StringValueCStr(test_message);
90
+ args.test_message_arg = test_message_arg != Qnil ? NUM2INT(test_message_arg) : -1;
91
+
92
+ if (RTEST(release_gvl)) {
93
+ rb_thread_call_without_gvl(trigger_grab_gvl_and_raise, &args, NULL, NULL);
94
+ } else {
95
+ grab_gvl_and_raise(args.exception_class, "%s", args.test_message);
96
+ }
97
+
98
+ rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen");
99
+ }
100
+
101
+ static void *trigger_grab_gvl_and_raise(void *trigger_args) {
102
+ struct trigger_grab_gvl_and_raise_arguments *args = (struct trigger_grab_gvl_and_raise_arguments *) trigger_args;
103
+
104
+ if (args->test_message_arg >= 0) {
105
+ grab_gvl_and_raise(args->exception_class, "%s%d", args->test_message, args->test_message_arg);
106
+ } else {
107
+ grab_gvl_and_raise(args->exception_class, "%s", args->test_message);
108
+ }
109
+
110
+ return NULL;
111
+ }
112
+
113
+ struct trigger_grab_gvl_and_raise_syserr_arguments {
114
+ int syserr_errno;
115
+ char *test_message;
116
+ int test_message_arg;
117
+ };
118
+
119
+ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE test_message, VALUE test_message_arg, VALUE release_gvl) {
120
+ ENFORCE_TYPE(test_message, T_STRING);
121
+
122
+ struct trigger_grab_gvl_and_raise_syserr_arguments args;
123
+
124
+ args.syserr_errno = NUM2INT(syserr_errno);
125
+ args.test_message = StringValueCStr(test_message);
126
+ args.test_message_arg = test_message_arg != Qnil ? NUM2INT(test_message_arg) : -1;
127
+
128
+ if (RTEST(release_gvl)) {
129
+ rb_thread_call_without_gvl(trigger_grab_gvl_and_raise_syserr, &args, NULL, NULL);
130
+ } else {
131
+ grab_gvl_and_raise_syserr(args.syserr_errno, "%s", args.test_message);
132
+ }
133
+
134
+ rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen");
135
+ }
136
+
137
+ static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args) {
138
+ struct trigger_grab_gvl_and_raise_syserr_arguments *args = (struct trigger_grab_gvl_and_raise_syserr_arguments *) trigger_args;
139
+
140
+ if (args->test_message_arg >= 0) {
141
+ grab_gvl_and_raise_syserr(args->syserr_errno, "%s%d", args->test_message, args->test_message_arg);
142
+ } else {
143
+ grab_gvl_and_raise_syserr(args->syserr_errno, "%s", args->test_message);
144
+ }
145
+
146
+ return NULL;
147
+ }
148
+
44
149
  static VALUE _native_ddtrace_rb_ractor_main_p(DDTRACE_UNUSED VALUE _self) {
45
150
  return ddtrace_rb_ractor_main_p() ? Qtrue : Qfalse;
46
151
  }
152
+
153
+ static VALUE _native_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self) {
154
+ return ((bool) testing_is_current_thread_holding_the_gvl(NULL)) ? Qtrue : Qfalse;
155
+ }
156
+
157
+ static VALUE _native_release_gvl_and_call_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self) {
158
+ return ((bool) rb_thread_call_without_gvl(testing_is_current_thread_holding_the_gvl, NULL, NULL, NULL)) ? Qtrue : Qfalse;
159
+ }
160
+
161
+ static void *testing_is_current_thread_holding_the_gvl(DDTRACE_UNUSED void *_unused) {
162
+ return (void *) is_current_thread_holding_the_gvl();
163
+ }
164
+
165
+ static VALUE _native_install_holding_the_gvl_signal_handler(DDTRACE_UNUSED VALUE _self) {
166
+ install_sigprof_signal_handler(holding_the_gvl_signal_handler, "holding_the_gvl_signal_handler");
167
+ return Qtrue;
168
+ }
169
+
170
+ static pthread_mutex_t holding_the_gvl_signal_handler_mutex = PTHREAD_MUTEX_INITIALIZER;
171
+ static pthread_cond_t holding_the_gvl_signal_handler_executed = PTHREAD_COND_INITIALIZER;
172
+ static VALUE holding_the_gvl_signal_handler_result[3];
173
+
174
+ // Ruby VM API that is exported but not present in the header files. Only used by holding_the_gvl_signal_handler below and SHOULD NOT
175
+ // be used in any other situation. See the comments on is_current_thread_holding_the_gvl for details.
176
+ int ruby_thread_has_gvl_p(void);
177
+
178
+ static void holding_the_gvl_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext) {
179
+ pthread_mutex_lock(&holding_the_gvl_signal_handler_mutex);
180
+
181
+ VALUE test_executed = Qtrue;
182
+ VALUE ruby_thread_has_gvl_p_result = ruby_thread_has_gvl_p() ? Qtrue : Qfalse;
183
+ VALUE is_current_thread_holding_the_gvl_result = is_current_thread_holding_the_gvl() ? Qtrue : Qfalse;
184
+
185
+ holding_the_gvl_signal_handler_result[0] = test_executed;
186
+ holding_the_gvl_signal_handler_result[1] = ruby_thread_has_gvl_p_result;
187
+ holding_the_gvl_signal_handler_result[2] = is_current_thread_holding_the_gvl_result;
188
+
189
+ pthread_cond_broadcast(&holding_the_gvl_signal_handler_executed);
190
+ pthread_mutex_unlock(&holding_the_gvl_signal_handler_mutex);
191
+ }
192
+
193
+ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VALUE _self, VALUE background_thread) {
194
+ holding_the_gvl_signal_handler_result[0] = Qfalse;
195
+ holding_the_gvl_signal_handler_result[1] = Qfalse;
196
+ holding_the_gvl_signal_handler_result[2] = Qfalse;
197
+
198
+ rb_nativethread_id_t thread = pthread_id_for(background_thread);
199
+
200
+ ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&holding_the_gvl_signal_handler_mutex));
201
+
202
+ // We keep trying for ~5 seconds (500 x 10ms) to try to avoid any flakiness if the test machine is a bit slow
203
+ for (int tries = 0; holding_the_gvl_signal_handler_result[0] == Qfalse && tries < 500; tries++) {
204
+ pthread_kill(thread, SIGPROF);
205
+
206
+ // pthread_cond_timedwait is simply awful -- the deadline is based on wall-clock using a struct timespec, so we need
207
+ // all of the below complexity just to tell it "timeout is 10ms". The % limit dance below is needed because the
208
+ // `tv_nsec` part of a timespec can't go over the limit.
209
+ struct timespec deadline;
210
+ clock_gettime(CLOCK_REALTIME, &deadline);
211
+
212
+ unsigned int timeout_ns = MILLIS_AS_NS(10);
213
+ unsigned int tv_nsec_limit = SECONDS_AS_NS(1);
214
+ if ((deadline.tv_nsec + timeout_ns) < tv_nsec_limit) {
215
+ deadline.tv_nsec += timeout_ns;
216
+ } else {
217
+ deadline.tv_nsec = (deadline.tv_nsec + timeout_ns) % tv_nsec_limit;
218
+ deadline.tv_sec++;
219
+ }
220
+
221
+ int error = pthread_cond_timedwait(&holding_the_gvl_signal_handler_executed, &holding_the_gvl_signal_handler_mutex, &deadline);
222
+ if (error && error != ETIMEDOUT) ENFORCE_SUCCESS_GVL(error);
223
+ }
224
+
225
+ ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(&holding_the_gvl_signal_handler_mutex));
226
+
227
+ replace_sigprof_signal_handler_with_empty_handler(holding_the_gvl_signal_handler);
228
+
229
+ if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(rb_eRuntimeError, "Could not signal background_thread");
230
+
231
+ VALUE result = rb_hash_new();
232
+ rb_hash_aset(result, ID2SYM(rb_intern("ruby_thread_has_gvl_p")), holding_the_gvl_signal_handler_result[1]);
233
+ rb_hash_aset(result, ID2SYM(rb_intern("is_current_thread_holding_the_gvl")), holding_the_gvl_signal_handler_result[2]);
234
+ return result;
235
+ }
236
+
237
+ static VALUE _native_enforce_success(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE with_gvl) {
238
+ if (RTEST(with_gvl)) {
239
+ ENFORCE_SUCCESS_GVL(NUM2INT(syserr_errno));
240
+ } else {
241
+ rb_thread_call_without_gvl(trigger_enforce_success, (void *) (intptr_t) NUM2INT(syserr_errno), NULL, NULL);
242
+ }
243
+
244
+ return Qtrue;
245
+ }
246
+
247
+ static void *trigger_enforce_success(void *trigger_args) {
248
+ intptr_t syserr_errno = (intptr_t) trigger_args;
249
+ ENFORCE_SUCCESS_NO_GVL(syserr_errno);
250
+ return NULL;
251
+ }
@@ -1,4 +1,8 @@
1
+ #include <ruby.h>
2
+ #include <ruby/thread.h>
3
+
1
4
  #include "ruby_helpers.h"
5
+ #include "private_vm_api_access.h"
2
6
 
3
7
  void raise_unexpected_type(
4
8
  VALUE value,
@@ -22,3 +26,85 @@ void raise_unexpected_type(
22
26
  )
23
27
  );
24
28
  }
29
+
30
+ #define MAX_RAISE_MESSAGE_SIZE 256
31
+
32
+ struct raise_arguments {
33
+ VALUE exception_class;
34
+ char exception_message[MAX_RAISE_MESSAGE_SIZE];
35
+ };
36
+
37
+ static void *trigger_raise(void *raise_arguments) {
38
+ struct raise_arguments *args = (struct raise_arguments *) raise_arguments;
39
+ rb_raise(args->exception_class, "%s", args->exception_message);
40
+ }
41
+
42
+ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) {
43
+ struct raise_arguments args;
44
+
45
+ args.exception_class = exception_class;
46
+
47
+ va_list format_string_arguments;
48
+ va_start(format_string_arguments, format_string);
49
+ vsnprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, format_string, format_string_arguments);
50
+
51
+ if (is_current_thread_holding_the_gvl()) {
52
+ rb_raise(
53
+ rb_eRuntimeError,
54
+ "grab_gvl_and_raise called by thread holding the global VM lock. exception_message: '%s'",
55
+ args.exception_message
56
+ );
57
+ }
58
+
59
+ rb_thread_call_with_gvl(trigger_raise, &args);
60
+
61
+ rb_bug("[DDTRACE] Unexpected: Reached the end of grab_gvl_and_raise while raising '%s'\n", args.exception_message);
62
+ }
63
+
64
+ struct syserr_raise_arguments {
65
+ int syserr_errno;
66
+ char exception_message[MAX_RAISE_MESSAGE_SIZE];
67
+ };
68
+
69
+ static void *trigger_syserr_raise(void *syserr_raise_arguments) {
70
+ struct syserr_raise_arguments *args = (struct syserr_raise_arguments *) syserr_raise_arguments;
71
+ rb_syserr_fail(args->syserr_errno, args->exception_message);
72
+ }
73
+
74
+ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...) {
75
+ struct syserr_raise_arguments args;
76
+
77
+ args.syserr_errno = syserr_errno;
78
+
79
+ va_list format_string_arguments;
80
+ va_start(format_string_arguments, format_string);
81
+ vsnprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, format_string, format_string_arguments);
82
+
83
+ if (is_current_thread_holding_the_gvl()) {
84
+ rb_raise(
85
+ rb_eRuntimeError,
86
+ "grab_gvl_and_raise_syserr called by thread holding the global VM lock. syserr_errno: %d, exception_message: '%s'",
87
+ syserr_errno,
88
+ args.exception_message
89
+ );
90
+ }
91
+
92
+ rb_thread_call_with_gvl(trigger_syserr_raise, &args);
93
+
94
+ rb_bug("[DDTRACE] Unexpected: Reached the end of grab_gvl_and_raise_syserr while raising '%s'\n", args.exception_message);
95
+ }
96
+
97
+ void raise_syserr(
98
+ int syserr_errno,
99
+ bool have_gvl,
100
+ const char *expression,
101
+ const char *file,
102
+ int line,
103
+ const char *function_name
104
+ ) {
105
+ if (have_gvl) {
106
+ rb_exc_raise(rb_syserr_new_str(syserr_errno, rb_sprintf("Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name)));
107
+ } else {
108
+ grab_gvl_and_raise_syserr(syserr_errno, "Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name);
109
+ }
110
+ }
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include <ruby.h>
4
+ #include <stdbool.h>
4
5
 
5
6
  #include "helpers.h"
6
7
 
@@ -57,11 +58,32 @@ NORETURN(void raise_unexpected_type(
57
58
  const char *type_name,
58
59
  const char *file,
59
60
  int line,
60
- const char* function_name
61
+ const char *function_name
61
62
  ));
62
63
 
63
- // This API is exported as a public symbol by the VM BUT the function header is not defined in any public header, so we
64
- // repeat it here to be able to use in our code.
65
- //
66
- // Queries if the current thread is the owner of the global VM lock.
67
- int ruby_thread_has_gvl_p(void);
64
+ #define VALUE_COUNT(array) (sizeof(array) / sizeof(VALUE))
65
+
66
+ NORETURN(
67
+ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...)
68
+ __attribute__ ((format (printf, 2, 3)));
69
+ );
70
+ NORETURN(
71
+ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...)
72
+ __attribute__ ((format (printf, 2, 3)));
73
+ );
74
+
75
+ #define ENFORCE_SUCCESS_GVL(expression) ENFORCE_SUCCESS_HELPER(expression, true)
76
+ #define ENFORCE_SUCCESS_NO_GVL(expression) ENFORCE_SUCCESS_HELPER(expression, false)
77
+
78
+ #define ENFORCE_SUCCESS_HELPER(expression, have_gvl) \
79
+ { int result_syserr_errno = expression; if (RB_UNLIKELY(result_syserr_errno)) raise_syserr(result_syserr_errno, have_gvl, ADD_QUOTES(expression), __FILE__, __LINE__, __func__); }
80
+
81
+ // Called by ENFORCE_SUCCESS_HELPER; should not be used directly
82
+ NORETURN(void raise_syserr(
83
+ int syserr_errno,
84
+ bool have_gvl,
85
+ const char *expression,
86
+ const char *file,
87
+ int line,
88
+ const char *function_name
89
+ ));
@@ -5,6 +5,9 @@
5
5
 
6
6
  #include "helpers.h"
7
7
  #include "setup_signal_handler.h"
8
+ #include "ruby_helpers.h"
9
+
10
+ // Used by Collectors::CpuAndWallTimeWorker to setup SIGPROF signal handlers used for cpu/wall-time profiling.
8
11
 
9
12
  static void install_sigprof_signal_handler_internal(
10
13
  void (*signal_handler_function)(int, siginfo_t *, void *),
@@ -88,9 +91,25 @@ void remove_sigprof_signal_handler(void) {
88
91
  if (sigaction(SIGPROF, &signal_handler_config, NULL) != 0) rb_sys_fail("Failure while removing the signal handler");
89
92
  }
90
93
 
94
+ static void toggle_sigprof_signal_handler_for_current_thread(int action) {
95
+ sigset_t signals_to_toggle;
96
+ sigemptyset(&signals_to_toggle);
97
+ sigaddset(&signals_to_toggle, SIGPROF);
98
+ int error = pthread_sigmask(action, &signals_to_toggle, NULL);
99
+ if (error) rb_exc_raise(rb_syserr_new_str(error, rb_sprintf("Unexpected failure in pthread_sigmask, action=%d", action)));
100
+ }
101
+
91
102
  void block_sigprof_signal_handler_from_running_in_current_thread(void) {
92
- sigset_t signals_to_block;
93
- sigemptyset(&signals_to_block);
94
- sigaddset(&signals_to_block, SIGPROF);
95
- pthread_sigmask(SIG_BLOCK, &signals_to_block, NULL);
103
+ toggle_sigprof_signal_handler_for_current_thread(SIG_BLOCK);
104
+ }
105
+
106
+ void unblock_sigprof_signal_handler_from_running_in_current_thread(void) {
107
+ toggle_sigprof_signal_handler_for_current_thread(SIG_UNBLOCK);
108
+ }
109
+
110
+ VALUE is_sigprof_blocked_in_current_thread(void) {
111
+ sigset_t current_signals;
112
+ sigemptyset(&current_signals);
113
+ ENFORCE_SUCCESS_GVL(pthread_sigmask(0, NULL, &current_signals));
114
+ return sigismember(&current_signals, SIGPROF) ? Qtrue : Qfalse;
96
115
  }
@@ -1,7 +1,11 @@
1
1
  #pragma once
2
2
 
3
+ #include <signal.h>
4
+
3
5
  void empty_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
4
6
  void install_sigprof_signal_handler(void (*signal_handler_function)(int, siginfo_t *, void *), const char *handler_pretty_name);
5
7
  void replace_sigprof_signal_handler_with_empty_handler(void (*expected_existing_handler)(int, siginfo_t *, void *));
6
8
  void remove_sigprof_signal_handler(void);
7
9
  void block_sigprof_signal_handler_from_running_in_current_thread(void);
10
+ void unblock_sigprof_signal_handler_from_running_in_current_thread(void);
11
+ VALUE is_sigprof_blocked_in_current_thread(void);