ddtrace 1.7.0 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -1
  3. data/README.md +2 -2
  4. data/ext/ddtrace_profiling_loader/extconf.rb +4 -1
  5. data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +1 -1
  6. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +3 -2
  7. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +15 -41
  8. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.h +1 -1
  9. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +284 -74
  10. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +142 -0
  11. data/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +14 -0
  12. data/ext/ddtrace_profiling_native_extension/collectors_idle_sampling_helper.c +241 -0
  13. data/ext/ddtrace_profiling_native_extension/collectors_idle_sampling_helper.h +3 -0
  14. data/ext/ddtrace_profiling_native_extension/extconf.rb +21 -7
  15. data/ext/ddtrace_profiling_native_extension/helpers.h +5 -0
  16. data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +8 -0
  17. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +108 -24
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +9 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +205 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +86 -0
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +28 -6
  22. data/ext/ddtrace_profiling_native_extension/setup_signal_handler.c +23 -4
  23. data/ext/ddtrace_profiling_native_extension/setup_signal_handler.h +4 -0
  24. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +15 -18
  25. data/ext/ddtrace_profiling_native_extension/time_helpers.c +17 -0
  26. data/ext/ddtrace_profiling_native_extension/time_helpers.h +10 -0
  27. data/lib/datadog/core/configuration/components.rb +27 -6
  28. data/lib/datadog/core/configuration/ext.rb +18 -0
  29. data/lib/datadog/core/configuration/settings.rb +14 -341
  30. data/lib/datadog/core/diagnostics/health.rb +4 -22
  31. data/lib/datadog/core/environment/variable_helpers.rb +58 -10
  32. data/lib/datadog/core/utils.rb +0 -21
  33. data/lib/datadog/core.rb +21 -1
  34. data/lib/datadog/opentracer/distributed_headers.rb +2 -2
  35. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +16 -5
  36. data/lib/datadog/profiling/collectors/dynamic_sampling_rate.rb +14 -0
  37. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +68 -0
  38. data/lib/datadog/profiling/stack_recorder.rb +14 -0
  39. data/lib/datadog/profiling.rb +2 -0
  40. data/lib/datadog/tracing/configuration/ext.rb +33 -3
  41. data/lib/datadog/tracing/configuration/settings.rb +433 -0
  42. data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +4 -1
  43. data/lib/datadog/tracing/contrib/aws/ext.rb +1 -0
  44. data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +4 -1
  45. data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
  46. data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +5 -1
  47. data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +1 -0
  48. data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +6 -1
  49. data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
  50. data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +5 -1
  51. data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
  52. data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +5 -1
  53. data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
  54. data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +6 -1
  55. data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +9 -4
  56. data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
  57. data/lib/datadog/tracing/contrib/http/configuration/settings.rb +6 -1
  58. data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +9 -4
  59. data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
  60. data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +6 -1
  61. data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
  62. data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +6 -1
  63. data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
  64. data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +5 -1
  65. data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
  66. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +4 -1
  67. data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
  68. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -2
  69. data/lib/datadog/tracing/contrib/patcher.rb +3 -2
  70. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +4 -1
  71. data/lib/datadog/tracing/contrib/pg/ext.rb +1 -0
  72. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +12 -2
  73. data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +4 -1
  74. data/lib/datadog/tracing/contrib/presto/ext.rb +1 -0
  75. data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +1 -0
  76. data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +10 -12
  77. data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +4 -1
  78. data/lib/datadog/tracing/contrib/redis/ext.rb +1 -0
  79. data/lib/datadog/tracing/contrib/redis/instrumentation.rb +30 -23
  80. data/lib/datadog/tracing/contrib/redis/integration.rb +34 -2
  81. data/lib/datadog/tracing/contrib/redis/patcher.rb +18 -14
  82. data/lib/datadog/tracing/contrib/redis/quantize.rb +12 -9
  83. data/lib/datadog/tracing/contrib/redis/tags.rb +4 -6
  84. data/lib/datadog/tracing/contrib/redis/trace_middleware.rb +72 -0
  85. data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +6 -1
  86. data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
  87. data/lib/datadog/{core → tracing}/diagnostics/ext.rb +1 -6
  88. data/lib/datadog/tracing/diagnostics/health.rb +40 -0
  89. data/lib/datadog/tracing/distributed/{b3.rb → b3_multi.rb} +2 -2
  90. data/lib/datadog/tracing/distributed/helpers.rb +2 -1
  91. data/lib/datadog/tracing/distributed/none.rb +19 -0
  92. data/lib/datadog/tracing/distributed/trace_context.rb +369 -0
  93. data/lib/datadog/tracing/metadata/ext.rb +1 -1
  94. data/lib/datadog/tracing/sampling/priority_sampler.rb +11 -0
  95. data/lib/datadog/tracing/sampling/rate_sampler.rb +3 -3
  96. data/lib/datadog/tracing/span.rb +3 -19
  97. data/lib/datadog/tracing/span_operation.rb +5 -4
  98. data/lib/datadog/tracing/trace_digest.rb +75 -2
  99. data/lib/datadog/tracing/trace_operation.rb +5 -4
  100. data/lib/datadog/tracing/utils.rb +50 -0
  101. data/lib/ddtrace/version.rb +1 -1
  102. metadata +20 -5
@@ -50,6 +50,100 @@ rb_nativethread_id_t pthread_id_for(VALUE thread) {
50
50
  #endif
51
51
  }
52
52
 
53
+ // Queries if the current thread is the owner of the global VM lock.
54
+ //
55
+ // @ivoanjo: Ruby has a similarly-named `ruby_thread_has_gvl_p` but that API is insufficient for our needs because it can
56
+ // still return `true` even when a thread DOES NOT HAVE the global VM lock.
57
+ // In particular, looking at the implementation, that API assumes that if a thread is not in a "blocking region" then it
58
+ // will have the GVL which is probably true for the situations that API was designed to be called from BUT this assumption
59
+ // does not hold true when calling `ruby_thread_has_gvl_p` from a signal handler. (Because the thread may have lost the
60
+ // GVL due to a scheduler decision, not because it decided to block.)
61
+ // I have also submitted https://bugs.ruby-lang.org/issues/19172 to discuss this with upstream Ruby developers.
62
+ //
63
+ // Thus we need our own gvl-checking method which actually looks at the gvl structure to determine if it is the owner.
64
+ bool is_current_thread_holding_the_gvl(void) {
65
+ current_gvl_owner owner = gvl_owner();
66
+ return owner.valid && pthread_equal(pthread_self(), owner.owner);
67
+ }
68
+
69
+ #ifndef NO_GVL_OWNER // Ruby < 2.6 doesn't have the owner/running field
70
+ // NOTE: Reading the owner in this is a racy read, because we're not grabbing the lock that Ruby uses to protect it.
71
+ //
72
+ // While we could potentially grab this lock, I (@ivoanjo) think we actually don't need it because:
73
+ // * In the case where a thread owns the GVL and calls `gvl_owner`, it will always see the correct value. That's
74
+ // because every thread sets itself as the owner when it grabs the GVL and unsets itself at the end.
75
+ // That means that `is_current_thread_holding_the_gvl` is always accurate.
76
+ // * In a case where we observe a different thread, then this may change by the time we do something with this value
77
+ // anyway. So unless we want to prevent the Ruby scheduler from switching threads, we need to deal with races here.
78
+ current_gvl_owner gvl_owner(void) {
79
+ const rb_thread_t *current_owner =
80
+ #ifndef NO_RB_THREAD_SCHED // Introduced in Ruby 3.2 as a replacement for struct rb_global_vm_lock_struct
81
+ GET_RACTOR()->threads.sched.running;
82
+ #elif HAVE_RUBY_RACTOR_H
83
+ GET_RACTOR()->threads.gvl.owner;
84
+ #else
85
+ GET_VM()->gvl.owner;
86
+ #endif
87
+
88
+ if (current_owner == NULL) return (current_gvl_owner) {.valid = false};
89
+
90
+ return (current_gvl_owner) {
91
+ .valid = true,
92
+ .owner =
93
+ #ifndef NO_RB_NATIVE_THREAD
94
+ current_owner->nt->thread_id
95
+ #else
96
+ current_owner->thread_id
97
+ #endif
98
+ };
99
+ }
100
+ #else
101
+ current_gvl_owner gvl_owner(void) {
102
+ rb_vm_t *vm =
103
+ #ifndef NO_GET_VM
104
+ GET_VM();
105
+ #else
106
+ thread_struct_from_object(rb_thread_current())->vm;
107
+ #endif
108
+
109
+ // BIG Issue: Ruby < 2.6 did not have the owner field. The really nice thing about the owner field is that it's
110
+ // "atomic" -- when a thread sets it, it "declares" two things in a single step
111
+ // * Declaration 1: Someone has the GVL
112
+ // * Declaration 2: That someone is the specific thread
113
+ //
114
+ // Observation 1: On older versions of Ruby, this ownership concept is actually split. Specifically, `gvl.acquired`
115
+ // is a boolean that represents declaration 1 above, and `vm->running_thread` (or `ruby_current_thread`/
116
+ // `ruby_current_execution_context_ptr`) represents declaration 2.
117
+ //
118
+ // Observation 2: In addition, when a thread releases the GVL, it only sets `gvl.acquired` back to 0 **BUT CRUCIALLY
119
+ // DOES NOT CHANGE THE OTHER global variables**.
120
+ //
121
+ // Observation 1+2 above lead to the following possible race:
122
+ // * Thread A grabs the GVL (`gvl.acquired == 1`)
123
+ // * Thread A sets `running_thread` (`gvl.acquired == 1` + `running_thread == Thread A`)
124
+ // * Thread A releases the GVL (`gvl.acquired == 0` + `running_thread == Thread A`)
125
+ // * Thread B grabs the GVL (`gvl.acquired == 1` + `running_thread == Thread A`)
126
+ // * Thread A calls gvl_owner. Due to the current state (`gvl.acquired == 1` + `running_thread == Thread A`), this
127
+ // function returns an incorrect result.
128
+ // * Thread B finally sets `running_thread` (`gvl.acquired == 1` + `running_thread == Thread B`)
129
+ //
130
+ // This is especially problematic because we use `gvl_owner` to implement `is_current_thread_holding_the_gvl` which
131
+ // is called in a signal handler to decide "is it safe for me to call `rb_postponed_job_register_one` or not".
132
+ // (See constraints in `collectors_cpu_and_wall_time_worker.c` comments for why).
133
+ //
134
+ // Thus an incorrect `is_current_thread_holding_the_gvl` result may lead to issues inside `rb_postponed_job_register_one`.
135
+ //
136
+ // For this reason we currently do not enable the new Ruby profiler on Ruby 2.5 and below by default, and we print a
137
+ // warning when customers force-enable it.
138
+ bool gvl_acquired = vm->gvl.acquired != 0;
139
+ rb_thread_t *current_owner = vm->running_thread;
140
+
141
+ if (!gvl_acquired || current_owner == NULL) return (current_gvl_owner) {.valid = false};
142
+
143
+ return (current_gvl_owner) {.valid = true, .owner = current_owner->thread_id};
144
+ }
145
+ #endif // NO_GVL_OWNER
146
+
53
147
  // Taken from upstream vm_core.h at commit d9cf0388599a3234b9f3c06ddd006cd59a58ab8b (November 2022, Ruby 3.2 trunk)
54
148
  // Copyright (C) 2004-2007 Koichi Sasada
55
149
  // to support tid_for (see below)
@@ -128,7 +222,12 @@ VALUE ddtrace_thread_list(void) {
128
222
  rb_ractor_t *current_ractor = GET_RACTOR();
129
223
  ccan_list_for_each(&current_ractor->threads.set, thread, lt_node) {
130
224
  #else
131
- rb_vm_t *vm = thread_struct_from_object(rb_thread_current())->vm;
225
+ rb_vm_t *vm =
226
+ #ifndef NO_GET_VM
227
+ GET_VM();
228
+ #else
229
+ thread_struct_from_object(rb_thread_current())->vm;
230
+ #endif
132
231
  list_for_each(&vm->living_threads, thread, vmlt_node) {
133
232
  #endif
134
233
  switch (thread->status) {
@@ -284,9 +383,6 @@ calc_lineno(const rb_iseq_t *iseq, const VALUE *pc)
284
383
  // * Add `end_cfp == NULL` and `end_cfp <= cfp` safety checks. These are used in a bunch of places in
285
384
  // `vm_backtrace.c` (`backtrace_each`, `backtrace_size`, `rb_ec_partial_backtrace_object`) but are conspicuously
286
385
  // absent from `rb_profile_frames`. Oversight?
287
- // * Distinguish between `end_cfp == NULL` (dead thread or some other error, returns 0) and `end_cfp <= cfp`
288
- // (alive thread which may just be executing native code and has not pushed anything on the Ruby stack, returns
289
- // PLACEHOLDER_STACK_IN_NATIVE_CODE). See comments on `record_placeholder_stack_in_native_code` for more details.
290
386
  // * Skip frames where `cfp->iseq && !cfp->pc`. These seem to be internal and are skipped by `backtrace_each` in
291
387
  // `vm_backtrace.c`.
292
388
  // * Check thread status and do not sample if thread has been killed.
@@ -340,13 +436,13 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i
340
436
  const rb_control_frame_t *cfp = ec->cfp, *end_cfp = RUBY_VM_END_CONTROL_FRAME(ec);
341
437
  const rb_callable_method_entry_t *cme;
342
438
 
343
- // `vm_backtrace.c` includes this check in several methods, and I think this happens on either dead or newly-created
344
- // threads, but I'm not entirely sure
345
- if (end_cfp == NULL) return 0;
346
-
347
439
  // Avoid sampling dead threads
348
440
  if (th->status == THREAD_KILLED) return 0;
349
441
 
442
+ // `vm_backtrace.c` includes this check in several methods. This happens on newly-created threads, and may
443
+ // also (not entirely sure) happen on dead threads
444
+ if (end_cfp == NULL) return PLACEHOLDER_STACK_IN_NATIVE_CODE;
445
+
350
446
  // Fix: Skip dummy frame that shows up in main thread.
351
447
  //
352
448
  // According to a comment in `backtrace_each` (`vm_backtrace.c`), there's two dummy frames that we should ignore
@@ -650,9 +746,6 @@ calc_lineno(const rb_iseq_t *iseq, const VALUE *pc)
650
746
  // * Add `end_cfp == NULL` and `end_cfp <= cfp` safety checks. These are used in a bunch of places in
651
747
  // `vm_backtrace.c` (`backtrace_each`, `backtrace_size`, `rb_ec_partial_backtrace_object`) but are conspicuously
652
748
  // absent from `rb_profile_frames`. Oversight?
653
- // * Distinguish between `end_cfp == NULL` (dead thread or some other error, returns 0) and `end_cfp <= cfp`
654
- // (alive thread which may just be executing native code and has not pushed anything on the Ruby stack, returns
655
- // PLACEHOLDER_STACK_IN_NATIVE_CODE). See comments on `record_placeholder_stack_in_native_code` for more details.
656
749
  // * Check thread status and do not sample if thread has been killed.
657
750
  //
658
751
  // The `rb_profile_frames` function changed quite a bit between Ruby 2.2 and 2.3. Since the change was quite complex
@@ -667,13 +760,13 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i
667
760
  rb_thread_t *th = thread_struct_from_object(thread);
668
761
  rb_control_frame_t *cfp = th->cfp, *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
669
762
 
670
- // `vm_backtrace.c` includes this check in several methods, and I think this happens on either dead or newly-created
671
- // threads, but I'm not entirely sure
672
- if (end_cfp == NULL) return 0;
673
-
674
763
  // Avoid sampling dead threads
675
764
  if (th->status == THREAD_KILLED) return 0;
676
765
 
766
+ // `vm_backtrace.c` includes this check in several methods. This happens on newly-created threads, and may
767
+ // also (not entirely sure) happen on dead threads
768
+ if (end_cfp == NULL) return PLACEHOLDER_STACK_IN_NATIVE_CODE;
769
+
677
770
  // Fix: Skip dummy frame that shows up in main thread.
678
771
  //
679
772
  // According to a comment in `backtrace_each` (`vm_backtrace.c`), there's two dummy frames that we should ignore
@@ -717,15 +810,6 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i
717
810
 
718
811
  #endif // USE_LEGACY_RB_PROFILE_FRAMES
719
812
 
720
- #ifdef NO_THREAD_HAS_GVL
721
- int ruby_thread_has_gvl_p(void) {
722
- // TODO: The CpuAndWallTimeWorker needs this function, but Ruby 2.2 doesn't expose it... For now this placeholder
723
- // will enable the profiling native extension to continue to compile on Ruby 2.2, but the CpuAndWallTimeWorker will
724
- // not work properly on 2.2. Will be addressed later.
725
- return 0;
726
- }
727
- #endif // NO_THREAD_HAS_GVL
728
-
729
813
  #ifndef NO_RACTORS
730
814
  // This API and definition are exported as a public symbol by the VM BUT the function header is not defined in any public header, so we
731
815
  // repeat it here to be able to use in our code.
@@ -7,11 +7,20 @@
7
7
  // without also dragging the incompatible includes
8
8
  #ifndef PRIVATE_VM_API_ACCESS_SKIP_RUBY_INCLUDES
9
9
  #include <ruby/thread_native.h>
10
+ #include <ruby/vm.h>
10
11
  #endif
11
12
 
12
13
  #include "extconf.h"
13
14
 
15
+ // Contains the current gvl owner, and a flag to indicate if it is valid
16
+ typedef struct {
17
+ bool valid;
18
+ rb_nativethread_id_t owner;
19
+ } current_gvl_owner;
20
+
14
21
  rb_nativethread_id_t pthread_id_for(VALUE thread);
22
+ bool is_current_thread_holding_the_gvl(void);
23
+ current_gvl_owner gvl_owner(void);
15
24
  uint64_t native_thread_id_for(VALUE thread);
16
25
  ptrdiff_t stack_depth_for(VALUE thread);
17
26
  VALUE ddtrace_thread_list(void);
@@ -1,18 +1,37 @@
1
1
  #include <ruby.h>
2
+ #include <ruby/thread.h>
3
+ #include <errno.h>
2
4
 
3
5
  #include "clock_id.h"
4
6
  #include "helpers.h"
5
7
  #include "private_vm_api_access.h"
8
+ #include "ruby_helpers.h"
9
+ #include "setup_signal_handler.h"
10
+ #include "time_helpers.h"
6
11
 
7
12
  // Each class/module here is implemented in their separate file
8
13
  void collectors_cpu_and_wall_time_init(VALUE profiling_module);
9
14
  void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module);
15
+ void collectors_dynamic_sampling_rate_init(VALUE profiling_module);
16
+ void collectors_idle_sampling_helper_init(VALUE profiling_module);
10
17
  void collectors_stack_init(VALUE profiling_module);
11
18
  void http_transport_init(VALUE profiling_module);
12
19
  void stack_recorder_init(VALUE profiling_module);
13
20
 
14
21
  static VALUE native_working_p(VALUE self);
22
+ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE exception_class, VALUE test_message, VALUE test_message_arg, VALUE release_gvl);
23
+ static void *trigger_grab_gvl_and_raise(void *trigger_args);
24
+ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE test_message, VALUE test_message_arg, VALUE release_gvl);
25
+ static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args);
15
26
  static VALUE _native_ddtrace_rb_ractor_main_p(DDTRACE_UNUSED VALUE _self);
27
+ static VALUE _native_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self);
28
+ static VALUE _native_release_gvl_and_call_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self);
29
+ static void *testing_is_current_thread_holding_the_gvl(DDTRACE_UNUSED void *_unused);
30
+ static VALUE _native_install_holding_the_gvl_signal_handler(DDTRACE_UNUSED VALUE _self);
31
+ static void holding_the_gvl_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
32
+ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VALUE _self, VALUE background_thread);
33
+ static VALUE _native_enforce_success(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE with_gvl);
34
+ static void *trigger_enforce_success(void *trigger_args);
16
35
 
17
36
  void DDTRACE_EXPORT Init_ddtrace_profiling_native_extension(void) {
18
37
  VALUE datadog_module = rb_define_module("Datadog");
@@ -26,13 +45,27 @@ void DDTRACE_EXPORT Init_ddtrace_profiling_native_extension(void) {
26
45
 
27
46
  collectors_cpu_and_wall_time_init(profiling_module);
28
47
  collectors_cpu_and_wall_time_worker_init(profiling_module);
48
+ collectors_dynamic_sampling_rate_init(profiling_module);
49
+ collectors_idle_sampling_helper_init(profiling_module);
29
50
  collectors_stack_init(profiling_module);
30
51
  http_transport_init(profiling_module);
31
52
  stack_recorder_init(profiling_module);
32
53
 
33
54
  // Hosts methods used for testing the native code using RSpec
34
55
  VALUE testing_module = rb_define_module_under(native_extension_module, "Testing");
56
+ rb_define_singleton_method(testing_module, "_native_grab_gvl_and_raise", _native_grab_gvl_and_raise, 4);
57
+ rb_define_singleton_method(testing_module, "_native_grab_gvl_and_raise_syserr", _native_grab_gvl_and_raise_syserr, 4);
35
58
  rb_define_singleton_method(testing_module, "_native_ddtrace_rb_ractor_main_p", _native_ddtrace_rb_ractor_main_p, 0);
59
+ rb_define_singleton_method(testing_module, "_native_is_current_thread_holding_the_gvl", _native_is_current_thread_holding_the_gvl, 0);
60
+ rb_define_singleton_method(
61
+ testing_module,
62
+ "_native_release_gvl_and_call_is_current_thread_holding_the_gvl",
63
+ _native_release_gvl_and_call_is_current_thread_holding_the_gvl,
64
+ 0
65
+ );
66
+ rb_define_singleton_method(testing_module, "_native_install_holding_the_gvl_signal_handler", _native_install_holding_the_gvl_signal_handler, 0);
67
+ rb_define_singleton_method(testing_module, "_native_trigger_holding_the_gvl_signal_handler_on", _native_trigger_holding_the_gvl_signal_handler_on, 1);
68
+ rb_define_singleton_method(testing_module, "_native_enforce_success", _native_enforce_success, 2);
36
69
  }
37
70
 
38
71
  static VALUE native_working_p(DDTRACE_UNUSED VALUE _self) {
@@ -41,6 +74,178 @@ static VALUE native_working_p(DDTRACE_UNUSED VALUE _self) {
41
74
  return Qtrue;
42
75
  }
43
76
 
77
+ struct trigger_grab_gvl_and_raise_arguments {
78
+ VALUE exception_class;
79
+ char *test_message;
80
+ int test_message_arg;
81
+ };
82
+
83
+ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE exception_class, VALUE test_message, VALUE test_message_arg, VALUE release_gvl) {
84
+ ENFORCE_TYPE(test_message, T_STRING);
85
+
86
+ struct trigger_grab_gvl_and_raise_arguments args;
87
+
88
+ args.exception_class = exception_class;
89
+ args.test_message = StringValueCStr(test_message);
90
+ args.test_message_arg = test_message_arg != Qnil ? NUM2INT(test_message_arg) : -1;
91
+
92
+ if (RTEST(release_gvl)) {
93
+ rb_thread_call_without_gvl(trigger_grab_gvl_and_raise, &args, NULL, NULL);
94
+ } else {
95
+ grab_gvl_and_raise(args.exception_class, "%s", args.test_message);
96
+ }
97
+
98
+ rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen");
99
+ }
100
+
101
+ static void *trigger_grab_gvl_and_raise(void *trigger_args) {
102
+ struct trigger_grab_gvl_and_raise_arguments *args = (struct trigger_grab_gvl_and_raise_arguments *) trigger_args;
103
+
104
+ if (args->test_message_arg >= 0) {
105
+ grab_gvl_and_raise(args->exception_class, "%s%d", args->test_message, args->test_message_arg);
106
+ } else {
107
+ grab_gvl_and_raise(args->exception_class, "%s", args->test_message);
108
+ }
109
+
110
+ return NULL;
111
+ }
112
+
113
+ struct trigger_grab_gvl_and_raise_syserr_arguments {
114
+ int syserr_errno;
115
+ char *test_message;
116
+ int test_message_arg;
117
+ };
118
+
119
+ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE test_message, VALUE test_message_arg, VALUE release_gvl) {
120
+ ENFORCE_TYPE(test_message, T_STRING);
121
+
122
+ struct trigger_grab_gvl_and_raise_syserr_arguments args;
123
+
124
+ args.syserr_errno = NUM2INT(syserr_errno);
125
+ args.test_message = StringValueCStr(test_message);
126
+ args.test_message_arg = test_message_arg != Qnil ? NUM2INT(test_message_arg) : -1;
127
+
128
+ if (RTEST(release_gvl)) {
129
+ rb_thread_call_without_gvl(trigger_grab_gvl_and_raise_syserr, &args, NULL, NULL);
130
+ } else {
131
+ grab_gvl_and_raise_syserr(args.syserr_errno, "%s", args.test_message);
132
+ }
133
+
134
+ rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen");
135
+ }
136
+
137
+ static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args) {
138
+ struct trigger_grab_gvl_and_raise_syserr_arguments *args = (struct trigger_grab_gvl_and_raise_syserr_arguments *) trigger_args;
139
+
140
+ if (args->test_message_arg >= 0) {
141
+ grab_gvl_and_raise_syserr(args->syserr_errno, "%s%d", args->test_message, args->test_message_arg);
142
+ } else {
143
+ grab_gvl_and_raise_syserr(args->syserr_errno, "%s", args->test_message);
144
+ }
145
+
146
+ return NULL;
147
+ }
148
+
44
149
  static VALUE _native_ddtrace_rb_ractor_main_p(DDTRACE_UNUSED VALUE _self) {
45
150
  return ddtrace_rb_ractor_main_p() ? Qtrue : Qfalse;
46
151
  }
152
+
153
+ static VALUE _native_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self) {
154
+ return ((bool) testing_is_current_thread_holding_the_gvl(NULL)) ? Qtrue : Qfalse;
155
+ }
156
+
157
+ static VALUE _native_release_gvl_and_call_is_current_thread_holding_the_gvl(DDTRACE_UNUSED VALUE _self) {
158
+ return ((bool) rb_thread_call_without_gvl(testing_is_current_thread_holding_the_gvl, NULL, NULL, NULL)) ? Qtrue : Qfalse;
159
+ }
160
+
161
+ static void *testing_is_current_thread_holding_the_gvl(DDTRACE_UNUSED void *_unused) {
162
+ return (void *) is_current_thread_holding_the_gvl();
163
+ }
164
+
165
+ static VALUE _native_install_holding_the_gvl_signal_handler(DDTRACE_UNUSED VALUE _self) {
166
+ install_sigprof_signal_handler(holding_the_gvl_signal_handler, "holding_the_gvl_signal_handler");
167
+ return Qtrue;
168
+ }
169
+
170
+ static pthread_mutex_t holding_the_gvl_signal_handler_mutex = PTHREAD_MUTEX_INITIALIZER;
171
+ static pthread_cond_t holding_the_gvl_signal_handler_executed = PTHREAD_COND_INITIALIZER;
172
+ static VALUE holding_the_gvl_signal_handler_result[3];
173
+
174
+ // Ruby VM API that is exported but not present in the header files. Only used by holding_the_gvl_signal_handler below and SHOULD NOT
175
+ // be used in any other situation. See the comments on is_current_thread_holding_the_gvl for details.
176
+ int ruby_thread_has_gvl_p(void);
177
+
178
+ static void holding_the_gvl_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext) {
179
+ pthread_mutex_lock(&holding_the_gvl_signal_handler_mutex);
180
+
181
+ VALUE test_executed = Qtrue;
182
+ VALUE ruby_thread_has_gvl_p_result = ruby_thread_has_gvl_p() ? Qtrue : Qfalse;
183
+ VALUE is_current_thread_holding_the_gvl_result = is_current_thread_holding_the_gvl() ? Qtrue : Qfalse;
184
+
185
+ holding_the_gvl_signal_handler_result[0] = test_executed;
186
+ holding_the_gvl_signal_handler_result[1] = ruby_thread_has_gvl_p_result;
187
+ holding_the_gvl_signal_handler_result[2] = is_current_thread_holding_the_gvl_result;
188
+
189
+ pthread_cond_broadcast(&holding_the_gvl_signal_handler_executed);
190
+ pthread_mutex_unlock(&holding_the_gvl_signal_handler_mutex);
191
+ }
192
+
193
+ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VALUE _self, VALUE background_thread) {
194
+ holding_the_gvl_signal_handler_result[0] = Qfalse;
195
+ holding_the_gvl_signal_handler_result[1] = Qfalse;
196
+ holding_the_gvl_signal_handler_result[2] = Qfalse;
197
+
198
+ rb_nativethread_id_t thread = pthread_id_for(background_thread);
199
+
200
+ ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&holding_the_gvl_signal_handler_mutex));
201
+
202
+ // We keep trying for ~5 seconds (500 x 10ms) to try to avoid any flakiness if the test machine is a bit slow
203
+ for (int tries = 0; holding_the_gvl_signal_handler_result[0] == Qfalse && tries < 500; tries++) {
204
+ pthread_kill(thread, SIGPROF);
205
+
206
+ // pthread_cond_timedwait is simply awful -- the deadline is based on wall-clock using a struct timespec, so we need
207
+ // all of the below complexity just to tell it "timeout is 10ms". The % limit dance below is needed because the
208
+ // `tv_nsec` part of a timespec can't go over the limit.
209
+ struct timespec deadline;
210
+ clock_gettime(CLOCK_REALTIME, &deadline);
211
+
212
+ unsigned int timeout_ns = MILLIS_AS_NS(10);
213
+ unsigned int tv_nsec_limit = SECONDS_AS_NS(1);
214
+ if ((deadline.tv_nsec + timeout_ns) < tv_nsec_limit) {
215
+ deadline.tv_nsec += timeout_ns;
216
+ } else {
217
+ deadline.tv_nsec = (deadline.tv_nsec + timeout_ns) % tv_nsec_limit;
218
+ deadline.tv_sec++;
219
+ }
220
+
221
+ int error = pthread_cond_timedwait(&holding_the_gvl_signal_handler_executed, &holding_the_gvl_signal_handler_mutex, &deadline);
222
+ if (error && error != ETIMEDOUT) ENFORCE_SUCCESS_GVL(error);
223
+ }
224
+
225
+ ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(&holding_the_gvl_signal_handler_mutex));
226
+
227
+ replace_sigprof_signal_handler_with_empty_handler(holding_the_gvl_signal_handler);
228
+
229
+ if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(rb_eRuntimeError, "Could not signal background_thread");
230
+
231
+ VALUE result = rb_hash_new();
232
+ rb_hash_aset(result, ID2SYM(rb_intern("ruby_thread_has_gvl_p")), holding_the_gvl_signal_handler_result[1]);
233
+ rb_hash_aset(result, ID2SYM(rb_intern("is_current_thread_holding_the_gvl")), holding_the_gvl_signal_handler_result[2]);
234
+ return result;
235
+ }
236
+
237
+ static VALUE _native_enforce_success(DDTRACE_UNUSED VALUE _self, VALUE syserr_errno, VALUE with_gvl) {
238
+ if (RTEST(with_gvl)) {
239
+ ENFORCE_SUCCESS_GVL(NUM2INT(syserr_errno));
240
+ } else {
241
+ rb_thread_call_without_gvl(trigger_enforce_success, (void *) (intptr_t) NUM2INT(syserr_errno), NULL, NULL);
242
+ }
243
+
244
+ return Qtrue;
245
+ }
246
+
247
+ static void *trigger_enforce_success(void *trigger_args) {
248
+ intptr_t syserr_errno = (intptr_t) trigger_args;
249
+ ENFORCE_SUCCESS_NO_GVL(syserr_errno);
250
+ return NULL;
251
+ }
@@ -1,4 +1,8 @@
1
+ #include <ruby.h>
2
+ #include <ruby/thread.h>
3
+
1
4
  #include "ruby_helpers.h"
5
+ #include "private_vm_api_access.h"
2
6
 
3
7
  void raise_unexpected_type(
4
8
  VALUE value,
@@ -22,3 +26,85 @@ void raise_unexpected_type(
22
26
  )
23
27
  );
24
28
  }
29
+
30
+ #define MAX_RAISE_MESSAGE_SIZE 256
31
+
32
+ struct raise_arguments {
33
+ VALUE exception_class;
34
+ char exception_message[MAX_RAISE_MESSAGE_SIZE];
35
+ };
36
+
37
+ static void *trigger_raise(void *raise_arguments) {
38
+ struct raise_arguments *args = (struct raise_arguments *) raise_arguments;
39
+ rb_raise(args->exception_class, "%s", args->exception_message);
40
+ }
41
+
42
+ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) {
43
+ struct raise_arguments args;
44
+
45
+ args.exception_class = exception_class;
46
+
47
+ va_list format_string_arguments;
48
+ va_start(format_string_arguments, format_string);
49
+ vsnprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, format_string, format_string_arguments);
50
+
51
+ if (is_current_thread_holding_the_gvl()) {
52
+ rb_raise(
53
+ rb_eRuntimeError,
54
+ "grab_gvl_and_raise called by thread holding the global VM lock. exception_message: '%s'",
55
+ args.exception_message
56
+ );
57
+ }
58
+
59
+ rb_thread_call_with_gvl(trigger_raise, &args);
60
+
61
+ rb_bug("[DDTRACE] Unexpected: Reached the end of grab_gvl_and_raise while raising '%s'\n", args.exception_message);
62
+ }
63
+
64
+ struct syserr_raise_arguments {
65
+ int syserr_errno;
66
+ char exception_message[MAX_RAISE_MESSAGE_SIZE];
67
+ };
68
+
69
+ static void *trigger_syserr_raise(void *syserr_raise_arguments) {
70
+ struct syserr_raise_arguments *args = (struct syserr_raise_arguments *) syserr_raise_arguments;
71
+ rb_syserr_fail(args->syserr_errno, args->exception_message);
72
+ }
73
+
74
+ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...) {
75
+ struct syserr_raise_arguments args;
76
+
77
+ args.syserr_errno = syserr_errno;
78
+
79
+ va_list format_string_arguments;
80
+ va_start(format_string_arguments, format_string);
81
+ vsnprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, format_string, format_string_arguments);
82
+
83
+ if (is_current_thread_holding_the_gvl()) {
84
+ rb_raise(
85
+ rb_eRuntimeError,
86
+ "grab_gvl_and_raise_syserr called by thread holding the global VM lock. syserr_errno: %d, exception_message: '%s'",
87
+ syserr_errno,
88
+ args.exception_message
89
+ );
90
+ }
91
+
92
+ rb_thread_call_with_gvl(trigger_syserr_raise, &args);
93
+
94
+ rb_bug("[DDTRACE] Unexpected: Reached the end of grab_gvl_and_raise_syserr while raising '%s'\n", args.exception_message);
95
+ }
96
+
97
+ void raise_syserr(
98
+ int syserr_errno,
99
+ bool have_gvl,
100
+ const char *expression,
101
+ const char *file,
102
+ int line,
103
+ const char *function_name
104
+ ) {
105
+ if (have_gvl) {
106
+ rb_exc_raise(rb_syserr_new_str(syserr_errno, rb_sprintf("Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name)));
107
+ } else {
108
+ grab_gvl_and_raise_syserr(syserr_errno, "Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name);
109
+ }
110
+ }
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include <ruby.h>
4
+ #include <stdbool.h>
4
5
 
5
6
  #include "helpers.h"
6
7
 
@@ -57,11 +58,32 @@ NORETURN(void raise_unexpected_type(
57
58
  const char *type_name,
58
59
  const char *file,
59
60
  int line,
60
- const char* function_name
61
+ const char *function_name
61
62
  ));
62
63
 
63
- // This API is exported as a public symbol by the VM BUT the function header is not defined in any public header, so we
64
- // repeat it here to be able to use in our code.
65
- //
66
- // Queries if the current thread is the owner of the global VM lock.
67
- int ruby_thread_has_gvl_p(void);
64
+ #define VALUE_COUNT(array) (sizeof(array) / sizeof(VALUE))
65
+
66
+ NORETURN(
67
+ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...)
68
+ __attribute__ ((format (printf, 2, 3)));
69
+ );
70
+ NORETURN(
71
+ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...)
72
+ __attribute__ ((format (printf, 2, 3)));
73
+ );
74
+
75
+ #define ENFORCE_SUCCESS_GVL(expression) ENFORCE_SUCCESS_HELPER(expression, true)
76
+ #define ENFORCE_SUCCESS_NO_GVL(expression) ENFORCE_SUCCESS_HELPER(expression, false)
77
+
78
+ #define ENFORCE_SUCCESS_HELPER(expression, have_gvl) \
79
+ { int result_syserr_errno = expression; if (RB_UNLIKELY(result_syserr_errno)) raise_syserr(result_syserr_errno, have_gvl, ADD_QUOTES(expression), __FILE__, __LINE__, __func__); }
80
+
81
+ // Called by ENFORCE_SUCCESS_HELPER; should not be used directly
82
+ NORETURN(void raise_syserr(
83
+ int syserr_errno,
84
+ bool have_gvl,
85
+ const char *expression,
86
+ const char *file,
87
+ int line,
88
+ const char *function_name
89
+ ));
@@ -5,6 +5,9 @@
5
5
 
6
6
  #include "helpers.h"
7
7
  #include "setup_signal_handler.h"
8
+ #include "ruby_helpers.h"
9
+
10
+ // Used by Collectors::CpuAndWallTimeWorker to setup SIGPROF signal handlers used for cpu/wall-time profiling.
8
11
 
9
12
  static void install_sigprof_signal_handler_internal(
10
13
  void (*signal_handler_function)(int, siginfo_t *, void *),
@@ -88,9 +91,25 @@ void remove_sigprof_signal_handler(void) {
88
91
  if (sigaction(SIGPROF, &signal_handler_config, NULL) != 0) rb_sys_fail("Failure while removing the signal handler");
89
92
  }
90
93
 
94
+ static void toggle_sigprof_signal_handler_for_current_thread(int action) {
95
+ sigset_t signals_to_toggle;
96
+ sigemptyset(&signals_to_toggle);
97
+ sigaddset(&signals_to_toggle, SIGPROF);
98
+ int error = pthread_sigmask(action, &signals_to_toggle, NULL);
99
+ if (error) rb_exc_raise(rb_syserr_new_str(error, rb_sprintf("Unexpected failure in pthread_sigmask, action=%d", action)));
100
+ }
101
+
91
102
  void block_sigprof_signal_handler_from_running_in_current_thread(void) {
92
- sigset_t signals_to_block;
93
- sigemptyset(&signals_to_block);
94
- sigaddset(&signals_to_block, SIGPROF);
95
- pthread_sigmask(SIG_BLOCK, &signals_to_block, NULL);
103
+ toggle_sigprof_signal_handler_for_current_thread(SIG_BLOCK);
104
+ }
105
+
106
+ void unblock_sigprof_signal_handler_from_running_in_current_thread(void) {
107
+ toggle_sigprof_signal_handler_for_current_thread(SIG_UNBLOCK);
108
+ }
109
+
110
+ VALUE is_sigprof_blocked_in_current_thread(void) {
111
+ sigset_t current_signals;
112
+ sigemptyset(&current_signals);
113
+ ENFORCE_SUCCESS_GVL(pthread_sigmask(0, NULL, &current_signals));
114
+ return sigismember(&current_signals, SIGPROF) ? Qtrue : Qfalse;
96
115
  }
@@ -1,7 +1,11 @@
1
1
  #pragma once
2
2
 
3
+ #include <signal.h>
4
+
3
5
  void empty_signal_handler(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
4
6
  void install_sigprof_signal_handler(void (*signal_handler_function)(int, siginfo_t *, void *), const char *handler_pretty_name);
5
7
  void replace_sigprof_signal_handler_with_empty_handler(void (*expected_existing_handler)(int, siginfo_t *, void *));
6
8
  void remove_sigprof_signal_handler(void);
7
9
  void block_sigprof_signal_handler_from_running_in_current_thread(void);
10
+ void unblock_sigprof_signal_handler_from_running_in_current_thread(void);
11
+ VALUE is_sigprof_blocked_in_current_thread(void);