ddtrace 1.18.0 → 1.23.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +228 -2
  3. data/LICENSE-3rdparty.csv +1 -1
  4. data/bin/ddprofrb +15 -0
  5. data/bin/ddtracerb +3 -1
  6. data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
  7. data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
  8. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +312 -117
  9. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +422 -0
  10. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +101 -0
  11. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +22 -14
  12. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +4 -0
  13. data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  14. data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  15. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +43 -102
  16. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +10 -3
  17. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +272 -136
  18. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +2 -1
  19. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +28 -7
  20. data/ext/datadog_profiling_native_extension/heap_recorder.c +1047 -0
  21. data/ext/datadog_profiling_native_extension/heap_recorder.h +166 -0
  22. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +6 -0
  23. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
  24. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +20 -0
  25. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +11 -0
  26. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +50 -4
  27. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +19 -0
  28. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
  29. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +18 -1
  30. data/ext/datadog_profiling_native_extension/ruby_helpers.c +267 -0
  31. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +33 -0
  32. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +476 -58
  33. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +3 -0
  34. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +2 -0
  35. data/lib/datadog/appsec/contrib/devise/tracking.rb +8 -0
  36. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +45 -14
  37. data/lib/datadog/appsec/event.rb +1 -1
  38. data/lib/datadog/auto_instrument.rb +3 -0
  39. data/lib/datadog/core/configuration/components.rb +7 -6
  40. data/lib/datadog/core/configuration/option.rb +8 -6
  41. data/lib/datadog/core/configuration/settings.rb +259 -60
  42. data/lib/datadog/core/configuration.rb +20 -4
  43. data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
  44. data/lib/datadog/core/environment/class_count.rb +6 -6
  45. data/lib/datadog/core/environment/git.rb +25 -0
  46. data/lib/datadog/core/environment/identity.rb +18 -48
  47. data/lib/datadog/core/environment/platform.rb +7 -1
  48. data/lib/datadog/core/git/ext.rb +2 -23
  49. data/lib/datadog/core/remote/client/capabilities.rb +1 -1
  50. data/lib/datadog/core/remote/component.rb +25 -12
  51. data/lib/datadog/core/remote/ext.rb +1 -0
  52. data/lib/datadog/core/remote/negotiation.rb +2 -2
  53. data/lib/datadog/core/remote/tie/tracing.rb +39 -0
  54. data/lib/datadog/core/remote/tie.rb +27 -0
  55. data/lib/datadog/core/remote/transport/http/config.rb +1 -1
  56. data/lib/datadog/core/remote/worker.rb +7 -4
  57. data/lib/datadog/core/telemetry/client.rb +18 -10
  58. data/lib/datadog/core/telemetry/emitter.rb +9 -13
  59. data/lib/datadog/core/telemetry/event.rb +247 -56
  60. data/lib/datadog/core/telemetry/ext.rb +4 -0
  61. data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
  62. data/lib/datadog/core/telemetry/http/ext.rb +4 -1
  63. data/lib/datadog/core/telemetry/http/response.rb +4 -0
  64. data/lib/datadog/core/telemetry/http/transport.rb +9 -4
  65. data/lib/datadog/core/telemetry/request.rb +59 -0
  66. data/lib/datadog/core/transport/ext.rb +2 -0
  67. data/lib/datadog/core/utils/url.rb +25 -0
  68. data/lib/datadog/opentelemetry/sdk/propagator.rb +3 -2
  69. data/lib/datadog/opentelemetry.rb +3 -0
  70. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  71. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +36 -12
  72. data/lib/datadog/profiling/collectors/info.rb +101 -0
  73. data/lib/datadog/profiling/component.rb +210 -34
  74. data/lib/datadog/profiling/exporter.rb +23 -6
  75. data/lib/datadog/profiling/ext.rb +2 -0
  76. data/lib/datadog/profiling/flush.rb +6 -3
  77. data/lib/datadog/profiling/http_transport.rb +5 -1
  78. data/lib/datadog/profiling/load_native_extension.rb +19 -6
  79. data/lib/datadog/profiling/native_extension.rb +1 -1
  80. data/lib/datadog/profiling/scheduler.rb +4 -6
  81. data/lib/datadog/profiling/stack_recorder.rb +19 -4
  82. data/lib/datadog/profiling/tag_builder.rb +5 -0
  83. data/lib/datadog/profiling/tasks/exec.rb +3 -3
  84. data/lib/datadog/profiling/tasks/help.rb +3 -3
  85. data/lib/datadog/profiling.rb +13 -2
  86. data/lib/datadog/tracing/configuration/ext.rb +0 -1
  87. data/lib/datadog/tracing/configuration/settings.rb +2 -1
  88. data/lib/datadog/tracing/contrib/action_cable/configuration/settings.rb +1 -0
  89. data/lib/datadog/tracing/contrib/action_cable/ext.rb +1 -0
  90. data/lib/datadog/tracing/contrib/action_mailer/configuration/settings.rb +1 -0
  91. data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
  92. data/lib/datadog/tracing/contrib/action_mailer/ext.rb +1 -0
  93. data/lib/datadog/tracing/contrib/action_pack/configuration/settings.rb +1 -0
  94. data/lib/datadog/tracing/contrib/action_pack/ext.rb +1 -0
  95. data/lib/datadog/tracing/contrib/action_view/configuration/settings.rb +1 -0
  96. data/lib/datadog/tracing/contrib/action_view/ext.rb +1 -0
  97. data/lib/datadog/tracing/contrib/active_job/configuration/settings.rb +1 -0
  98. data/lib/datadog/tracing/contrib/active_job/ext.rb +1 -0
  99. data/lib/datadog/tracing/contrib/active_model_serializers/configuration/settings.rb +1 -0
  100. data/lib/datadog/tracing/contrib/active_model_serializers/ext.rb +1 -0
  101. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
  102. data/lib/datadog/tracing/contrib/active_record/configuration/settings.rb +1 -0
  103. data/lib/datadog/tracing/contrib/active_record/ext.rb +1 -0
  104. data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +1 -0
  105. data/lib/datadog/tracing/contrib/active_support/ext.rb +1 -0
  106. data/lib/datadog/tracing/contrib/analytics.rb +0 -1
  107. data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +1 -0
  108. data/lib/datadog/tracing/contrib/aws/ext.rb +1 -0
  109. data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
  110. data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
  111. data/lib/datadog/tracing/contrib/configurable.rb +1 -1
  112. data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +1 -0
  113. data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
  114. data/lib/datadog/tracing/contrib/delayed_job/configuration/settings.rb +1 -0
  115. data/lib/datadog/tracing/contrib/delayed_job/ext.rb +1 -0
  116. data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +1 -0
  117. data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +1 -0
  118. data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +1 -0
  119. data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
  120. data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +1 -0
  121. data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
  122. data/lib/datadog/tracing/contrib/extensions.rb +6 -2
  123. data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +7 -0
  124. data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
  125. data/lib/datadog/tracing/contrib/faraday/middleware.rb +1 -1
  126. data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +1 -0
  127. data/lib/datadog/tracing/contrib/grape/ext.rb +1 -0
  128. data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +1 -0
  129. data/lib/datadog/tracing/contrib/graphql/ext.rb +1 -0
  130. data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +1 -0
  131. data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
  132. data/lib/datadog/tracing/contrib/http/configuration/settings.rb +1 -0
  133. data/lib/datadog/tracing/contrib/http/distributed/fetcher.rb +2 -2
  134. data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
  135. data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +1 -0
  136. data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
  137. data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +1 -0
  138. data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
  139. data/lib/datadog/tracing/contrib/kafka/configuration/settings.rb +1 -0
  140. data/lib/datadog/tracing/contrib/kafka/ext.rb +1 -0
  141. data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +1 -0
  142. data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
  143. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
  144. data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
  145. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  146. data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +1 -0
  147. data/lib/datadog/tracing/contrib/opensearch/ext.rb +1 -0
  148. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +1 -0
  149. data/lib/datadog/tracing/contrib/pg/ext.rb +1 -0
  150. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
  151. data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +1 -0
  152. data/lib/datadog/tracing/contrib/presto/ext.rb +1 -0
  153. data/lib/datadog/tracing/contrib/qless/configuration/settings.rb +1 -0
  154. data/lib/datadog/tracing/contrib/qless/ext.rb +1 -0
  155. data/lib/datadog/tracing/contrib/que/configuration/settings.rb +1 -0
  156. data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
  157. data/lib/datadog/tracing/contrib/racecar/configuration/settings.rb +1 -0
  158. data/lib/datadog/tracing/contrib/racecar/ext.rb +1 -0
  159. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +1 -0
  160. data/lib/datadog/tracing/contrib/rack/ext.rb +1 -0
  161. data/lib/datadog/tracing/contrib/rack/middlewares.rb +9 -2
  162. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  163. data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +1 -0
  164. data/lib/datadog/tracing/contrib/rails/ext.rb +1 -0
  165. data/lib/datadog/tracing/contrib/rake/configuration/settings.rb +1 -0
  166. data/lib/datadog/tracing/contrib/rake/ext.rb +1 -0
  167. data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +1 -0
  168. data/lib/datadog/tracing/contrib/redis/ext.rb +1 -0
  169. data/lib/datadog/tracing/contrib/redis/instrumentation.rb +2 -2
  170. data/lib/datadog/tracing/contrib/redis/patcher.rb +34 -21
  171. data/lib/datadog/tracing/contrib/resque/configuration/settings.rb +1 -0
  172. data/lib/datadog/tracing/contrib/resque/ext.rb +1 -0
  173. data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +1 -0
  174. data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
  175. data/lib/datadog/tracing/contrib/roda/configuration/settings.rb +1 -0
  176. data/lib/datadog/tracing/contrib/roda/ext.rb +1 -0
  177. data/lib/datadog/tracing/contrib/sequel/configuration/settings.rb +1 -0
  178. data/lib/datadog/tracing/contrib/sequel/ext.rb +1 -0
  179. data/lib/datadog/tracing/contrib/shoryuken/configuration/settings.rb +1 -0
  180. data/lib/datadog/tracing/contrib/shoryuken/ext.rb +1 -0
  181. data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +1 -0
  182. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  183. data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +1 -0
  184. data/lib/datadog/tracing/contrib/sinatra/ext.rb +1 -0
  185. data/lib/datadog/tracing/contrib/sneakers/configuration/settings.rb +1 -0
  186. data/lib/datadog/tracing/contrib/sneakers/ext.rb +1 -0
  187. data/lib/datadog/tracing/contrib/stripe/configuration/settings.rb +1 -0
  188. data/lib/datadog/tracing/contrib/stripe/ext.rb +1 -0
  189. data/lib/datadog/tracing/contrib/sucker_punch/configuration/settings.rb +1 -0
  190. data/lib/datadog/tracing/contrib/sucker_punch/ext.rb +1 -0
  191. data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +58 -0
  192. data/lib/datadog/tracing/contrib/trilogy/ext.rb +27 -0
  193. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +94 -0
  194. data/lib/datadog/tracing/contrib/trilogy/integration.rb +43 -0
  195. data/lib/datadog/tracing/contrib/trilogy/patcher.rb +31 -0
  196. data/lib/datadog/tracing/contrib.rb +1 -0
  197. data/lib/datadog/tracing/sampling/matcher.rb +23 -3
  198. data/lib/datadog/tracing/sampling/rule.rb +7 -2
  199. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
  200. data/lib/datadog/tracing/trace_operation.rb +1 -2
  201. data/lib/datadog/tracing/transport/http.rb +1 -0
  202. data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
  203. data/lib/datadog/tracing.rb +8 -2
  204. data/lib/ddtrace/version.rb +2 -2
  205. metadata +71 -61
  206. data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
  207. data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
  208. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -110
  209. data/lib/datadog/core/telemetry/collector.rb +0 -240
  210. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -52
  211. data/lib/datadog/core/telemetry/v1/application.rb +0 -92
  212. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
  213. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
  214. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  215. data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
  216. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  217. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
  218. data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
  219. data/lib/datadog/core/telemetry/v2/request.rb +0 -29
  220. data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
  221. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
  222. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
  223. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
  224. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
  225. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
  226. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
  227. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
  228. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
  229. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
@@ -12,10 +12,16 @@
12
12
  #include "collectors_thread_context.h"
13
13
  #include "collectors_dynamic_sampling_rate.h"
14
14
  #include "collectors_idle_sampling_helper.h"
15
+ #include "collectors_discrete_dynamic_sampler.h"
15
16
  #include "private_vm_api_access.h"
16
17
  #include "setup_signal_handler.h"
17
18
  #include "time_helpers.h"
18
19
 
20
+ #define ERR_CLOCK_FAIL "failed to get clock time"
21
+
22
+ // Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
23
+ unsigned int MAX_ALLOC_WEIGHT = 65535;
24
+
19
25
  // Used to trigger the execution of Collectors::ThreadState, which implements all of the sampling logic
20
26
  // itself; this class only implements the "when to do it" part.
21
27
  //
@@ -75,20 +81,28 @@
75
81
  //
76
82
  // ---
77
83
 
84
+ #ifndef NO_POSTPONED_TRIGGER
85
+ // Used to call the rb_postponed_job_trigger from Ruby 3.3+. These get initialized in
86
+ // `collectors_cpu_and_wall_time_worker_init` below and always get reused after that.
87
+ static rb_postponed_job_handle_t sample_from_postponed_job_handle;
88
+ static rb_postponed_job_handle_t after_gc_from_postponed_job_handle;
89
+ #endif
90
+
78
91
  // Contains state for a single CpuAndWallTimeWorker instance
79
92
  struct cpu_and_wall_time_worker_state {
80
93
  // These are immutable after initialization
81
94
 
82
95
  bool gc_profiling_enabled;
83
- bool allocation_counting_enabled;
84
96
  bool no_signals_workaround_enabled;
85
97
  bool dynamic_sampling_rate_enabled;
86
- int allocation_sample_every; // Temporarily used for development/testing of allocation profiling
98
+ bool allocation_profiling_enabled;
99
+ bool skip_idle_samples_for_testing;
87
100
  VALUE self_instance;
88
101
  VALUE thread_context_collector_instance;
89
102
  VALUE idle_sampling_helper_instance;
90
103
  VALUE owner_thread;
91
- dynamic_sampling_rate_state dynamic_sampling_rate;
104
+ dynamic_sampling_rate_state cpu_dynamic_sampling_rate;
105
+ discrete_dynamic_sampler allocation_sampler;
92
106
  VALUE gc_tracepoint; // Used to get gc start/finish information
93
107
  VALUE object_allocation_tracepoint; // Used to get allocation counts and allocation profiling
94
108
 
@@ -108,6 +122,7 @@ struct cpu_and_wall_time_worker_state {
108
122
  bool during_sample;
109
123
 
110
124
  struct stats {
125
+ // # Generic stats
111
126
  // How many times we tried to trigger a sample
112
127
  unsigned int trigger_sample_attempts;
113
128
  // How many times we tried to simulate signal delivery
@@ -118,25 +133,38 @@ struct cpu_and_wall_time_worker_state {
118
133
  unsigned int signal_handler_enqueued_sample;
119
134
  // How many times the signal handler was called from the wrong thread
120
135
  unsigned int signal_handler_wrong_thread;
121
- // How many times we actually sampled (except GC samples)
122
- unsigned int sampled;
123
- // How many times we skipped a sample because of the dynamic sampling rate mechanism
124
- unsigned int skipped_sample_because_of_dynamic_sampling_rate;
136
+ // How many times we actually tried to interrupt a thread for sampling
137
+ unsigned int interrupt_thread_attempts;
125
138
 
126
- // Stats for the results of calling rb_postponed_job_register_one
127
- // The same function was already waiting to be executed
139
+ // # Stats for the results of calling rb_postponed_job_register_one
140
+ // The same function was already waiting to be executed
128
141
  unsigned int postponed_job_skipped_already_existed;
129
- // The function was added to the queue successfully
142
+ // The function was added to the queue successfully
130
143
  unsigned int postponed_job_success;
131
- // The queue was full
144
+ // The queue was full
132
145
  unsigned int postponed_job_full;
133
- // The function returned an unknown result code
146
+ // The function returned an unknown result code
134
147
  unsigned int postponed_job_unknown_result;
135
148
 
136
- // Min/max/total wall-time spent sampling (except GC samples)
137
- uint64_t sampling_time_ns_min;
138
- uint64_t sampling_time_ns_max;
139
- uint64_t sampling_time_ns_total;
149
+ // # CPU/Walltime sampling stats
150
+ // How many times we actually CPU/wall sampled
151
+ unsigned int cpu_sampled;
152
+ // How many times we skipped a CPU/wall sample because of the dynamic sampling rate mechanism
153
+ unsigned int cpu_skipped;
154
+ // Min/max/total wall-time spent on CPU/wall sampling
155
+ uint64_t cpu_sampling_time_ns_min;
156
+ uint64_t cpu_sampling_time_ns_max;
157
+ uint64_t cpu_sampling_time_ns_total;
158
+
159
+ // # Allocation sampling stats
160
+ // How many times we actually allocation sampled
161
+ uint64_t allocation_sampled;
162
+ // How many times we skipped an allocation sample because of the dynamic sampling rate mechanism
163
+ uint64_t allocation_skipped;
164
+ // Min/max/total wall-time spent on allocation sampling
165
+ uint64_t allocation_sampling_time_ns_min;
166
+ uint64_t allocation_sampling_time_ns_max;
167
+ uint64_t allocation_sampling_time_ns_total;
140
168
  // How many times we saw allocations being done inside a sample
141
169
  unsigned int allocations_during_sample;
142
170
  } stats;
@@ -149,15 +177,17 @@ static VALUE _native_initialize(
149
177
  VALUE thread_context_collector_instance,
150
178
  VALUE gc_profiling_enabled,
151
179
  VALUE idle_sampling_helper_instance,
152
- VALUE allocation_counting_enabled,
153
180
  VALUE no_signals_workaround_enabled,
154
181
  VALUE dynamic_sampling_rate_enabled,
155
- VALUE allocation_sample_every
182
+ VALUE dynamic_sampling_rate_overhead_target_percentage,
183
+ VALUE allocation_profiling_enabled,
184
+ VALUE skip_idle_samples_for_testing
156
185
  );
157
186
  static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
158
187
  static VALUE _native_sampling_loop(VALUE self, VALUE instance);
159
188
  static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE worker_thread);
160
189
  static VALUE stop(VALUE self_instance, VALUE optional_exception);
190
+ static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception);
161
191
  static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
162
192
  static void *run_sampling_trigger_loop(void *state_ptr);
163
193
  static void interrupt_sampling_trigger_loop(void *state_ptr);
@@ -180,15 +210,18 @@ static VALUE _native_simulate_sample_from_postponed_job(DDTRACE_UNUSED VALUE sel
180
210
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance);
181
211
  static VALUE _native_is_sigprof_blocked_in_current_thread(DDTRACE_UNUSED VALUE self);
182
212
  static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
213
+ static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance);
183
214
  void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused);
184
215
  static void grab_gvl_and_sample(void);
185
- static void reset_stats(struct cpu_and_wall_time_worker_state *state);
216
+ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
186
217
  static void sleep_for(uint64_t time_ns);
187
218
  static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
188
219
  static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
189
220
  static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
190
221
  static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
191
222
  static VALUE rescued_sample_allocation(VALUE tracepoint_data);
223
+ static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error);
224
+ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg);
192
225
 
193
226
  // Note on sampler global state safety:
194
227
  //
@@ -201,6 +234,11 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data);
201
234
  static VALUE active_sampler_instance = Qnil;
202
235
  static struct cpu_and_wall_time_worker_state *active_sampler_instance_state = NULL;
203
236
 
237
+ // See handle_sampling_signal for details on what this does
238
+ #ifdef NO_POSTPONED_TRIGGER
239
+ static void *gc_finalize_deferred_workaround;
240
+ #endif
241
+
204
242
  // Used to implement CpuAndWallTimeWorker._native_allocation_count . To be able to use cheap thread-local variables
205
243
  // (here with `__thread`, see https://gcc.gnu.org/onlinedocs/gcc/Thread-Local.html), this needs to be global.
206
244
  //
@@ -211,6 +249,18 @@ __thread uint64_t allocation_count = 0;
211
249
  void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
212
250
  rb_global_variable(&active_sampler_instance);
213
251
 
252
+ #ifndef NO_POSTPONED_TRIGGER
253
+ int unused_flags = 0;
254
+ sample_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, sample_from_postponed_job, NULL);
255
+ after_gc_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, after_gc_from_postponed_job, NULL);
256
+
257
+ if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
258
+ rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
259
+ }
260
+ #else
261
+ gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround();
262
+ #endif
263
+
214
264
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
215
265
  VALUE collectors_cpu_and_wall_time_worker_class = rb_define_class_under(collectors_module, "CpuAndWallTimeWorker", rb_cObject);
216
266
  // Hosts methods used for testing the native code using RSpec
@@ -226,13 +276,16 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
226
276
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
227
277
  rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
228
278
 
229
- rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8);
279
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
230
280
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
231
281
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
232
282
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
233
283
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats", _native_stats, 1);
284
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats_reset_not_thread_safe", _native_stats_reset_not_thread_safe, 1);
234
285
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
286
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1);
235
287
  rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
288
+ // TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it
236
289
  rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
237
290
  rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
238
291
  rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
@@ -242,6 +295,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
242
295
  rb_define_singleton_method(testing_module, "_native_simulate_sample_from_postponed_job", _native_simulate_sample_from_postponed_job, 0);
243
296
  rb_define_singleton_method(testing_module, "_native_is_sigprof_blocked_in_current_thread", _native_is_sigprof_blocked_in_current_thread, 0);
244
297
  rb_define_singleton_method(testing_module, "_native_with_blocked_sigprof", _native_with_blocked_sigprof, 0);
298
+ rb_define_singleton_method(testing_module, "_native_delayed_error", _native_delayed_error, 2);
245
299
  }
246
300
 
247
301
  // This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_worker_state
@@ -264,14 +318,14 @@ static VALUE _native_new(VALUE klass) {
264
318
  // being leaked.
265
319
 
266
320
  state->gc_profiling_enabled = false;
267
- state->allocation_counting_enabled = false;
268
321
  state->no_signals_workaround_enabled = false;
269
322
  state->dynamic_sampling_rate_enabled = true;
270
- state->allocation_sample_every = 0;
323
+ state->allocation_profiling_enabled = false;
324
+ state->skip_idle_samples_for_testing = false;
271
325
  state->thread_context_collector_instance = Qnil;
272
326
  state->idle_sampling_helper_instance = Qnil;
273
327
  state->owner_thread = Qnil;
274
- dynamic_sampling_rate_init(&state->dynamic_sampling_rate);
328
+ dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
275
329
  state->gc_tracepoint = Qnil;
276
330
  state->object_allocation_tracepoint = Qnil;
277
331
 
@@ -281,7 +335,15 @@ static VALUE _native_new(VALUE klass) {
281
335
 
282
336
  state->during_sample = false;
283
337
 
284
- reset_stats(state);
338
+ reset_stats_not_thread_safe(state);
339
+
340
+ long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
341
+ if (now == 0) {
342
+ ruby_xfree(state);
343
+ rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
344
+ }
345
+
346
+ discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
285
347
 
286
348
  return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
287
349
  }
@@ -292,28 +354,37 @@ static VALUE _native_initialize(
292
354
  VALUE thread_context_collector_instance,
293
355
  VALUE gc_profiling_enabled,
294
356
  VALUE idle_sampling_helper_instance,
295
- VALUE allocation_counting_enabled,
296
357
  VALUE no_signals_workaround_enabled,
297
358
  VALUE dynamic_sampling_rate_enabled,
298
- VALUE allocation_sample_every
359
+ VALUE dynamic_sampling_rate_overhead_target_percentage,
360
+ VALUE allocation_profiling_enabled,
361
+ VALUE skip_idle_samples_for_testing
299
362
  ) {
300
363
  ENFORCE_BOOLEAN(gc_profiling_enabled);
301
- ENFORCE_BOOLEAN(allocation_counting_enabled);
302
364
  ENFORCE_BOOLEAN(no_signals_workaround_enabled);
303
365
  ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
304
- ENFORCE_TYPE(allocation_sample_every, T_FIXNUM);
366
+ ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
367
+ ENFORCE_BOOLEAN(allocation_profiling_enabled);
368
+ ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
305
369
 
306
370
  struct cpu_and_wall_time_worker_state *state;
307
371
  TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
308
372
 
309
373
  state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
310
- state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
311
374
  state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
312
375
  state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
313
- state->allocation_sample_every = NUM2INT(allocation_sample_every);
376
+ state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
377
+ state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
314
378
 
315
- if (state->allocation_sample_every < 0) {
316
- rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every);
379
+ double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
380
+ if (!state->allocation_profiling_enabled) {
381
+ dynamic_sampling_rate_set_overhead_target_percentage(&state->cpu_dynamic_sampling_rate, total_overhead_target_percentage);
382
+ } else {
383
+ // TODO: May be nice to offer customization here? Distribute available "overhead" margin with a bias towards one or the other
384
+ // sampler.
385
+ dynamic_sampling_rate_set_overhead_target_percentage(&state->cpu_dynamic_sampling_rate, total_overhead_target_percentage / 2);
386
+ long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
387
+ discrete_dynamic_sampler_set_overhead_target_percentage(&state->allocation_sampler, total_overhead_target_percentage / 2, now);
317
388
  }
318
389
 
319
390
  state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
@@ -342,6 +413,12 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
342
413
  struct cpu_and_wall_time_worker_state *state;
343
414
  TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
344
415
 
416
+ // If we already got a delayed exception registered even before starting, raise before starting
417
+ if (state->failure_exception != Qnil) {
418
+ disable_tracepoints(state);
419
+ rb_exc_raise(state->failure_exception);
420
+ }
421
+
345
422
  struct cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state;
346
423
  if (old_state != NULL) {
347
424
  if (is_thread_alive(old_state->owner_thread)) {
@@ -366,7 +443,9 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
366
443
  if (state->stop_thread == rb_thread_current()) return Qnil;
367
444
 
368
445
  // Reset the dynamic sampling rate state, if any (reminder: the monotonic clock reference may change after a fork)
369
- dynamic_sampling_rate_reset(&state->dynamic_sampling_rate);
446
+ dynamic_sampling_rate_reset(&state->cpu_dynamic_sampling_rate);
447
+ long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
448
+ discrete_dynamic_sampler_reset(&state->allocation_sampler, now);
370
449
 
371
450
  // This write to a global is thread-safe BECAUSE we're still holding on to the global VM lock at this point
372
451
  active_sampler_instance_state = state;
@@ -428,15 +507,19 @@ static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE
428
507
  return stop(self_instance, /* optional_exception: */ Qnil);
429
508
  }
430
509
 
431
- static VALUE stop(VALUE self_instance, VALUE optional_exception) {
432
- struct cpu_and_wall_time_worker_state *state;
433
- TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
434
-
510
+ static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception) {
435
511
  atomic_store(&state->should_run, false);
436
512
  state->failure_exception = optional_exception;
437
513
 
438
514
  // Disable the tracepoints as soon as possible, so the VM doesn't keep on calling them
439
515
  disable_tracepoints(state);
516
+ }
517
+
518
+ static VALUE stop(VALUE self_instance, VALUE optional_exception) {
519
+ struct cpu_and_wall_time_worker_state *state;
520
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
521
+
522
+ stop_state(state, optional_exception);
440
523
 
441
524
  return Qtrue;
442
525
  }
@@ -472,20 +555,50 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
472
555
 
473
556
  // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
474
557
  // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
475
- int result = rb_postponed_job_register_one(0, sample_from_postponed_job, NULL);
476
-
477
- // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
478
- // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
479
- switch (result) {
480
- case 0:
481
- state->stats.postponed_job_full++; break;
482
- case 1:
483
- state->stats.postponed_job_success++; break;
484
- case 2:
485
- state->stats.postponed_job_skipped_already_existed++; break;
486
- default:
487
- state->stats.postponed_job_unknown_result++;
488
- }
558
+ #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
559
+ rb_postponed_job_trigger(sample_from_postponed_job_handle);
560
+ state->stats.postponed_job_success++; // Always succeeds
561
+ #else
562
+
563
+ // This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
564
+ //
565
+ // TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
566
+ // and in rare cases can cause VM crashes.
567
+ //
568
+ // Specifically, if we're interrupting `rb_postponed_job_flush` (the function that processes postponed jobs), the way
569
+ // that this function reads the jobs is not atomic, and can cause our call to
570
+ // `rb_postponed_job_register(function, arg)` to clobber an existing job that is getting dequeued.
571
+ // Clobbering an existing job is somewhat annoying, but the worst part is that it can happen that we clobber only
572
+ // the existing job's arguments.
573
+ // As surveyed in https://github.com/ruby/ruby/pull/8949#issuecomment-1821441370 clobbering the arguments turns out
574
+ // to not matter in many cases as usually `rb_postponed_job_register` calls in the VM and ecosystem ignore the argument.
575
+ //
576
+ // https://bugs.ruby-lang.org/issues/19991 is the exception: inside Ruby's `gc.c`, when dealing with object
577
+ // finalizers, Ruby calls `gc_finalize_deferred_register` which internally calls
578
+ // `rb_postponed_job_register_one(gc_finalize_deferred, objspace)`.
579
+ // Clobbering this call means that `gc_finalize_deferred` would get called with `NULL`, causing a segmentation fault.
580
+ //
581
+ // Note that this is quite rare: our signal needs to land at exactly the point where the VM has read the function
582
+ // to execute, but has yet to read the arguments. @ivoanjo: I could only reproduce it by manually changing the VM
583
+ // code to simulate this happening.
584
+ //
585
+ // Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
586
+ // In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
587
+ int result = rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
588
+
589
+ // Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
590
+ // seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
591
+ switch (result) {
592
+ case 0:
593
+ state->stats.postponed_job_full++; break;
594
+ case 1:
595
+ state->stats.postponed_job_success++; break;
596
+ case 2:
597
+ state->stats.postponed_job_skipped_already_existed++; break;
598
+ default:
599
+ state->stats.postponed_job_unknown_result++;
600
+ }
601
+ #endif
489
602
  }
490
603
 
491
604
  // The actual sampling trigger loop always runs **without** the global vm lock.
@@ -513,17 +626,23 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
513
626
  // Note that reading the GVL owner and sending them a signal is a race -- the Ruby VM keeps on executing while
514
627
  // we're doing this, so we may still not signal the correct thread from time to time, but our signal handler
515
628
  // includes a check to see if it got called in the right thread
629
+ state->stats.interrupt_thread_attempts++;
516
630
  pthread_kill(owner.owner, SIGPROF);
517
631
  } else {
518
- // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
519
- // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
520
- //
521
- // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
522
- // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
523
- // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
524
- // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
525
- state->stats.trigger_simulated_signal_delivery_attempts++;
526
- idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
632
+ if (state->skip_idle_samples_for_testing) {
633
+ // This was added to make sure our tests don't accidentally pass due to idle samples. Specifically, if we
634
+ // comment out the thread interruption code inside `if (owner.valid)` above, our tests should not pass!
635
+ } else {
636
+ // If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
637
+ // so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
638
+ //
639
+ // In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
640
+ // Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
641
+ // for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
642
+ // CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
643
+ state->stats.trigger_simulated_signal_delivery_attempts++;
644
+ idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
645
+ }
527
646
  }
528
647
  }
529
648
 
@@ -534,7 +653,7 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
534
653
  // Note that we deliberately should NOT combine this sleep_for with the one above because the result of
535
654
  // `dynamic_sampling_rate_get_sleep` may have changed while the above sleep was ongoing.
536
655
  uint64_t extra_sleep =
537
- dynamic_sampling_rate_get_sleep(&state->dynamic_sampling_rate, monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE));
656
+ dynamic_sampling_rate_get_sleep(&state->cpu_dynamic_sampling_rate, monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE));
538
657
  if (state->dynamic_sampling_rate_enabled && extra_sleep > 0) sleep_for(extra_sleep);
539
658
  }
540
659
 
@@ -574,12 +693,12 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
574
693
 
575
694
  long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
576
695
 
577
- if (state->dynamic_sampling_rate_enabled && !dynamic_sampling_rate_should_sample(&state->dynamic_sampling_rate, wall_time_ns_before_sample)) {
578
- state->stats.skipped_sample_because_of_dynamic_sampling_rate++;
696
+ if (state->dynamic_sampling_rate_enabled && !dynamic_sampling_rate_should_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_before_sample)) {
697
+ state->stats.cpu_skipped++;
579
698
  return Qnil;
580
699
  }
581
700
 
582
- state->stats.sampled++;
701
+ state->stats.cpu_sampled++;
583
702
 
584
703
  VALUE profiler_overhead_stack_thread = state->owner_thread; // Used to attribute profiler overhead to a different stack
585
704
  thread_context_collector_sample(state->thread_context_collector_instance, wall_time_ns_before_sample, profiler_overhead_stack_thread);
@@ -590,11 +709,11 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
590
709
  // Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
591
710
  uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
592
711
 
593
- state->stats.sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.sampling_time_ns_min);
594
- state->stats.sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.sampling_time_ns_max);
595
- state->stats.sampling_time_ns_total += sampling_time_ns;
712
+ state->stats.cpu_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_min);
713
+ state->stats.cpu_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_max);
714
+ state->stats.cpu_sampling_time_ns_total += sampling_time_ns;
596
715
 
597
- dynamic_sampling_rate_after_sample(&state->dynamic_sampling_rate, wall_time_ns_after_sample, sampling_time_ns);
716
+ dynamic_sampling_rate_after_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_after_sample, sampling_time_ns);
598
717
 
599
718
  // Return a dummy VALUE because we're called from rb_rescue2 which requires it
600
719
  return Qnil;
@@ -632,7 +751,10 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
632
751
  // because they may raise exceptions.
633
752
  install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
634
753
  if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
635
- if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
754
+ if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
755
+
756
+ // Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
757
+ rb_funcall(instance, rb_intern("signal_running"), 0);
636
758
 
637
759
  rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
638
760
 
@@ -714,28 +836,17 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
714
836
  if (event == RUBY_INTERNAL_EVENT_GC_ENTER) {
715
837
  thread_context_collector_on_gc_start(state->thread_context_collector_instance);
716
838
  } else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) {
717
- // Design: In an earlier iteration of this feature (see https://github.com/DataDog/dd-trace-rb/pull/2308) we
718
- // actually had a single method to implement the behavior of both thread_context_collector_on_gc_finish
719
- // and thread_context_collector_sample_after_gc (the latter is called via after_gc_from_postponed_job).
720
- //
721
- // Unfortunately, then we discovered the safety issue around no allocations, and thus decided to separate them -- so that
722
- // the sampling could run outside the tight safety constraints of the garbage collection process.
723
- //
724
- // There is a downside: The sample is now taken very very shortly afterwards the GC finishes, and not immediately
725
- // as the GC finishes, which means the stack captured may by affected by "skid", e.g. point slightly after where
726
- // it should be pointing at.
727
- // Alternatives to solve this would be to capture no stack for garbage collection (as we do for Java and .net);
728
- // making the sampling process allocation-safe (very hard); or separate stack sampling from sample recording,
729
- // e.g. enabling us to capture the stack in thread_context_collector_on_gc_finish and do the rest later
730
- // (medium hard).
731
-
732
- thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
733
- // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after if
734
- // fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual.
735
- //
736
- // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
737
- // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
738
- rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
839
+ bool should_flush = thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
840
+
841
+ // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc when the
842
+ // thread collector flags it's time to flush.
843
+ if (should_flush) {
844
+ #ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
845
+ rb_postponed_job_trigger(after_gc_from_postponed_job_handle);
846
+ #else
847
+ rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
848
+ #endif
849
+ }
739
850
  }
740
851
  }
741
852
 
@@ -802,7 +913,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance)
802
913
  // Disable all tracepoints, so that there are no more attempts to mutate the profile
803
914
  disable_tracepoints(state);
804
915
 
805
- reset_stats(state);
916
+ reset_stats_not_thread_safe(state);
806
917
 
807
918
  // Remove all state from the `Collectors::ThreadState` and connected downstream components
808
919
  rb_funcall(state->thread_context_collector_instance, rb_intern("reset_after_fork"), 0);
@@ -818,11 +929,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
818
929
  struct cpu_and_wall_time_worker_state *state;
819
930
  TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
820
931
 
821
- VALUE pretty_sampling_time_ns_min = state->stats.sampling_time_ns_min == UINT64_MAX ? Qnil : ULL2NUM(state->stats.sampling_time_ns_min);
822
- VALUE pretty_sampling_time_ns_max = state->stats.sampling_time_ns_max == 0 ? Qnil : ULL2NUM(state->stats.sampling_time_ns_max);
823
- VALUE pretty_sampling_time_ns_total = state->stats.sampling_time_ns_total == 0 ? Qnil : ULL2NUM(state->stats.sampling_time_ns_total);
824
- VALUE pretty_sampling_time_ns_avg =
825
- state->stats.sampled == 0 ? Qnil : DBL2NUM(((double) state->stats.sampling_time_ns_total) / state->stats.sampled);
932
+ unsigned long total_cpu_samples_attempted = state->stats.cpu_sampled + state->stats.cpu_skipped;
933
+ VALUE effective_cpu_sample_rate =
934
+ total_cpu_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampled) / total_cpu_samples_attempted);
935
+ unsigned long total_allocation_samples_attempted = state->stats.allocation_sampled + state->stats.allocation_skipped;
936
+ VALUE effective_allocation_sample_rate =
937
+ total_allocation_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampled) / total_allocation_samples_attempted);
938
+
939
+ VALUE allocation_sampler_snapshot = state->allocation_profiling_enabled && state->dynamic_sampling_rate_enabled ?
940
+ discrete_dynamic_sampler_state_snapshot(&state->allocation_sampler) : Qnil;
826
941
 
827
942
  VALUE stats_as_hash = rb_hash_new();
828
943
  VALUE arguments[] = {
@@ -831,22 +946,43 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
831
946
  ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
832
947
  ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
833
948
  ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
834
- ID2SYM(rb_intern("sampled")), /* => */ UINT2NUM(state->stats.sampled),
835
- ID2SYM(rb_intern("skipped_sample_because_of_dynamic_sampling_rate")), /* => */ UINT2NUM(state->stats.skipped_sample_because_of_dynamic_sampling_rate),
836
949
  ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
837
950
  ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
838
951
  ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
839
952
  ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
840
- ID2SYM(rb_intern("sampling_time_ns_min")), /* => */ pretty_sampling_time_ns_min,
841
- ID2SYM(rb_intern("sampling_time_ns_max")), /* => */ pretty_sampling_time_ns_max,
842
- ID2SYM(rb_intern("sampling_time_ns_total")), /* => */ pretty_sampling_time_ns_total,
843
- ID2SYM(rb_intern("sampling_time_ns_avg")), /* => */ pretty_sampling_time_ns_avg,
844
- ID2SYM(rb_intern("allocations_during_sample")), /* => */ UINT2NUM(state->stats.allocations_during_sample),
953
+ ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
954
+
955
+ // CPU Stats
956
+ ID2SYM(rb_intern("cpu_sampled")), /* => */ UINT2NUM(state->stats.cpu_sampled),
957
+ ID2SYM(rb_intern("cpu_skipped")), /* => */ UINT2NUM(state->stats.cpu_skipped),
958
+ ID2SYM(rb_intern("cpu_effective_sample_rate")), /* => */ effective_cpu_sample_rate,
959
+ ID2SYM(rb_intern("cpu_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
960
+ ID2SYM(rb_intern("cpu_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_max, > 0, ULL2NUM),
961
+ ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_total, > 0, ULL2NUM),
962
+ ID2SYM(rb_intern("cpu_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.cpu_sampling_time_ns_total, state->stats.cpu_sampled),
963
+
964
+ // Allocation stats
965
+ ID2SYM(rb_intern("allocation_sampled")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_sampled) : Qnil,
966
+ ID2SYM(rb_intern("allocation_skipped")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_skipped) : Qnil,
967
+ ID2SYM(rb_intern("allocation_effective_sample_rate")), /* => */ effective_allocation_sample_rate,
968
+ ID2SYM(rb_intern("allocation_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
969
+ ID2SYM(rb_intern("allocation_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_max, > 0, ULL2NUM),
970
+ ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_total, > 0, ULL2NUM),
971
+ ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.allocation_sampling_time_ns_total, state->stats.allocation_sampled),
972
+ ID2SYM(rb_intern("allocation_sampler_snapshot")), /* => */ allocation_sampler_snapshot,
973
+ ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
845
974
  };
846
975
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
847
976
  return stats_as_hash;
848
977
  }
849
978
 
979
+ static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance) {
980
+ struct cpu_and_wall_time_worker_state *state;
981
+ TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
982
+ reset_stats_not_thread_safe(state);
983
+ return Qnil;
984
+ }
985
+
850
986
  void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
851
987
  struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
852
988
 
@@ -864,9 +1000,17 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
864
1000
 
865
1001
  static void grab_gvl_and_sample(void) { rb_thread_call_with_gvl(simulate_sampling_signal_delivery, NULL); }
866
1002
 
867
- static void reset_stats(struct cpu_and_wall_time_worker_state *state) {
868
- state->stats = (struct stats) {}; // Resets all stats back to zero
869
- state->stats.sampling_time_ns_min = UINT64_MAX; // Since we always take the min between existing and latest sample
1003
+ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state) {
1004
+ // NOTE: This is not really thread safe so ongoing sampling operations that are concurrent with a reset can have their stats:
1005
+ // * Lost (writes after stats retrieval but before reset).
1006
+ // * Included in the previous stats window (writes before stats retrieval and reset).
1007
+ // * Included in the following stats window (writes after stats retrieval and reset).
1008
+ // Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
1009
+ // this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
1010
+ state->stats = (struct stats) {
1011
+ .cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
1012
+ .allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
1013
+ };
870
1014
  }
871
1015
 
872
1016
  static void sleep_for(uint64_t time_ns) {
@@ -888,9 +1032,9 @@ static void sleep_for(uint64_t time_ns) {
888
1032
  }
889
1033
 
890
1034
  static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
891
- bool is_profiler_running = active_sampler_instance_state != NULL;
1035
+ bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
892
1036
 
893
- return is_profiler_running ? ULL2NUM(allocation_count) : Qnil;
1037
+ return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
894
1038
  }
895
1039
 
896
1040
  // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
@@ -916,25 +1060,53 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
916
1060
  return;
917
1061
  }
918
1062
 
1063
+ if (state->dynamic_sampling_rate_enabled) {
1064
+ long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1065
+ if (now == 0) {
1066
+ delayed_error(state, ERR_CLOCK_FAIL);
1067
+ return;
1068
+ }
1069
+ if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) {
1070
+ state->stats.allocation_skipped++;
1071
+ return;
1072
+ }
1073
+ }
1074
+
919
1075
  // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
920
1076
  // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
921
1077
  // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
922
1078
  // defined as not being able to allocate) sets this.
923
1079
  state->during_sample = true;
924
1080
 
925
- // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
926
- // beta), and having something here allows us to test the rest of feature, sampling decision aside.
927
- if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) {
928
- // Rescue against any exceptions that happen during sampling
929
- safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
1081
+ // Rescue against any exceptions that happen during sampling
1082
+ safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
1083
+
1084
+ if (state->dynamic_sampling_rate_enabled) {
1085
+ long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1086
+ if (now == 0) {
1087
+ delayed_error(state, ERR_CLOCK_FAIL);
1088
+ // NOTE: Not short-circuiting here to make sure cleanup happens
1089
+ }
1090
+ uint64_t sampling_time_ns = discrete_dynamic_sampler_after_sample(&state->allocation_sampler, now);
1091
+ // NOTE: To keep things lean when dynamic sampling rate is disabled we skip clock interactions which is
1092
+ // why we're fine with having this inside this conditional.
1093
+ state->stats.allocation_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_min);
1094
+ state->stats.allocation_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_max);
1095
+ state->stats.allocation_sampling_time_ns_total += sampling_time_ns;
930
1096
  }
931
1097
 
1098
+ state->stats.allocation_sampled++;
1099
+
932
1100
  state->during_sample = false;
933
1101
  }
934
1102
 
935
1103
  static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
936
- rb_tracepoint_disable(state->gc_tracepoint);
937
- rb_tracepoint_disable(state->object_allocation_tracepoint);
1104
+ if (state->gc_tracepoint != Qnil) {
1105
+ rb_tracepoint_disable(state->gc_tracepoint);
1106
+ }
1107
+ if (state->object_allocation_tracepoint != Qnil) {
1108
+ rb_tracepoint_disable(state->object_allocation_tracepoint);
1109
+ }
938
1110
  }
939
1111
 
940
1112
  static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
@@ -959,8 +1131,31 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
959
1131
  rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data);
960
1132
  VALUE new_object = rb_tracearg_object(data);
961
1133
 
962
- thread_context_collector_sample_allocation(state->thread_context_collector_instance, state->allocation_sample_every, new_object);
1134
+ unsigned long allocations_since_last_sample = state->dynamic_sampling_rate_enabled ?
1135
+ // if we're doing dynamic sampling, ask the sampler how many events since last sample
1136
+ discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
1137
+ // if we aren't, then we're sampling every event
1138
+ 1;
1139
+ // TODO: Signal in the profile that clamping happened?
1140
+ unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
1141
+ thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
963
1142
 
964
1143
  // Return a dummy VALUE because we're called from rb_rescue2 which requires it
965
1144
  return Qnil;
966
1145
  }
1146
+
1147
+ static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error) {
1148
+ // If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop.
1149
+ stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error));
1150
+ }
1151
+
1152
+ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) {
1153
+ ENFORCE_TYPE(error_msg, T_STRING);
1154
+
1155
+ struct cpu_and_wall_time_worker_state *state;
1156
+ TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
1157
+
1158
+ delayed_error(state, rb_string_value_cstr(&error_msg));
1159
+
1160
+ return Qnil;
1161
+ }