ddtrace 1.18.0 → 1.23.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +228 -2
  3. data/LICENSE-3rdparty.csv +1 -1
  4. data/bin/ddprofrb +15 -0
  5. data/bin/ddtracerb +3 -1
  6. data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
  7. data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
  8. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +312 -117
  9. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +422 -0
  10. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +101 -0
  11. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +22 -14
  12. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +4 -0
  13. data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  14. data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  15. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +43 -102
  16. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +10 -3
  17. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +272 -136
  18. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +2 -1
  19. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +28 -7
  20. data/ext/datadog_profiling_native_extension/heap_recorder.c +1047 -0
  21. data/ext/datadog_profiling_native_extension/heap_recorder.h +166 -0
  22. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +6 -0
  23. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
  24. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +20 -0
  25. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +11 -0
  26. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +50 -4
  27. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +19 -0
  28. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
  29. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +18 -1
  30. data/ext/datadog_profiling_native_extension/ruby_helpers.c +267 -0
  31. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +33 -0
  32. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +476 -58
  33. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +3 -0
  34. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +2 -0
  35. data/lib/datadog/appsec/contrib/devise/tracking.rb +8 -0
  36. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +45 -14
  37. data/lib/datadog/appsec/event.rb +1 -1
  38. data/lib/datadog/auto_instrument.rb +3 -0
  39. data/lib/datadog/core/configuration/components.rb +7 -6
  40. data/lib/datadog/core/configuration/option.rb +8 -6
  41. data/lib/datadog/core/configuration/settings.rb +259 -60
  42. data/lib/datadog/core/configuration.rb +20 -4
  43. data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
  44. data/lib/datadog/core/environment/class_count.rb +6 -6
  45. data/lib/datadog/core/environment/git.rb +25 -0
  46. data/lib/datadog/core/environment/identity.rb +18 -48
  47. data/lib/datadog/core/environment/platform.rb +7 -1
  48. data/lib/datadog/core/git/ext.rb +2 -23
  49. data/lib/datadog/core/remote/client/capabilities.rb +1 -1
  50. data/lib/datadog/core/remote/component.rb +25 -12
  51. data/lib/datadog/core/remote/ext.rb +1 -0
  52. data/lib/datadog/core/remote/negotiation.rb +2 -2
  53. data/lib/datadog/core/remote/tie/tracing.rb +39 -0
  54. data/lib/datadog/core/remote/tie.rb +27 -0
  55. data/lib/datadog/core/remote/transport/http/config.rb +1 -1
  56. data/lib/datadog/core/remote/worker.rb +7 -4
  57. data/lib/datadog/core/telemetry/client.rb +18 -10
  58. data/lib/datadog/core/telemetry/emitter.rb +9 -13
  59. data/lib/datadog/core/telemetry/event.rb +247 -56
  60. data/lib/datadog/core/telemetry/ext.rb +4 -0
  61. data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
  62. data/lib/datadog/core/telemetry/http/ext.rb +4 -1
  63. data/lib/datadog/core/telemetry/http/response.rb +4 -0
  64. data/lib/datadog/core/telemetry/http/transport.rb +9 -4
  65. data/lib/datadog/core/telemetry/request.rb +59 -0
  66. data/lib/datadog/core/transport/ext.rb +2 -0
  67. data/lib/datadog/core/utils/url.rb +25 -0
  68. data/lib/datadog/opentelemetry/sdk/propagator.rb +3 -2
  69. data/lib/datadog/opentelemetry.rb +3 -0
  70. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  71. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +36 -12
  72. data/lib/datadog/profiling/collectors/info.rb +101 -0
  73. data/lib/datadog/profiling/component.rb +210 -34
  74. data/lib/datadog/profiling/exporter.rb +23 -6
  75. data/lib/datadog/profiling/ext.rb +2 -0
  76. data/lib/datadog/profiling/flush.rb +6 -3
  77. data/lib/datadog/profiling/http_transport.rb +5 -1
  78. data/lib/datadog/profiling/load_native_extension.rb +19 -6
  79. data/lib/datadog/profiling/native_extension.rb +1 -1
  80. data/lib/datadog/profiling/scheduler.rb +4 -6
  81. data/lib/datadog/profiling/stack_recorder.rb +19 -4
  82. data/lib/datadog/profiling/tag_builder.rb +5 -0
  83. data/lib/datadog/profiling/tasks/exec.rb +3 -3
  84. data/lib/datadog/profiling/tasks/help.rb +3 -3
  85. data/lib/datadog/profiling.rb +13 -2
  86. data/lib/datadog/tracing/configuration/ext.rb +0 -1
  87. data/lib/datadog/tracing/configuration/settings.rb +2 -1
  88. data/lib/datadog/tracing/contrib/action_cable/configuration/settings.rb +1 -0
  89. data/lib/datadog/tracing/contrib/action_cable/ext.rb +1 -0
  90. data/lib/datadog/tracing/contrib/action_mailer/configuration/settings.rb +1 -0
  91. data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
  92. data/lib/datadog/tracing/contrib/action_mailer/ext.rb +1 -0
  93. data/lib/datadog/tracing/contrib/action_pack/configuration/settings.rb +1 -0
  94. data/lib/datadog/tracing/contrib/action_pack/ext.rb +1 -0
  95. data/lib/datadog/tracing/contrib/action_view/configuration/settings.rb +1 -0
  96. data/lib/datadog/tracing/contrib/action_view/ext.rb +1 -0
  97. data/lib/datadog/tracing/contrib/active_job/configuration/settings.rb +1 -0
  98. data/lib/datadog/tracing/contrib/active_job/ext.rb +1 -0
  99. data/lib/datadog/tracing/contrib/active_model_serializers/configuration/settings.rb +1 -0
  100. data/lib/datadog/tracing/contrib/active_model_serializers/ext.rb +1 -0
  101. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
  102. data/lib/datadog/tracing/contrib/active_record/configuration/settings.rb +1 -0
  103. data/lib/datadog/tracing/contrib/active_record/ext.rb +1 -0
  104. data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +1 -0
  105. data/lib/datadog/tracing/contrib/active_support/ext.rb +1 -0
  106. data/lib/datadog/tracing/contrib/analytics.rb +0 -1
  107. data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +1 -0
  108. data/lib/datadog/tracing/contrib/aws/ext.rb +1 -0
  109. data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
  110. data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
  111. data/lib/datadog/tracing/contrib/configurable.rb +1 -1
  112. data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +1 -0
  113. data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
  114. data/lib/datadog/tracing/contrib/delayed_job/configuration/settings.rb +1 -0
  115. data/lib/datadog/tracing/contrib/delayed_job/ext.rb +1 -0
  116. data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +1 -0
  117. data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +1 -0
  118. data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +1 -0
  119. data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
  120. data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +1 -0
  121. data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
  122. data/lib/datadog/tracing/contrib/extensions.rb +6 -2
  123. data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +7 -0
  124. data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
  125. data/lib/datadog/tracing/contrib/faraday/middleware.rb +1 -1
  126. data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +1 -0
  127. data/lib/datadog/tracing/contrib/grape/ext.rb +1 -0
  128. data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +1 -0
  129. data/lib/datadog/tracing/contrib/graphql/ext.rb +1 -0
  130. data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +1 -0
  131. data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
  132. data/lib/datadog/tracing/contrib/http/configuration/settings.rb +1 -0
  133. data/lib/datadog/tracing/contrib/http/distributed/fetcher.rb +2 -2
  134. data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
  135. data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +1 -0
  136. data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
  137. data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +1 -0
  138. data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
  139. data/lib/datadog/tracing/contrib/kafka/configuration/settings.rb +1 -0
  140. data/lib/datadog/tracing/contrib/kafka/ext.rb +1 -0
  141. data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +1 -0
  142. data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
  143. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
  144. data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
  145. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +2 -1
  146. data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +1 -0
  147. data/lib/datadog/tracing/contrib/opensearch/ext.rb +1 -0
  148. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +1 -0
  149. data/lib/datadog/tracing/contrib/pg/ext.rb +1 -0
  150. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
  151. data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +1 -0
  152. data/lib/datadog/tracing/contrib/presto/ext.rb +1 -0
  153. data/lib/datadog/tracing/contrib/qless/configuration/settings.rb +1 -0
  154. data/lib/datadog/tracing/contrib/qless/ext.rb +1 -0
  155. data/lib/datadog/tracing/contrib/que/configuration/settings.rb +1 -0
  156. data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
  157. data/lib/datadog/tracing/contrib/racecar/configuration/settings.rb +1 -0
  158. data/lib/datadog/tracing/contrib/racecar/ext.rb +1 -0
  159. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +1 -0
  160. data/lib/datadog/tracing/contrib/rack/ext.rb +1 -0
  161. data/lib/datadog/tracing/contrib/rack/middlewares.rb +9 -2
  162. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  163. data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +1 -0
  164. data/lib/datadog/tracing/contrib/rails/ext.rb +1 -0
  165. data/lib/datadog/tracing/contrib/rake/configuration/settings.rb +1 -0
  166. data/lib/datadog/tracing/contrib/rake/ext.rb +1 -0
  167. data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +1 -0
  168. data/lib/datadog/tracing/contrib/redis/ext.rb +1 -0
  169. data/lib/datadog/tracing/contrib/redis/instrumentation.rb +2 -2
  170. data/lib/datadog/tracing/contrib/redis/patcher.rb +34 -21
  171. data/lib/datadog/tracing/contrib/resque/configuration/settings.rb +1 -0
  172. data/lib/datadog/tracing/contrib/resque/ext.rb +1 -0
  173. data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +1 -0
  174. data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
  175. data/lib/datadog/tracing/contrib/roda/configuration/settings.rb +1 -0
  176. data/lib/datadog/tracing/contrib/roda/ext.rb +1 -0
  177. data/lib/datadog/tracing/contrib/sequel/configuration/settings.rb +1 -0
  178. data/lib/datadog/tracing/contrib/sequel/ext.rb +1 -0
  179. data/lib/datadog/tracing/contrib/shoryuken/configuration/settings.rb +1 -0
  180. data/lib/datadog/tracing/contrib/shoryuken/ext.rb +1 -0
  181. data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +1 -0
  182. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  183. data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +1 -0
  184. data/lib/datadog/tracing/contrib/sinatra/ext.rb +1 -0
  185. data/lib/datadog/tracing/contrib/sneakers/configuration/settings.rb +1 -0
  186. data/lib/datadog/tracing/contrib/sneakers/ext.rb +1 -0
  187. data/lib/datadog/tracing/contrib/stripe/configuration/settings.rb +1 -0
  188. data/lib/datadog/tracing/contrib/stripe/ext.rb +1 -0
  189. data/lib/datadog/tracing/contrib/sucker_punch/configuration/settings.rb +1 -0
  190. data/lib/datadog/tracing/contrib/sucker_punch/ext.rb +1 -0
  191. data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +58 -0
  192. data/lib/datadog/tracing/contrib/trilogy/ext.rb +27 -0
  193. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +94 -0
  194. data/lib/datadog/tracing/contrib/trilogy/integration.rb +43 -0
  195. data/lib/datadog/tracing/contrib/trilogy/patcher.rb +31 -0
  196. data/lib/datadog/tracing/contrib.rb +1 -0
  197. data/lib/datadog/tracing/sampling/matcher.rb +23 -3
  198. data/lib/datadog/tracing/sampling/rule.rb +7 -2
  199. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
  200. data/lib/datadog/tracing/trace_operation.rb +1 -2
  201. data/lib/datadog/tracing/transport/http.rb +1 -0
  202. data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
  203. data/lib/datadog/tracing.rb +8 -2
  204. data/lib/ddtrace/version.rb +2 -2
  205. metadata +71 -61
  206. data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
  207. data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
  208. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -110
  209. data/lib/datadog/core/telemetry/collector.rb +0 -240
  210. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -52
  211. data/lib/datadog/core/telemetry/v1/application.rb +0 -92
  212. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
  213. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
  214. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  215. data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
  216. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  217. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
  218. data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
  219. data/lib/datadog/core/telemetry/v2/request.rb +0 -29
  220. data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
  221. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
  222. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
  223. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
  224. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
  225. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
  226. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
  227. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
  228. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
  229. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
@@ -3,6 +3,7 @@
3
3
  #include "collectors_thread_context.h"
4
4
  #include "clock_id.h"
5
5
  #include "collectors_stack.h"
6
+ #include "collectors_gc_profiling_helper.h"
6
7
  #include "helpers.h"
7
8
  #include "libdatadog_helpers.h"
8
9
  #include "private_vm_api_access.h"
@@ -37,24 +38,29 @@
37
38
  // When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
38
39
  // context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
39
40
  //
40
- // While these fields are set, regular samples (if any) do not account for any time that passes after these two
41
- // timestamps.
41
+ // While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
42
+ // after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
43
+ // Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
42
44
  //
43
- // (Regular samples can still account for the time between the previous sample and the start of GC.)
45
+ // (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
44
46
  //
45
- // When `thread_context_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the
46
- // thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`.
47
+ // When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
48
+ // into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
49
+ // of the thread that did GC also gets adjusted to avoid double-accounting.)
47
50
  //
48
- // Finally, when `thread_context_collector_sample_after_gc` gets called, the following happens:
51
+ // Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
52
+ // a single placeholder `Garbage Collection` frame. This sample gets
53
+ // assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
54
+ // as metadata for the last GC.
49
55
  //
50
- // 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder
51
- // `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was
52
- // recorded between calls to `on_gc_start` and `on_gc_finish`.
53
- //
54
- // 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`).
55
- //
56
- // 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in
57
- // GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC.
56
+ // Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
57
+ // so that the application can keep doing user work in between GC steps.
58
+ // The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
59
+ // that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
60
+ // Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
61
+ // spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
62
+ // individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
63
+ // happened in the coalesced period.
58
64
  //
59
65
  // In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
60
66
  // discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
@@ -68,6 +74,7 @@
68
74
  #define IS_WALL_TIME true
69
75
  #define IS_NOT_WALL_TIME false
70
76
  #define MISSING_TRACER_CONTEXT_KEY 0
77
+ #define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
71
78
 
72
79
  static ID at_active_span_id; // id of :@active_span in Ruby
73
80
  static ID at_active_trace_id; // id of :@active_trace in Ruby
@@ -75,6 +82,9 @@ static ID at_id_id; // id of :@id in Ruby
75
82
  static ID at_resource_id; // id of :@resource in Ruby
76
83
  static ID at_root_span_id; // id of :@root_span in Ruby
77
84
  static ID at_type_id; // id of :@type in Ruby
85
+ static ID at_otel_values_id; // id of :@otel_values in Ruby
86
+ static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
87
+ static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
78
88
 
79
89
  // Contains state for a single ThreadContext instance
80
90
  struct thread_context_collector_state {
@@ -107,6 +117,8 @@ struct thread_context_collector_state {
107
117
  monotonic_to_system_epoch_state time_converter_state;
108
118
  // Used to identify the main thread, to give it a fallback name
109
119
  VALUE main_thread;
120
+ // Used when extracting trace identifiers from otel spans. Lazily initialized.
121
+ VALUE otel_current_span_key;
110
122
 
111
123
  struct stats {
112
124
  // Track how many garbage collection samples we've taken.
@@ -114,6 +126,14 @@ struct thread_context_collector_state {
114
126
  // See thread_context_collector_on_gc_start for details
115
127
  unsigned int gc_samples_missed_due_to_missing_context;
116
128
  } stats;
129
+
130
+ struct {
131
+ unsigned long accumulated_cpu_time_ns;
132
+ unsigned long accumulated_wall_time_ns;
133
+
134
+ long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
135
+ long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
136
+ } gc_tracking;
117
137
  };
118
138
 
119
139
  // Tracks per-thread state
@@ -127,15 +147,10 @@ struct per_thread_context {
127
147
  long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
128
148
 
129
149
  struct {
130
- // Both of these fields are set by on_gc_start and kept until sample_after_gc is called.
150
+ // Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
131
151
  // Outside of this window, they will be INVALID_TIME.
132
152
  long cpu_time_at_start_ns;
133
153
  long wall_time_at_start_ns;
134
-
135
- // Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
136
- // Outside of this window, they will be INVALID_TIME.
137
- long cpu_time_at_finish_ns;
138
- long wall_time_at_finish_ns;
139
154
  } gc_tracking;
140
155
  };
141
156
 
@@ -180,7 +195,6 @@ static void trigger_sample_for_thread(
180
195
  VALUE stack_from_thread,
181
196
  struct per_thread_context *thread_context,
182
197
  sample_values values,
183
- sample_type type,
184
198
  long current_monotonic_wall_time_ns,
185
199
  ddog_CharSlice *ruby_vm_type,
186
200
  ddog_CharSlice *class_name
@@ -193,6 +207,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance);
193
207
  static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
194
208
  static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
195
209
  static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
210
+ static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
196
211
  static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
197
212
  static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
198
213
  static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
@@ -200,13 +215,22 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
200
215
  static long cpu_time_now_ns(struct per_thread_context *thread_context);
201
216
  static long thread_id_for(VALUE thread);
202
217
  static VALUE _native_stats(VALUE self, VALUE collector_instance);
218
+ static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
203
219
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
204
- static bool should_collect_resource(VALUE root_span_type);
220
+ static bool should_collect_resource(VALUE root_span);
205
221
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
206
222
  static VALUE thread_list(struct thread_context_collector_state *state);
207
223
  static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
208
224
  static VALUE _native_new_empty_thread(VALUE self);
209
- ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
225
+ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
226
+ static void ddtrace_otel_trace_identifiers_for(
227
+ struct thread_context_collector_state *state,
228
+ VALUE *active_trace,
229
+ VALUE *root_span,
230
+ VALUE *numeric_span_id,
231
+ VALUE active_span,
232
+ VALUE otel_values
233
+ );
210
234
 
211
235
  void collectors_thread_context_init(VALUE profiling_module) {
212
236
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -235,6 +259,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
235
259
  rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
236
260
  rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
237
261
  rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
262
+ rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
238
263
  rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
239
264
 
240
265
  at_active_span_id = rb_intern_const("@active_span");
@@ -243,6 +268,11 @@ void collectors_thread_context_init(VALUE profiling_module) {
243
268
  at_resource_id = rb_intern_const("@resource");
244
269
  at_root_span_id = rb_intern_const("@root_span");
245
270
  at_type_id = rb_intern_const("@type");
271
+ at_otel_values_id = rb_intern_const("@otel_values");
272
+ at_parent_span_id_id = rb_intern_const("@parent_span_id");
273
+ at_datadog_trace_id = rb_intern_const("@datadog_trace");
274
+
275
+ gc_profiling_init();
246
276
  }
247
277
 
248
278
  // This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
@@ -268,6 +298,7 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
268
298
  st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
269
299
  rb_gc_mark(state->thread_list_buffer);
270
300
  rb_gc_mark(state->main_thread);
301
+ rb_gc_mark(state->otel_current_span_key);
271
302
  }
272
303
 
273
304
  static void thread_context_collector_typed_data_free(void *state_ptr) {
@@ -320,6 +351,9 @@ static VALUE _native_new(VALUE klass) {
320
351
  state->allocation_type_enabled = true;
321
352
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
322
353
  state->main_thread = rb_thread_main();
354
+ state->otel_current_span_key = Qnil;
355
+ state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
356
+ state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
323
357
 
324
358
  return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
325
359
  }
@@ -465,7 +499,11 @@ void update_metrics_and_sample(
465
499
  long wall_time_elapsed_ns = update_time_since_previous_sample(
466
500
  &thread_context->wall_time_at_previous_sample_ns,
467
501
  current_monotonic_wall_time_ns,
468
- thread_context->gc_tracking.wall_time_at_start_ns,
502
+ // We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
503
+ // accounting to change during GC.
504
+ // E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
505
+ // GC or not.
506
+ INVALID_TIME,
469
507
  IS_WALL_TIME
470
508
  );
471
509
 
@@ -475,7 +513,6 @@ void update_metrics_and_sample(
475
513
  stack_from_thread,
476
514
  thread_context,
477
515
  (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
478
- SAMPLE_REGULAR,
479
516
  current_monotonic_wall_time_ns,
480
517
  NULL,
481
518
  NULL
@@ -484,7 +521,7 @@ void update_metrics_and_sample(
484
521
 
485
522
  // This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
486
523
  // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
487
- // create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work.
524
+ // create an event including the cpu/wall time spent in garbage collector work.
488
525
  //
489
526
  // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
490
527
  // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
@@ -509,27 +546,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
509
546
  return;
510
547
  }
511
548
 
512
- // If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`.
513
- //
514
- // When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
515
- // called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
516
- // before we can actually run that method.
517
- //
518
- // We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
519
- // `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
520
- // then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
521
- // there was a single, longer GC period.
522
- if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
523
- thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
524
-
525
- // Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
549
+ // Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
526
550
  thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
527
551
  thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
528
552
  }
529
553
 
530
554
  // This function gets called when Ruby has finished running the Garbage Collector on the current thread.
531
- // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
532
- // create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now.
555
+ // It records the cpu/wall-time observed during GC, which will be used to later
556
+ // create an event including the cpu/wall time spent from the start of garbage collector work until now.
533
557
  //
534
558
  // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
535
559
  // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
@@ -537,9 +561,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) {
537
561
  //
538
562
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
539
563
  // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
540
- void thread_context_collector_on_gc_finish(VALUE self_instance) {
564
+ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
541
565
  struct thread_context_collector_state *state;
542
- if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
566
+ if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
543
567
  // This should never fail the the above check passes
544
568
  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
545
569
 
@@ -547,29 +571,70 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) {
547
571
 
548
572
  // If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
549
573
  // how often this happens -- see on_gc_start.
550
- if (thread_context == NULL) return;
574
+ if (thread_context == NULL) return false;
551
575
 
552
- if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME &&
553
- thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
576
+ long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
577
+ long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
578
+
579
+ if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
554
580
  // If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
555
581
  // context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
556
582
  // do nothing.
557
- return;
583
+ return false;
584
+ }
585
+
586
+ // Mark thread as no longer in GC
587
+ thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
588
+ thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
589
+
590
+ // Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
591
+ long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
592
+ long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
593
+
594
+ // If our end timestamp is not OK, we bail out
595
+ if (wall_time_at_finish_ns == 0) return false;
596
+
597
+ long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
598
+ long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
599
+
600
+ // Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
601
+ // so let's ensure we don't get negative values for time deltas.
602
+ gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
603
+ gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
604
+
605
+ if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
606
+ state->gc_tracking.accumulated_cpu_time_ns = 0;
607
+ state->gc_tracking.accumulated_wall_time_ns = 0;
608
+ }
609
+
610
+ state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
611
+ state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
612
+ state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
613
+
614
+ // Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
615
+ // We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
616
+ // `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
617
+ if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
618
+ thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
558
619
  }
559
620
 
560
- // Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time
561
- thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
562
- thread_context->gc_tracking.wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
621
+ // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
622
+ // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
623
+ // samples first.
624
+ bool over_flush_time_treshold =
625
+ (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
626
+
627
+ if (over_flush_time_treshold) {
628
+ return true;
629
+ } else {
630
+ return gc_profiling_has_major_gc_finished();
631
+ }
563
632
  }
564
633
 
565
- // This function gets called shortly after Ruby has finished running the Garbage Collector.
634
+ // This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
566
635
  // It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
567
636
  // GC-related tracking.
568
637
  //
569
- // Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
570
- // and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
571
- // set on their context.
572
- //
573
638
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
574
639
  // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
575
640
  // Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
@@ -578,70 +643,45 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
578
643
  struct thread_context_collector_state *state;
579
644
  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
580
645
 
581
- VALUE threads = thread_list(state);
582
- bool sampled_any_thread = false;
583
-
584
- const long thread_count = RARRAY_LEN(threads);
585
- for (long i = 0; i < thread_count; i++) {
586
- VALUE thread = RARRAY_AREF(threads, i);
587
- struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
646
+ if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
647
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
648
+ }
588
649
 
589
- if (
590
- thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME ||
591
- thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME ||
592
- thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
593
- thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
594
- ) continue; // Ignore threads with no/incomplete garbage collection data
595
-
596
- sampled_any_thread = true;
597
-
598
- long gc_cpu_time_elapsed_ns =
599
- thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
600
- long gc_wall_time_elapsed_ns =
601
- thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
602
-
603
- // We don't expect non-wall time to go backwards, so let's flag this as a bug
604
- if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
605
- // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
606
- // was a bug.
607
- // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
608
- // https://github.com/DataDog/dd-trace-rb/pull/2336.
609
- if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
610
-
611
- if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
612
- // Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
613
- // come up with a crazy value for the frame
614
- rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
615
- }
650
+ int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
651
+ ddog_prof_Label labels[max_labels_needed_for_gc];
652
+ uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
616
653
 
617
- trigger_sample_for_thread(
618
- state,
619
- /* thread: */ thread,
620
- /* stack_from_thread: */ thread,
621
- thread_context,
622
- (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
623
- SAMPLE_IN_GC,
624
- INVALID_TIME, // For now we're not collecting timestamps for these events
625
- NULL,
626
- NULL
627
- );
654
+ ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
628
655
 
629
- // Mark thread as no longer in GC
630
- thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
631
- thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
632
- thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
633
- thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
656
+ // The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
657
+ int64_t end_timestamp_ns = 0;
634
658
 
635
- // Update counters so that they won't include the time in GC during the next sample
636
- if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
637
- thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
638
- }
639
- if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
640
- thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
641
- }
659
+ if (state->timeline_enabled) {
660
+ end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
642
661
  }
643
662
 
644
- if (sampled_any_thread) state->stats.gc_samples++;
663
+ record_placeholder_stack(
664
+ state->sampling_buffer,
665
+ state->recorder_instance,
666
+ (sample_values) {
667
+ // This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
668
+ // timeline duration.
669
+ // This is done to enable two use-cases:
670
+ // * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
671
+ // * the timeline duration is used when the event shows up in the timeline
672
+ .cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
673
+ .cpu_or_wall_samples = 1,
674
+ .wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
675
+ .timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
676
+ },
677
+ (sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
678
+ DDOG_CHARSLICE_C("Garbage Collection")
679
+ );
680
+
681
+ state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
682
+ state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
683
+
684
+ state->stats.gc_samples++;
645
685
 
646
686
  // Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
647
687
  return Qnil;
@@ -653,7 +693,6 @@ static void trigger_sample_for_thread(
653
693
  VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
654
694
  struct per_thread_context *thread_context,
655
695
  sample_values values,
656
- sample_type type,
657
696
  long current_monotonic_wall_time_ns,
658
697
  // These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
659
698
  ddog_CharSlice *ruby_vm_type,
@@ -776,8 +815,7 @@ static void trigger_sample_for_thread(
776
815
  state->sampling_buffer,
777
816
  state->recorder_instance,
778
817
  values,
779
- (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns},
780
- type
818
+ (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}
781
819
  );
782
820
  }
783
821
 
@@ -874,9 +912,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
874
912
 
875
913
  // These will only be used during a GC operation
876
914
  thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
877
- thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
878
915
  thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
879
- thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
880
916
  }
881
917
 
882
918
  static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
@@ -901,6 +937,8 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
901
937
  state->time_converter_state.delta_to_epoch_ns
902
938
  ));
903
939
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
940
+ rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
941
+ rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
904
942
 
905
943
  return result;
906
944
  }
@@ -927,9 +965,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
927
965
  ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
928
966
 
929
967
  ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
930
- ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
931
968
  ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
932
- ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
933
969
  };
934
970
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
935
971
 
@@ -947,6 +983,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) {
947
983
  return stats_as_hash;
948
984
  }
949
985
 
986
+ static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) {
987
+ // Update this when modifying state struct (gc_tracking inner struct)
988
+ VALUE result = rb_hash_new();
989
+ VALUE arguments[] = {
990
+ ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns),
991
+ ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns),
992
+ ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns),
993
+ ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns),
994
+ };
995
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]);
996
+ return result;
997
+ }
998
+
950
999
  static void remove_context_for_dead_threads(struct thread_context_collector_state *state) {
951
1000
  st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
952
1001
  }
@@ -1049,8 +1098,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) {
1049
1098
 
1050
1099
  // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1051
1100
  // It SHOULD NOT be used for other purposes.
1052
- //
1053
- // Returns the whole contents of the per_thread_context structs being tracked.
1054
1101
  static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
1055
1102
  struct thread_context_collector_state *state;
1056
1103
  TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
@@ -1058,6 +1105,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
1058
1105
  return stats_as_ruby_hash(state);
1059
1106
  }
1060
1107
 
1108
+ // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
1109
+ // It SHOULD NOT be used for other purposes.
1110
+ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
1111
+ struct thread_context_collector_state *state;
1112
+ TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
1113
+
1114
+ return gc_tracking_as_ruby_hash(state);
1115
+ }
1116
+
1061
1117
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
1062
1118
  static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
1063
1119
  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
@@ -1070,10 +1126,19 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1070
1126
 
1071
1127
  VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
1072
1128
  VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
1073
- if (root_span == Qnil || active_span == Qnil) return;
1129
+ // Note: On Ruby 3.x `rb_attr_get` is exactly the same as `rb_ivar_get`. For Ruby 2.x, the difference is that
1130
+ // `rb_ivar_get` can trigger "warning: instance variable @otel_values not initialized" if warnings are enabled and
1131
+ // opentelemetry is not in use, whereas `rb_attr_get` does the lookup without generating the warning.
1132
+ VALUE otel_values = rb_attr_get(active_trace, at_otel_values_id /* @otel_values */);
1133
+
1134
+ VALUE numeric_span_id = Qnil;
1135
+
1136
+ if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
1137
+
1138
+ if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
1074
1139
 
1075
1140
  VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
1076
- VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
1141
+ if (active_span != Qnil && numeric_span_id == Qnil) numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
1077
1142
  if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
1078
1143
 
1079
1144
  trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
@@ -1081,10 +1146,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1081
1146
 
1082
1147
  trace_identifiers_result->valid = true;
1083
1148
 
1084
- if (!state->endpoint_collection_enabled) return;
1085
-
1086
- VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
1087
- if (root_span_type == Qnil || !should_collect_resource(root_span_type)) return;
1149
+ if (!state->endpoint_collection_enabled || !should_collect_resource(root_span)) return;
1088
1150
 
1089
1151
  VALUE trace_resource = rb_ivar_get(active_trace, at_resource_id /* @resource */);
1090
1152
  if (RB_TYPE_P(trace_resource, T_STRING)) {
@@ -1095,21 +1157,32 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1095
1157
  }
1096
1158
  }
1097
1159
 
1098
- // We only collect the resource for spans of types:
1160
+ // We opt-in to collecting the resource for spans of types:
1099
1161
  // * 'web', for web requests
1100
- // * proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
1162
+ // * 'proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request)
1163
+ // * 'worker', used for sidekiq and similar background job processors
1101
1164
  //
1102
- // NOTE: Currently we're only interested in HTTP service endpoints. Over time, this list may be expanded.
1165
+ // Over time, this list may be expanded.
1103
1166
  // Resources MUST NOT include personal identifiable information (PII); this should not be the case with
1104
1167
  // ddtrace integrations, but worth mentioning just in case :)
1105
- static bool should_collect_resource(VALUE root_span_type) {
1168
+ static bool should_collect_resource(VALUE root_span) {
1169
+ VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */);
1170
+ if (root_span_type == Qnil) return false;
1106
1171
  ENFORCE_TYPE(root_span_type, T_STRING);
1107
1172
 
1108
1173
  int root_span_type_length = RSTRING_LEN(root_span_type);
1109
1174
  const char *root_span_type_value = StringValuePtr(root_span_type);
1110
1175
 
1111
- return (root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
1176
+ bool is_web_request =
1177
+ (root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) ||
1112
1178
  (root_span_type_length == strlen("proxy") && (memcmp("proxy", root_span_type_value, strlen("proxy")) == 0));
1179
+
1180
+ if (is_web_request) return true;
1181
+
1182
+ bool is_worker_request =
1183
+ (root_span_type_length == strlen("worker") && (memcmp("worker", root_span_type_value, strlen("worker")) == 0));
1184
+
1185
+ return is_worker_request;
1113
1186
  }
1114
1187
 
1115
1188
  // After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent.
@@ -1210,13 +1283,14 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1210
1283
  }
1211
1284
  }
1212
1285
 
1286
+ track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);
1287
+
1213
1288
  trigger_sample_for_thread(
1214
1289
  state,
1215
1290
  /* thread: */ current_thread,
1216
1291
  /* stack_from_thread: */ current_thread,
1217
1292
  get_or_create_context_for(current_thread, state),
1218
1293
  (sample_values) {.alloc_samples = sample_weight},
1219
- SAMPLE_REGULAR,
1220
1294
  INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
1221
1295
  &ruby_vm_type,
1222
1296
  optional_class_name
@@ -1239,7 +1313,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) {
1239
1313
  return rb_thread_create(new_empty_thread_inner, NULL);
1240
1314
  }
1241
1315
 
1242
- ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1316
+ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1243
1317
  switch (type) {
1244
1318
  case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object");
1245
1319
  case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class");
@@ -1264,3 +1338,65 @@ ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1264
1338
  default: return DDOG_CHARSLICE_C("(VM Internal, Missing class)");
1265
1339
  }
1266
1340
  }
1341
+
1342
+ static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
1343
+ if (state->otel_current_span_key == Qnil) {
1344
+ VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
1345
+ VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
1346
+ VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
1347
+ VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
1348
+ VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
1349
+
1350
+ if (current_span_key == Qnil) {
1351
+ rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
1352
+ }
1353
+
1354
+ state->otel_current_span_key = current_span_key;
1355
+ }
1356
+
1357
+ return state->otel_current_span_key;
1358
+ }
1359
+
1360
+ // This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
1361
+ // differently, and this codepath handles it.
1362
+ static void ddtrace_otel_trace_identifiers_for(
1363
+ struct thread_context_collector_state *state,
1364
+ VALUE *active_trace,
1365
+ VALUE *root_span,
1366
+ VALUE *numeric_span_id,
1367
+ VALUE active_span,
1368
+ VALUE otel_values
1369
+ ) {
1370
+ VALUE resolved_numeric_span_id =
1371
+ active_span == Qnil ?
1372
+ // For traces started from otel spans, the span id will be empty, and the @parent_span_id has the right value
1373
+ rb_ivar_get(*active_trace, at_parent_span_id_id /* @parent_span_id */) :
1374
+ // Regular span created by ddtrace
1375
+ rb_ivar_get(active_span, at_id_id /* @id */);
1376
+
1377
+ if (resolved_numeric_span_id == Qnil) return;
1378
+
1379
+ VALUE otel_current_span_key = get_otel_current_span_key(state);
1380
+ VALUE current_trace = *active_trace;
1381
+
1382
+ // ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
1383
+ // trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked
1384
+ // list. The local root span is going to be the trace/span we find at the end of this linked list.
1385
+ while (otel_values != Qnil) {
1386
+ VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key);
1387
+ if (otel_span == Qnil) break;
1388
+ VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id);
1389
+ if (next_trace == Qnil) break;
1390
+
1391
+ current_trace = next_trace;
1392
+ otel_values = rb_ivar_get(current_trace, at_otel_values_id /* @otel_values */);
1393
+ }
1394
+
1395
+ // We found the last trace in the linked list. This contains the local root span
1396
+ VALUE resolved_root_span = rb_ivar_get(current_trace, at_root_span_id /* @root_span */);
1397
+ if (resolved_root_span == Qnil) return;
1398
+
1399
+ *root_span = resolved_root_span;
1400
+ *active_trace = current_trace;
1401
+ *numeric_span_id = resolved_numeric_span_id;
1402
+ }