ddtrace 1.12.1 → 1.23.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (509) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +613 -9
  3. data/LICENSE-3rdparty.csv +1 -1
  4. data/bin/ddprofrb +15 -0
  5. data/bin/ddtracerb +3 -1
  6. data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
  7. data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
  8. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +3 -5
  9. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -3
  10. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +3 -22
  11. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -1
  12. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +338 -108
  13. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +422 -0
  14. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +101 -0
  15. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +22 -14
  16. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +4 -0
  17. data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
  18. data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
  19. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +3 -0
  20. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +111 -118
  21. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +11 -4
  22. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +545 -144
  23. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +3 -2
  24. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +68 -17
  25. data/ext/datadog_profiling_native_extension/heap_recorder.c +1047 -0
  26. data/ext/datadog_profiling_native_extension/heap_recorder.h +166 -0
  27. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +6 -0
  28. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +60 -32
  29. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +62 -0
  30. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +42 -0
  31. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +50 -4
  32. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +155 -32
  33. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +16 -0
  34. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +19 -3
  35. data/ext/datadog_profiling_native_extension/ruby_helpers.c +267 -0
  36. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +33 -0
  37. data/ext/datadog_profiling_native_extension/stack_recorder.c +1040 -0
  38. data/ext/datadog_profiling_native_extension/stack_recorder.h +27 -0
  39. data/ext/datadog_profiling_native_extension/time_helpers.c +53 -0
  40. data/ext/datadog_profiling_native_extension/time_helpers.h +26 -0
  41. data/lib/datadog/appsec/assets/waf_rules/processors.json +92 -0
  42. data/lib/datadog/appsec/assets/waf_rules/recommended.json +698 -75
  43. data/lib/datadog/appsec/assets/waf_rules/scanners.json +114 -0
  44. data/lib/datadog/appsec/assets/waf_rules/strict.json +98 -8
  45. data/lib/datadog/appsec/assets.rb +8 -0
  46. data/lib/datadog/appsec/component.rb +21 -2
  47. data/lib/datadog/appsec/configuration/settings.rb +167 -189
  48. data/lib/datadog/appsec/configuration.rb +0 -79
  49. data/lib/datadog/appsec/contrib/auto_instrument.rb +2 -4
  50. data/lib/datadog/appsec/contrib/devise/event.rb +57 -0
  51. data/lib/datadog/appsec/contrib/devise/ext.rb +13 -0
  52. data/lib/datadog/appsec/contrib/devise/integration.rb +42 -0
  53. data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +76 -0
  54. data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +54 -0
  55. data/lib/datadog/appsec/contrib/devise/patcher.rb +45 -0
  56. data/lib/datadog/appsec/contrib/devise/resource.rb +35 -0
  57. data/lib/datadog/appsec/contrib/devise/tracking.rb +57 -0
  58. data/lib/datadog/appsec/contrib/rack/ext.rb +2 -1
  59. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +6 -2
  60. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +8 -6
  61. data/lib/datadog/appsec/contrib/rack/reactive/request.rb +3 -8
  62. data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +3 -6
  63. data/lib/datadog/appsec/contrib/rack/reactive/response.rb +3 -6
  64. data/lib/datadog/appsec/contrib/rack/request_body_middleware.rb +3 -2
  65. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +77 -27
  66. data/lib/datadog/appsec/contrib/rails/ext.rb +3 -2
  67. data/lib/datadog/appsec/contrib/rails/framework.rb +1 -3
  68. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +3 -2
  69. data/lib/datadog/appsec/contrib/rails/patcher.rb +17 -11
  70. data/lib/datadog/appsec/contrib/rails/reactive/action.rb +3 -6
  71. data/lib/datadog/appsec/contrib/sinatra/ext.rb +2 -1
  72. data/lib/datadog/appsec/contrib/sinatra/framework.rb +1 -3
  73. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +6 -4
  74. data/lib/datadog/appsec/contrib/sinatra/patcher.rb +13 -7
  75. data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +3 -6
  76. data/lib/datadog/appsec/event.rb +106 -50
  77. data/lib/datadog/appsec/extensions.rb +1 -130
  78. data/lib/datadog/appsec/monitor/gateway/watcher.rb +3 -3
  79. data/lib/datadog/appsec/monitor/reactive/set_user.rb +3 -6
  80. data/lib/datadog/appsec/processor/actions.rb +49 -0
  81. data/lib/datadog/appsec/processor/rule_loader.rb +60 -0
  82. data/lib/datadog/appsec/processor/rule_merger.rb +22 -2
  83. data/lib/datadog/appsec/processor.rb +35 -7
  84. data/lib/datadog/appsec/rate_limiter.rb +1 -1
  85. data/lib/datadog/appsec/remote.rb +17 -11
  86. data/lib/datadog/appsec/response.rb +82 -4
  87. data/lib/datadog/appsec/sample_rate.rb +21 -0
  88. data/lib/datadog/appsec.rb +3 -4
  89. data/lib/datadog/auto_instrument.rb +3 -0
  90. data/lib/datadog/core/backport.rb +51 -0
  91. data/lib/datadog/core/configuration/agent_settings_resolver.rb +38 -29
  92. data/lib/datadog/core/configuration/base.rb +6 -16
  93. data/lib/datadog/core/configuration/components.rb +20 -7
  94. data/lib/datadog/core/configuration/ext.rb +28 -5
  95. data/lib/datadog/core/configuration/option.rb +271 -21
  96. data/lib/datadog/core/configuration/option_definition.rb +73 -32
  97. data/lib/datadog/core/configuration/options.rb +27 -15
  98. data/lib/datadog/core/configuration/settings.rb +398 -119
  99. data/lib/datadog/core/configuration.rb +24 -4
  100. data/lib/datadog/core/diagnostics/environment_logger.rb +132 -235
  101. data/lib/datadog/core/environment/class_count.rb +6 -6
  102. data/lib/datadog/core/environment/execution.rb +103 -0
  103. data/lib/datadog/core/environment/ext.rb +13 -11
  104. data/lib/datadog/core/environment/git.rb +25 -0
  105. data/lib/datadog/core/environment/identity.rb +18 -48
  106. data/lib/datadog/core/environment/platform.rb +7 -1
  107. data/lib/datadog/core/environment/variable_helpers.rb +0 -69
  108. data/lib/datadog/core/environment/yjit.rb +58 -0
  109. data/lib/datadog/core/error.rb +1 -0
  110. data/lib/datadog/core/git/ext.rb +6 -23
  111. data/lib/datadog/core/logging/ext.rb +3 -1
  112. data/lib/datadog/core/metrics/ext.rb +7 -5
  113. data/lib/datadog/core/remote/client/capabilities.rb +7 -2
  114. data/lib/datadog/core/remote/client.rb +3 -0
  115. data/lib/datadog/core/remote/component.rb +52 -48
  116. data/lib/datadog/core/remote/configuration/content.rb +28 -1
  117. data/lib/datadog/core/remote/configuration/repository.rb +3 -1
  118. data/lib/datadog/core/remote/ext.rb +2 -1
  119. data/lib/datadog/core/remote/negotiation.rb +20 -7
  120. data/lib/datadog/core/remote/tie/tracing.rb +39 -0
  121. data/lib/datadog/core/remote/tie.rb +27 -0
  122. data/lib/datadog/core/remote/transport/config.rb +60 -0
  123. data/lib/datadog/core/remote/transport/http/api/instance.rb +39 -0
  124. data/lib/datadog/core/remote/transport/http/api/spec.rb +21 -0
  125. data/lib/datadog/core/remote/transport/http/api.rb +58 -0
  126. data/lib/datadog/core/remote/transport/http/builder.rb +219 -0
  127. data/lib/datadog/core/remote/transport/http/client.rb +48 -0
  128. data/lib/datadog/core/remote/transport/http/config.rb +280 -0
  129. data/lib/datadog/core/remote/transport/http/negotiation.rb +146 -0
  130. data/lib/datadog/core/remote/transport/http.rb +179 -0
  131. data/lib/datadog/core/{transport → remote/transport}/negotiation.rb +25 -23
  132. data/lib/datadog/core/remote/worker.rb +11 -5
  133. data/lib/datadog/core/runtime/ext.rb +22 -12
  134. data/lib/datadog/core/runtime/metrics.rb +43 -0
  135. data/lib/datadog/core/telemetry/client.rb +28 -10
  136. data/lib/datadog/core/telemetry/emitter.rb +9 -11
  137. data/lib/datadog/core/telemetry/event.rb +250 -44
  138. data/lib/datadog/core/telemetry/ext.rb +8 -1
  139. data/lib/datadog/core/telemetry/heartbeat.rb +3 -7
  140. data/lib/datadog/core/telemetry/http/ext.rb +13 -8
  141. data/lib/datadog/core/telemetry/http/response.rb +4 -0
  142. data/lib/datadog/core/telemetry/http/transport.rb +10 -3
  143. data/lib/datadog/core/telemetry/request.rb +59 -0
  144. data/lib/datadog/core/transport/ext.rb +49 -0
  145. data/lib/datadog/core/transport/http/adapters/net.rb +168 -0
  146. data/lib/datadog/core/transport/http/adapters/registry.rb +29 -0
  147. data/lib/datadog/core/transport/http/adapters/test.rb +89 -0
  148. data/lib/datadog/core/transport/http/adapters/unix_socket.rb +83 -0
  149. data/lib/datadog/core/transport/http/api/endpoint.rb +31 -0
  150. data/lib/datadog/core/transport/http/api/fallbacks.rb +26 -0
  151. data/lib/datadog/core/transport/http/api/map.rb +18 -0
  152. data/lib/datadog/core/transport/http/env.rb +62 -0
  153. data/lib/datadog/core/transport/http/response.rb +60 -0
  154. data/lib/datadog/core/transport/parcel.rb +22 -0
  155. data/lib/datadog/core/transport/request.rb +17 -0
  156. data/lib/datadog/core/transport/response.rb +64 -0
  157. data/lib/datadog/core/utils/duration.rb +52 -0
  158. data/lib/datadog/core/utils/hash.rb +47 -0
  159. data/lib/datadog/core/utils/network.rb +1 -1
  160. data/lib/datadog/core/utils/safe_dup.rb +27 -20
  161. data/lib/datadog/core/utils/url.rb +25 -0
  162. data/lib/datadog/core/utils.rb +1 -1
  163. data/lib/datadog/core/workers/async.rb +3 -2
  164. data/lib/datadog/core/workers/polling.rb +2 -2
  165. data/lib/datadog/kit/appsec/events.rb +139 -89
  166. data/lib/datadog/kit/enable_core_dumps.rb +5 -6
  167. data/lib/datadog/kit/identity.rb +80 -65
  168. data/lib/datadog/opentelemetry/api/context.rb +10 -3
  169. data/lib/datadog/opentelemetry/sdk/propagator.rb +5 -3
  170. data/lib/datadog/opentelemetry/sdk/span_processor.rb +48 -5
  171. data/lib/datadog/opentelemetry/sdk/trace/span.rb +167 -0
  172. data/lib/datadog/opentelemetry/trace.rb +58 -0
  173. data/lib/datadog/opentelemetry.rb +4 -0
  174. data/lib/datadog/opentracer/text_map_propagator.rb +2 -1
  175. data/lib/datadog/opentracer.rb +9 -0
  176. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  177. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +43 -20
  178. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +3 -1
  179. data/lib/datadog/profiling/collectors/info.rb +101 -0
  180. data/lib/datadog/profiling/collectors/thread_context.rb +17 -2
  181. data/lib/datadog/profiling/component.rb +248 -97
  182. data/lib/datadog/profiling/exporter.rb +26 -5
  183. data/lib/datadog/profiling/ext.rb +2 -12
  184. data/lib/datadog/profiling/flush.rb +10 -5
  185. data/lib/datadog/profiling/http_transport.rb +23 -6
  186. data/lib/datadog/profiling/load_native_extension.rb +25 -6
  187. data/lib/datadog/profiling/native_extension.rb +1 -22
  188. data/lib/datadog/profiling/profiler.rb +36 -13
  189. data/lib/datadog/profiling/scheduler.rb +20 -15
  190. data/lib/datadog/profiling/stack_recorder.rb +19 -4
  191. data/lib/datadog/profiling/tag_builder.rb +5 -0
  192. data/lib/datadog/profiling/tasks/exec.rb +3 -3
  193. data/lib/datadog/profiling/tasks/help.rb +3 -3
  194. data/lib/datadog/profiling.rb +28 -79
  195. data/lib/datadog/tracing/component.rb +70 -11
  196. data/lib/datadog/tracing/configuration/agent_settings_resolver.rb +13 -0
  197. data/lib/datadog/tracing/configuration/dynamic/option.rb +71 -0
  198. data/lib/datadog/tracing/configuration/dynamic.rb +64 -0
  199. data/lib/datadog/tracing/configuration/ext.rb +40 -33
  200. data/lib/datadog/tracing/configuration/http.rb +74 -0
  201. data/lib/datadog/tracing/configuration/settings.rb +136 -99
  202. data/lib/datadog/tracing/contrib/action_cable/configuration/settings.rb +10 -6
  203. data/lib/datadog/tracing/contrib/action_cable/ext.rb +21 -18
  204. data/lib/datadog/tracing/contrib/action_mailer/configuration/settings.rb +10 -6
  205. data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
  206. data/lib/datadog/tracing/contrib/action_mailer/ext.rb +21 -18
  207. data/lib/datadog/tracing/contrib/action_pack/configuration/settings.rb +10 -7
  208. data/lib/datadog/tracing/contrib/action_pack/ext.rb +11 -8
  209. data/lib/datadog/tracing/contrib/action_view/configuration/settings.rb +10 -6
  210. data/lib/datadog/tracing/contrib/action_view/ext.rb +13 -10
  211. data/lib/datadog/tracing/contrib/active_job/configuration/settings.rb +14 -7
  212. data/lib/datadog/tracing/contrib/active_job/ext.rb +26 -23
  213. data/lib/datadog/tracing/contrib/active_job/log_injection.rb +1 -1
  214. data/lib/datadog/tracing/contrib/active_job/patcher.rb +1 -1
  215. data/lib/datadog/tracing/contrib/active_model_serializers/configuration/settings.rb +10 -6
  216. data/lib/datadog/tracing/contrib/active_model_serializers/ext.rb +13 -10
  217. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +29 -15
  218. data/lib/datadog/tracing/contrib/active_record/configuration/settings.rb +10 -7
  219. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +2 -6
  220. data/lib/datadog/tracing/contrib/active_record/ext.rb +18 -15
  221. data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
  222. data/lib/datadog/tracing/contrib/active_support/cache/instrumentation.rb +106 -202
  223. data/lib/datadog/tracing/contrib/active_support/cache/patcher.rb +3 -0
  224. data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +10 -7
  225. data/lib/datadog/tracing/contrib/active_support/ext.rb +19 -16
  226. data/lib/datadog/tracing/contrib/analytics.rb +0 -1
  227. data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +15 -7
  228. data/lib/datadog/tracing/contrib/aws/ext.rb +38 -24
  229. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +16 -5
  230. data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
  231. data/lib/datadog/tracing/contrib/concurrent_ruby/configuration/settings.rb +3 -2
  232. data/lib/datadog/tracing/contrib/concurrent_ruby/context_composite_executor_service.rb +14 -14
  233. data/lib/datadog/tracing/contrib/concurrent_ruby/ext.rb +4 -2
  234. data/lib/datadog/tracing/contrib/concurrent_ruby/future_patch.rb +3 -10
  235. data/lib/datadog/tracing/contrib/concurrent_ruby/integration.rb +2 -1
  236. data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +19 -2
  237. data/lib/datadog/tracing/contrib/concurrent_ruby/promises_future_patch.rb +22 -0
  238. data/lib/datadog/tracing/contrib/configurable.rb +1 -1
  239. data/lib/datadog/tracing/contrib/configuration/settings.rb +1 -1
  240. data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +21 -7
  241. data/lib/datadog/tracing/contrib/dalli/ext.rb +27 -11
  242. data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +17 -8
  243. data/lib/datadog/tracing/contrib/delayed_job/configuration/settings.rb +14 -7
  244. data/lib/datadog/tracing/contrib/delayed_job/ext.rb +17 -14
  245. data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +15 -7
  246. data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +22 -15
  247. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +104 -99
  248. data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +17 -9
  249. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +48 -3
  250. data/lib/datadog/tracing/contrib/ethon/ext.rb +20 -11
  251. data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +6 -3
  252. data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +20 -10
  253. data/lib/datadog/tracing/contrib/excon/ext.rb +17 -8
  254. data/lib/datadog/tracing/contrib/excon/middleware.rb +25 -5
  255. data/lib/datadog/tracing/contrib/ext.rb +26 -1
  256. data/lib/datadog/tracing/contrib/extensions.rb +38 -2
  257. data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +27 -10
  258. data/lib/datadog/tracing/contrib/faraday/ext.rb +17 -8
  259. data/lib/datadog/tracing/contrib/faraday/middleware.rb +22 -6
  260. data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +9 -6
  261. data/lib/datadog/tracing/contrib/grape/ext.rb +17 -14
  262. data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +9 -6
  263. data/lib/datadog/tracing/contrib/graphql/ext.rb +8 -5
  264. data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +40 -9
  265. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +39 -20
  266. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +37 -18
  267. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor.rb +0 -4
  268. data/lib/datadog/tracing/contrib/grpc/ext.rb +17 -13
  269. data/lib/datadog/tracing/contrib/grpc/formatting.rb +127 -0
  270. data/lib/datadog/tracing/contrib/hanami/configuration/settings.rb +3 -2
  271. data/lib/datadog/tracing/contrib/hanami/ext.rb +10 -8
  272. data/lib/datadog/tracing/contrib/http/circuit_breaker.rb +5 -8
  273. data/lib/datadog/tracing/contrib/http/configuration/settings.rb +34 -11
  274. data/lib/datadog/tracing/contrib/http/distributed/fetcher.rb +2 -2
  275. data/lib/datadog/tracing/contrib/http/ext.rb +17 -9
  276. data/lib/datadog/tracing/contrib/http/instrumentation.rb +27 -7
  277. data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +34 -11
  278. data/lib/datadog/tracing/contrib/httpclient/ext.rb +18 -9
  279. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +22 -5
  280. data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +34 -11
  281. data/lib/datadog/tracing/contrib/httprb/ext.rb +17 -9
  282. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +22 -5
  283. data/lib/datadog/tracing/contrib/kafka/configuration/settings.rb +10 -6
  284. data/lib/datadog/tracing/contrib/kafka/ext.rb +43 -39
  285. data/lib/datadog/tracing/contrib/lograge/configuration/settings.rb +3 -2
  286. data/lib/datadog/tracing/contrib/lograge/ext.rb +3 -1
  287. data/lib/datadog/tracing/contrib/lograge/instrumentation.rb +2 -17
  288. data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +15 -7
  289. data/lib/datadog/tracing/contrib/mongodb/ext.rb +21 -16
  290. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +16 -5
  291. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +22 -14
  292. data/lib/datadog/tracing/contrib/mysql2/ext.rb +16 -10
  293. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +22 -7
  294. data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +53 -0
  295. data/lib/datadog/tracing/contrib/opensearch/ext.rb +38 -0
  296. data/lib/datadog/tracing/contrib/opensearch/integration.rb +44 -0
  297. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +135 -0
  298. data/lib/datadog/tracing/contrib/opensearch/quantize.rb +81 -0
  299. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +23 -14
  300. data/lib/datadog/tracing/contrib/pg/ext.rb +23 -19
  301. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +49 -9
  302. data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +15 -7
  303. data/lib/datadog/tracing/contrib/presto/ext.rb +26 -20
  304. data/lib/datadog/tracing/contrib/presto/instrumentation.rb +14 -5
  305. data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +12 -10
  306. data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +1 -1
  307. data/lib/datadog/tracing/contrib/qless/configuration/settings.rb +13 -8
  308. data/lib/datadog/tracing/contrib/qless/ext.rb +15 -12
  309. data/lib/datadog/tracing/contrib/que/configuration/settings.rb +22 -12
  310. data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
  311. data/lib/datadog/tracing/contrib/racecar/configuration/settings.rb +10 -7
  312. data/lib/datadog/tracing/contrib/racecar/event.rb +5 -5
  313. data/lib/datadog/tracing/contrib/racecar/ext.rb +21 -18
  314. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +17 -12
  315. data/lib/datadog/tracing/contrib/rack/ext.rb +19 -16
  316. data/lib/datadog/tracing/contrib/rack/header_collection.rb +3 -0
  317. data/lib/datadog/tracing/contrib/rack/header_tagging.rb +63 -0
  318. data/lib/datadog/tracing/contrib/rack/middlewares.rb +16 -50
  319. data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
  320. data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +20 -15
  321. data/lib/datadog/tracing/contrib/rails/ext.rb +8 -5
  322. data/lib/datadog/tracing/contrib/rails/log_injection.rb +7 -10
  323. data/lib/datadog/tracing/contrib/rails/patcher.rb +10 -41
  324. data/lib/datadog/tracing/contrib/rails/railtie.rb +3 -3
  325. data/lib/datadog/tracing/contrib/rake/configuration/settings.rb +14 -10
  326. data/lib/datadog/tracing/contrib/rake/ext.rb +15 -12
  327. data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +18 -9
  328. data/lib/datadog/tracing/contrib/redis/ext.rb +23 -15
  329. data/lib/datadog/tracing/contrib/redis/instrumentation.rb +5 -40
  330. data/lib/datadog/tracing/contrib/redis/patcher.rb +34 -21
  331. data/lib/datadog/tracing/contrib/redis/tags.rb +16 -7
  332. data/lib/datadog/tracing/contrib/redis/trace_middleware.rb +46 -33
  333. data/lib/datadog/tracing/contrib/resque/configuration/settings.rb +14 -7
  334. data/lib/datadog/tracing/contrib/resque/ext.rb +10 -7
  335. data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +17 -9
  336. data/lib/datadog/tracing/contrib/rest_client/ext.rb +16 -8
  337. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +25 -5
  338. data/lib/datadog/tracing/contrib/roda/configuration/settings.rb +10 -6
  339. data/lib/datadog/tracing/contrib/roda/ext.rb +1 -0
  340. data/lib/datadog/tracing/contrib/semantic_logger/configuration/settings.rb +3 -2
  341. data/lib/datadog/tracing/contrib/semantic_logger/ext.rb +3 -1
  342. data/lib/datadog/tracing/contrib/semantic_logger/instrumentation.rb +4 -20
  343. data/lib/datadog/tracing/contrib/sequel/configuration/settings.rb +10 -6
  344. data/lib/datadog/tracing/contrib/sequel/ext.rb +11 -8
  345. data/lib/datadog/tracing/contrib/sequel/utils.rb +7 -7
  346. data/lib/datadog/tracing/contrib/shoryuken/configuration/settings.rb +15 -8
  347. data/lib/datadog/tracing/contrib/shoryuken/ext.rb +15 -12
  348. data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +19 -11
  349. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +33 -30
  350. data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +12 -9
  351. data/lib/datadog/tracing/contrib/sinatra/env.rb +0 -17
  352. data/lib/datadog/tracing/contrib/sinatra/ext.rb +22 -19
  353. data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +3 -14
  354. data/lib/datadog/tracing/contrib/sneakers/configuration/settings.rb +15 -8
  355. data/lib/datadog/tracing/contrib/sneakers/ext.rb +2 -0
  356. data/lib/datadog/tracing/contrib/sneakers/tracer.rb +1 -1
  357. data/lib/datadog/tracing/contrib/span_attribute_schema.rb +74 -10
  358. data/lib/datadog/tracing/contrib/stripe/configuration/settings.rb +10 -6
  359. data/lib/datadog/tracing/contrib/stripe/ext.rb +1 -0
  360. data/lib/datadog/tracing/contrib/sucker_punch/configuration/settings.rb +10 -6
  361. data/lib/datadog/tracing/contrib/sucker_punch/ext.rb +16 -13
  362. data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +58 -0
  363. data/lib/datadog/tracing/contrib/trilogy/ext.rb +27 -0
  364. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +94 -0
  365. data/lib/datadog/tracing/contrib/trilogy/integration.rb +43 -0
  366. data/lib/datadog/{ci/contrib/cucumber → tracing/contrib/trilogy}/patcher.rb +10 -6
  367. data/lib/datadog/tracing/contrib/utils/database.rb +5 -3
  368. data/lib/datadog/tracing/contrib/utils/quantization/http.rb +11 -11
  369. data/lib/datadog/tracing/contrib.rb +2 -0
  370. data/lib/datadog/tracing/correlation.rb +29 -12
  371. data/lib/datadog/tracing/diagnostics/environment_logger.rb +165 -0
  372. data/lib/datadog/tracing/diagnostics/ext.rb +21 -19
  373. data/lib/datadog/tracing/distributed/b3_multi.rb +2 -2
  374. data/lib/datadog/tracing/distributed/b3_single.rb +1 -1
  375. data/lib/datadog/tracing/distributed/datadog.rb +0 -1
  376. data/lib/datadog/tracing/distributed/propagation.rb +35 -34
  377. data/lib/datadog/tracing/distributed/trace_context.rb +52 -17
  378. data/lib/datadog/tracing/metadata/ext.rb +9 -6
  379. data/lib/datadog/tracing/metadata/tagging.rb +3 -3
  380. data/lib/datadog/tracing/remote.rb +78 -0
  381. data/lib/datadog/tracing/sampling/matcher.rb +23 -3
  382. data/lib/datadog/tracing/sampling/rule.rb +7 -2
  383. data/lib/datadog/tracing/sampling/rule_sampler.rb +31 -0
  384. data/lib/datadog/tracing/span_operation.rb +3 -15
  385. data/lib/datadog/tracing/sync_writer.rb +3 -3
  386. data/lib/datadog/tracing/trace_digest.rb +31 -0
  387. data/lib/datadog/tracing/trace_operation.rb +17 -5
  388. data/lib/datadog/tracing/trace_segment.rb +5 -2
  389. data/lib/datadog/tracing/tracer.rb +12 -1
  390. data/lib/datadog/{core → tracing}/transport/http/api/instance.rb +1 -1
  391. data/lib/datadog/{core → tracing}/transport/http/api/spec.rb +1 -1
  392. data/lib/datadog/tracing/transport/http/api.rb +43 -0
  393. data/lib/datadog/{core → tracing}/transport/http/builder.rb +13 -68
  394. data/lib/datadog/tracing/transport/http/client.rb +57 -0
  395. data/lib/datadog/tracing/transport/http/statistics.rb +47 -0
  396. data/lib/datadog/tracing/transport/http/traces.rb +152 -0
  397. data/lib/datadog/tracing/transport/http.rb +125 -0
  398. data/lib/datadog/tracing/transport/io/client.rb +89 -0
  399. data/lib/datadog/tracing/transport/io/response.rb +27 -0
  400. data/lib/datadog/tracing/transport/io/traces.rb +101 -0
  401. data/lib/datadog/tracing/transport/io.rb +30 -0
  402. data/lib/datadog/tracing/transport/serializable_trace.rb +126 -0
  403. data/lib/datadog/tracing/transport/statistics.rb +77 -0
  404. data/lib/datadog/tracing/transport/trace_formatter.rb +240 -0
  405. data/lib/datadog/tracing/transport/traces.rb +224 -0
  406. data/lib/datadog/tracing/workers/trace_writer.rb +6 -4
  407. data/lib/datadog/tracing/workers.rb +4 -2
  408. data/lib/datadog/tracing/writer.rb +5 -2
  409. data/lib/datadog/tracing.rb +8 -2
  410. data/lib/ddtrace/transport/ext.rb +22 -14
  411. data/lib/ddtrace/version.rb +9 -12
  412. data/lib/ddtrace.rb +1 -1
  413. metadata +157 -139
  414. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +0 -25
  415. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -110
  416. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +0 -591
  417. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +0 -14
  418. data/ext/ddtrace_profiling_native_extension/time_helpers.c +0 -17
  419. data/ext/ddtrace_profiling_native_extension/time_helpers.h +0 -10
  420. data/lib/datadog/ci/configuration/components.rb +0 -32
  421. data/lib/datadog/ci/configuration/settings.rb +0 -53
  422. data/lib/datadog/ci/contrib/cucumber/configuration/settings.rb +0 -33
  423. data/lib/datadog/ci/contrib/cucumber/ext.rb +0 -20
  424. data/lib/datadog/ci/contrib/cucumber/formatter.rb +0 -94
  425. data/lib/datadog/ci/contrib/cucumber/instrumentation.rb +0 -28
  426. data/lib/datadog/ci/contrib/cucumber/integration.rb +0 -47
  427. data/lib/datadog/ci/contrib/rspec/configuration/settings.rb +0 -33
  428. data/lib/datadog/ci/contrib/rspec/example.rb +0 -68
  429. data/lib/datadog/ci/contrib/rspec/ext.rb +0 -19
  430. data/lib/datadog/ci/contrib/rspec/integration.rb +0 -48
  431. data/lib/datadog/ci/contrib/rspec/patcher.rb +0 -27
  432. data/lib/datadog/ci/ext/app_types.rb +0 -9
  433. data/lib/datadog/ci/ext/environment.rb +0 -575
  434. data/lib/datadog/ci/ext/settings.rb +0 -10
  435. data/lib/datadog/ci/ext/test.rb +0 -35
  436. data/lib/datadog/ci/extensions.rb +0 -19
  437. data/lib/datadog/ci/flush.rb +0 -38
  438. data/lib/datadog/ci/test.rb +0 -81
  439. data/lib/datadog/ci.rb +0 -20
  440. data/lib/datadog/core/configuration/dependency_resolver.rb +0 -28
  441. data/lib/datadog/core/configuration/option_definition_set.rb +0 -22
  442. data/lib/datadog/core/configuration/option_set.rb +0 -10
  443. data/lib/datadog/core/telemetry/collector.rb +0 -231
  444. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -52
  445. data/lib/datadog/core/telemetry/v1/application.rb +0 -92
  446. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
  447. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
  448. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  449. data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
  450. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  451. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
  452. data/lib/datadog/core/transport/config.rb +0 -58
  453. data/lib/datadog/core/transport/http/api.rb +0 -57
  454. data/lib/datadog/core/transport/http/client.rb +0 -45
  455. data/lib/datadog/core/transport/http/config.rb +0 -268
  456. data/lib/datadog/core/transport/http/negotiation.rb +0 -144
  457. data/lib/datadog/core/transport/http.rb +0 -169
  458. data/lib/datadog/core/utils/object_set.rb +0 -43
  459. data/lib/datadog/core/utils/string_table.rb +0 -47
  460. data/lib/datadog/profiling/backtrace_location.rb +0 -34
  461. data/lib/datadog/profiling/buffer.rb +0 -43
  462. data/lib/datadog/profiling/collectors/old_stack.rb +0 -301
  463. data/lib/datadog/profiling/encoding/profile.rb +0 -41
  464. data/lib/datadog/profiling/event.rb +0 -15
  465. data/lib/datadog/profiling/events/stack.rb +0 -82
  466. data/lib/datadog/profiling/old_recorder.rb +0 -107
  467. data/lib/datadog/profiling/pprof/builder.rb +0 -125
  468. data/lib/datadog/profiling/pprof/converter.rb +0 -102
  469. data/lib/datadog/profiling/pprof/message_set.rb +0 -16
  470. data/lib/datadog/profiling/pprof/payload.rb +0 -20
  471. data/lib/datadog/profiling/pprof/pprof.proto +0 -212
  472. data/lib/datadog/profiling/pprof/pprof_pb.rb +0 -81
  473. data/lib/datadog/profiling/pprof/stack_sample.rb +0 -139
  474. data/lib/datadog/profiling/pprof/string_table.rb +0 -12
  475. data/lib/datadog/profiling/pprof/template.rb +0 -118
  476. data/lib/datadog/profiling/trace_identifiers/ddtrace.rb +0 -43
  477. data/lib/datadog/profiling/trace_identifiers/helper.rb +0 -45
  478. data/lib/datadog/tracing/contrib/sinatra/headers.rb +0 -35
  479. data/lib/ddtrace/transport/http/adapters/net.rb +0 -168
  480. data/lib/ddtrace/transport/http/adapters/registry.rb +0 -27
  481. data/lib/ddtrace/transport/http/adapters/test.rb +0 -85
  482. data/lib/ddtrace/transport/http/adapters/unix_socket.rb +0 -77
  483. data/lib/ddtrace/transport/http/api/endpoint.rb +0 -29
  484. data/lib/ddtrace/transport/http/api/fallbacks.rb +0 -24
  485. data/lib/ddtrace/transport/http/api/instance.rb +0 -35
  486. data/lib/ddtrace/transport/http/api/map.rb +0 -16
  487. data/lib/ddtrace/transport/http/api/spec.rb +0 -17
  488. data/lib/ddtrace/transport/http/api.rb +0 -39
  489. data/lib/ddtrace/transport/http/builder.rb +0 -176
  490. data/lib/ddtrace/transport/http/client.rb +0 -52
  491. data/lib/ddtrace/transport/http/env.rb +0 -58
  492. data/lib/ddtrace/transport/http/response.rb +0 -58
  493. data/lib/ddtrace/transport/http/statistics.rb +0 -43
  494. data/lib/ddtrace/transport/http/traces.rb +0 -144
  495. data/lib/ddtrace/transport/http.rb +0 -117
  496. data/lib/ddtrace/transport/io/client.rb +0 -85
  497. data/lib/ddtrace/transport/io/response.rb +0 -25
  498. data/lib/ddtrace/transport/io/traces.rb +0 -99
  499. data/lib/ddtrace/transport/io.rb +0 -28
  500. data/lib/ddtrace/transport/parcel.rb +0 -20
  501. data/lib/ddtrace/transport/request.rb +0 -15
  502. data/lib/ddtrace/transport/response.rb +0 -60
  503. data/lib/ddtrace/transport/serializable_trace.rb +0 -122
  504. data/lib/ddtrace/transport/statistics.rb +0 -75
  505. data/lib/ddtrace/transport/trace_formatter.rb +0 -198
  506. data/lib/ddtrace/transport/traces.rb +0 -216
  507. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
  508. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
  509. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
@@ -0,0 +1,1047 @@
1
+ #include "heap_recorder.h"
2
+ #include <pthread.h>
3
+ #include "ruby/st.h"
4
+ #include "ruby_helpers.h"
5
+ #include <errno.h>
6
+ #include "collectors_stack.h"
7
+ #include "libdatadog_helpers.h"
8
+
9
+ #if (defined(HAVE_WORKING_RB_GC_FORCE_RECYCLE) && ! defined(NO_SEEN_OBJ_ID_FLAG))
10
+ #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
11
+ #endif
12
+
13
+ // Minimum age (in GC generations) of heap objects we want to include in heap
14
+ // recorder iterations. Object with age 0 represent objects that have yet to undergo
15
+ // a GC and, thus, may just be noise/trash at instant of iteration and are usually not
16
+ // relevant for heap profiles as the great majority should be trivially reclaimed
17
+ // during the next GC.
18
+ #define ITERATION_MIN_AGE 1
19
+
20
+ // A compact representation of a stacktrace frame for a heap allocation.
21
+ typedef struct {
22
+ char *name;
23
+ char *filename;
24
+ int32_t line;
25
+ } heap_frame;
26
+ static st_index_t heap_frame_hash(heap_frame*, st_index_t seed);
27
+
28
+ // A compact representation of a stacktrace for a heap allocation.
29
+ //
30
+ // We could use a ddog_prof_Slice_Location instead but it has a lot of
31
+ // unused fields. Because we have to keep these stacks around for at
32
+ // least the lifetime of the objects allocated therein, we would be
33
+ // incurring a non-negligible memory overhead for little purpose.
34
+ typedef struct {
35
+ uint16_t frames_len;
36
+ heap_frame frames[];
37
+ } heap_stack;
38
+ static heap_stack* heap_stack_new(ddog_prof_Slice_Location);
39
+ static void heap_stack_free(heap_stack*);
40
+ static st_index_t heap_stack_hash(heap_stack*, st_index_t);
41
+
42
+ #if MAX_FRAMES_LIMIT > UINT16_MAX
43
+ #error Frames len type not compatible with MAX_FRAMES_LIMIT
44
+ #endif
45
+
46
+ enum heap_record_key_type {
47
+ HEAP_STACK,
48
+ LOCATION_SLICE
49
+ };
50
+ // This struct allows us to use two different types of stacks when
51
+ // interacting with a heap_record hash.
52
+ //
53
+ // The idea is that we'll always want to use heap_stack-keys when
54
+ // adding new entries to the hash since that's the compact stack
55
+ // representation we rely on internally.
56
+ //
57
+ // However, when querying for an existing heap record, we'd save a
58
+ // lot of allocations if we could query with the
59
+ // ddog_prof_Slice_Location we receive in our external API.
60
+ //
61
+ // To allow this interchange, we need a union and need to ensure
62
+ // that whatever shape of the union, the heap_record_key_cmp_st
63
+ // and heap_record_hash_st functions return the same results for
64
+ // equivalent stacktraces.
65
+ typedef struct {
66
+ enum heap_record_key_type type;
67
+ union {
68
+ // key never owns this if set
69
+ heap_stack *heap_stack;
70
+ // key never owns this if set
71
+ ddog_prof_Slice_Location *location_slice;
72
+ };
73
+ } heap_record_key;
74
+ static heap_record_key* heap_record_key_new(heap_stack*);
75
+ static void heap_record_key_free(heap_record_key*);
76
+ static int heap_record_key_cmp_st(st_data_t, st_data_t);
77
+ static st_index_t heap_record_key_hash_st(st_data_t);
78
+ static const struct st_hash_type st_hash_type_heap_record_key = {
79
+ heap_record_key_cmp_st,
80
+ heap_record_key_hash_st,
81
+ };
82
+
83
+ // Need to implement these functions to support the location-slice based keys
84
+ static st_index_t ddog_location_hash(ddog_prof_Location, st_index_t seed);
85
+ static st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location, st_index_t seed);
86
+
87
+ // A heap record is used for deduping heap allocation stacktraces across multiple
88
+ // objects sharing the same allocation location.
89
+ typedef struct {
90
+ // How many objects are currently tracked by the heap recorder for this heap record.
91
+ uint32_t num_tracked_objects;
92
+ // stack is owned by the associated record and gets cleaned up alongside it
93
+ heap_stack *stack;
94
+ } heap_record;
95
+ static heap_record* heap_record_new(heap_stack*);
96
+ static void heap_record_free(heap_record*);
97
+
98
+ // An object record is used for storing data about currently tracked live objects
99
+ typedef struct {
100
+ long obj_id;
101
+ heap_record *heap_record;
102
+ live_object_data object_data;
103
+ } object_record;
104
+ static object_record* object_record_new(long, heap_record*, live_object_data);
105
+ static void object_record_free(object_record*);
106
+ static VALUE object_record_inspect(object_record*);
107
+ static object_record SKIPPED_RECORD = {0};
108
+
109
+ // A wrapper around an object record that is in the process of being recorded and was not
110
+ // yet committed.
111
+ typedef struct {
112
+ // Pointer to the (potentially partial) object_record containing metadata about an ongoing recording.
113
+ // When NULL, this symbolizes an unstarted/invalid recording.
114
+ object_record *object_record;
115
+ // A flag to track whether we had to force set the RUBY_FL_SEEN_OBJ_ID flag on this object
116
+ // as part of our workaround around rb_gc_force_recycle issues.
117
+ bool did_recycle_workaround;
118
+ } recording;
119
+
120
+ struct heap_recorder {
121
+ // Config
122
+ // Whether the recorder should try to determine approximate sizes for tracked objects.
123
+ bool size_enabled;
124
+ uint sample_rate;
125
+
126
+ // Map[key: heap_record_key*, record: heap_record*]
127
+ // NOTE: We always use heap_record_key.type == HEAP_STACK for storage but support lookups
128
+ // via heap_record_key.type == LOCATION_SLICE to allow for allocation-free fast-paths.
129
+ // NOTE: This table is currently only protected by the GVL since we never interact with it
130
+ // outside the GVL.
131
+ // NOTE: This table has ownership of both its heap_record_keys and heap_records.
132
+ st_table *heap_records;
133
+
134
+ // Map[obj_id: long, record: object_record*]
135
+ // NOTE: This table is currently only protected by the GVL since we never interact with it
136
+ // outside the GVL.
137
+ // NOTE: This table has ownership of its object_records. The keys are longs and so are
138
+ // passed as values.
139
+ st_table *object_records;
140
+
141
+ // Map[obj_id: long, record: object_record*]
142
+ // NOTE: This is a snapshot of object_records built ahead of a iteration. Outside of an
143
+ // iteration context, this table will be NULL. During an iteration, there will be no
144
+ // mutation of the data so iteration can occur without acquiring a lock.
145
+ // NOTE: Contrary to object_records, this table has no ownership of its data.
146
+ st_table *object_records_snapshot;
147
+ // The GC gen/epoch/count in which we prepared the current iteration.
148
+ //
149
+ // This enables us to calculate the age of iterated objects in the above snapshot by
150
+ // comparing it against an object's alloc_gen.
151
+ size_t iteration_gen;
152
+
153
+ // Data for a heap recording that was started but not yet ended
154
+ recording active_recording;
155
+
156
+ // Reusable location array, implementing a flyweight pattern for things like iteration.
157
+ ddog_prof_Location *reusable_locations;
158
+
159
+ // Sampling state
160
+ uint num_recordings_skipped;
161
+
162
+ struct stats_last_update {
163
+ size_t objects_alive;
164
+ size_t objects_dead;
165
+ size_t objects_skipped;
166
+ size_t objects_frozen;
167
+ } stats_last_update;
168
+ };
169
+ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Location);
170
+ static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
171
+ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, object_record *record);
172
+ static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t);
173
+ static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t);
174
+ static int st_object_record_update(st_data_t, st_data_t, st_data_t);
175
+ static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
176
+ static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
177
+ static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
178
+ static void commit_recording(heap_recorder*, heap_record*, recording);
179
+
180
+ // ==========================
181
+ // Heap Recorder External API
182
+ //
183
+ // WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop.
184
+ //
185
+ // WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs
186
+ // happens under the GVL.
187
+ //
188
+ // ==========================
189
+ heap_recorder* heap_recorder_new(void) {
190
+ heap_recorder *recorder = ruby_xcalloc(1, sizeof(heap_recorder));
191
+
192
+ recorder->heap_records = st_init_table(&st_hash_type_heap_record_key);
193
+ recorder->object_records = st_init_numtable();
194
+ recorder->object_records_snapshot = NULL;
195
+ recorder->reusable_locations = ruby_xcalloc(MAX_FRAMES_LIMIT, sizeof(ddog_prof_Location));
196
+ recorder->active_recording = (recording) {0};
197
+ recorder->size_enabled = true;
198
+ recorder->sample_rate = 1; // By default do no sampling on top of what allocation profiling already does
199
+
200
+ return recorder;
201
+ }
202
+
203
+ void heap_recorder_free(heap_recorder *heap_recorder) {
204
+ if (heap_recorder == NULL) {
205
+ return;
206
+ }
207
+
208
+ if (heap_recorder->object_records_snapshot != NULL) {
209
+ // if there's an unfinished iteration, clean it up now
210
+ // before we clean up any other state it might depend on
211
+ heap_recorder_finish_iteration(heap_recorder);
212
+ }
213
+
214
+ // Clean-up all object records
215
+ st_foreach(heap_recorder->object_records, st_object_record_entry_free, 0);
216
+ st_free_table(heap_recorder->object_records);
217
+
218
+ // Clean-up all heap records (this includes those only referred to by queued_samples)
219
+ st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, 0);
220
+ st_free_table(heap_recorder->heap_records);
221
+
222
+ if (heap_recorder->active_recording.object_record != NULL) {
223
+ // If there's a partial object record, clean it up as well
224
+ object_record_free(heap_recorder->active_recording.object_record);
225
+ }
226
+
227
+ ruby_xfree(heap_recorder->reusable_locations);
228
+
229
+ ruby_xfree(heap_recorder);
230
+ }
231
+
232
+ void heap_recorder_set_size_enabled(heap_recorder *heap_recorder, bool size_enabled) {
233
+ if (heap_recorder == NULL) {
234
+ return;
235
+ }
236
+
237
+ heap_recorder->size_enabled = size_enabled;
238
+ }
239
+
240
+ void heap_recorder_set_sample_rate(heap_recorder *heap_recorder, int sample_rate) {
241
+ if (heap_recorder == NULL) {
242
+ return;
243
+ }
244
+
245
+ if (sample_rate <= 0) {
246
+ rb_raise(rb_eArgError, "Heap sample rate must be a positive integer value but was %d", sample_rate);
247
+ }
248
+
249
+ heap_recorder->sample_rate = sample_rate;
250
+ heap_recorder->num_recordings_skipped = 0;
251
+ }
252
+
253
+ // WARN: Assumes this gets called before profiler is reinitialized on the fork
254
+ void heap_recorder_after_fork(heap_recorder *heap_recorder) {
255
+ if (heap_recorder == NULL) {
256
+ return;
257
+ }
258
+
259
+ // When forking, the child process gets a copy of the entire state of the parent process, minus
260
+ // threads.
261
+ //
262
+ // This means anything the heap recorder is tracking will still be alive after the fork and
263
+ // should thus be kept. Because this heap recorder implementation does not rely on free
264
+ // tracepoints to track liveness, any frees that happen until we fully reinitialize, will
265
+ // simply be noticed on next heap_recorder_prepare_iteration.
266
+ //
267
+ // There is one small caveat though: fork only preserves one thread and in a Ruby app, that
268
+ // will be the thread holding on to the GVL. Since we support iteration on the heap recorder
269
+ // outside of the GVL, any state specific to that interaction may be incosistent after fork
270
+ // (e.g. an acquired lock for thread safety). Iteration operates on object_records_snapshot
271
+ // though and that one will be updated on next heap_recorder_prepare_iteration so we really
272
+ // only need to finish any iteration that might have been left unfinished.
273
+ if (heap_recorder->object_records_snapshot != NULL) {
274
+ heap_recorder_finish_iteration(heap_recorder);
275
+ }
276
+ }
277
+
278
+ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *alloc_class) {
279
+ if (heap_recorder == NULL) {
280
+ return;
281
+ }
282
+
283
+ if (heap_recorder->active_recording.object_record != NULL) {
284
+ rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end.");
285
+ }
286
+
287
+ if (heap_recorder->num_recordings_skipped + 1 < heap_recorder->sample_rate) {
288
+ heap_recorder->active_recording.object_record = &SKIPPED_RECORD;
289
+ heap_recorder->num_recordings_skipped++;
290
+ return;
291
+ }
292
+
293
+ heap_recorder->num_recordings_skipped = 0;
294
+
295
+ VALUE ruby_obj_id = rb_obj_id(new_obj);
296
+ if (!FIXNUM_P(ruby_obj_id)) {
297
+ rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling.");
298
+ }
299
+
300
+ bool did_recycle_workaround = false;
301
+
302
+ #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
303
+ // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
304
+ // its usage may lead to an object being re-used outside of the typical GC cycle.
305
+ //
306
+ // This re-use is in theory invisible to us unless we're lucky enough to sample both
307
+ // the original object and the replacement that uses the recycled slot.
308
+ //
309
+ // In practice, we've observed (https://github.com/DataDog/dd-trace-rb/pull/3366)
310
+ // that non-noop implementations of rb_gc_force_recycle have an implementation bug
311
+ // which results in the object that re-used the recycled slot inheriting the same
312
+ // object id without setting the FL_SEEN_OBJ_ID flag. We rely on this knowledge to
313
+ // "observe" implicit frees when an object we are tracking is force-recycled.
314
+ //
315
+ // However, it may happen that we start tracking a new object and that object was
316
+ // allocated on a recycled slot. Due to the bug, this object would be missing the
317
+ // FL_SEEN_OBJ_ID flag even though it was not recycled itself. If we left it be,
318
+ // when we're doing our liveness check, the absence of the flag would trigger our
319
+ // implicit free workaround and the object would be inferred as recycled even though
320
+ // it might still be alive.
321
+ //
322
+ // Thus, if we detect that this new allocation is already missing the flag at the start
323
+ // of the heap allocation recording, we force-set it. This should be safe since we
324
+ // just called rb_obj_id on it above and the expectation is that any flaggable object
325
+ // that goes through it ends up with the flag set (as evidenced by the GC_ASSERT
326
+ // lines in https://github.com/ruby/ruby/blob/4a8d7246d15b2054eacb20f8ab3d29d39a3e7856/gc.c#L4050C14-L4050C14).
327
+ if (RB_FL_ABLE(new_obj) && !RB_FL_TEST(new_obj, RUBY_FL_SEEN_OBJ_ID)) {
328
+ RB_FL_SET(new_obj, RUBY_FL_SEEN_OBJ_ID);
329
+ did_recycle_workaround = true;
330
+ }
331
+ #endif
332
+
333
+ heap_recorder->active_recording = (recording) {
334
+ .object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) {
335
+ .weight = weight * heap_recorder->sample_rate,
336
+ .class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL,
337
+ .alloc_gen = rb_gc_count(),
338
+ }),
339
+ .did_recycle_workaround = did_recycle_workaround,
340
+ };
341
+ }
342
+
343
+ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
344
+ if (heap_recorder == NULL) {
345
+ return;
346
+ }
347
+
348
+ recording active_recording = heap_recorder->active_recording;
349
+
350
+ if (active_recording.object_record == NULL) {
351
+ // Recording ended without having been started?
352
+ rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started");
353
+ }
354
+ // From now on, mark the global active recording as invalid so we can short-circuit at any point
355
+ // and not end up with a still active recording. the local active_recording still holds the
356
+ // data required for committing though.
357
+ heap_recorder->active_recording = (recording) {0};
358
+
359
+ if (active_recording.object_record == &SKIPPED_RECORD) {
360
+ // special marker when we decided to skip due to sampling
361
+ return;
362
+ }
363
+
364
+ heap_record *heap_record = get_or_create_heap_record(heap_recorder, locations);
365
+
366
+ // And then commit the new allocation.
367
+ commit_recording(heap_recorder, heap_record, active_recording);
368
+ }
369
+
370
+ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
371
+ if (heap_recorder == NULL) {
372
+ return;
373
+ }
374
+
375
+ heap_recorder->iteration_gen = rb_gc_count();
376
+
377
+ if (heap_recorder->object_records_snapshot != NULL) {
378
+ // we could trivially handle this but we raise to highlight and catch unexpected usages.
379
+ rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
380
+ }
381
+
382
+ // Reset last update stats, we'll be building them from scratch during the st_foreach call below
383
+ heap_recorder->stats_last_update = (struct stats_last_update) {};
384
+
385
+ st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);
386
+
387
+ heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
388
+ if (heap_recorder->object_records_snapshot == NULL) {
389
+ rb_raise(rb_eRuntimeError, "Failed to create heap snapshot.");
390
+ }
391
+ }
392
+
393
+ void heap_recorder_finish_iteration(heap_recorder *heap_recorder) {
394
+ if (heap_recorder == NULL) {
395
+ return;
396
+ }
397
+
398
+ if (heap_recorder->object_records_snapshot == NULL) {
399
+ // we could trivially handle this but we raise to highlight and catch unexpected usages.
400
+ rb_raise(rb_eRuntimeError, "Heap recorder iteration finished without having been prepared.");
401
+ }
402
+
403
+ st_free_table(heap_recorder->object_records_snapshot);
404
+ heap_recorder->object_records_snapshot = NULL;
405
+ }
406
+
407
+ // Internal data we need while performing iteration over live objects.
408
+ typedef struct {
409
+ // The callback we need to call for each object.
410
+ bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg);
411
+ // The extra arg to pass as the second parameter to the callback.
412
+ void *for_each_callback_extra_arg;
413
+ // A reference to the heap recorder so we can access extra stuff like reusable_locations.
414
+ heap_recorder *heap_recorder;
415
+ } iteration_context;
416
+
417
+ // WARN: Assume iterations can run without the GVL for performance reasons. Do not raise, allocate or
418
+ // do NoGVL-unsafe interactions with the Ruby runtime. Any such interactions should be done during
419
+ // heap_recorder_prepare_iteration or heap_recorder_finish_iteration.
420
+ bool heap_recorder_for_each_live_object(
421
+ heap_recorder *heap_recorder,
422
+ bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg),
423
+ void *for_each_callback_extra_arg) {
424
+ if (heap_recorder == NULL) {
425
+ return true;
426
+ }
427
+
428
+ if (heap_recorder->object_records_snapshot == NULL) {
429
+ return false;
430
+ }
431
+
432
+ iteration_context context;
433
+ context.for_each_callback = for_each_callback;
434
+ context.for_each_callback_extra_arg = for_each_callback_extra_arg;
435
+ context.heap_recorder = heap_recorder;
436
+ st_foreach(heap_recorder->object_records_snapshot, st_object_records_iterate, (st_data_t) &context);
437
+ return true;
438
+ }
439
+
440
+ VALUE heap_recorder_state_snapshot(heap_recorder *heap_recorder) {
441
+ VALUE arguments[] = {
442
+ ID2SYM(rb_intern("num_object_records")), /* => */ LONG2NUM(heap_recorder->object_records->num_entries),
443
+ ID2SYM(rb_intern("num_heap_records")), /* => */ LONG2NUM(heap_recorder->heap_records->num_entries),
444
+
445
+ // Stats as of last update
446
+ ID2SYM(rb_intern("last_update_objects_alive")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_alive),
447
+ ID2SYM(rb_intern("last_update_objects_dead")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_dead),
448
+ ID2SYM(rb_intern("last_update_objects_skipped")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_skipped),
449
+ ID2SYM(rb_intern("last_update_objects_frozen")), /* => */ LONG2NUM(heap_recorder->stats_last_update.objects_frozen),
450
+ };
451
+ VALUE hash = rb_hash_new();
452
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
453
+ return hash;
454
+ }
455
+
456
+ void heap_recorder_testonly_assert_hash_matches(ddog_prof_Slice_Location locations) {
457
+ heap_stack *stack = heap_stack_new(locations);
458
+ heap_record_key stack_based_key = (heap_record_key) {
459
+ .type = HEAP_STACK,
460
+ .heap_stack = stack,
461
+ };
462
+ heap_record_key location_based_key = (heap_record_key) {
463
+ .type = LOCATION_SLICE,
464
+ .location_slice = &locations,
465
+ };
466
+
467
+ st_index_t stack_hash = heap_record_key_hash_st((st_data_t) &stack_based_key);
468
+ st_index_t location_hash = heap_record_key_hash_st((st_data_t) &location_based_key);
469
+
470
+ heap_stack_free(stack);
471
+
472
+ if (stack_hash != location_hash) {
473
+ rb_raise(rb_eRuntimeError, "Heap record key hashes built from the same locations differ. stack_based_hash=%"PRI_VALUE_PREFIX"u location_based_hash=%"PRI_VALUE_PREFIX"u", stack_hash, location_hash);
474
+ }
475
+ }
476
+
477
+ VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
478
+ if (heap_recorder == NULL) {
479
+ return rb_str_new2("NULL heap_recorder");
480
+ }
481
+
482
+ VALUE debug_str = rb_str_new2("object records:\n");
483
+ st_foreach(heap_recorder->object_records, st_object_records_debug, (st_data_t) debug_str);
484
+ return debug_str;
485
+ }
486
+
487
+ // ==========================
488
+ // Heap Recorder Internal API
489
+ // ==========================
490
+ static int st_heap_record_entry_free(st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
491
+ heap_record_key *record_key = (heap_record_key*) key;
492
+ heap_record_key_free(record_key);
493
+ heap_record_free((heap_record *) value);
494
+ return ST_DELETE;
495
+ }
496
+
497
+ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
498
+ object_record_free((object_record *) value);
499
+ return ST_DELETE;
500
+ }
501
+
502
+ // Check to see if an object should not be included in a heap recorder iteration.
503
+ // This centralizes the checking logic to ensure it's equally applied between
504
+ // preparation and iteration codepaths.
505
+ static inline bool should_exclude_from_iteration(object_record *obj_record) {
506
+ return obj_record->object_data.gen_age < ITERATION_MIN_AGE;
507
+ }
508
+
509
+ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
510
+ long obj_id = (long) key;
511
+ object_record *record = (object_record*) value;
512
+ heap_recorder *recorder = (heap_recorder*) extra_arg;
513
+
514
+ VALUE ref;
515
+
516
+ size_t iteration_gen = recorder->iteration_gen;
517
+ size_t alloc_gen = record->object_data.alloc_gen;
518
+ // Guard against potential overflows given unsigned types here.
519
+ record->object_data.gen_age = alloc_gen < iteration_gen ? iteration_gen - alloc_gen : 0;
520
+
521
+ if (should_exclude_from_iteration(record)) {
522
+ // If an object won't be included in the current iteration, there's
523
+ // no point checking for liveness or updating its size, so exit early.
524
+ // NOTE: This means that there should be an equivalent check during actual
525
+ // iteration otherwise we'd iterate/expose stale object data.
526
+ recorder->stats_last_update.objects_skipped++;
527
+ return ST_CONTINUE;
528
+ }
529
+
530
+ if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
531
+ // Id no longer associated with a valid ref. Need to delete this object record!
532
+ on_committed_object_record_cleanup(recorder, record);
533
+ recorder->stats_last_update.objects_dead++;
534
+ return ST_DELETE;
535
+ }
536
+
537
+ // If we got this far, then we found a valid live object for the tracked id.
538
+
539
+ #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
540
+ // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
541
+ // its usage may lead to an object being re-used outside of the typical GC cycle.
542
+ //
543
+ // This re-use is in theory invisible to us and would mean that the ref from which we
544
+ // collected the object_record metadata may not be the same as the current ref and
545
+ // thus any further reporting would be innacurately attributed to stale metadata.
546
+ //
547
+ // In practice, there is a way for us to notice that this happened because of a bug
548
+ // in the implementation of rb_gc_force_recycle. Our heap profiler relies on object
549
+ // ids and id2ref to detect whether objects are still alive. Turns out that when an
550
+ // object with an id is re-used via rb_gc_force_recycle, it will "inherit" the ID
551
+ // of the old object but it will NOT have the FL_SEEN_OBJ_ID as per the experiment
552
+ // in https://github.com/DataDog/dd-trace-rb/pull/3360#discussion_r1442823517
553
+ //
554
+ // Thus, if we detect that the ref we just resolved above is missing this flag, we can
555
+ // safely say re-use happened and thus treat it as an implicit free of the object
556
+ // we were tracking (the original one which got recycled).
557
+ if (RB_FL_ABLE(ref) && !RB_FL_TEST(ref, RUBY_FL_SEEN_OBJ_ID)) {
558
+
559
+ // NOTE: We don't really need to set this flag for heap recorder to work correctly
560
+ // but doing so partially mitigates a bug in runtimes with working rb_gc_force_recycle
561
+ // which leads to broken invariants and leaking of entries in obj_to_id and id_to_obj
562
+ // tables in objspace. We already do the same thing when we sample a recycled object,
563
+ // here we apply it as well to objects that replace recycled objects that were being
564
+ // tracked. More details in https://github.com/DataDog/dd-trace-rb/pull/3366
565
+ RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID);
566
+
567
+ on_committed_object_record_cleanup(recorder, record);
568
+ recorder->stats_last_update.objects_dead++;
569
+ return ST_DELETE;
570
+ }
571
+
572
+ #endif
573
+
574
+ if (recorder->size_enabled && !record->object_data.is_frozen) {
575
+ // if we were asked to update sizes and this object was not already seen as being frozen,
576
+ // update size again.
577
+ record->object_data.size = ruby_obj_memsize_of(ref);
578
+ // Check if it's now frozen so we skip a size update next time
579
+ record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
580
+ }
581
+
582
+ recorder->stats_last_update.objects_alive++;
583
+ if (record->object_data.is_frozen) {
584
+ recorder->stats_last_update.objects_frozen++;
585
+ }
586
+
587
+ return ST_CONTINUE;
588
+ }
589
+
590
+ // WARN: This can get called outside the GVL. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED.
591
+ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
592
+ object_record *record = (object_record*) value;
593
+ const heap_stack *stack = record->heap_record->stack;
594
+ iteration_context *context = (iteration_context*) extra;
595
+
596
+ const heap_recorder *recorder = context->heap_recorder;
597
+
598
+ if (should_exclude_from_iteration(record)) {
599
+ // Skip objects that should not be included in iteration
600
+ // NOTE: This matches the short-circuiting condition in st_object_record_update
601
+ // and prevents iteration over stale objects.
602
+ return ST_CONTINUE;
603
+ }
604
+
605
+ ddog_prof_Location *locations = recorder->reusable_locations;
606
+ for (uint16_t i = 0; i < stack->frames_len; i++) {
607
+ const heap_frame *frame = &stack->frames[i];
608
+ ddog_prof_Location *location = &locations[i];
609
+ location->function.name.ptr = frame->name;
610
+ location->function.name.len = strlen(frame->name);
611
+ location->function.filename.ptr = frame->filename;
612
+ location->function.filename.len = strlen(frame->filename);
613
+ location->line = frame->line;
614
+ }
615
+
616
+ heap_recorder_iteration_data iteration_data;
617
+ iteration_data.object_data = record->object_data;
618
+ iteration_data.locations = (ddog_prof_Slice_Location) {.ptr = locations, .len = stack->frames_len};
619
+
620
+ if (!context->for_each_callback(iteration_data, context->for_each_callback_extra_arg)) {
621
+ return ST_STOP;
622
+ }
623
+
624
+ return ST_CONTINUE;
625
+ }
626
+
627
+ static int st_object_records_debug(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
628
+ VALUE debug_str = (VALUE) extra;
629
+
630
+ object_record *record = (object_record*) value;
631
+
632
+ rb_str_catf(debug_str, "%"PRIsVALUE"\n", object_record_inspect(record));
633
+
634
+ return ST_CONTINUE;
635
+ }
636
+
637
+ // Struct holding data required for an update operation on heap_records
638
+ typedef struct {
639
+ // [in] The recording containing the new object record we want to add.
640
+ // NOTE: Transfer of ownership of the contained object record is assumed, do not re-use it after call to ::update_object_record_entry
641
+ recording recording;
642
+
643
+ // [in] The heap recorder where the update is happening.
644
+ heap_recorder *heap_recorder;
645
+ } object_record_update_data;
646
+
647
+ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *value, st_data_t data, int existing) {
648
+ object_record_update_data *update_data = (object_record_update_data*) data;
649
+ recording recording = update_data->recording;
650
+ object_record *new_object_record = recording.object_record;
651
+ if (existing) {
652
+ object_record *existing_record = (object_record*) (*value);
653
+ if (recording.did_recycle_workaround) {
654
+ // In this case, it's possible for an object id to be re-used and we were lucky enough to have
655
+ // sampled both the original object and the replacement so cleanup the old one and replace it with
656
+ // the new object_record (i.e. treat this as a combined free+allocation).
657
+ on_committed_object_record_cleanup(update_data->heap_recorder, existing_record);
658
+ } else {
659
+ // This is not supposed to happen, raising...
660
+ VALUE existing_inspect = object_record_inspect(existing_record);
661
+ VALUE new_inspect = object_record_inspect(new_object_record);
662
+ rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
663
+ "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect);
664
+ }
665
+ }
666
+ // Always carry on with the update, we want the new record to be there at the end
667
+ (*value) = (st_data_t) new_object_record;
668
+ return ST_CONTINUE;
669
+ }
670
+
671
+ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_record, recording recording) {
672
+ // Link the object record with the corresponding heap record. This was the last remaining thing we
673
+ // needed to fully build the object_record.
674
+ recording.object_record->heap_record = heap_record;
675
+ if (heap_record->num_tracked_objects == UINT32_MAX) {
676
+ rb_raise(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record");
677
+ }
678
+ heap_record->num_tracked_objects++;
679
+
680
+ // Update object_records with the data for this new recording
681
+ object_record_update_data update_data = (object_record_update_data) {
682
+ .heap_recorder = heap_recorder,
683
+ .recording = recording,
684
+ };
685
+ st_update(heap_recorder->object_records, recording.object_record->obj_id, update_object_record_entry, (st_data_t) &update_data);
686
+ }
687
+
688
+ // Struct holding data required for an update operation on heap_records
689
+ typedef struct {
690
+ // [in] The locations we did this update with
691
+ ddog_prof_Slice_Location locations;
692
+ // [out] Pointer that will be updated to the updated heap record to prevent having to do
693
+ // another lookup to access the updated heap record.
694
+ heap_record **record;
695
+ } heap_record_update_data;
696
+
697
+ // This function assumes ownership of stack_data is passed on to it so it'll either transfer ownership or clean-up.
698
+ static int update_heap_record_entry_with_new_allocation(st_data_t *key, st_data_t *value, st_data_t data, int existing) {
699
+ heap_record_update_data *update_data = (heap_record_update_data*) data;
700
+
701
+ if (!existing) {
702
+ // there was no matching heap record so lets create a new one...
703
+ // we need to initialize a heap_record_key with a new stack and use that for the key storage. We can't use the
704
+ // locations-based key we used for the update call because we don't own its lifecycle. So we create a new
705
+ // heap stack and will pass ownership of it to the heap_record.
706
+ heap_stack *stack = heap_stack_new(update_data->locations);
707
+ (*key) = (st_data_t) heap_record_key_new(stack);
708
+ (*value) = (st_data_t) heap_record_new(stack);
709
+ }
710
+
711
+ heap_record *record = (heap_record*) (*value);
712
+ (*update_data->record) = record;
713
+
714
+ return ST_CONTINUE;
715
+ }
716
+
717
+ static heap_record* get_or_create_heap_record(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
718
+ // For performance reasons we use a stack-allocated location-slice based key. This allows us
719
+ // to do allocation-free lookups and reuse of a matching existing heap record.
720
+ // NOTE: If we end up creating a new record, we'll create a heap-allocated key we own and use that for storage
721
+ // instead of this one.
722
+ heap_record_key lookup_key = (heap_record_key) {
723
+ .type = LOCATION_SLICE,
724
+ .location_slice = &locations,
725
+ };
726
+
727
+ heap_record *heap_record = NULL;
728
+ heap_record_update_data update_data = (heap_record_update_data) {
729
+ .locations = locations,
730
+ .record = &heap_record,
731
+ };
732
+ st_update(heap_recorder->heap_records, (st_data_t) &lookup_key, update_heap_record_entry_with_new_allocation, (st_data_t) &update_data);
733
+
734
+ return heap_record;
735
+ }
736
+
737
+ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_record *heap_record) {
738
+ if (heap_record->num_tracked_objects > 0) {
739
+ // still being used! do nothing...
740
+ return;
741
+ }
742
+
743
+ heap_record_key heap_key = (heap_record_key) {
744
+ .type = HEAP_STACK,
745
+ .heap_stack = heap_record->stack,
746
+ };
747
+ // We need to access the deleted key to free it since we gave ownership of the keys to the hash.
748
+ // st_delete will change this pointer to point to the removed key if one is found.
749
+ heap_record_key *deleted_key = &heap_key;
750
+ if (!st_delete(heap_recorder->heap_records, (st_data_t*) &deleted_key, NULL)) {
751
+ rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record");
752
+ };
753
+ heap_record_key_free(deleted_key);
754
+ heap_record_free(heap_record);
755
+ }
756
+
757
+ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, object_record *record) {
758
+ // Starting with the associated heap record. There will now be one less tracked object pointing to it
759
+ heap_record *heap_record = record->heap_record;
760
+ heap_record->num_tracked_objects--;
761
+
762
+ // One less object using this heap record, it may have become unused...
763
+ cleanup_heap_record_if_unused(heap_recorder, heap_record);
764
+
765
+ object_record_free(record);
766
+ }
767
+
768
+ // ===============
769
+ // Heap Record API
770
+ // ===============
771
+ heap_record* heap_record_new(heap_stack *stack) {
772
+ heap_record *record = ruby_xcalloc(1, sizeof(heap_record));
773
+ record->num_tracked_objects = 0;
774
+ record->stack = stack;
775
+ return record;
776
+ }
777
+
778
+ void heap_record_free(heap_record *record) {
779
+ heap_stack_free(record->stack);
780
+ ruby_xfree(record);
781
+ }
782
+
783
+
784
+ // =================
785
+ // Object Record API
786
+ // =================
787
+ object_record* object_record_new(long obj_id, heap_record *heap_record, live_object_data object_data) {
788
+ object_record *record = ruby_xcalloc(1, sizeof(object_record));
789
+ record->obj_id = obj_id;
790
+ record->heap_record = heap_record;
791
+ record->object_data = object_data;
792
+ return record;
793
+ }
794
+
795
+ void object_record_free(object_record *record) {
796
+ if (record->object_data.class != NULL) {
797
+ ruby_xfree(record->object_data.class);
798
+ }
799
+ ruby_xfree(record);
800
+ }
801
+
802
+ VALUE object_record_inspect(object_record *record) {
803
+ heap_frame top_frame = record->heap_record->stack->frames[0];
804
+ live_object_data object_data = record->object_data;
805
+ VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu gen_age=%zu frozen=%d ",
806
+ record->obj_id, object_data.weight, object_data.size, top_frame.filename,
807
+ (int) top_frame.line, object_data.alloc_gen, object_data.gen_age, object_data.is_frozen);
808
+
809
+ const char *class = record->object_data.class;
810
+ if (class != NULL) {
811
+ rb_str_catf(inspect, "class=%s ", class);
812
+ }
813
+ VALUE ref;
814
+
815
+ if (!ruby_ref_from_id(LONG2NUM(record->obj_id), &ref)) {
816
+ rb_str_catf(inspect, "object=<invalid>");
817
+ } else {
818
+ VALUE ruby_inspect = ruby_safe_inspect(ref);
819
+ if (ruby_inspect != Qnil) {
820
+ rb_str_catf(inspect, "object=%"PRIsVALUE, ruby_inspect);
821
+ } else {
822
+ rb_str_catf(inspect, "object=%s", ruby_value_type_to_string(rb_type(ref)));
823
+ }
824
+ }
825
+
826
+ return inspect;
827
+ }
828
+
829
+ // ==============
830
+ // Heap Frame API
831
+ // ==============
832
+ int heap_frame_cmp(heap_frame *f1, heap_frame *f2) {
833
+ int line_diff = (int) (f1->line - f2->line);
834
+ if (line_diff != 0) {
835
+ return line_diff;
836
+ }
837
+ int cmp = strcmp(f1->name, f2->name);
838
+ if (cmp != 0) {
839
+ return cmp;
840
+ }
841
+ return strcmp(f1->filename, f2->filename);
842
+ }
843
+
844
+ // TODO: Research potential performance improvements around hashing stuff here
845
+ // once we have a benchmarking suite.
846
+ // Example: Each call to st_hash is calling murmur_finish and we may want
847
+ // to only finish once per structure, not per field?
848
+ // Example: There may be a more efficient hashing for line that is not the
849
+ // generic st_hash algorithm?
850
+
851
+ // WARN: Must be kept in-sync with ::char_slice_hash
852
+ st_index_t string_hash(char *str, st_index_t seed) {
853
+ return st_hash(str, strlen(str), seed);
854
+ }
855
+
856
+ // WARN: Must be kept in-sync with ::string_hash
857
+ st_index_t char_slice_hash(ddog_CharSlice char_slice, st_index_t seed) {
858
+ return st_hash(char_slice.ptr, char_slice.len, seed);
859
+ }
860
+
861
+ // WARN: Must be kept in-sync with ::ddog_location_hash
862
+ st_index_t heap_frame_hash(heap_frame *frame, st_index_t seed) {
863
+ st_index_t hash = string_hash(frame->name, seed);
864
+ hash = string_hash(frame->filename, hash);
865
+ hash = st_hash(&frame->line, sizeof(frame->line), hash);
866
+ return hash;
867
+ }
868
+
869
+ // WARN: Must be kept in-sync with ::heap_frame_hash
870
+ st_index_t ddog_location_hash(ddog_prof_Location location, st_index_t seed) {
871
+ st_index_t hash = char_slice_hash(location.function.name, seed);
872
+ hash = char_slice_hash(location.function.filename, hash);
873
+ // Convert ddog_prof line type to the same type we use for our heap_frames to
874
+ // ensure we have compatible hashes
875
+ int32_t line_as_int32 = (int32_t) location.line;
876
+ hash = st_hash(&line_as_int32, sizeof(line_as_int32), hash);
877
+ return hash;
878
+ }
879
+
880
+ // ==============
881
+ // Heap Stack API
882
+ // ==============
883
+ heap_stack* heap_stack_new(ddog_prof_Slice_Location locations) {
884
+ uint16_t frames_len = locations.len;
885
+ if (frames_len > MAX_FRAMES_LIMIT) {
886
+ // This should not be happening anyway since MAX_FRAMES_LIMIT should be shared with
887
+ // the stacktrace construction mechanism. If it happens, lets just raise. This should
888
+ // be safe since only allocate with the GVL anyway.
889
+ rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len);
890
+ }
891
+ heap_stack *stack = ruby_xcalloc(1, sizeof(heap_stack) + frames_len * sizeof(heap_frame));
892
+ stack->frames_len = frames_len;
893
+ for (uint16_t i = 0; i < stack->frames_len; i++) {
894
+ const ddog_prof_Location *location = &locations.ptr[i];
895
+ stack->frames[i] = (heap_frame) {
896
+ .name = string_from_char_slice(location->function.name),
897
+ .filename = string_from_char_slice(location->function.filename),
898
+ // ddog_prof_Location is a int64_t. We don't expect to have to profile files with more than
899
+ // 2M lines so this cast should be fairly safe?
900
+ .line = (int32_t) location->line,
901
+ };
902
+ }
903
+ return stack;
904
+ }
905
+
906
+ void heap_stack_free(heap_stack *stack) {
907
+ for (uint64_t i = 0; i < stack->frames_len; i++) {
908
+ heap_frame *frame = &stack->frames[i];
909
+ ruby_xfree(frame->name);
910
+ ruby_xfree(frame->filename);
911
+ }
912
+ ruby_xfree(stack);
913
+ }
914
+
915
+ // WARN: Must be kept in-sync with ::ddog_location_slice_hash
916
+ st_index_t heap_stack_hash(heap_stack *stack, st_index_t seed) {
917
+ st_index_t hash = seed;
918
+ for (uint64_t i = 0; i < stack->frames_len; i++) {
919
+ hash = heap_frame_hash(&stack->frames[i], hash);
920
+ }
921
+ return hash;
922
+ }
923
+
924
+ // WARN: Must be kept in-sync with ::heap_stack_hash
925
+ st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location locations, st_index_t seed) {
926
+ st_index_t hash = seed;
927
+ for (uint64_t i = 0; i < locations.len; i++) {
928
+ hash = ddog_location_hash(locations.ptr[i], hash);
929
+ }
930
+ return hash;
931
+ }
932
+
933
+ // ===================
934
+ // Heap Record Key API
935
+ // ===================
936
+ heap_record_key* heap_record_key_new(heap_stack *stack) {
937
+ heap_record_key *key = ruby_xmalloc(sizeof(heap_record_key));
938
+ key->type = HEAP_STACK;
939
+ key->heap_stack = stack;
940
+ return key;
941
+ }
942
+
943
+ void heap_record_key_free(heap_record_key *key) {
944
+ ruby_xfree(key);
945
+ }
946
+
947
+ static inline size_t heap_record_key_len(heap_record_key *key) {
948
+ if (key->type == HEAP_STACK) {
949
+ return key->heap_stack->frames_len;
950
+ } else {
951
+ return key->location_slice->len;
952
+ }
953
+ }
954
+
955
+ static inline int64_t heap_record_key_entry_line(heap_record_key *key, size_t entry_i) {
956
+ if (key->type == HEAP_STACK) {
957
+ return key->heap_stack->frames[entry_i].line;
958
+ } else {
959
+ return key->location_slice->ptr[entry_i].line;
960
+ }
961
+ }
962
+
963
+ static inline size_t heap_record_key_entry_name(heap_record_key *key, size_t entry_i, const char **name_ptr) {
964
+ if (key->type == HEAP_STACK) {
965
+ char *name = key->heap_stack->frames[entry_i].name;
966
+ (*name_ptr) = name;
967
+ return strlen(name);
968
+ } else {
969
+ ddog_CharSlice name = key->location_slice->ptr[entry_i].function.name;
970
+ (*name_ptr) = name.ptr;
971
+ return name.len;
972
+ }
973
+ }
974
+
975
+ static inline size_t heap_record_key_entry_filename(heap_record_key *key, size_t entry_i, const char **filename_ptr) {
976
+ if (key->type == HEAP_STACK) {
977
+ char *filename = key->heap_stack->frames[entry_i].filename;
978
+ (*filename_ptr) = filename;
979
+ return strlen(filename);
980
+ } else {
981
+ ddog_CharSlice filename = key->location_slice->ptr[entry_i].function.filename;
982
+ (*filename_ptr) = filename.ptr;
983
+ return filename.len;
984
+ }
985
+ }
986
+
987
+ int heap_record_key_cmp_st(st_data_t key1, st_data_t key2) {
988
+ heap_record_key *key_record1 = (heap_record_key*) key1;
989
+ heap_record_key *key_record2 = (heap_record_key*) key2;
990
+
991
+ // Fast path, check if lengths differ
992
+ size_t key_record1_len = heap_record_key_len(key_record1);
993
+ size_t key_record2_len = heap_record_key_len(key_record2);
994
+
995
+ if (key_record1_len != key_record2_len) {
996
+ return ((int) key_record1_len) - ((int) key_record2_len);
997
+ }
998
+
999
+ // If we got this far, we have same lengths so need to check item-by-item
1000
+ for (size_t i = 0; i < key_record1_len; i++) {
1001
+ // Lines are faster to compare, lets do that first
1002
+ size_t line1 = heap_record_key_entry_line(key_record1, i);
1003
+ size_t line2 = heap_record_key_entry_line(key_record2, i);
1004
+ if (line1 != line2) {
1005
+ return ((int) line1) - ((int)line2);
1006
+ }
1007
+
1008
+ // Then come names, they are usually smaller than filenames
1009
+ const char *name1, *name2;
1010
+ size_t name1_len = heap_record_key_entry_name(key_record1, i, &name1);
1011
+ size_t name2_len = heap_record_key_entry_name(key_record2, i, &name2);
1012
+ if (name1_len != name2_len) {
1013
+ return ((int) name1_len) - ((int) name2_len);
1014
+ }
1015
+ int name_cmp_result = strncmp(name1, name2, name1_len);
1016
+ if (name_cmp_result != 0) {
1017
+ return name_cmp_result;
1018
+ }
1019
+
1020
+ // Then come filenames
1021
+ const char *filename1, *filename2;
1022
+ int64_t filename1_len = heap_record_key_entry_filename(key_record1, i, &filename1);
1023
+ int64_t filename2_len = heap_record_key_entry_filename(key_record2, i, &filename2);
1024
+ if (filename1_len != filename2_len) {
1025
+ return ((int) filename1_len) - ((int) filename2_len);
1026
+ }
1027
+ int filename_cmp_result = strncmp(filename1, filename2, filename1_len);
1028
+ if (filename_cmp_result != 0) {
1029
+ return filename_cmp_result;
1030
+ }
1031
+ }
1032
+
1033
+ // If we survived the above for, then everything matched
1034
+ return 0;
1035
+ }
1036
+
1037
+ // Initial seed for hash functions
1038
+ #define FNV1_32A_INIT 0x811c9dc5
1039
+
1040
+ st_index_t heap_record_key_hash_st(st_data_t key) {
1041
+ heap_record_key *record_key = (heap_record_key*) key;
1042
+ if (record_key->type == HEAP_STACK) {
1043
+ return heap_stack_hash(record_key->heap_stack, FNV1_32A_INIT);
1044
+ } else {
1045
+ return ddog_location_slice_hash(*record_key->location_slice, FNV1_32A_INIT);
1046
+ }
1047
+ }