datadog 2.7.1 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (441) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +353 -1
  3. data/ext/datadog_profiling_native_extension/clock_id.h +2 -2
  4. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +78 -102
  5. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +1 -1
  6. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +1 -1
  7. data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +16 -16
  8. data/ext/datadog_profiling_native_extension/collectors_stack.c +235 -57
  9. data/ext/datadog_profiling_native_extension/collectors_stack.h +21 -5
  10. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +376 -156
  11. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
  12. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +1 -4
  13. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +10 -0
  14. data/ext/datadog_profiling_native_extension/encoded_profile.c +79 -0
  15. data/ext/datadog_profiling_native_extension/encoded_profile.h +8 -0
  16. data/ext/datadog_profiling_native_extension/extconf.rb +14 -8
  17. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +2 -0
  18. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +0 -8
  19. data/ext/datadog_profiling_native_extension/heap_recorder.c +295 -532
  20. data/ext/datadog_profiling_native_extension/heap_recorder.h +6 -8
  21. data/ext/datadog_profiling_native_extension/http_transport.c +64 -98
  22. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +22 -0
  23. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +8 -5
  24. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +69 -1
  25. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +16 -4
  26. data/ext/datadog_profiling_native_extension/profiling.c +19 -8
  27. data/ext/datadog_profiling_native_extension/ruby_helpers.c +9 -21
  28. data/ext/datadog_profiling_native_extension/ruby_helpers.h +2 -10
  29. data/ext/datadog_profiling_native_extension/stack_recorder.c +231 -181
  30. data/ext/datadog_profiling_native_extension/stack_recorder.h +2 -2
  31. data/ext/datadog_profiling_native_extension/time_helpers.h +1 -1
  32. data/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c +47 -0
  33. data/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h +31 -0
  34. data/ext/libdatadog_api/crashtracker.c +17 -15
  35. data/ext/libdatadog_api/crashtracker.h +5 -0
  36. data/ext/libdatadog_api/datadog_ruby_common.c +1 -4
  37. data/ext/libdatadog_api/datadog_ruby_common.h +10 -0
  38. data/ext/libdatadog_api/extconf.rb +2 -2
  39. data/ext/libdatadog_api/init.c +15 -0
  40. data/ext/libdatadog_api/library_config.c +164 -0
  41. data/ext/libdatadog_api/library_config.h +25 -0
  42. data/ext/libdatadog_api/macos_development.md +3 -3
  43. data/ext/libdatadog_api/process_discovery.c +112 -0
  44. data/ext/libdatadog_api/process_discovery.h +5 -0
  45. data/ext/libdatadog_extconf_helpers.rb +2 -2
  46. data/lib/datadog/appsec/actions_handler/serializable_backtrace.rb +89 -0
  47. data/lib/datadog/appsec/actions_handler.rb +49 -0
  48. data/lib/datadog/appsec/anonymizer.rb +16 -0
  49. data/lib/datadog/appsec/api_security/lru_cache.rb +56 -0
  50. data/lib/datadog/appsec/api_security/route_extractor.rb +65 -0
  51. data/lib/datadog/appsec/api_security/sampler.rb +59 -0
  52. data/lib/datadog/appsec/api_security.rb +23 -0
  53. data/lib/datadog/appsec/assets/waf_rules/README.md +50 -5
  54. data/lib/datadog/appsec/assets/waf_rules/recommended.json +623 -253
  55. data/lib/datadog/appsec/assets/waf_rules/strict.json +69 -107
  56. data/lib/datadog/appsec/autoload.rb +1 -1
  57. data/lib/datadog/appsec/component.rb +49 -65
  58. data/lib/datadog/appsec/compressed_json.rb +40 -0
  59. data/lib/datadog/appsec/configuration/settings.rb +212 -27
  60. data/lib/datadog/appsec/context.rb +74 -0
  61. data/lib/datadog/appsec/contrib/active_record/instrumentation.rb +92 -0
  62. data/lib/datadog/appsec/contrib/active_record/integration.rb +41 -0
  63. data/lib/datadog/appsec/contrib/active_record/patcher.rb +101 -0
  64. data/lib/datadog/appsec/contrib/auto_instrument.rb +1 -1
  65. data/lib/datadog/appsec/contrib/devise/configuration.rb +52 -0
  66. data/lib/datadog/appsec/contrib/devise/data_extractor.rb +78 -0
  67. data/lib/datadog/appsec/contrib/devise/ext.rb +22 -0
  68. data/lib/datadog/appsec/contrib/devise/integration.rb +1 -2
  69. data/lib/datadog/appsec/contrib/devise/patcher.rb +33 -25
  70. data/lib/datadog/appsec/contrib/devise/patches/signin_tracking_patch.rb +102 -0
  71. data/lib/datadog/appsec/contrib/devise/patches/signup_tracking_patch.rb +69 -0
  72. data/lib/datadog/appsec/contrib/devise/{patcher/rememberable_patch.rb → patches/skip_signin_tracking_patch.rb} +3 -3
  73. data/lib/datadog/appsec/contrib/devise/tracking_middleware.rb +106 -0
  74. data/lib/datadog/appsec/contrib/excon/integration.rb +41 -0
  75. data/lib/datadog/appsec/contrib/excon/patcher.rb +28 -0
  76. data/lib/datadog/appsec/contrib/excon/ssrf_detection_middleware.rb +42 -0
  77. data/lib/datadog/appsec/contrib/faraday/connection_patch.rb +22 -0
  78. data/lib/datadog/appsec/contrib/faraday/integration.rb +42 -0
  79. data/lib/datadog/appsec/contrib/faraday/patcher.rb +53 -0
  80. data/lib/datadog/appsec/contrib/faraday/rack_builder_patch.rb +22 -0
  81. data/lib/datadog/appsec/contrib/faraday/ssrf_detection_middleware.rb +41 -0
  82. data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +1 -7
  83. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +17 -30
  84. data/lib/datadog/appsec/contrib/graphql/integration.rb +1 -1
  85. data/lib/datadog/appsec/contrib/graphql/patcher.rb +0 -3
  86. data/lib/datadog/appsec/contrib/rack/ext.rb +34 -0
  87. data/lib/datadog/appsec/contrib/rack/gateway/response.rb +3 -3
  88. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +78 -98
  89. data/lib/datadog/appsec/contrib/rack/integration.rb +1 -1
  90. data/lib/datadog/appsec/contrib/rack/patcher.rb +0 -3
  91. data/lib/datadog/appsec/contrib/rack/request_body_middleware.rb +10 -11
  92. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +73 -78
  93. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +16 -33
  94. data/lib/datadog/appsec/contrib/rails/integration.rb +1 -1
  95. data/lib/datadog/appsec/contrib/rails/patcher.rb +25 -38
  96. data/lib/datadog/appsec/contrib/rest_client/integration.rb +45 -0
  97. data/lib/datadog/appsec/contrib/rest_client/patcher.rb +28 -0
  98. data/lib/datadog/appsec/contrib/rest_client/request_ssrf_detection_patch.rb +38 -0
  99. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +31 -68
  100. data/lib/datadog/appsec/contrib/sinatra/integration.rb +1 -1
  101. data/lib/datadog/appsec/contrib/sinatra/patcher.rb +5 -31
  102. data/lib/datadog/appsec/event.rb +96 -135
  103. data/lib/datadog/appsec/ext.rb +12 -3
  104. data/lib/datadog/appsec/instrumentation/gateway/argument.rb +7 -2
  105. data/lib/datadog/appsec/instrumentation/gateway/middleware.rb +24 -0
  106. data/lib/datadog/appsec/instrumentation/gateway.rb +17 -22
  107. data/lib/datadog/appsec/metrics/collector.rb +38 -0
  108. data/lib/datadog/appsec/metrics/exporter.rb +35 -0
  109. data/lib/datadog/appsec/metrics/telemetry.rb +23 -0
  110. data/lib/datadog/appsec/metrics.rb +13 -0
  111. data/lib/datadog/appsec/monitor/gateway/watcher.rb +52 -32
  112. data/lib/datadog/appsec/processor/rule_loader.rb +30 -36
  113. data/lib/datadog/appsec/remote.rb +31 -57
  114. data/lib/datadog/appsec/response.rb +19 -85
  115. data/lib/datadog/appsec/security_engine/engine.rb +194 -0
  116. data/lib/datadog/appsec/security_engine/result.rb +67 -0
  117. data/lib/datadog/appsec/security_engine/runner.rb +87 -0
  118. data/lib/datadog/appsec/security_engine.rb +9 -0
  119. data/lib/datadog/appsec/security_event.rb +39 -0
  120. data/lib/datadog/appsec/utils.rb +0 -2
  121. data/lib/datadog/appsec.rb +22 -12
  122. data/lib/datadog/auto_instrument.rb +3 -0
  123. data/lib/datadog/core/buffer/random.rb +18 -2
  124. data/lib/datadog/core/configuration/agent_settings.rb +52 -0
  125. data/lib/datadog/core/configuration/agent_settings_resolver.rb +4 -18
  126. data/lib/datadog/core/configuration/agentless_settings_resolver.rb +176 -0
  127. data/lib/datadog/core/configuration/components.rb +74 -32
  128. data/lib/datadog/core/configuration/components_state.rb +23 -0
  129. data/lib/datadog/core/configuration/ext.rb +5 -1
  130. data/lib/datadog/core/configuration/option.rb +81 -45
  131. data/lib/datadog/core/configuration/option_definition.rb +6 -4
  132. data/lib/datadog/core/configuration/options.rb +3 -3
  133. data/lib/datadog/core/configuration/settings.rb +121 -50
  134. data/lib/datadog/core/configuration/stable_config.rb +22 -0
  135. data/lib/datadog/core/configuration.rb +43 -11
  136. data/lib/datadog/{tracing → core}/contrib/rails/utils.rb +1 -3
  137. data/lib/datadog/core/crashtracking/component.rb +4 -13
  138. data/lib/datadog/core/crashtracking/tag_builder.rb +4 -22
  139. data/lib/datadog/core/diagnostics/environment_logger.rb +1 -1
  140. data/lib/datadog/core/encoding.rb +17 -1
  141. data/lib/datadog/core/environment/agent_info.rb +78 -0
  142. data/lib/datadog/core/environment/cgroup.rb +10 -12
  143. data/lib/datadog/core/environment/container.rb +38 -40
  144. data/lib/datadog/core/environment/ext.rb +6 -6
  145. data/lib/datadog/core/environment/git.rb +1 -0
  146. data/lib/datadog/core/environment/identity.rb +3 -3
  147. data/lib/datadog/core/environment/platform.rb +3 -3
  148. data/lib/datadog/core/environment/variable_helpers.rb +1 -1
  149. data/lib/datadog/core/error.rb +11 -9
  150. data/lib/datadog/core/logger.rb +2 -2
  151. data/lib/datadog/core/metrics/client.rb +27 -27
  152. data/lib/datadog/core/metrics/logging.rb +5 -5
  153. data/lib/datadog/core/process_discovery/tracer_memfd.rb +15 -0
  154. data/lib/datadog/core/process_discovery.rb +36 -0
  155. data/lib/datadog/core/rate_limiter.rb +4 -2
  156. data/lib/datadog/core/remote/client/capabilities.rb +6 -0
  157. data/lib/datadog/core/remote/client.rb +107 -92
  158. data/lib/datadog/core/remote/component.rb +18 -19
  159. data/lib/datadog/core/remote/configuration/digest.rb +7 -7
  160. data/lib/datadog/core/remote/configuration/path.rb +1 -1
  161. data/lib/datadog/core/remote/configuration/repository.rb +14 -1
  162. data/lib/datadog/core/remote/negotiation.rb +9 -9
  163. data/lib/datadog/core/remote/transport/config.rb +4 -3
  164. data/lib/datadog/core/remote/transport/http/api.rb +13 -18
  165. data/lib/datadog/core/remote/transport/http/client.rb +5 -4
  166. data/lib/datadog/core/remote/transport/http/config.rb +27 -55
  167. data/lib/datadog/core/remote/transport/http/negotiation.rb +8 -51
  168. data/lib/datadog/core/remote/transport/http.rb +25 -94
  169. data/lib/datadog/core/remote/transport/negotiation.rb +17 -4
  170. data/lib/datadog/core/remote/worker.rb +10 -7
  171. data/lib/datadog/core/runtime/metrics.rb +12 -5
  172. data/lib/datadog/core/tag_builder.rb +56 -0
  173. data/lib/datadog/core/telemetry/component.rb +84 -49
  174. data/lib/datadog/core/telemetry/emitter.rb +23 -11
  175. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +66 -0
  176. data/lib/datadog/core/telemetry/event/app_closing.rb +18 -0
  177. data/lib/datadog/core/telemetry/event/app_dependencies_loaded.rb +33 -0
  178. data/lib/datadog/core/telemetry/event/app_heartbeat.rb +18 -0
  179. data/lib/datadog/core/telemetry/event/app_integrations_change.rb +58 -0
  180. data/lib/datadog/core/telemetry/event/app_started.rb +269 -0
  181. data/lib/datadog/core/telemetry/event/base.rb +40 -0
  182. data/lib/datadog/core/telemetry/event/distributions.rb +18 -0
  183. data/lib/datadog/core/telemetry/event/generate_metrics.rb +43 -0
  184. data/lib/datadog/core/telemetry/event/log.rb +76 -0
  185. data/lib/datadog/core/telemetry/event/message_batch.rb +42 -0
  186. data/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb +43 -0
  187. data/lib/datadog/core/telemetry/event.rb +17 -383
  188. data/lib/datadog/core/telemetry/ext.rb +1 -0
  189. data/lib/datadog/core/telemetry/http/adapters/net.rb +12 -97
  190. data/lib/datadog/core/telemetry/logger.rb +5 -4
  191. data/lib/datadog/core/telemetry/logging.rb +12 -6
  192. data/lib/datadog/core/telemetry/metric.rb +28 -6
  193. data/lib/datadog/core/telemetry/request.rb +4 -4
  194. data/lib/datadog/core/telemetry/transport/http/api.rb +43 -0
  195. data/lib/datadog/core/telemetry/transport/http/client.rb +49 -0
  196. data/lib/datadog/core/telemetry/transport/http/telemetry.rb +92 -0
  197. data/lib/datadog/core/telemetry/transport/http.rb +63 -0
  198. data/lib/datadog/core/telemetry/transport/telemetry.rb +51 -0
  199. data/lib/datadog/core/telemetry/worker.rb +128 -25
  200. data/lib/datadog/core/transport/http/adapters/net.rb +17 -2
  201. data/lib/datadog/core/transport/http/adapters/test.rb +2 -1
  202. data/lib/datadog/core/transport/http/adapters/unix_socket.rb +1 -1
  203. data/lib/datadog/{tracing → core}/transport/http/api/instance.rb +18 -1
  204. data/lib/datadog/core/transport/http/api/spec.rb +36 -0
  205. data/lib/datadog/{tracing → core}/transport/http/builder.rb +53 -31
  206. data/lib/datadog/core/transport/http/env.rb +8 -0
  207. data/lib/datadog/core/transport/http.rb +75 -0
  208. data/lib/datadog/core/transport/response.rb +4 -0
  209. data/lib/datadog/core/utils/at_fork_monkey_patch.rb +6 -6
  210. data/lib/datadog/core/utils/duration.rb +32 -32
  211. data/lib/datadog/core/utils/forking.rb +2 -2
  212. data/lib/datadog/core/utils/network.rb +6 -6
  213. data/lib/datadog/core/utils/only_once_successful.rb +16 -5
  214. data/lib/datadog/core/utils/time.rb +20 -0
  215. data/lib/datadog/core/utils/truncation.rb +21 -0
  216. data/lib/datadog/core/utils.rb +7 -0
  217. data/lib/datadog/core/vendor/multipart-post/multipart/post/composite_read_io.rb +1 -1
  218. data/lib/datadog/core/vendor/multipart-post/multipart/post/multipartable.rb +8 -8
  219. data/lib/datadog/core/vendor/multipart-post/multipart/post/parts.rb +7 -7
  220. data/lib/datadog/core/worker.rb +1 -1
  221. data/lib/datadog/core/workers/async.rb +29 -12
  222. data/lib/datadog/core/workers/interval_loop.rb +12 -1
  223. data/lib/datadog/core/workers/runtime_metrics.rb +2 -2
  224. data/lib/datadog/core.rb +8 -0
  225. data/lib/datadog/di/base.rb +115 -0
  226. data/lib/datadog/di/boot.rb +34 -0
  227. data/lib/datadog/di/code_tracker.rb +26 -15
  228. data/lib/datadog/di/component.rb +23 -14
  229. data/lib/datadog/di/configuration/settings.rb +25 -1
  230. data/lib/datadog/di/contrib/active_record.rb +1 -0
  231. data/lib/datadog/di/contrib/railtie.rb +15 -0
  232. data/lib/datadog/di/contrib.rb +28 -0
  233. data/lib/datadog/di/error.rb +5 -0
  234. data/lib/datadog/di/instrumenter.rb +162 -21
  235. data/lib/datadog/di/logger.rb +30 -0
  236. data/lib/datadog/di/preload.rb +18 -0
  237. data/lib/datadog/di/probe.rb +14 -7
  238. data/lib/datadog/di/probe_builder.rb +1 -0
  239. data/lib/datadog/di/probe_manager.rb +11 -5
  240. data/lib/datadog/di/probe_notification_builder.rb +54 -38
  241. data/lib/datadog/di/probe_notifier_worker.rb +60 -26
  242. data/lib/datadog/di/redactor.rb +0 -1
  243. data/lib/datadog/di/remote.rb +147 -0
  244. data/lib/datadog/di/serializer.rb +19 -8
  245. data/lib/datadog/di/transport/diagnostics.rb +62 -0
  246. data/lib/datadog/di/transport/http/api.rb +42 -0
  247. data/lib/datadog/di/transport/http/client.rb +47 -0
  248. data/lib/datadog/di/transport/http/diagnostics.rb +65 -0
  249. data/lib/datadog/di/transport/http/input.rb +77 -0
  250. data/lib/datadog/di/transport/http.rb +57 -0
  251. data/lib/datadog/di/transport/input.rb +70 -0
  252. data/lib/datadog/di/utils.rb +103 -0
  253. data/lib/datadog/di.rb +14 -76
  254. data/lib/datadog/error_tracking/collector.rb +87 -0
  255. data/lib/datadog/error_tracking/component.rb +167 -0
  256. data/lib/datadog/error_tracking/configuration/settings.rb +63 -0
  257. data/lib/datadog/error_tracking/configuration.rb +11 -0
  258. data/lib/datadog/error_tracking/ext.rb +18 -0
  259. data/lib/datadog/error_tracking/extensions.rb +16 -0
  260. data/lib/datadog/error_tracking/filters.rb +77 -0
  261. data/lib/datadog/error_tracking.rb +18 -0
  262. data/lib/datadog/kit/appsec/events.rb +15 -3
  263. data/lib/datadog/kit/identity.rb +9 -5
  264. data/lib/datadog/opentelemetry/api/baggage.rb +90 -0
  265. data/lib/datadog/opentelemetry/api/baggage.rbs +26 -0
  266. data/lib/datadog/opentelemetry/api/context.rb +16 -2
  267. data/lib/datadog/opentelemetry/sdk/trace/span.rb +1 -1
  268. data/lib/datadog/opentelemetry.rb +2 -1
  269. data/lib/datadog/profiling/collectors/code_provenance.rb +18 -9
  270. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +4 -0
  271. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  272. data/lib/datadog/profiling/collectors/info.rb +3 -0
  273. data/lib/datadog/profiling/collectors/thread_context.rb +17 -2
  274. data/lib/datadog/profiling/component.rb +64 -82
  275. data/lib/datadog/profiling/encoded_profile.rb +11 -0
  276. data/lib/datadog/profiling/exporter.rb +3 -4
  277. data/lib/datadog/profiling/ext.rb +0 -14
  278. data/lib/datadog/profiling/flush.rb +5 -8
  279. data/lib/datadog/profiling/http_transport.rb +8 -87
  280. data/lib/datadog/profiling/load_native_extension.rb +1 -33
  281. data/lib/datadog/profiling/profiler.rb +2 -0
  282. data/lib/datadog/profiling/scheduler.rb +10 -2
  283. data/lib/datadog/profiling/stack_recorder.rb +9 -9
  284. data/lib/datadog/profiling/tag_builder.rb +5 -41
  285. data/lib/datadog/profiling/tasks/setup.rb +2 -0
  286. data/lib/datadog/profiling.rb +6 -2
  287. data/lib/datadog/tracing/analytics.rb +1 -1
  288. data/lib/datadog/tracing/component.rb +16 -12
  289. data/lib/datadog/tracing/configuration/ext.rb +8 -1
  290. data/lib/datadog/tracing/configuration/settings.rb +22 -10
  291. data/lib/datadog/tracing/context_provider.rb +1 -1
  292. data/lib/datadog/tracing/contrib/action_cable/integration.rb +5 -2
  293. data/lib/datadog/tracing/contrib/action_mailer/integration.rb +6 -2
  294. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +15 -0
  295. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +19 -12
  296. data/lib/datadog/tracing/contrib/action_pack/ext.rb +2 -0
  297. data/lib/datadog/tracing/contrib/action_pack/integration.rb +5 -2
  298. data/lib/datadog/tracing/contrib/action_view/integration.rb +5 -2
  299. data/lib/datadog/tracing/contrib/active_job/integration.rb +5 -2
  300. data/lib/datadog/tracing/contrib/active_record/integration.rb +7 -3
  301. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +7 -2
  302. data/lib/datadog/tracing/contrib/active_support/cache/instrumentation.rb +36 -1
  303. data/lib/datadog/tracing/contrib/active_support/cache/patcher.rb +4 -0
  304. data/lib/datadog/tracing/contrib/active_support/cache/redis.rb +14 -4
  305. data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +10 -0
  306. data/lib/datadog/tracing/contrib/active_support/integration.rb +5 -2
  307. data/lib/datadog/tracing/contrib/auto_instrument.rb +2 -2
  308. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +10 -0
  309. data/lib/datadog/tracing/contrib/aws/integration.rb +3 -0
  310. data/lib/datadog/tracing/contrib/aws/parsed_context.rb +5 -1
  311. data/lib/datadog/tracing/contrib/concurrent_ruby/integration.rb +3 -0
  312. data/lib/datadog/tracing/contrib/configuration/settings.rb +1 -1
  313. data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +4 -0
  314. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
  315. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +4 -5
  316. data/lib/datadog/tracing/contrib/excon/middleware.rb +5 -3
  317. data/lib/datadog/tracing/contrib/ext.rb +1 -0
  318. data/lib/datadog/tracing/contrib/extensions.rb +29 -3
  319. data/lib/datadog/tracing/contrib/faraday/middleware.rb +5 -3
  320. data/lib/datadog/tracing/contrib/graphql/configuration/error_extension_env_parser.rb +21 -0
  321. data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +11 -0
  322. data/lib/datadog/tracing/contrib/graphql/ext.rb +5 -0
  323. data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +102 -11
  324. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +7 -1
  325. data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +3 -0
  326. data/lib/datadog/tracing/contrib/http/circuit_breaker.rb +0 -15
  327. data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +4 -1
  328. data/lib/datadog/tracing/contrib/http/instrumentation.rb +6 -10
  329. data/lib/datadog/tracing/contrib/http/integration.rb +3 -0
  330. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -16
  331. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +7 -15
  332. data/lib/datadog/tracing/contrib/httprb/integration.rb +3 -0
  333. data/lib/datadog/tracing/contrib/kafka/integration.rb +3 -0
  334. data/lib/datadog/tracing/contrib/karafka/configuration/settings.rb +27 -0
  335. data/lib/datadog/tracing/contrib/karafka/distributed/propagation.rb +48 -0
  336. data/lib/datadog/tracing/contrib/karafka/ext.rb +27 -0
  337. data/lib/datadog/tracing/contrib/karafka/integration.rb +45 -0
  338. data/lib/datadog/tracing/contrib/karafka/monitor.rb +66 -0
  339. data/lib/datadog/tracing/contrib/karafka/patcher.rb +71 -0
  340. data/lib/datadog/tracing/contrib/karafka.rb +37 -0
  341. data/lib/datadog/tracing/contrib/lograge/patcher.rb +4 -2
  342. data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +8 -0
  343. data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
  344. data/lib/datadog/tracing/contrib/mongodb/integration.rb +3 -0
  345. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +18 -1
  346. data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +17 -0
  347. data/lib/datadog/tracing/contrib/opensearch/ext.rb +9 -0
  348. data/lib/datadog/tracing/contrib/opensearch/integration.rb +3 -0
  349. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +5 -1
  350. data/lib/datadog/tracing/contrib/patcher.rb +5 -2
  351. data/lib/datadog/tracing/contrib/presto/integration.rb +3 -0
  352. data/lib/datadog/tracing/contrib/rack/header_collection.rb +11 -1
  353. data/lib/datadog/tracing/contrib/rack/integration.rb +2 -2
  354. data/lib/datadog/tracing/contrib/rack/middlewares.rb +1 -1
  355. data/lib/datadog/tracing/contrib/rack/request_queue.rb +1 -1
  356. data/lib/datadog/tracing/contrib/rails/framework.rb +2 -2
  357. data/lib/datadog/tracing/contrib/rails/patcher.rb +1 -1
  358. data/lib/datadog/tracing/contrib/rest_client/integration.rb +3 -0
  359. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +5 -3
  360. data/lib/datadog/tracing/contrib/sidekiq/client_tracer.rb +6 -1
  361. data/lib/datadog/tracing/contrib/sidekiq/distributed/propagation.rb +3 -0
  362. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  363. data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +5 -2
  364. data/lib/datadog/tracing/contrib/span_attribute_schema.rb +6 -1
  365. data/lib/datadog/tracing/contrib/support.rb +28 -0
  366. data/lib/datadog/tracing/contrib.rb +1 -0
  367. data/lib/datadog/tracing/correlation.rb +9 -2
  368. data/lib/datadog/tracing/distributed/b3_multi.rb +1 -1
  369. data/lib/datadog/tracing/distributed/b3_single.rb +1 -1
  370. data/lib/datadog/tracing/distributed/baggage.rb +131 -0
  371. data/lib/datadog/tracing/distributed/datadog.rb +4 -2
  372. data/lib/datadog/tracing/distributed/propagation.rb +25 -4
  373. data/lib/datadog/tracing/distributed/propagation_policy.rb +42 -0
  374. data/lib/datadog/tracing/metadata/errors.rb +4 -4
  375. data/lib/datadog/tracing/metadata/ext.rb +5 -0
  376. data/lib/datadog/tracing/metadata/metastruct.rb +36 -0
  377. data/lib/datadog/tracing/metadata/metastruct_tagging.rb +42 -0
  378. data/lib/datadog/tracing/metadata.rb +2 -0
  379. data/lib/datadog/tracing/sampling/rate_sampler.rb +2 -1
  380. data/lib/datadog/tracing/sampling/span/rule.rb +0 -1
  381. data/lib/datadog/tracing/span.rb +22 -5
  382. data/lib/datadog/tracing/span_event.rb +124 -4
  383. data/lib/datadog/tracing/span_operation.rb +52 -16
  384. data/lib/datadog/tracing/sync_writer.rb +10 -6
  385. data/lib/datadog/tracing/trace_digest.rb +9 -2
  386. data/lib/datadog/tracing/trace_operation.rb +55 -27
  387. data/lib/datadog/tracing/trace_segment.rb +6 -4
  388. data/lib/datadog/tracing/tracer.rb +66 -14
  389. data/lib/datadog/tracing/transport/http/api.rb +5 -4
  390. data/lib/datadog/tracing/transport/http/client.rb +5 -4
  391. data/lib/datadog/tracing/transport/http/traces.rb +13 -44
  392. data/lib/datadog/tracing/transport/http.rb +13 -70
  393. data/lib/datadog/tracing/transport/serializable_trace.rb +31 -7
  394. data/lib/datadog/tracing/transport/trace_formatter.rb +7 -0
  395. data/lib/datadog/tracing/transport/traces.rb +47 -13
  396. data/lib/datadog/tracing/utils.rb +1 -1
  397. data/lib/datadog/tracing/workers/trace_writer.rb +8 -5
  398. data/lib/datadog/tracing/workers.rb +5 -4
  399. data/lib/datadog/tracing/writer.rb +10 -6
  400. data/lib/datadog/tracing.rb +16 -3
  401. data/lib/datadog/version.rb +2 -2
  402. data/lib/datadog.rb +2 -0
  403. metadata +149 -54
  404. data/ext/datadog_profiling_loader/datadog_profiling_loader.c +0 -142
  405. data/ext/datadog_profiling_loader/extconf.rb +0 -60
  406. data/lib/datadog/appsec/assets/waf_rules/processors.json +0 -92
  407. data/lib/datadog/appsec/assets/waf_rules/scanners.json +0 -114
  408. data/lib/datadog/appsec/contrib/devise/event.rb +0 -57
  409. data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +0 -77
  410. data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +0 -54
  411. data/lib/datadog/appsec/contrib/devise/resource.rb +0 -35
  412. data/lib/datadog/appsec/contrib/devise/tracking.rb +0 -57
  413. data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +0 -46
  414. data/lib/datadog/appsec/contrib/patcher.rb +0 -12
  415. data/lib/datadog/appsec/contrib/rack/reactive/request.rb +0 -69
  416. data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +0 -47
  417. data/lib/datadog/appsec/contrib/rack/reactive/response.rb +0 -53
  418. data/lib/datadog/appsec/contrib/rails/reactive/action.rb +0 -53
  419. data/lib/datadog/appsec/contrib/sinatra/ext.rb +0 -14
  420. data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +0 -48
  421. data/lib/datadog/appsec/monitor/reactive/set_user.rb +0 -45
  422. data/lib/datadog/appsec/processor/actions.rb +0 -49
  423. data/lib/datadog/appsec/processor/context.rb +0 -107
  424. data/lib/datadog/appsec/processor/rule_merger.rb +0 -170
  425. data/lib/datadog/appsec/processor.rb +0 -106
  426. data/lib/datadog/appsec/reactive/address_hash.rb +0 -22
  427. data/lib/datadog/appsec/reactive/engine.rb +0 -47
  428. data/lib/datadog/appsec/reactive/operation.rb +0 -68
  429. data/lib/datadog/appsec/reactive/subscriber.rb +0 -19
  430. data/lib/datadog/appsec/scope.rb +0 -58
  431. data/lib/datadog/appsec/utils/trace_operation.rb +0 -15
  432. data/lib/datadog/core/crashtracking/agent_base_url.rb +0 -21
  433. data/lib/datadog/core/remote/transport/http/api/instance.rb +0 -39
  434. data/lib/datadog/core/remote/transport/http/api/spec.rb +0 -21
  435. data/lib/datadog/core/remote/transport/http/builder.rb +0 -219
  436. data/lib/datadog/core/telemetry/http/env.rb +0 -20
  437. data/lib/datadog/core/telemetry/http/ext.rb +0 -28
  438. data/lib/datadog/core/telemetry/http/response.rb +0 -70
  439. data/lib/datadog/core/telemetry/http/transport.rb +0 -90
  440. data/lib/datadog/di/transport.rb +0 -81
  441. data/lib/datadog/tracing/transport/http/api/spec.rb +0 -19
@@ -1,15 +1,21 @@
1
1
  #include "heap_recorder.h"
2
- #include <pthread.h>
3
2
  #include "ruby/st.h"
4
3
  #include "ruby_helpers.h"
5
- #include <errno.h>
6
4
  #include "collectors_stack.h"
7
5
  #include "libdatadog_helpers.h"
8
6
  #include "time_helpers.h"
9
7
 
10
- #if (defined(HAVE_WORKING_RB_GC_FORCE_RECYCLE) && ! defined(NO_SEEN_OBJ_ID_FLAG))
11
- #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
12
- #endif
8
+ // Note on calloc vs ruby_xcalloc use:
9
+ // * Whenever we're allocating memory after being called by the Ruby VM in a "regular" situation (e.g. initializer)
10
+ // we should use `ruby_xcalloc` to give the VM visibility into what we're doing + give it a chance to manage GC
11
+ // * BUT, when we're being called during a sample, being in the middle of an object allocation is a very special
12
+ // situation for the VM to be in, and we've found the hard way (e.g. https://bugs.ruby-lang.org/issues/20629 and
13
+ // https://github.com/DataDog/dd-trace-rb/pull/4240 ) that it can be easy to do things the VM didn't expect.
14
+ // * Thus, out of caution and to avoid future potential issues such as the ones above, whenever we allocate memory
15
+ // during **sampling** we use `calloc` instead of `ruby_xcalloc`. Note that we've never seen issues from using
16
+ // `ruby_xcalloc` at any time, so this is a **precaution** not a "we've seen it break". But it seems a harmless
17
+ // one to use.
18
+ // This applies to both heap_recorder.c and collectors_thread_context.c
13
19
 
14
20
  // Minimum age (in GC generations) of heap objects we want to include in heap
15
21
  // recorder iterations. Object with age 0 represent objects that have yet to undergo
@@ -30,81 +36,37 @@
30
36
 
31
37
  // A compact representation of a stacktrace frame for a heap allocation.
32
38
  typedef struct {
33
- char *name;
34
- char *filename;
39
+ ddog_prof_ManagedStringId name;
40
+ ddog_prof_ManagedStringId filename;
35
41
  int32_t line;
36
42
  } heap_frame;
37
- static st_index_t heap_frame_hash(heap_frame*, st_index_t seed);
43
+
44
+ // We use memcmp/st_hash below to compare/hash an entire array of heap_frames, so want to make sure no padding is added
45
+ // We could define the structure to be packed, but that seems even weirder across compilers, and this seems more portable?
46
+ _Static_assert(
47
+ sizeof(heap_frame) == sizeof(ddog_prof_ManagedStringId) * 2 + sizeof(int32_t),
48
+ "Size of heap_frame does not match the sum of its members. Padding detected."
49
+ );
38
50
 
39
51
  // A compact representation of a stacktrace for a heap allocation.
40
- //
41
- // We could use a ddog_prof_Slice_Location instead but it has a lot of
42
- // unused fields. Because we have to keep these stacks around for at
43
- // least the lifetime of the objects allocated therein, we would be
44
- // incurring a non-negligible memory overhead for little purpose.
52
+ // Used to dedup heap allocation stacktraces across multiple objects sharing the same allocation location.
45
53
  typedef struct {
54
+ // How many objects are currently tracked in object_records recorder for this heap record.
55
+ uint32_t num_tracked_objects;
56
+
46
57
  uint16_t frames_len;
47
58
  heap_frame frames[];
48
- } heap_stack;
49
- static heap_stack* heap_stack_new(ddog_prof_Slice_Location);
50
- static void heap_stack_free(heap_stack*);
51
- static st_index_t heap_stack_hash(heap_stack*, st_index_t);
59
+ } heap_record;
60
+ static heap_record* heap_record_new(heap_recorder*, ddog_prof_Slice_Location);
61
+ static void heap_record_free(heap_recorder*, heap_record*);
52
62
 
53
63
  #if MAX_FRAMES_LIMIT > UINT16_MAX
54
64
  #error Frames len type not compatible with MAX_FRAMES_LIMIT
55
65
  #endif
56
66
 
57
- enum heap_record_key_type {
58
- HEAP_STACK,
59
- LOCATION_SLICE
60
- };
61
- // This struct allows us to use two different types of stacks when
62
- // interacting with a heap_record hash.
63
- //
64
- // The idea is that we'll always want to use heap_stack-keys when
65
- // adding new entries to the hash since that's the compact stack
66
- // representation we rely on internally.
67
- //
68
- // However, when querying for an existing heap record, we'd save a
69
- // lot of allocations if we could query with the
70
- // ddog_prof_Slice_Location we receive in our external API.
71
- //
72
- // To allow this interchange, we need a union and need to ensure
73
- // that whatever shape of the union, the heap_record_key_cmp_st
74
- // and heap_record_hash_st functions return the same results for
75
- // equivalent stacktraces.
76
- typedef struct {
77
- enum heap_record_key_type type;
78
- union {
79
- // key never owns this if set
80
- heap_stack *heap_stack;
81
- // key never owns this if set
82
- ddog_prof_Slice_Location *location_slice;
83
- };
84
- } heap_record_key;
85
- static heap_record_key* heap_record_key_new(heap_stack*);
86
- static void heap_record_key_free(heap_record_key*);
87
- static int heap_record_key_cmp_st(st_data_t, st_data_t);
88
- static st_index_t heap_record_key_hash_st(st_data_t);
89
- static const struct st_hash_type st_hash_type_heap_record_key = {
90
- heap_record_key_cmp_st,
91
- heap_record_key_hash_st,
92
- };
93
-
94
- // Need to implement these functions to support the location-slice based keys
95
- static st_index_t ddog_location_hash(ddog_prof_Location, st_index_t seed);
96
- static st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location, st_index_t seed);
97
-
98
- // A heap record is used for deduping heap allocation stacktraces across multiple
99
- // objects sharing the same allocation location.
100
- typedef struct {
101
- // How many objects are currently tracked by the heap recorder for this heap record.
102
- uint32_t num_tracked_objects;
103
- // stack is owned by the associated record and gets cleaned up alongside it
104
- heap_stack *stack;
105
- } heap_record;
106
- static heap_record* heap_record_new(heap_stack*);
107
- static void heap_record_free(heap_record*);
67
+ static int heap_record_cmp_st(st_data_t, st_data_t);
68
+ static st_index_t heap_record_hash_st(st_data_t);
69
+ static const struct st_hash_type st_hash_type_heap_record = { .compare = heap_record_cmp_st, .hash = heap_record_hash_st };
108
70
 
109
71
  // An object record is used for storing data about currently tracked live objects
110
72
  typedef struct {
@@ -113,33 +75,25 @@ typedef struct {
113
75
  live_object_data object_data;
114
76
  } object_record;
115
77
  static object_record* object_record_new(long, heap_record*, live_object_data);
116
- static void object_record_free(object_record*);
117
- static VALUE object_record_inspect(object_record*);
78
+ static void object_record_free(heap_recorder*, object_record*);
79
+ static VALUE object_record_inspect(heap_recorder*, object_record*);
118
80
  static object_record SKIPPED_RECORD = {0};
119
81
 
120
- // A wrapper around an object record that is in the process of being recorded and was not
121
- // yet committed.
122
- typedef struct {
123
- // Pointer to the (potentially partial) object_record containing metadata about an ongoing recording.
124
- // When NULL, this symbolizes an unstarted/invalid recording.
125
- object_record *object_record;
126
- // A flag to track whether we had to force set the RUBY_FL_SEEN_OBJ_ID flag on this object
127
- // as part of our workaround around rb_gc_force_recycle issues.
128
- bool did_recycle_workaround;
129
- } recording;
130
-
131
82
  struct heap_recorder {
132
83
  // Config
133
84
  // Whether the recorder should try to determine approximate sizes for tracked objects.
134
85
  bool size_enabled;
135
86
  uint sample_rate;
136
87
 
137
- // Map[key: heap_record_key*, record: heap_record*]
138
- // NOTE: We always use heap_record_key.type == HEAP_STACK for storage but support lookups
139
- // via heap_record_key.type == LOCATION_SLICE to allow for allocation-free fast-paths.
88
+ // Map[key: heap_record*, record: nothing] (This is a set, basically)
140
89
  // NOTE: This table is currently only protected by the GVL since we never interact with it
141
90
  // outside the GVL.
142
- // NOTE: This table has ownership of both its heap_record_keys and heap_records.
91
+ // NOTE: This table has ownership of its heap_records.
92
+ //
93
+ // This is a cpu/memory trade-off: Maintaining the "heap_records" map means we spend extra CPU when sampling as we need
94
+ // to do de-duplication, but we reduce the memory footprint of the heap profiler.
95
+ // In the future, it may be worth revisiting if we can move this inside libdatadog: if libdatadog was able to track
96
+ // entire stacks for us, then we wouldn't need to do it on the Ruby side.
143
97
  st_table *heap_records;
144
98
 
145
99
  // Map[obj_id: long, record: object_record*]
@@ -147,6 +101,11 @@ struct heap_recorder {
147
101
  // outside the GVL.
148
102
  // NOTE: This table has ownership of its object_records. The keys are longs and so are
149
103
  // passed as values.
104
+ //
105
+ // TODO: @ivoanjo We've evolved to actually never need to look up on object_records (we only insert and iterate),
106
+ // so right now this seems to be just a really really fancy self-resizing list/set.
107
+ // If we replace this with a list, we could record the latest id and compare it when inserting to make sure our
108
+ // assumption of ids never reused + always increasing always holds. (This as an alternative to checking for duplicates)
150
109
  st_table *object_records;
151
110
 
152
111
  // Map[obj_id: long, record: object_record*]
@@ -169,14 +128,21 @@ struct heap_recorder {
169
128
  long last_update_ns;
170
129
 
171
130
  // Data for a heap recording that was started but not yet ended
172
- recording active_recording;
131
+ object_record *active_recording;
173
132
 
174
- // Reusable location array, implementing a flyweight pattern for things like iteration.
133
+ // Reusable arrays, implementing a flyweight pattern for things like iteration
134
+ #define REUSABLE_LOCATIONS_SIZE MAX_FRAMES_LIMIT
175
135
  ddog_prof_Location *reusable_locations;
176
136
 
137
+ #define REUSABLE_FRAME_DETAILS_SIZE (2 * MAX_FRAMES_LIMIT) // because it'll be used for both function names AND file names)
138
+ ddog_prof_ManagedStringId *reusable_ids;
139
+ ddog_CharSlice *reusable_char_slices;
140
+
177
141
  // Sampling state
178
142
  uint num_recordings_skipped;
179
143
 
144
+ ddog_prof_ManagedStringStorage string_storage;
145
+
180
146
  struct stats_last_update {
181
147
  size_t objects_alive;
182
148
  size_t objects_dead;
@@ -200,10 +166,10 @@ struct heap_recorder {
200
166
  } stats_lifetime;
201
167
  };
202
168
 
203
- struct end_heap_allocation_args {
204
- struct heap_recorder *heap_recorder;
169
+ typedef struct {
170
+ heap_recorder *heap_recorder;
205
171
  ddog_prof_Slice_Location locations;
206
- };
172
+ } end_heap_allocation_args;
207
173
 
208
174
  static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Location);
209
175
  static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
@@ -214,10 +180,13 @@ static int st_object_record_update(st_data_t, st_data_t, st_data_t);
214
180
  static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
215
181
  static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
216
182
  static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
217
- static void commit_recording(heap_recorder*, heap_record*, recording);
183
+ static void commit_recording(heap_recorder *, heap_record *, object_record *active_recording);
218
184
  static VALUE end_heap_allocation_recording(VALUE end_heap_allocation_args);
219
185
  static void heap_recorder_update(heap_recorder *heap_recorder, bool full_update);
220
186
  static inline double ewma_stat(double previous, double current);
187
+ static void unintern_or_raise(heap_recorder *, ddog_prof_ManagedStringId);
188
+ static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids);
189
+ static VALUE get_ruby_string_or_raise(heap_recorder*, ddog_prof_ManagedStringId);
221
190
 
222
191
  // ==========================
223
192
  // Heap Recorder External API
@@ -228,16 +197,19 @@ static inline double ewma_stat(double previous, double current);
228
197
  // happens under the GVL.
229
198
  //
230
199
  // ==========================
231
- heap_recorder* heap_recorder_new(void) {
200
+ heap_recorder* heap_recorder_new(ddog_prof_ManagedStringStorage string_storage) {
232
201
  heap_recorder *recorder = ruby_xcalloc(1, sizeof(heap_recorder));
233
202
 
234
- recorder->heap_records = st_init_table(&st_hash_type_heap_record_key);
203
+ recorder->heap_records = st_init_table(&st_hash_type_heap_record);
235
204
  recorder->object_records = st_init_numtable();
236
205
  recorder->object_records_snapshot = NULL;
237
- recorder->reusable_locations = ruby_xcalloc(MAX_FRAMES_LIMIT, sizeof(ddog_prof_Location));
238
- recorder->active_recording = (recording) {0};
206
+ recorder->reusable_locations = ruby_xcalloc(REUSABLE_LOCATIONS_SIZE, sizeof(ddog_prof_Location));
207
+ recorder->reusable_ids = ruby_xcalloc(REUSABLE_FRAME_DETAILS_SIZE, sizeof(ddog_prof_ManagedStringId));
208
+ recorder->reusable_char_slices = ruby_xcalloc(REUSABLE_FRAME_DETAILS_SIZE, sizeof(ddog_CharSlice));
209
+ recorder->active_recording = NULL;
239
210
  recorder->size_enabled = true;
240
211
  recorder->sample_rate = 1; // By default do no sampling on top of what allocation profiling already does
212
+ recorder->string_storage = string_storage;
241
213
 
242
214
  return recorder;
243
215
  }
@@ -254,19 +226,21 @@ void heap_recorder_free(heap_recorder *heap_recorder) {
254
226
  }
255
227
 
256
228
  // Clean-up all object records
257
- st_foreach(heap_recorder->object_records, st_object_record_entry_free, 0);
229
+ st_foreach(heap_recorder->object_records, st_object_record_entry_free, (st_data_t) heap_recorder);
258
230
  st_free_table(heap_recorder->object_records);
259
231
 
260
232
  // Clean-up all heap records (this includes those only referred to by queued_samples)
261
- st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, 0);
233
+ st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, (st_data_t) heap_recorder);
262
234
  st_free_table(heap_recorder->heap_records);
263
235
 
264
- if (heap_recorder->active_recording.object_record != NULL && heap_recorder->active_recording.object_record != &SKIPPED_RECORD) {
236
+ if (heap_recorder->active_recording != NULL && heap_recorder->active_recording != &SKIPPED_RECORD) {
265
237
  // If there's a partial object record, clean it up as well
266
- object_record_free(heap_recorder->active_recording.object_record);
238
+ object_record_free(heap_recorder, heap_recorder->active_recording);
267
239
  }
268
240
 
269
241
  ruby_xfree(heap_recorder->reusable_locations);
242
+ ruby_xfree(heap_recorder->reusable_ids);
243
+ ruby_xfree(heap_recorder->reusable_char_slices);
270
244
 
271
245
  ruby_xfree(heap_recorder);
272
246
  }
@@ -308,7 +282,7 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder) {
308
282
  //
309
283
  // There is one small caveat though: fork only preserves one thread and in a Ruby app, that
310
284
  // will be the thread holding on to the GVL. Since we support iteration on the heap recorder
311
- // outside of the GVL, any state specific to that interaction may be incosistent after fork
285
+ // outside of the GVL, any state specific to that interaction may be inconsistent after fork
312
286
  // (e.g. an acquired lock for thread safety). Iteration operates on object_records_snapshot
313
287
  // though and that one will be updated on next heap_recorder_prepare_iteration so we really
314
288
  // only need to finish any iteration that might have been left unfinished.
@@ -320,18 +294,28 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder) {
320
294
  heap_recorder->stats_lifetime = (struct stats_lifetime) {0};
321
295
  }
322
296
 
323
- void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *alloc_class) {
297
+ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice alloc_class) {
324
298
  if (heap_recorder == NULL) {
325
299
  return;
326
300
  }
327
301
 
328
- if (heap_recorder->active_recording.object_record != NULL) {
302
+ if (heap_recorder->active_recording != NULL) {
329
303
  rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end.");
330
304
  }
331
305
 
332
- if (heap_recorder->num_recordings_skipped + 1 < heap_recorder->sample_rate) {
333
- heap_recorder->active_recording.object_record = &SKIPPED_RECORD;
334
- heap_recorder->num_recordings_skipped++;
306
+ if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate ||
307
+ #ifdef NO_IMEMO_OBJECT_ID
308
+ // On Ruby 3.5, we can't ask the object_id from IMEMOs (https://github.com/ruby/ruby/pull/13347)
309
+ RB_BUILTIN_TYPE(new_obj) == RUBY_T_IMEMO
310
+ #else
311
+ false
312
+ #endif
313
+ // If we got really unlucky and an allocation showed up during an update (because it triggered an allocation
314
+ // directly OR because the GVL got released in the middle of an update), let's skip this sample as well.
315
+ // See notes on `heap_recorder_update` for details.
316
+ || heap_recorder->updating
317
+ ) {
318
+ heap_recorder->active_recording = &SKIPPED_RECORD;
335
319
  return;
336
320
  }
337
321
 
@@ -342,85 +326,60 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
342
326
  rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling.");
343
327
  }
344
328
 
345
- bool did_recycle_workaround = false;
346
-
347
- #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
348
- // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
349
- // its usage may lead to an object being re-used outside of the typical GC cycle.
350
- //
351
- // This re-use is in theory invisible to us unless we're lucky enough to sample both
352
- // the original object and the replacement that uses the recycled slot.
353
- //
354
- // In practice, we've observed (https://github.com/DataDog/dd-trace-rb/pull/3366)
355
- // that non-noop implementations of rb_gc_force_recycle have an implementation bug
356
- // which results in the object that re-used the recycled slot inheriting the same
357
- // object id without setting the FL_SEEN_OBJ_ID flag. We rely on this knowledge to
358
- // "observe" implicit frees when an object we are tracking is force-recycled.
359
- //
360
- // However, it may happen that we start tracking a new object and that object was
361
- // allocated on a recycled slot. Due to the bug, this object would be missing the
362
- // FL_SEEN_OBJ_ID flag even though it was not recycled itself. If we left it be,
363
- // when we're doing our liveness check, the absence of the flag would trigger our
364
- // implicit free workaround and the object would be inferred as recycled even though
365
- // it might still be alive.
366
- //
367
- // Thus, if we detect that this new allocation is already missing the flag at the start
368
- // of the heap allocation recording, we force-set it. This should be safe since we
369
- // just called rb_obj_id on it above and the expectation is that any flaggable object
370
- // that goes through it ends up with the flag set (as evidenced by the GC_ASSERT
371
- // lines in https://github.com/ruby/ruby/blob/4a8d7246d15b2054eacb20f8ab3d29d39a3e7856/gc.c#L4050C14-L4050C14).
372
- if (RB_FL_ABLE(new_obj) && !RB_FL_TEST(new_obj, RUBY_FL_SEEN_OBJ_ID)) {
373
- RB_FL_SET(new_obj, RUBY_FL_SEEN_OBJ_ID);
374
- did_recycle_workaround = true;
329
+ heap_recorder->active_recording = object_record_new(
330
+ FIX2LONG(ruby_obj_id),
331
+ NULL,
332
+ (live_object_data) {
333
+ .weight = weight * heap_recorder->sample_rate,
334
+ .class = intern_or_raise(heap_recorder->string_storage, alloc_class),
335
+ .alloc_gen = rb_gc_count(),
375
336
  }
376
- #endif
377
-
378
- heap_recorder->active_recording = (recording) {
379
- .object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) {
380
- .weight = weight * heap_recorder->sample_rate,
381
- .class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL,
382
- .alloc_gen = rb_gc_count(),
383
- }),
384
- .did_recycle_workaround = did_recycle_workaround,
385
- };
337
+ );
386
338
  }
387
339
 
388
340
  // end_heap_allocation_recording_with_rb_protect gets called while the stack_recorder is holding one of the profile
389
341
  // locks. To enable us to correctly unlock the profile on exception, we wrap the call to end_heap_allocation_recording
390
342
  // with an rb_protect.
391
343
  __attribute__((warn_unused_result))
392
- int end_heap_allocation_recording_with_rb_protect(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
344
+ int end_heap_allocation_recording_with_rb_protect(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
345
+ if (heap_recorder == NULL) {
346
+ return 0;
347
+ }
348
+ if (heap_recorder->active_recording == &SKIPPED_RECORD) {
349
+ // Short circuit, in this case there's nothing to be done
350
+ heap_recorder->active_recording = NULL;
351
+ return 0;
352
+ }
353
+
354
+
393
355
  int exception_state;
394
- struct end_heap_allocation_args end_heap_allocation_args = {
356
+ end_heap_allocation_args args = {
395
357
  .heap_recorder = heap_recorder,
396
358
  .locations = locations,
397
359
  };
398
- rb_protect(end_heap_allocation_recording, (VALUE) &end_heap_allocation_args, &exception_state);
360
+ rb_protect(end_heap_allocation_recording, (VALUE) &args, &exception_state);
399
361
  return exception_state;
400
362
  }
401
363
 
402
- static VALUE end_heap_allocation_recording(VALUE end_heap_allocation_args) {
403
- struct end_heap_allocation_args *args = (struct end_heap_allocation_args *) end_heap_allocation_args;
364
+ static VALUE end_heap_allocation_recording(VALUE protect_args) {
365
+ end_heap_allocation_args *args = (end_heap_allocation_args *) protect_args;
404
366
 
405
- struct heap_recorder *heap_recorder = args->heap_recorder;
367
+ heap_recorder *heap_recorder = args->heap_recorder;
406
368
  ddog_prof_Slice_Location locations = args->locations;
407
369
 
408
- if (heap_recorder == NULL) {
409
- return Qnil;
410
- }
370
+ object_record *active_recording = heap_recorder->active_recording;
411
371
 
412
- recording active_recording = heap_recorder->active_recording;
413
-
414
- if (active_recording.object_record == NULL) {
372
+ if (active_recording == NULL) {
415
373
  // Recording ended without having been started?
416
374
  rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started");
417
375
  }
418
376
  // From now on, mark the global active recording as invalid so we can short-circuit at any point
419
377
  // and not end up with a still active recording. the local active_recording still holds the
420
378
  // data required for committing though.
421
- heap_recorder->active_recording = (recording) {0};
379
+ heap_recorder->active_recording = NULL;
422
380
 
423
- if (active_recording.object_record == &SKIPPED_RECORD) { // special marker when we decided to skip due to sampling
381
+ if (active_recording == &SKIPPED_RECORD) { // special marker when we decided to skip due to sampling
382
+ // Note: Remember to update the short circuit in end_heap_allocation_recording_with_rb_protect if this logic changes
424
383
  return Qnil;
425
384
  }
426
385
 
@@ -440,15 +399,28 @@ void heap_recorder_update_young_objects(heap_recorder *heap_recorder) {
440
399
  heap_recorder_update(heap_recorder, /* full_update: */ false);
441
400
  }
442
401
 
402
+ // NOTE: This function needs and assumes it gets called with the GVL being held.
403
+ // But importantly **some of the operations inside `st_object_record_update` may cause a thread switch**,
404
+ // so we can't assume a single update happens in a single "atomic" step -- other threads may get some running time
405
+ // in the meanwhile.
443
406
  static void heap_recorder_update(heap_recorder *heap_recorder, bool full_update) {
444
407
  if (heap_recorder->updating) {
445
- if (full_update) rb_raise(rb_eRuntimeError, "BUG: full_update should not be triggered during another update");
446
-
447
- // If we try to update while another update is still running, short-circuit.
448
- // NOTE: This runs while holding the GVL. But since updates may be triggered from GC activity, there's still
449
- // a chance for updates to be attempted concurrently if scheduling gods so determine.
450
- heap_recorder->stats_lifetime.updates_skipped_concurrent++;
451
- return;
408
+ if (full_update) {
409
+ // There's another thread that's already doing an update :(
410
+ //
411
+ // Because there's a lock on the `StackRecorder` (see @no_concurrent_serialize_mutex) then it's not possible that
412
+ // the other update is a full update.
413
+ // Thus we expect is happening is that the GVL got released by the other thread in the middle of a non-full update
414
+ // and the scheduler thread decided now was a great time to serialize the profile.
415
+ //
416
+ // So, let's yield the time on the current thread until Ruby goes back to the other thread doing the update and
417
+ // it finishes cleanly.
418
+ while (heap_recorder->updating) { rb_thread_schedule(); }
419
+ } else {
420
+ // Non-full updates are optional, so let's walk away
421
+ heap_recorder->stats_lifetime.updates_skipped_concurrent++;
422
+ return;
423
+ }
452
424
  }
453
425
 
454
426
  if (heap_recorder->object_records_snapshot != NULL) {
@@ -602,26 +574,10 @@ VALUE heap_recorder_state_snapshot(heap_recorder *heap_recorder) {
602
574
  return hash;
603
575
  }
604
576
 
605
- void heap_recorder_testonly_assert_hash_matches(ddog_prof_Slice_Location locations) {
606
- heap_stack *stack = heap_stack_new(locations);
607
- heap_record_key stack_based_key = (heap_record_key) {
608
- .type = HEAP_STACK,
609
- .heap_stack = stack,
610
- };
611
- heap_record_key location_based_key = (heap_record_key) {
612
- .type = LOCATION_SLICE,
613
- .location_slice = &locations,
614
- };
615
-
616
- st_index_t stack_hash = heap_record_key_hash_st((st_data_t) &stack_based_key);
617
- st_index_t location_hash = heap_record_key_hash_st((st_data_t) &location_based_key);
618
-
619
- heap_stack_free(stack);
620
-
621
- if (stack_hash != location_hash) {
622
- rb_raise(rb_eRuntimeError, "Heap record key hashes built from the same locations differ. stack_based_hash=%"PRI_VALUE_PREFIX"u location_based_hash=%"PRI_VALUE_PREFIX"u", stack_hash, location_hash);
623
- }
624
- }
577
+ typedef struct {
578
+ heap_recorder *recorder;
579
+ VALUE debug_str;
580
+ } debug_context;
625
581
 
626
582
  VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
627
583
  if (heap_recorder == NULL) {
@@ -629,7 +585,8 @@ VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
629
585
  }
630
586
 
631
587
  VALUE debug_str = rb_str_new2("object records:\n");
632
- st_foreach(heap_recorder->object_records, st_object_records_debug, (st_data_t) debug_str);
588
+ debug_context context = (debug_context) {.recorder = heap_recorder, .debug_str = debug_str};
589
+ st_foreach(heap_recorder->object_records, st_object_records_debug, (st_data_t) &context);
633
590
 
634
591
  rb_str_catf(debug_str, "state snapshot: %"PRIsVALUE"\n------\n", heap_recorder_state_snapshot(heap_recorder));
635
592
 
@@ -639,18 +596,19 @@ VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
639
596
  // ==========================
640
597
  // Heap Recorder Internal API
641
598
  // ==========================
642
- static int st_heap_record_entry_free(st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
643
- heap_record_key *record_key = (heap_record_key*) key;
644
- heap_record_key_free(record_key);
645
- heap_record_free((heap_record *) value);
599
+ static int st_heap_record_entry_free(st_data_t key, DDTRACE_UNUSED st_data_t value, st_data_t extra_arg) {
600
+ heap_recorder *recorder = (heap_recorder *) extra_arg;
601
+ heap_record_free(recorder, (heap_record *) key);
646
602
  return ST_DELETE;
647
603
  }
648
604
 
649
- static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
650
- object_record_free((object_record *) value);
605
+ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra_arg) {
606
+ heap_recorder *recorder = (heap_recorder *) extra_arg;
607
+ object_record_free(recorder, (object_record *) value);
651
608
  return ST_DELETE;
652
609
  }
653
610
 
611
+ // NOTE: Some operations inside this function can cause the GVL to be released! Plan accordingly.
654
612
  static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
655
613
  long obj_id = (long) key;
656
614
  object_record *record = (object_record*) value;
@@ -676,7 +634,7 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
676
634
  return ST_CONTINUE;
677
635
  }
678
636
 
679
- if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
637
+ if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) { // Note: This function call can cause the GVL to be released
680
638
  // Id no longer associated with a valid ref. Need to delete this object record!
681
639
  on_committed_object_record_cleanup(recorder, record);
682
640
  recorder->stats_last_update.objects_dead++;
@@ -685,41 +643,6 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
685
643
 
686
644
  // If we got this far, then we found a valid live object for the tracked id.
687
645
 
688
- #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
689
- // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
690
- // its usage may lead to an object being re-used outside of the typical GC cycle.
691
- //
692
- // This re-use is in theory invisible to us and would mean that the ref from which we
693
- // collected the object_record metadata may not be the same as the current ref and
694
- // thus any further reporting would be innacurately attributed to stale metadata.
695
- //
696
- // In practice, there is a way for us to notice that this happened because of a bug
697
- // in the implementation of rb_gc_force_recycle. Our heap profiler relies on object
698
- // ids and id2ref to detect whether objects are still alive. Turns out that when an
699
- // object with an id is re-used via rb_gc_force_recycle, it will "inherit" the ID
700
- // of the old object but it will NOT have the FL_SEEN_OBJ_ID as per the experiment
701
- // in https://github.com/DataDog/dd-trace-rb/pull/3360#discussion_r1442823517
702
- //
703
- // Thus, if we detect that the ref we just resolved above is missing this flag, we can
704
- // safely say re-use happened and thus treat it as an implicit free of the object
705
- // we were tracking (the original one which got recycled).
706
- if (RB_FL_ABLE(ref) && !RB_FL_TEST(ref, RUBY_FL_SEEN_OBJ_ID)) {
707
-
708
- // NOTE: We don't really need to set this flag for heap recorder to work correctly
709
- // but doing so partially mitigates a bug in runtimes with working rb_gc_force_recycle
710
- // which leads to broken invariants and leaking of entries in obj_to_id and id_to_obj
711
- // tables in objspace. We already do the same thing when we sample a recycled object,
712
- // here we apply it as well to objects that replace recycled objects that were being
713
- // tracked. More details in https://github.com/DataDog/dd-trace-rb/pull/3366
714
- RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID);
715
-
716
- on_committed_object_record_cleanup(recorder, record);
717
- recorder->stats_last_update.objects_dead++;
718
- return ST_DELETE;
719
- }
720
-
721
- #endif
722
-
723
646
  if (
724
647
  recorder->size_enabled &&
725
648
  recorder->update_include_old && // We only update sizes when doing a full update
@@ -727,11 +650,16 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
727
650
  ) {
728
651
  // if we were asked to update sizes and this object was not already seen as being frozen,
729
652
  // update size again.
730
- record->object_data.size = ruby_obj_memsize_of(ref);
653
+ record->object_data.size = ruby_obj_memsize_of(ref); // Note: This function call can cause the GVL to be released... maybe?
654
+ // (With T_DATA for instance, since it can be a custom method supplied by extensions)
731
655
  // Check if it's now frozen so we skip a size update next time
732
656
  record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
733
657
  }
734
658
 
659
+ // Ensure that ref is kept on the stack so the Ruby garbage collector does not try to clean up the object before this
660
+ // point.
661
+ RB_GC_GUARD(ref);
662
+
735
663
  recorder->stats_last_update.objects_alive++;
736
664
  if (record->object_data.is_frozen) {
737
665
  recorder->stats_last_update.objects_frozen++;
@@ -743,7 +671,7 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
743
671
  // WARN: This can get called outside the GVL. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED.
744
672
  static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
745
673
  object_record *record = (object_record*) value;
746
- const heap_stack *stack = record->heap_record->stack;
674
+ const heap_record *stack = record->heap_record;
747
675
  iteration_context *context = (iteration_context*) extra;
748
676
 
749
677
  const heap_recorder *recorder = context->heap_recorder;
@@ -757,10 +685,12 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
757
685
  for (uint16_t i = 0; i < stack->frames_len; i++) {
758
686
  const heap_frame *frame = &stack->frames[i];
759
687
  locations[i] = (ddog_prof_Location) {
760
- .mapping = {.filename = DDOG_CHARSLICE_C(""), .build_id = DDOG_CHARSLICE_C("")},
688
+ .mapping = {.filename = DDOG_CHARSLICE_C(""), .build_id = DDOG_CHARSLICE_C(""), .build_id_id = {}},
761
689
  .function = {
762
- .name = {.ptr = frame->name, .len = strlen(frame->name)},
763
- .filename = {.ptr = frame->filename, .len = strlen(frame->filename)},
690
+ .name = DDOG_CHARSLICE_C(""),
691
+ .name_id = frame->name,
692
+ .filename = DDOG_CHARSLICE_C(""),
693
+ .filename_id = frame->filename,
764
694
  },
765
695
  .line = frame->line,
766
696
  };
@@ -770,6 +700,7 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
770
700
  iteration_data.object_data = record->object_data;
771
701
  iteration_data.locations = (ddog_prof_Slice_Location) {.ptr = locations, .len = stack->frames_len};
772
702
 
703
+ // This is expected to be StackRecorder's add_heap_sample_to_active_profile_without_gvl
773
704
  if (!context->for_each_callback(iteration_data, context->for_each_callback_extra_arg)) {
774
705
  return ST_STOP;
775
706
  }
@@ -778,113 +709,69 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
778
709
  }
779
710
 
780
711
  static int st_object_records_debug(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
781
- VALUE debug_str = (VALUE) extra;
712
+ debug_context *context = (debug_context*) extra;
713
+ VALUE debug_str = context->debug_str;
782
714
 
783
715
  object_record *record = (object_record*) value;
784
716
 
785
- rb_str_catf(debug_str, "%"PRIsVALUE"\n", object_record_inspect(record));
717
+ rb_str_catf(debug_str, "%"PRIsVALUE"\n", object_record_inspect(context->recorder, record));
786
718
 
787
719
  return ST_CONTINUE;
788
720
  }
789
721
 
790
- // Struct holding data required for an update operation on heap_records
791
- typedef struct {
792
- // [in] The recording containing the new object record we want to add.
793
- // NOTE: Transfer of ownership of the contained object record is assumed, do not re-use it after call to ::update_object_record_entry
794
- recording recording;
795
-
796
- // [in] The heap recorder where the update is happening.
797
- heap_recorder *heap_recorder;
798
- } object_record_update_data;
799
-
800
- static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *value, st_data_t data, int existing) {
801
- object_record_update_data *update_data = (object_record_update_data*) data;
802
- recording recording = update_data->recording;
803
- object_record *new_object_record = recording.object_record;
804
- if (existing) {
805
- object_record *existing_record = (object_record*) (*value);
806
- if (recording.did_recycle_workaround) {
807
- // In this case, it's possible for an object id to be re-used and we were lucky enough to have
808
- // sampled both the original object and the replacement so cleanup the old one and replace it with
809
- // the new object_record (i.e. treat this as a combined free+allocation).
810
- on_committed_object_record_cleanup(update_data->heap_recorder, existing_record);
811
- } else {
812
- // This is not supposed to happen, raising...
813
- VALUE existing_inspect = object_record_inspect(existing_record);
814
- VALUE new_inspect = object_record_inspect(new_object_record);
815
- rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
816
- "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect);
817
- }
722
+ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *value, st_data_t new_object_record, int existing) {
723
+ if (!existing) {
724
+ (*value) = (st_data_t) new_object_record; // Expected to be a `object_record *`
725
+ } else {
726
+ // If key already existed, we don't touch the existing value, so it can be used for diagnostics
818
727
  }
819
- // Always carry on with the update, we want the new record to be there at the end
820
- (*value) = (st_data_t) new_object_record;
821
728
  return ST_CONTINUE;
822
729
  }
823
730
 
824
- static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_record, recording recording) {
731
+ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_record, object_record *active_recording) {
825
732
  // Link the object record with the corresponding heap record. This was the last remaining thing we
826
733
  // needed to fully build the object_record.
827
- recording.object_record->heap_record = heap_record;
734
+ active_recording->heap_record = heap_record;
828
735
  if (heap_record->num_tracked_objects == UINT32_MAX) {
829
736
  rb_raise(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record");
830
737
  }
831
738
  heap_record->num_tracked_objects++;
832
739
 
833
- // Update object_records with the data for this new recording
834
- object_record_update_data update_data = (object_record_update_data) {
835
- .heap_recorder = heap_recorder,
836
- .recording = recording,
837
- };
838
- st_update(heap_recorder->object_records, recording.object_record->obj_id, update_object_record_entry, (st_data_t) &update_data);
839
- }
740
+ int existing_error = st_update(heap_recorder->object_records, active_recording->obj_id, update_object_record_entry, (st_data_t) active_recording);
741
+ if (existing_error) {
742
+ object_record *existing_record = NULL;
743
+ st_lookup(heap_recorder->object_records, active_recording->obj_id, (st_data_t *) &existing_record);
744
+ if (existing_record == NULL) rb_raise(rb_eRuntimeError, "Unexpected NULL when reading existing record");
840
745
 
841
- // Struct holding data required for an update operation on heap_records
842
- typedef struct {
843
- // [in] The locations we did this update with
844
- ddog_prof_Slice_Location locations;
845
- // [out] Pointer that will be updated to the updated heap record to prevent having to do
846
- // another lookup to access the updated heap record.
847
- heap_record **record;
848
- } heap_record_update_data;
746
+ VALUE existing_inspect = object_record_inspect(heap_recorder, existing_record);
747
+ VALUE new_inspect = object_record_inspect(heap_recorder, active_recording);
748
+ rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
749
+ "the same id. previous={%"PRIsVALUE"} new={%"PRIsVALUE"}", existing_inspect, new_inspect);
750
+ }
751
+ }
849
752
 
850
- // This function assumes ownership of stack_data is passed on to it so it'll either transfer ownership or clean-up.
851
753
  static int update_heap_record_entry_with_new_allocation(st_data_t *key, st_data_t *value, st_data_t data, int existing) {
852
- heap_record_update_data *update_data = (heap_record_update_data*) data;
754
+ heap_record **new_or_existing_record = (heap_record **) data;
755
+ (*new_or_existing_record) = (heap_record *) (*key);
853
756
 
854
757
  if (!existing) {
855
- // there was no matching heap record so lets create a new one...
856
- // we need to initialize a heap_record_key with a new stack and use that for the key storage. We can't use the
857
- // locations-based key we used for the update call because we don't own its lifecycle. So we create a new
858
- // heap stack and will pass ownership of it to the heap_record.
859
- heap_stack *stack = heap_stack_new(update_data->locations);
860
- (*key) = (st_data_t) heap_record_key_new(stack);
861
- (*value) = (st_data_t) heap_record_new(stack);
758
+ (*value) = (st_data_t) true; // We're only using this hash as a set
862
759
  }
863
760
 
864
- heap_record *record = (heap_record*) (*value);
865
- (*update_data->record) = record;
866
-
867
761
  return ST_CONTINUE;
868
762
  }
869
763
 
870
764
  static heap_record* get_or_create_heap_record(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
871
- // For performance reasons we use a stack-allocated location-slice based key. This allows us
872
- // to do allocation-free lookups and reuse of a matching existing heap record.
873
- // NOTE: If we end up creating a new record, we'll create a heap-allocated key we own and use that for storage
874
- // instead of this one.
875
- heap_record_key lookup_key = (heap_record_key) {
876
- .type = LOCATION_SLICE,
877
- .location_slice = &locations,
878
- };
765
+ // See note on "heap_records" definition for why we keep this map.
766
+ heap_record *stack = heap_record_new(heap_recorder, locations);
879
767
 
880
- heap_record *heap_record = NULL;
881
- heap_record_update_data update_data = (heap_record_update_data) {
882
- .locations = locations,
883
- .record = &heap_record,
884
- };
885
- st_update(heap_recorder->heap_records, (st_data_t) &lookup_key, update_heap_record_entry_with_new_allocation, (st_data_t) &update_data);
768
+ heap_record *new_or_existing_record = NULL; // Will be set inside update_heap_record_entry_with_new_allocation
769
+ bool existing = st_update(heap_recorder->heap_records, (st_data_t) stack, update_heap_record_entry_with_new_allocation, (st_data_t) &new_or_existing_record);
770
+ if (existing) {
771
+ heap_record_free(heap_recorder, stack);
772
+ }
886
773
 
887
- return heap_record;
774
+ return new_or_existing_record;
888
775
  }
889
776
 
890
777
  static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_record *heap_record) {
@@ -893,18 +780,10 @@ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_rec
893
780
  return;
894
781
  }
895
782
 
896
- heap_record_key heap_key = (heap_record_key) {
897
- .type = HEAP_STACK,
898
- .heap_stack = heap_record->stack,
899
- };
900
- // We need to access the deleted key to free it since we gave ownership of the keys to the hash.
901
- // st_delete will change this pointer to point to the removed key if one is found.
902
- heap_record_key *deleted_key = &heap_key;
903
- if (!st_delete(heap_recorder->heap_records, (st_data_t*) &deleted_key, NULL)) {
783
+ if (!st_delete(heap_recorder->heap_records, (st_data_t*) &heap_record, NULL)) {
904
784
  rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record");
905
785
  };
906
- heap_record_key_free(deleted_key);
907
- heap_record_free(heap_record);
786
+ heap_record_free(heap_recorder, heap_record);
908
787
  }
909
788
 
910
789
  static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, object_record *record) {
@@ -920,60 +799,44 @@ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, obj
920
799
  heap_record *heap_record = record->heap_record;
921
800
 
922
801
  if (heap_record == NULL) rb_raise(rb_eRuntimeError, "heap_record was NULL in on_committed_object_record_cleanup");
923
- if (heap_record->stack == NULL) rb_raise(rb_eRuntimeError, "heap_record->stack was NULL in on_committed_object_record_cleanup");
924
802
 
925
803
  heap_record->num_tracked_objects--;
926
804
 
927
805
  // One less object using this heap record, it may have become unused...
928
806
  cleanup_heap_record_if_unused(heap_recorder, heap_record);
929
807
 
930
- object_record_free(record);
931
- }
932
-
933
- // ===============
934
- // Heap Record API
935
- // ===============
936
- heap_record* heap_record_new(heap_stack *stack) {
937
- heap_record *record = ruby_xcalloc(1, sizeof(heap_record));
938
- record->num_tracked_objects = 0;
939
- record->stack = stack;
940
- return record;
941
- }
942
-
943
- void heap_record_free(heap_record *record) {
944
- heap_stack_free(record->stack);
945
- ruby_xfree(record);
808
+ object_record_free(heap_recorder, record);
946
809
  }
947
810
 
948
-
949
811
  // =================
950
812
  // Object Record API
951
813
  // =================
952
814
  object_record* object_record_new(long obj_id, heap_record *heap_record, live_object_data object_data) {
953
- object_record *record = ruby_xcalloc(1, sizeof(object_record));
815
+ object_record *record = calloc(1, sizeof(object_record)); // See "note on calloc vs ruby_xcalloc use" above
954
816
  record->obj_id = obj_id;
955
817
  record->heap_record = heap_record;
956
818
  record->object_data = object_data;
957
819
  return record;
958
820
  }
959
821
 
960
- void object_record_free(object_record *record) {
961
- if (record->object_data.class != NULL) {
962
- ruby_xfree(record->object_data.class);
963
- }
964
- ruby_xfree(record);
822
+ void object_record_free(heap_recorder *recorder, object_record *record) {
823
+ unintern_or_raise(recorder, record->object_data.class);
824
+ free(record); // See "note on calloc vs ruby_xcalloc use" above
965
825
  }
966
826
 
967
- VALUE object_record_inspect(object_record *record) {
968
- heap_frame top_frame = record->heap_record->stack->frames[0];
827
+ VALUE object_record_inspect(heap_recorder *recorder, object_record *record) {
828
+ heap_frame top_frame = record->heap_record->frames[0];
829
+ VALUE filename = get_ruby_string_or_raise(recorder, top_frame.filename);
969
830
  live_object_data object_data = record->object_data;
970
- VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu gen_age=%zu frozen=%d ",
971
- record->obj_id, object_data.weight, object_data.size, top_frame.filename,
831
+
832
+ VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%"PRIsVALUE":%d alloc_gen=%zu gen_age=%zu frozen=%d ",
833
+ record->obj_id, object_data.weight, object_data.size, filename,
972
834
  (int) top_frame.line, object_data.alloc_gen, object_data.gen_age, object_data.is_frozen);
973
835
 
974
- const char *class = record->object_data.class;
975
- if (class != NULL) {
976
- rb_str_catf(inspect, "class=%s ", class);
836
+ if (record->object_data.class.value > 0) {
837
+ VALUE class = get_ruby_string_or_raise(recorder, record->object_data.class);
838
+
839
+ rb_str_catf(inspect, "class=%"PRIsVALUE" ", class);
977
840
  }
978
841
  VALUE ref;
979
842
 
@@ -993,223 +856,103 @@ VALUE object_record_inspect(object_record *record) {
993
856
  }
994
857
 
995
858
  // ==============
996
- // Heap Frame API
997
- // ==============
998
- int heap_frame_cmp(heap_frame *f1, heap_frame *f2) {
999
- int line_diff = (int) (f1->line - f2->line);
1000
- if (line_diff != 0) {
1001
- return line_diff;
1002
- }
1003
- int cmp = strcmp(f1->name, f2->name);
1004
- if (cmp != 0) {
1005
- return cmp;
1006
- }
1007
- return strcmp(f1->filename, f2->filename);
1008
- }
1009
-
1010
- // TODO: Research potential performance improvements around hashing stuff here
1011
- // once we have a benchmarking suite.
1012
- // Example: Each call to st_hash is calling murmur_finish and we may want
1013
- // to only finish once per structure, not per field?
1014
- // Example: There may be a more efficient hashing for line that is not the
1015
- // generic st_hash algorithm?
1016
-
1017
- // WARN: Must be kept in-sync with ::char_slice_hash
1018
- st_index_t string_hash(char *str, st_index_t seed) {
1019
- return st_hash(str, strlen(str), seed);
1020
- }
1021
-
1022
- // WARN: Must be kept in-sync with ::string_hash
1023
- st_index_t char_slice_hash(ddog_CharSlice char_slice, st_index_t seed) {
1024
- return st_hash(char_slice.ptr, char_slice.len, seed);
1025
- }
1026
-
1027
- // WARN: Must be kept in-sync with ::ddog_location_hash
1028
- st_index_t heap_frame_hash(heap_frame *frame, st_index_t seed) {
1029
- st_index_t hash = string_hash(frame->name, seed);
1030
- hash = string_hash(frame->filename, hash);
1031
- hash = st_hash(&frame->line, sizeof(frame->line), hash);
1032
- return hash;
1033
- }
1034
-
1035
- // WARN: Must be kept in-sync with ::heap_frame_hash
1036
- st_index_t ddog_location_hash(ddog_prof_Location location, st_index_t seed) {
1037
- st_index_t hash = char_slice_hash(location.function.name, seed);
1038
- hash = char_slice_hash(location.function.filename, hash);
1039
- // Convert ddog_prof line type to the same type we use for our heap_frames to
1040
- // ensure we have compatible hashes
1041
- int32_t line_as_int32 = (int32_t) location.line;
1042
- hash = st_hash(&line_as_int32, sizeof(line_as_int32), hash);
1043
- return hash;
1044
- }
1045
-
1046
- // ==============
1047
- // Heap Stack API
859
+ // Heap Record API
1048
860
  // ==============
1049
- heap_stack* heap_stack_new(ddog_prof_Slice_Location locations) {
861
+ heap_record* heap_record_new(heap_recorder *recorder, ddog_prof_Slice_Location locations) {
1050
862
  uint16_t frames_len = locations.len;
1051
863
  if (frames_len > MAX_FRAMES_LIMIT) {
1052
- // This should not be happening anyway since MAX_FRAMES_LIMIT should be shared with
1053
- // the stacktrace construction mechanism. If it happens, lets just raise. This should
1054
- // be safe since only allocate with the GVL anyway.
864
+ // This is not expected as MAX_FRAMES_LIMIT is shared with the stacktrace construction mechanism
1055
865
  rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len);
1056
866
  }
1057
- heap_stack *stack = ruby_xcalloc(1, sizeof(heap_stack) + frames_len * sizeof(heap_frame));
867
+ heap_record *stack = calloc(1, sizeof(heap_record) + frames_len * sizeof(heap_frame)); // See "note on calloc vs ruby_xcalloc use" above
868
+ stack->num_tracked_objects = 0;
1058
869
  stack->frames_len = frames_len;
870
+
871
+ // Intern all these strings...
872
+ ddog_CharSlice *strings = recorder->reusable_char_slices;
873
+ // Put all the char slices in the same array; we'll pull them out in the same order from the ids array
1059
874
  for (uint16_t i = 0; i < stack->frames_len; i++) {
1060
875
  const ddog_prof_Location *location = &locations.ptr[i];
876
+ strings[i] = location->function.filename;
877
+ strings[i + stack->frames_len] = location->function.name;
878
+ }
879
+ intern_all_or_raise(recorder->string_storage, (ddog_prof_Slice_CharSlice) { .ptr = strings, .len = stack->frames_len * 2 }, recorder->reusable_ids, stack->frames_len * 2);
880
+
881
+ // ...and record them for later use
882
+ for (uint16_t i = 0; i < stack->frames_len; i++) {
1061
883
  stack->frames[i] = (heap_frame) {
1062
- .name = string_from_char_slice(location->function.name),
1063
- .filename = string_from_char_slice(location->function.filename),
884
+ .filename = recorder->reusable_ids[i],
885
+ .name = recorder->reusable_ids[i + stack->frames_len],
1064
886
  // ddog_prof_Location is a int64_t. We don't expect to have to profile files with more than
1065
887
  // 2M lines so this cast should be fairly safe?
1066
- .line = (int32_t) location->line,
888
+ .line = (int32_t) locations.ptr[i].line,
1067
889
  };
1068
890
  }
891
+
1069
892
  return stack;
1070
893
  }
1071
894
 
1072
- void heap_stack_free(heap_stack *stack) {
1073
- for (uint64_t i = 0; i < stack->frames_len; i++) {
1074
- heap_frame *frame = &stack->frames[i];
1075
- ruby_xfree(frame->name);
1076
- ruby_xfree(frame->filename);
1077
- }
1078
- ruby_xfree(stack);
1079
- }
895
+ void heap_record_free(heap_recorder *recorder, heap_record *stack) {
896
+ ddog_prof_ManagedStringId *ids = recorder->reusable_ids;
1080
897
 
1081
- // WARN: Must be kept in-sync with ::ddog_location_slice_hash
1082
- st_index_t heap_stack_hash(heap_stack *stack, st_index_t seed) {
1083
- st_index_t hash = seed;
1084
- for (uint64_t i = 0; i < stack->frames_len; i++) {
1085
- hash = heap_frame_hash(&stack->frames[i], hash);
898
+ // Put all the ids in the same array; doesn't really matter the order
899
+ for (u_int16_t i = 0; i < stack->frames_len; i++) {
900
+ ids[i] = stack->frames[i].filename;
901
+ ids[i + stack->frames_len] = stack->frames[i].name;
1086
902
  }
1087
- return hash;
1088
- }
1089
-
1090
- // WARN: Must be kept in-sync with ::heap_stack_hash
1091
- st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location locations, st_index_t seed) {
1092
- st_index_t hash = seed;
1093
- for (uint64_t i = 0; i < locations.len; i++) {
1094
- hash = ddog_location_hash(locations.ptr[i], hash);
1095
- }
1096
- return hash;
1097
- }
903
+ unintern_all_or_raise(recorder, (ddog_prof_Slice_ManagedStringId) { .ptr = ids, .len = stack->frames_len * 2 });
1098
904
 
1099
- // ===================
1100
- // Heap Record Key API
1101
- // ===================
1102
- heap_record_key* heap_record_key_new(heap_stack *stack) {
1103
- heap_record_key *key = ruby_xmalloc(sizeof(heap_record_key));
1104
- key->type = HEAP_STACK;
1105
- key->heap_stack = stack;
1106
- return key;
905
+ free(stack); // See "note on calloc vs ruby_xcalloc use" above
1107
906
  }
1108
907
 
1109
- void heap_record_key_free(heap_record_key *key) {
1110
- ruby_xfree(key);
1111
- }
1112
-
1113
- static inline size_t heap_record_key_len(heap_record_key *key) {
1114
- if (key->type == HEAP_STACK) {
1115
- return key->heap_stack->frames_len;
1116
- } else {
1117
- return key->location_slice->len;
1118
- }
1119
- }
908
+ // The entire stack is represented by ids (name, filename) and lines (integers) so we can treat is as just
909
+ // a big string of bytes and compare it all in one go.
910
+ int heap_record_cmp_st(st_data_t key1, st_data_t key2) {
911
+ heap_record *stack1 = (heap_record*) key1;
912
+ heap_record *stack2 = (heap_record*) key2;
1120
913
 
1121
- static inline int64_t heap_record_key_entry_line(heap_record_key *key, size_t entry_i) {
1122
- if (key->type == HEAP_STACK) {
1123
- return key->heap_stack->frames[entry_i].line;
914
+ if (stack1->frames_len != stack2->frames_len) {
915
+ return ((int) stack1->frames_len) - ((int) stack2->frames_len);
1124
916
  } else {
1125
- return key->location_slice->ptr[entry_i].line;
917
+ return memcmp(stack1->frames, stack2->frames, stack1->frames_len * sizeof(heap_frame));
1126
918
  }
1127
919
  }
1128
920
 
1129
- static inline size_t heap_record_key_entry_name(heap_record_key *key, size_t entry_i, const char **name_ptr) {
1130
- if (key->type == HEAP_STACK) {
1131
- char *name = key->heap_stack->frames[entry_i].name;
1132
- (*name_ptr) = name;
1133
- return strlen(name);
1134
- } else {
1135
- ddog_CharSlice name = key->location_slice->ptr[entry_i].function.name;
1136
- (*name_ptr) = name.ptr;
1137
- return name.len;
1138
- }
1139
- }
921
+ // Initial seed for hash function, same as Ruby uses
922
+ #define FNV1_32A_INIT 0x811c9dc5
1140
923
 
1141
- static inline size_t heap_record_key_entry_filename(heap_record_key *key, size_t entry_i, const char **filename_ptr) {
1142
- if (key->type == HEAP_STACK) {
1143
- char *filename = key->heap_stack->frames[entry_i].filename;
1144
- (*filename_ptr) = filename;
1145
- return strlen(filename);
1146
- } else {
1147
- ddog_CharSlice filename = key->location_slice->ptr[entry_i].function.filename;
1148
- (*filename_ptr) = filename.ptr;
1149
- return filename.len;
1150
- }
924
+ // The entire stack is represented by ids (name, filename) and lines (integers) so we can treat is as just
925
+ // a big string of bytes and hash it all in one go.
926
+ st_index_t heap_record_hash_st(st_data_t key) {
927
+ heap_record *stack = (heap_record*) key;
928
+ return st_hash(stack->frames, stack->frames_len * sizeof(heap_frame), FNV1_32A_INIT);
1151
929
  }
1152
930
 
1153
- int heap_record_key_cmp_st(st_data_t key1, st_data_t key2) {
1154
- heap_record_key *key_record1 = (heap_record_key*) key1;
1155
- heap_record_key *key_record2 = (heap_record_key*) key2;
1156
-
1157
- // Fast path, check if lengths differ
1158
- size_t key_record1_len = heap_record_key_len(key_record1);
1159
- size_t key_record2_len = heap_record_key_len(key_record2);
931
+ static void unintern_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) {
932
+ if (id.value == 0) return; // Empty string, nothing to do
1160
933
 
1161
- if (key_record1_len != key_record2_len) {
1162
- return ((int) key_record1_len) - ((int) key_record2_len);
934
+ ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern(recorder->string_storage, id);
935
+ if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) {
936
+ rb_raise(rb_eRuntimeError, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some));
1163
937
  }
938
+ }
1164
939
 
1165
- // If we got this far, we have same lengths so need to check item-by-item
1166
- for (size_t i = 0; i < key_record1_len; i++) {
1167
- // Lines are faster to compare, lets do that first
1168
- size_t line1 = heap_record_key_entry_line(key_record1, i);
1169
- size_t line2 = heap_record_key_entry_line(key_record2, i);
1170
- if (line1 != line2) {
1171
- return ((int) line1) - ((int)line2);
1172
- }
1173
-
1174
- // Then come names, they are usually smaller than filenames
1175
- const char *name1, *name2;
1176
- size_t name1_len = heap_record_key_entry_name(key_record1, i, &name1);
1177
- size_t name2_len = heap_record_key_entry_name(key_record2, i, &name2);
1178
- if (name1_len != name2_len) {
1179
- return ((int) name1_len) - ((int) name2_len);
1180
- }
1181
- int name_cmp_result = strncmp(name1, name2, name1_len);
1182
- if (name_cmp_result != 0) {
1183
- return name_cmp_result;
1184
- }
1185
-
1186
- // Then come filenames
1187
- const char *filename1, *filename2;
1188
- int64_t filename1_len = heap_record_key_entry_filename(key_record1, i, &filename1);
1189
- int64_t filename2_len = heap_record_key_entry_filename(key_record2, i, &filename2);
1190
- if (filename1_len != filename2_len) {
1191
- return ((int) filename1_len) - ((int) filename2_len);
1192
- }
1193
- int filename_cmp_result = strncmp(filename1, filename2, filename1_len);
1194
- if (filename_cmp_result != 0) {
1195
- return filename_cmp_result;
1196
- }
940
+ static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids) {
941
+ ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern_all(recorder->string_storage, ids);
942
+ if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) {
943
+ rb_raise(rb_eRuntimeError, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some));
1197
944
  }
1198
-
1199
- // If we survived the above for, then everything matched
1200
- return 0;
1201
945
  }
1202
946
 
1203
- // Initial seed for hash functions
1204
- #define FNV1_32A_INIT 0x811c9dc5
1205
-
1206
- st_index_t heap_record_key_hash_st(st_data_t key) {
1207
- heap_record_key *record_key = (heap_record_key*) key;
1208
- if (record_key->type == HEAP_STACK) {
1209
- return heap_stack_hash(record_key->heap_stack, FNV1_32A_INIT);
1210
- } else {
1211
- return ddog_location_slice_hash(*record_key->location_slice, FNV1_32A_INIT);
947
+ static VALUE get_ruby_string_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) {
948
+ ddog_StringWrapperResult get_string_result = ddog_prof_ManagedStringStorage_get_string(recorder->string_storage, id);
949
+ if (get_string_result.tag == DDOG_STRING_WRAPPER_RESULT_ERR) {
950
+ rb_raise(rb_eRuntimeError, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err));
1212
951
  }
952
+ VALUE ruby_string = ruby_string_from_vec_u8(get_string_result.ok.message);
953
+ ddog_StringWrapper_drop((ddog_StringWrapper *) &get_string_result.ok);
954
+
955
+ return ruby_string;
1213
956
  }
1214
957
 
1215
958
  static inline double ewma_stat(double previous, double current) {
@@ -1233,3 +976,23 @@ void heap_recorder_testonly_reset_last_update(heap_recorder *heap_recorder) {
1233
976
 
1234
977
  heap_recorder->last_update_ns = 0;
1235
978
  }
979
+
980
+ void heap_recorder_testonly_benchmark_intern(heap_recorder *heap_recorder, ddog_CharSlice string, int times, bool use_all) {
981
+ if (heap_recorder == NULL) rb_raise(rb_eArgError, "heap profiling must be enabled");
982
+ if (times > REUSABLE_FRAME_DETAILS_SIZE) rb_raise(rb_eArgError, "times cannot be > than REUSABLE_FRAME_DETAILS_SIZE");
983
+
984
+ if (use_all) {
985
+ ddog_CharSlice *strings = heap_recorder->reusable_char_slices;
986
+
987
+ for (int i = 0; i < times; i++) strings[i] = string;
988
+
989
+ intern_all_or_raise(
990
+ heap_recorder->string_storage,
991
+ (ddog_prof_Slice_CharSlice) { .ptr = strings, .len = times },
992
+ heap_recorder->reusable_ids,
993
+ times
994
+ );
995
+ } else {
996
+ for (int i = 0; i < times; i++) intern_or_raise(heap_recorder->string_storage, string);
997
+ }
998
+ }