datadog 2.12.1 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +243 -2
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +63 -56
  4. data/ext/datadog_profiling_native_extension/collectors_stack.c +263 -76
  5. data/ext/datadog_profiling_native_extension/collectors_stack.h +20 -3
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +78 -26
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
  8. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +1 -4
  9. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +10 -0
  10. data/ext/datadog_profiling_native_extension/encoded_profile.c +79 -0
  11. data/ext/datadog_profiling_native_extension/encoded_profile.h +8 -0
  12. data/ext/datadog_profiling_native_extension/extconf.rb +10 -0
  13. data/ext/datadog_profiling_native_extension/heap_recorder.c +247 -364
  14. data/ext/datadog_profiling_native_extension/heap_recorder.h +4 -6
  15. data/ext/datadog_profiling_native_extension/http_transport.c +60 -94
  16. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +22 -0
  17. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +8 -5
  18. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +41 -21
  19. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +6 -4
  20. data/ext/datadog_profiling_native_extension/profiling.c +2 -0
  21. data/ext/datadog_profiling_native_extension/ruby_helpers.c +1 -13
  22. data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -11
  23. data/ext/datadog_profiling_native_extension/stack_recorder.c +173 -76
  24. data/ext/libdatadog_api/crashtracker.c +11 -12
  25. data/ext/libdatadog_api/crashtracker.h +5 -0
  26. data/ext/libdatadog_api/datadog_ruby_common.c +1 -4
  27. data/ext/libdatadog_api/datadog_ruby_common.h +10 -0
  28. data/ext/libdatadog_api/extconf.rb +2 -2
  29. data/ext/libdatadog_api/init.c +15 -0
  30. data/ext/libdatadog_api/library_config.c +164 -0
  31. data/ext/libdatadog_api/library_config.h +25 -0
  32. data/ext/libdatadog_api/macos_development.md +3 -3
  33. data/ext/libdatadog_api/process_discovery.c +112 -0
  34. data/ext/libdatadog_api/process_discovery.h +5 -0
  35. data/ext/libdatadog_extconf_helpers.rb +2 -2
  36. data/lib/datadog/appsec/actions_handler/serializable_backtrace.rb +89 -0
  37. data/lib/datadog/appsec/actions_handler.rb +24 -2
  38. data/lib/datadog/appsec/anonymizer.rb +16 -0
  39. data/lib/datadog/appsec/api_security/lru_cache.rb +56 -0
  40. data/lib/datadog/appsec/api_security/route_extractor.rb +71 -0
  41. data/lib/datadog/appsec/api_security/sampler.rb +59 -0
  42. data/lib/datadog/appsec/api_security.rb +23 -0
  43. data/lib/datadog/appsec/assets/waf_rules/README.md +50 -5
  44. data/lib/datadog/appsec/assets/waf_rules/recommended.json +257 -85
  45. data/lib/datadog/appsec/assets/waf_rules/strict.json +10 -78
  46. data/lib/datadog/appsec/autoload.rb +1 -1
  47. data/lib/datadog/appsec/component.rb +46 -61
  48. data/lib/datadog/appsec/compressed_json.rb +40 -0
  49. data/lib/datadog/appsec/configuration/settings.rb +153 -30
  50. data/lib/datadog/appsec/context.rb +7 -7
  51. data/lib/datadog/appsec/contrib/active_record/instrumentation.rb +10 -12
  52. data/lib/datadog/appsec/contrib/active_record/integration.rb +2 -2
  53. data/lib/datadog/appsec/contrib/active_record/patcher.rb +22 -22
  54. data/lib/datadog/appsec/contrib/auto_instrument.rb +1 -1
  55. data/lib/datadog/appsec/contrib/devise/configuration.rb +7 -31
  56. data/lib/datadog/appsec/contrib/devise/data_extractor.rb +78 -0
  57. data/lib/datadog/appsec/contrib/devise/ext.rb +22 -0
  58. data/lib/datadog/appsec/contrib/devise/integration.rb +1 -2
  59. data/lib/datadog/appsec/contrib/devise/patcher.rb +34 -23
  60. data/lib/datadog/appsec/contrib/devise/patches/signin_tracking_patch.rb +102 -0
  61. data/lib/datadog/appsec/contrib/devise/patches/signup_tracking_patch.rb +69 -0
  62. data/lib/datadog/appsec/contrib/devise/{patcher/rememberable_patch.rb → patches/skip_signin_tracking_patch.rb} +2 -2
  63. data/lib/datadog/appsec/contrib/devise/tracking_middleware.rb +106 -0
  64. data/lib/datadog/appsec/contrib/excon/integration.rb +1 -1
  65. data/lib/datadog/appsec/contrib/excon/ssrf_detection_middleware.rb +9 -10
  66. data/lib/datadog/appsec/contrib/faraday/integration.rb +1 -1
  67. data/lib/datadog/appsec/contrib/faraday/ssrf_detection_middleware.rb +8 -9
  68. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +8 -9
  69. data/lib/datadog/appsec/contrib/graphql/integration.rb +1 -1
  70. data/lib/datadog/appsec/contrib/rack/ext.rb +34 -0
  71. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +49 -32
  72. data/lib/datadog/appsec/contrib/rack/integration.rb +1 -1
  73. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +42 -30
  74. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +11 -13
  75. data/lib/datadog/appsec/contrib/rails/integration.rb +1 -1
  76. data/lib/datadog/appsec/contrib/rails/patcher.rb +21 -21
  77. data/lib/datadog/appsec/contrib/rest_client/integration.rb +1 -1
  78. data/lib/datadog/appsec/contrib/rest_client/request_ssrf_detection_patch.rb +10 -11
  79. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +17 -23
  80. data/lib/datadog/appsec/contrib/sinatra/integration.rb +1 -1
  81. data/lib/datadog/appsec/event.rb +96 -135
  82. data/lib/datadog/appsec/ext.rb +4 -2
  83. data/lib/datadog/appsec/instrumentation/gateway/argument.rb +7 -2
  84. data/lib/datadog/appsec/instrumentation/gateway/middleware.rb +24 -0
  85. data/lib/datadog/appsec/instrumentation/gateway.rb +17 -22
  86. data/lib/datadog/appsec/metrics/telemetry.rb +1 -1
  87. data/lib/datadog/appsec/monitor/gateway/watcher.rb +49 -14
  88. data/lib/datadog/appsec/processor/rule_loader.rb +30 -33
  89. data/lib/datadog/appsec/remote.rb +31 -59
  90. data/lib/datadog/appsec/response.rb +6 -6
  91. data/lib/datadog/appsec/security_engine/engine.rb +194 -0
  92. data/lib/datadog/appsec/security_engine/runner.rb +13 -14
  93. data/lib/datadog/appsec/security_event.rb +39 -0
  94. data/lib/datadog/appsec/utils.rb +0 -2
  95. data/lib/datadog/appsec.rb +5 -8
  96. data/lib/datadog/core/buffer/random.rb +18 -2
  97. data/lib/datadog/core/configuration/agent_settings.rb +52 -0
  98. data/lib/datadog/core/configuration/agent_settings_resolver.rb +4 -46
  99. data/lib/datadog/core/configuration/agentless_settings_resolver.rb +176 -0
  100. data/lib/datadog/core/configuration/components.rb +48 -31
  101. data/lib/datadog/core/configuration/components_state.rb +23 -0
  102. data/lib/datadog/core/configuration/ext.rb +4 -0
  103. data/lib/datadog/core/configuration/option.rb +81 -45
  104. data/lib/datadog/core/configuration/option_definition.rb +4 -4
  105. data/lib/datadog/core/configuration/options.rb +3 -3
  106. data/lib/datadog/core/configuration/settings.rb +109 -44
  107. data/lib/datadog/core/configuration/stable_config.rb +22 -0
  108. data/lib/datadog/core/configuration.rb +40 -16
  109. data/lib/datadog/core/crashtracking/component.rb +3 -10
  110. data/lib/datadog/core/crashtracking/tag_builder.rb +4 -22
  111. data/lib/datadog/core/diagnostics/environment_logger.rb +1 -1
  112. data/lib/datadog/core/encoding.rb +1 -1
  113. data/lib/datadog/core/environment/agent_info.rb +4 -3
  114. data/lib/datadog/core/environment/cgroup.rb +10 -12
  115. data/lib/datadog/core/environment/container.rb +38 -40
  116. data/lib/datadog/core/environment/ext.rb +6 -6
  117. data/lib/datadog/core/environment/git.rb +1 -0
  118. data/lib/datadog/core/environment/identity.rb +3 -3
  119. data/lib/datadog/core/environment/platform.rb +3 -3
  120. data/lib/datadog/core/environment/variable_helpers.rb +1 -1
  121. data/lib/datadog/core/error.rb +11 -9
  122. data/lib/datadog/core/logger.rb +2 -2
  123. data/lib/datadog/core/metrics/client.rb +20 -21
  124. data/lib/datadog/core/metrics/logging.rb +5 -5
  125. data/lib/datadog/core/process_discovery/tracer_memfd.rb +15 -0
  126. data/lib/datadog/core/process_discovery.rb +36 -0
  127. data/lib/datadog/core/rate_limiter.rb +4 -2
  128. data/lib/datadog/core/remote/client.rb +40 -32
  129. data/lib/datadog/core/remote/component.rb +6 -9
  130. data/lib/datadog/core/remote/configuration/digest.rb +7 -7
  131. data/lib/datadog/core/remote/configuration/path.rb +1 -1
  132. data/lib/datadog/core/remote/configuration/repository.rb +14 -1
  133. data/lib/datadog/core/remote/negotiation.rb +9 -9
  134. data/lib/datadog/core/remote/transport/config.rb +4 -3
  135. data/lib/datadog/core/remote/transport/http/client.rb +5 -4
  136. data/lib/datadog/core/remote/transport/http/config.rb +27 -37
  137. data/lib/datadog/core/remote/transport/http/negotiation.rb +7 -33
  138. data/lib/datadog/core/remote/transport/http.rb +22 -57
  139. data/lib/datadog/core/remote/transport/negotiation.rb +4 -3
  140. data/lib/datadog/core/runtime/metrics.rb +12 -5
  141. data/lib/datadog/core/tag_builder.rb +56 -0
  142. data/lib/datadog/core/telemetry/component.rb +81 -52
  143. data/lib/datadog/core/telemetry/emitter.rb +23 -11
  144. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +66 -0
  145. data/lib/datadog/core/telemetry/event/app_closing.rb +18 -0
  146. data/lib/datadog/core/telemetry/event/app_dependencies_loaded.rb +33 -0
  147. data/lib/datadog/core/telemetry/event/app_heartbeat.rb +18 -0
  148. data/lib/datadog/core/telemetry/event/app_integrations_change.rb +58 -0
  149. data/lib/datadog/core/telemetry/event/app_started.rb +287 -0
  150. data/lib/datadog/core/telemetry/event/base.rb +40 -0
  151. data/lib/datadog/core/telemetry/event/distributions.rb +18 -0
  152. data/lib/datadog/core/telemetry/event/generate_metrics.rb +43 -0
  153. data/lib/datadog/core/telemetry/event/log.rb +76 -0
  154. data/lib/datadog/core/telemetry/event/message_batch.rb +42 -0
  155. data/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb +43 -0
  156. data/lib/datadog/core/telemetry/event.rb +17 -472
  157. data/lib/datadog/core/telemetry/http/adapters/net.rb +12 -97
  158. data/lib/datadog/core/telemetry/logger.rb +5 -4
  159. data/lib/datadog/core/telemetry/logging.rb +11 -5
  160. data/lib/datadog/core/telemetry/metric.rb +8 -8
  161. data/lib/datadog/core/telemetry/request.rb +4 -4
  162. data/lib/datadog/core/telemetry/transport/http/api.rb +43 -0
  163. data/lib/datadog/core/telemetry/transport/http/client.rb +49 -0
  164. data/lib/datadog/core/telemetry/transport/http/telemetry.rb +92 -0
  165. data/lib/datadog/core/telemetry/transport/http.rb +63 -0
  166. data/lib/datadog/core/telemetry/transport/telemetry.rb +51 -0
  167. data/lib/datadog/core/telemetry/worker.rb +90 -24
  168. data/lib/datadog/core/transport/http/adapters/net.rb +17 -2
  169. data/lib/datadog/core/transport/http/adapters/test.rb +2 -1
  170. data/lib/datadog/core/transport/http/api/instance.rb +17 -0
  171. data/lib/datadog/core/transport/http/api/spec.rb +17 -0
  172. data/lib/datadog/core/transport/http/builder.rb +19 -17
  173. data/lib/datadog/core/transport/http/env.rb +8 -0
  174. data/lib/datadog/core/transport/http.rb +39 -2
  175. data/lib/datadog/core/utils/at_fork_monkey_patch.rb +6 -6
  176. data/lib/datadog/core/utils/duration.rb +32 -32
  177. data/lib/datadog/core/utils/forking.rb +2 -2
  178. data/lib/datadog/core/utils/network.rb +6 -6
  179. data/lib/datadog/core/utils/only_once_successful.rb +16 -5
  180. data/lib/datadog/core/utils/time.rb +20 -0
  181. data/lib/datadog/core/utils/truncation.rb +21 -0
  182. data/lib/datadog/core/utils.rb +7 -0
  183. data/lib/datadog/core/vendor/multipart-post/multipart/post/composite_read_io.rb +1 -1
  184. data/lib/datadog/core/vendor/multipart-post/multipart/post/multipartable.rb +8 -8
  185. data/lib/datadog/core/vendor/multipart-post/multipart/post/parts.rb +7 -7
  186. data/lib/datadog/core/worker.rb +1 -1
  187. data/lib/datadog/core/workers/async.rb +29 -12
  188. data/lib/datadog/core/workers/interval_loop.rb +12 -1
  189. data/lib/datadog/core/workers/runtime_metrics.rb +2 -2
  190. data/lib/datadog/core.rb +8 -0
  191. data/lib/datadog/di/boot.rb +34 -0
  192. data/lib/datadog/di/component.rb +0 -2
  193. data/lib/datadog/di/instrumenter.rb +48 -5
  194. data/lib/datadog/di/probe_notification_builder.rb +38 -43
  195. data/lib/datadog/di/probe_notifier_worker.rb +25 -17
  196. data/lib/datadog/di/remote.rb +2 -0
  197. data/lib/datadog/di/serializer.rb +10 -2
  198. data/lib/datadog/di/transport/diagnostics.rb +4 -3
  199. data/lib/datadog/di/transport/http/api.rb +2 -12
  200. data/lib/datadog/di/transport/http/client.rb +4 -3
  201. data/lib/datadog/di/transport/http/diagnostics.rb +7 -34
  202. data/lib/datadog/di/transport/http/input.rb +18 -35
  203. data/lib/datadog/di/transport/http.rb +14 -62
  204. data/lib/datadog/di/transport/input.rb +14 -5
  205. data/lib/datadog/di/utils.rb +5 -0
  206. data/lib/datadog/di.rb +0 -33
  207. data/lib/datadog/error_tracking/collector.rb +87 -0
  208. data/lib/datadog/error_tracking/component.rb +167 -0
  209. data/lib/datadog/error_tracking/configuration/settings.rb +63 -0
  210. data/lib/datadog/error_tracking/configuration.rb +11 -0
  211. data/lib/datadog/error_tracking/ext.rb +18 -0
  212. data/lib/datadog/error_tracking/extensions.rb +16 -0
  213. data/lib/datadog/error_tracking/filters.rb +77 -0
  214. data/lib/datadog/error_tracking.rb +18 -0
  215. data/lib/datadog/kit/appsec/events/v2.rb +195 -0
  216. data/lib/datadog/kit/appsec/events.rb +12 -0
  217. data/lib/datadog/kit/identity.rb +5 -1
  218. data/lib/datadog/opentelemetry/api/baggage.rb +90 -0
  219. data/lib/datadog/opentelemetry/api/baggage.rbs +26 -0
  220. data/lib/datadog/opentelemetry/api/context.rb +16 -2
  221. data/lib/datadog/opentelemetry/sdk/trace/span.rb +1 -1
  222. data/lib/datadog/opentelemetry.rb +2 -1
  223. data/lib/datadog/profiling/collectors/code_provenance.rb +18 -9
  224. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -0
  225. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  226. data/lib/datadog/profiling/collectors/info.rb +44 -0
  227. data/lib/datadog/profiling/collectors/thread_context.rb +17 -2
  228. data/lib/datadog/profiling/component.rb +8 -9
  229. data/lib/datadog/profiling/encoded_profile.rb +11 -0
  230. data/lib/datadog/profiling/exporter.rb +12 -7
  231. data/lib/datadog/profiling/ext.rb +0 -14
  232. data/lib/datadog/profiling/flush.rb +5 -8
  233. data/lib/datadog/profiling/http_transport.rb +7 -61
  234. data/lib/datadog/profiling/profiler.rb +2 -0
  235. data/lib/datadog/profiling/scheduler.rb +10 -2
  236. data/lib/datadog/profiling/sequence_tracker.rb +44 -0
  237. data/lib/datadog/profiling/stack_recorder.rb +9 -9
  238. data/lib/datadog/profiling/tag_builder.rb +7 -41
  239. data/lib/datadog/profiling/tasks/setup.rb +2 -0
  240. data/lib/datadog/profiling.rb +7 -2
  241. data/lib/datadog/single_step_instrument.rb +9 -0
  242. data/lib/datadog/tracing/analytics.rb +1 -1
  243. data/lib/datadog/tracing/component.rb +15 -12
  244. data/lib/datadog/tracing/configuration/ext.rb +7 -1
  245. data/lib/datadog/tracing/configuration/settings.rb +18 -2
  246. data/lib/datadog/tracing/context_provider.rb +1 -1
  247. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +15 -0
  248. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +19 -12
  249. data/lib/datadog/tracing/contrib/action_pack/ext.rb +2 -0
  250. data/lib/datadog/tracing/contrib/active_record/integration.rb +1 -1
  251. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +11 -2
  252. data/lib/datadog/tracing/contrib/active_support/cache/instrumentation.rb +33 -0
  253. data/lib/datadog/tracing/contrib/active_support/cache/patcher.rb +4 -0
  254. data/lib/datadog/tracing/contrib/active_support/cache/redis.rb +2 -4
  255. data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +13 -0
  256. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +10 -0
  257. data/lib/datadog/tracing/contrib/aws/parsed_context.rb +5 -1
  258. data/lib/datadog/tracing/contrib/configuration/settings.rb +1 -1
  259. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +4 -5
  260. data/lib/datadog/tracing/contrib/excon/middleware.rb +5 -3
  261. data/lib/datadog/tracing/contrib/ext.rb +1 -0
  262. data/lib/datadog/tracing/contrib/faraday/middleware.rb +5 -3
  263. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +7 -1
  264. data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +3 -0
  265. data/lib/datadog/tracing/contrib/http/circuit_breaker.rb +0 -15
  266. data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +4 -1
  267. data/lib/datadog/tracing/contrib/http/instrumentation.rb +6 -10
  268. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -16
  269. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +7 -15
  270. data/lib/datadog/tracing/contrib/karafka/configuration/settings.rb +27 -0
  271. data/lib/datadog/tracing/contrib/karafka/distributed/propagation.rb +48 -0
  272. data/lib/datadog/tracing/contrib/karafka/ext.rb +27 -0
  273. data/lib/datadog/tracing/contrib/karafka/integration.rb +45 -0
  274. data/lib/datadog/tracing/contrib/karafka/monitor.rb +66 -0
  275. data/lib/datadog/tracing/contrib/karafka/patcher.rb +71 -0
  276. data/lib/datadog/tracing/contrib/karafka.rb +37 -0
  277. data/lib/datadog/tracing/contrib/lograge/patcher.rb +4 -2
  278. data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +8 -0
  279. data/lib/datadog/tracing/contrib/mongodb/ext.rb +1 -0
  280. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +18 -1
  281. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -6
  282. data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +17 -0
  283. data/lib/datadog/tracing/contrib/opensearch/ext.rb +9 -0
  284. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +5 -1
  285. data/lib/datadog/tracing/contrib/patcher.rb +5 -2
  286. data/lib/datadog/tracing/contrib/rack/request_queue.rb +1 -1
  287. data/lib/datadog/tracing/contrib/rails/patcher.rb +4 -1
  288. data/lib/datadog/tracing/contrib/rails/runner.rb +61 -40
  289. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +5 -3
  290. data/lib/datadog/tracing/contrib/sidekiq/client_tracer.rb +6 -1
  291. data/lib/datadog/tracing/contrib/sidekiq/distributed/propagation.rb +3 -0
  292. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  293. data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +5 -2
  294. data/lib/datadog/tracing/contrib/support.rb +28 -0
  295. data/lib/datadog/tracing/contrib.rb +1 -0
  296. data/lib/datadog/tracing/correlation.rb +9 -2
  297. data/lib/datadog/tracing/diagnostics/environment_logger.rb +3 -1
  298. data/lib/datadog/tracing/distributed/b3_multi.rb +1 -1
  299. data/lib/datadog/tracing/distributed/b3_single.rb +1 -1
  300. data/lib/datadog/tracing/distributed/baggage.rb +131 -0
  301. data/lib/datadog/tracing/distributed/datadog.rb +4 -2
  302. data/lib/datadog/tracing/distributed/propagation.rb +25 -4
  303. data/lib/datadog/tracing/distributed/propagation_policy.rb +42 -0
  304. data/lib/datadog/tracing/metadata/errors.rb +4 -4
  305. data/lib/datadog/tracing/metadata/ext.rb +5 -0
  306. data/lib/datadog/tracing/metadata/metastruct.rb +36 -0
  307. data/lib/datadog/tracing/metadata/metastruct_tagging.rb +42 -0
  308. data/lib/datadog/tracing/metadata.rb +2 -0
  309. data/lib/datadog/tracing/sampling/rate_sampler.rb +2 -1
  310. data/lib/datadog/tracing/sampling/span/rule.rb +0 -1
  311. data/lib/datadog/tracing/span.rb +10 -1
  312. data/lib/datadog/tracing/span_event.rb +2 -2
  313. data/lib/datadog/tracing/span_operation.rb +68 -16
  314. data/lib/datadog/tracing/sync_writer.rb +2 -3
  315. data/lib/datadog/tracing/trace_digest.rb +9 -2
  316. data/lib/datadog/tracing/trace_operation.rb +55 -27
  317. data/lib/datadog/tracing/trace_segment.rb +6 -4
  318. data/lib/datadog/tracing/tracer.rb +51 -7
  319. data/lib/datadog/tracing/transport/http/api.rb +2 -10
  320. data/lib/datadog/tracing/transport/http/client.rb +5 -4
  321. data/lib/datadog/tracing/transport/http/traces.rb +13 -41
  322. data/lib/datadog/tracing/transport/http.rb +11 -44
  323. data/lib/datadog/tracing/transport/serializable_trace.rb +3 -1
  324. data/lib/datadog/tracing/transport/trace_formatter.rb +7 -0
  325. data/lib/datadog/tracing/transport/traces.rb +26 -9
  326. data/lib/datadog/tracing/utils.rb +1 -1
  327. data/lib/datadog/tracing/workers/trace_writer.rb +2 -6
  328. data/lib/datadog/tracing/writer.rb +2 -6
  329. data/lib/datadog/tracing.rb +16 -3
  330. data/lib/datadog/version.rb +2 -2
  331. data/lib/datadog.rb +8 -2
  332. metadata +88 -23
  333. data/lib/datadog/appsec/assets/waf_rules/processors.json +0 -92
  334. data/lib/datadog/appsec/assets/waf_rules/scanners.json +0 -114
  335. data/lib/datadog/appsec/contrib/devise/event.rb +0 -54
  336. data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +0 -72
  337. data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +0 -47
  338. data/lib/datadog/appsec/contrib/devise/resource.rb +0 -35
  339. data/lib/datadog/appsec/contrib/devise/tracking.rb +0 -57
  340. data/lib/datadog/appsec/processor/rule_merger.rb +0 -170
  341. data/lib/datadog/appsec/processor.rb +0 -107
  342. data/lib/datadog/appsec/utils/trace_operation.rb +0 -15
  343. data/lib/datadog/core/telemetry/http/env.rb +0 -20
  344. data/lib/datadog/core/telemetry/http/ext.rb +0 -28
  345. data/lib/datadog/core/telemetry/http/response.rb +0 -70
  346. data/lib/datadog/core/telemetry/http/transport.rb +0 -90
@@ -5,6 +5,18 @@
5
5
  #include "libdatadog_helpers.h"
6
6
  #include "time_helpers.h"
7
7
 
8
+ // Note on calloc vs ruby_xcalloc use:
9
+ // * Whenever we're allocating memory after being called by the Ruby VM in a "regular" situation (e.g. initializer)
10
+ // we should use `ruby_xcalloc` to give the VM visibility into what we're doing + give it a chance to manage GC
11
+ // * BUT, when we're being called during a sample, being in the middle of an object allocation is a very special
12
+ // situation for the VM to be in, and we've found the hard way (e.g. https://bugs.ruby-lang.org/issues/20629 and
13
+ // https://github.com/DataDog/dd-trace-rb/pull/4240 ) that it can be easy to do things the VM didn't expect.
14
+ // * Thus, out of caution and to avoid future potential issues such as the ones above, whenever we allocate memory
15
+ // during **sampling** we use `calloc` instead of `ruby_xcalloc`. Note that we've never seen issues from using
16
+ // `ruby_xcalloc` at any time, so this is a **precaution** not a "we've seen it break". But it seems a harmless
17
+ // one to use.
18
+ // This applies to both heap_recorder.c and collectors_thread_context.c
19
+
8
20
  // Minimum age (in GC generations) of heap objects we want to include in heap
9
21
  // recorder iterations. Object with age 0 represent objects that have yet to undergo
10
22
  // a GC and, thus, may just be noise/trash at instant of iteration and are usually not
@@ -24,80 +36,37 @@
24
36
 
25
37
  // A compact representation of a stacktrace frame for a heap allocation.
26
38
  typedef struct {
27
- char *name;
28
- char *filename;
39
+ ddog_prof_ManagedStringId name;
40
+ ddog_prof_ManagedStringId filename;
29
41
  int32_t line;
30
42
  } heap_frame;
31
43
 
44
+ // We use memcmp/st_hash below to compare/hash an entire array of heap_frames, so want to make sure no padding is added
45
+ // We could define the structure to be packed, but that seems even weirder across compilers, and this seems more portable?
46
+ _Static_assert(
47
+ sizeof(heap_frame) == sizeof(ddog_prof_ManagedStringId) * 2 + sizeof(int32_t),
48
+ "Size of heap_frame does not match the sum of its members. Padding detected."
49
+ );
50
+
32
51
  // A compact representation of a stacktrace for a heap allocation.
33
- //
34
- // We could use a ddog_prof_Slice_Location instead but it has a lot of
35
- // unused fields. Because we have to keep these stacks around for at
36
- // least the lifetime of the objects allocated therein, we would be
37
- // incurring a non-negligible memory overhead for little purpose.
52
+ // Used to dedup heap allocation stacktraces across multiple objects sharing the same allocation location.
38
53
  typedef struct {
54
+ // How many objects are currently tracked in object_records recorder for this heap record.
55
+ uint32_t num_tracked_objects;
56
+
39
57
  uint16_t frames_len;
40
58
  heap_frame frames[];
41
- } heap_stack;
42
- static heap_stack* heap_stack_new(ddog_prof_Slice_Location);
43
- static void heap_stack_free(heap_stack*);
44
- static st_index_t heap_stack_hash(heap_stack*, st_index_t);
59
+ } heap_record;
60
+ static heap_record* heap_record_new(heap_recorder*, ddog_prof_Slice_Location);
61
+ static void heap_record_free(heap_recorder*, heap_record*);
45
62
 
46
63
  #if MAX_FRAMES_LIMIT > UINT16_MAX
47
64
  #error Frames len type not compatible with MAX_FRAMES_LIMIT
48
65
  #endif
49
66
 
50
- enum heap_record_key_type {
51
- HEAP_STACK,
52
- LOCATION_SLICE
53
- };
54
- // This struct allows us to use two different types of stacks when
55
- // interacting with a heap_record hash.
56
- //
57
- // The idea is that we'll always want to use heap_stack-keys when
58
- // adding new entries to the hash since that's the compact stack
59
- // representation we rely on internally.
60
- //
61
- // However, when querying for an existing heap record, we'd save a
62
- // lot of allocations if we could query with the
63
- // ddog_prof_Slice_Location we receive in our external API.
64
- //
65
- // To allow this interchange, we need a union and need to ensure
66
- // that whatever shape of the union, the heap_record_key_cmp_st
67
- // and heap_record_hash_st functions return the same results for
68
- // equivalent stacktraces.
69
- typedef struct {
70
- enum heap_record_key_type type;
71
- union {
72
- // key never owns this if set
73
- heap_stack *heap_stack;
74
- // key never owns this if set
75
- ddog_prof_Slice_Location *location_slice;
76
- };
77
- } heap_record_key;
78
- static heap_record_key* heap_record_key_new(heap_stack*);
79
- static void heap_record_key_free(heap_record_key*);
80
- static int heap_record_key_cmp_st(st_data_t, st_data_t);
81
- static st_index_t heap_record_key_hash_st(st_data_t);
82
- static const struct st_hash_type st_hash_type_heap_record_key = {
83
- heap_record_key_cmp_st,
84
- heap_record_key_hash_st,
85
- };
86
-
87
- // Need to implement these functions to support the location-slice based keys
88
- static st_index_t ddog_location_hash(ddog_prof_Location, st_index_t seed);
89
- static st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location, st_index_t seed);
90
-
91
- // A heap record is used for deduping heap allocation stacktraces across multiple
92
- // objects sharing the same allocation location.
93
- typedef struct {
94
- // How many objects are currently tracked by the heap recorder for this heap record.
95
- uint32_t num_tracked_objects;
96
- // stack is owned by the associated record and gets cleaned up alongside it
97
- heap_stack *stack;
98
- } heap_record;
99
- static heap_record* heap_record_new(heap_stack*);
100
- static void heap_record_free(heap_record*);
67
+ static int heap_record_cmp_st(st_data_t, st_data_t);
68
+ static st_index_t heap_record_hash_st(st_data_t);
69
+ static const struct st_hash_type st_hash_type_heap_record = { .compare = heap_record_cmp_st, .hash = heap_record_hash_st };
101
70
 
102
71
  // An object record is used for storing data about currently tracked live objects
103
72
  typedef struct {
@@ -106,8 +75,8 @@ typedef struct {
106
75
  live_object_data object_data;
107
76
  } object_record;
108
77
  static object_record* object_record_new(long, heap_record*, live_object_data);
109
- static void object_record_free(object_record*);
110
- static VALUE object_record_inspect(object_record*);
78
+ static void object_record_free(heap_recorder*, object_record*);
79
+ static VALUE object_record_inspect(heap_recorder*, object_record*);
111
80
  static object_record SKIPPED_RECORD = {0};
112
81
 
113
82
  struct heap_recorder {
@@ -116,12 +85,15 @@ struct heap_recorder {
116
85
  bool size_enabled;
117
86
  uint sample_rate;
118
87
 
119
- // Map[key: heap_record_key*, record: heap_record*]
120
- // NOTE: We always use heap_record_key.type == HEAP_STACK for storage but support lookups
121
- // via heap_record_key.type == LOCATION_SLICE to allow for allocation-free fast-paths.
88
+ // Map[key: heap_record*, record: nothing] (This is a set, basically)
122
89
  // NOTE: This table is currently only protected by the GVL since we never interact with it
123
90
  // outside the GVL.
124
- // NOTE: This table has ownership of both its heap_record_keys and heap_records.
91
+ // NOTE: This table has ownership of its heap_records.
92
+ //
93
+ // This is a cpu/memory trade-off: Maintaining the "heap_records" map means we spend extra CPU when sampling as we need
94
+ // to do de-duplication, but we reduce the memory footprint of the heap profiler.
95
+ // In the future, it may be worth revisiting if we can move this inside libdatadog: if libdatadog was able to track
96
+ // entire stacks for us, then we wouldn't need to do it on the Ruby side.
125
97
  st_table *heap_records;
126
98
 
127
99
  // Map[obj_id: long, record: object_record*]
@@ -132,6 +104,8 @@ struct heap_recorder {
132
104
  //
133
105
  // TODO: @ivoanjo We've evolved to actually never need to look up on object_records (we only insert and iterate),
134
106
  // so right now this seems to be just a really really fancy self-resizing list/set.
107
+ // If we replace this with a list, we could record the latest id and compare it when inserting to make sure our
108
+ // assumption of ids never reused + always increasing always holds. (This as an alternative to checking for duplicates)
135
109
  st_table *object_records;
136
110
 
137
111
  // Map[obj_id: long, record: object_record*]
@@ -156,12 +130,19 @@ struct heap_recorder {
156
130
  // Data for a heap recording that was started but not yet ended
157
131
  object_record *active_recording;
158
132
 
159
- // Reusable location array, implementing a flyweight pattern for things like iteration.
133
+ // Reusable arrays, implementing a flyweight pattern for things like iteration
134
+ #define REUSABLE_LOCATIONS_SIZE MAX_FRAMES_LIMIT
160
135
  ddog_prof_Location *reusable_locations;
161
136
 
137
+ #define REUSABLE_FRAME_DETAILS_SIZE (2 * MAX_FRAMES_LIMIT) // because it'll be used for both function names AND file names)
138
+ ddog_prof_ManagedStringId *reusable_ids;
139
+ ddog_CharSlice *reusable_char_slices;
140
+
162
141
  // Sampling state
163
142
  uint num_recordings_skipped;
164
143
 
144
+ ddog_prof_ManagedStringStorage string_storage;
145
+
165
146
  struct stats_last_update {
166
147
  size_t objects_alive;
167
148
  size_t objects_dead;
@@ -185,10 +166,10 @@ struct heap_recorder {
185
166
  } stats_lifetime;
186
167
  };
187
168
 
188
- struct end_heap_allocation_args {
189
- struct heap_recorder *heap_recorder;
169
+ typedef struct {
170
+ heap_recorder *heap_recorder;
190
171
  ddog_prof_Slice_Location locations;
191
- };
172
+ } end_heap_allocation_args;
192
173
 
193
174
  static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Location);
194
175
  static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
@@ -203,6 +184,9 @@ static void commit_recording(heap_recorder *, heap_record *, object_record *acti
203
184
  static VALUE end_heap_allocation_recording(VALUE end_heap_allocation_args);
204
185
  static void heap_recorder_update(heap_recorder *heap_recorder, bool full_update);
205
186
  static inline double ewma_stat(double previous, double current);
187
+ static void unintern_or_raise(heap_recorder *, ddog_prof_ManagedStringId);
188
+ static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids);
189
+ static VALUE get_ruby_string_or_raise(heap_recorder*, ddog_prof_ManagedStringId);
206
190
 
207
191
  // ==========================
208
192
  // Heap Recorder External API
@@ -213,16 +197,19 @@ static inline double ewma_stat(double previous, double current);
213
197
  // happens under the GVL.
214
198
  //
215
199
  // ==========================
216
- heap_recorder* heap_recorder_new(void) {
200
+ heap_recorder* heap_recorder_new(ddog_prof_ManagedStringStorage string_storage) {
217
201
  heap_recorder *recorder = ruby_xcalloc(1, sizeof(heap_recorder));
218
202
 
219
- recorder->heap_records = st_init_table(&st_hash_type_heap_record_key);
203
+ recorder->heap_records = st_init_table(&st_hash_type_heap_record);
220
204
  recorder->object_records = st_init_numtable();
221
205
  recorder->object_records_snapshot = NULL;
222
- recorder->reusable_locations = ruby_xcalloc(MAX_FRAMES_LIMIT, sizeof(ddog_prof_Location));
206
+ recorder->reusable_locations = ruby_xcalloc(REUSABLE_LOCATIONS_SIZE, sizeof(ddog_prof_Location));
207
+ recorder->reusable_ids = ruby_xcalloc(REUSABLE_FRAME_DETAILS_SIZE, sizeof(ddog_prof_ManagedStringId));
208
+ recorder->reusable_char_slices = ruby_xcalloc(REUSABLE_FRAME_DETAILS_SIZE, sizeof(ddog_CharSlice));
223
209
  recorder->active_recording = NULL;
224
210
  recorder->size_enabled = true;
225
211
  recorder->sample_rate = 1; // By default do no sampling on top of what allocation profiling already does
212
+ recorder->string_storage = string_storage;
226
213
 
227
214
  return recorder;
228
215
  }
@@ -239,19 +226,21 @@ void heap_recorder_free(heap_recorder *heap_recorder) {
239
226
  }
240
227
 
241
228
  // Clean-up all object records
242
- st_foreach(heap_recorder->object_records, st_object_record_entry_free, 0);
229
+ st_foreach(heap_recorder->object_records, st_object_record_entry_free, (st_data_t) heap_recorder);
243
230
  st_free_table(heap_recorder->object_records);
244
231
 
245
232
  // Clean-up all heap records (this includes those only referred to by queued_samples)
246
- st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, 0);
233
+ st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, (st_data_t) heap_recorder);
247
234
  st_free_table(heap_recorder->heap_records);
248
235
 
249
236
  if (heap_recorder->active_recording != NULL && heap_recorder->active_recording != &SKIPPED_RECORD) {
250
237
  // If there's a partial object record, clean it up as well
251
- object_record_free(heap_recorder->active_recording);
238
+ object_record_free(heap_recorder, heap_recorder->active_recording);
252
239
  }
253
240
 
254
241
  ruby_xfree(heap_recorder->reusable_locations);
242
+ ruby_xfree(heap_recorder->reusable_ids);
243
+ ruby_xfree(heap_recorder->reusable_char_slices);
255
244
 
256
245
  ruby_xfree(heap_recorder);
257
246
  }
@@ -314,7 +303,18 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
314
303
  rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end.");
315
304
  }
316
305
 
317
- if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate) {
306
+ if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate ||
307
+ #ifdef NO_IMEMO_OBJECT_ID
308
+ // On Ruby 3.5, we can't ask the object_id from IMEMOs (https://github.com/ruby/ruby/pull/13347)
309
+ RB_BUILTIN_TYPE(new_obj) == RUBY_T_IMEMO
310
+ #else
311
+ false
312
+ #endif
313
+ // If we got really unlucky and an allocation showed up during an update (because it triggered an allocation
314
+ // directly OR because the GVL got released in the middle of an update), let's skip this sample as well.
315
+ // See notes on `heap_recorder_update` for details.
316
+ || heap_recorder->updating
317
+ ) {
318
318
  heap_recorder->active_recording = &SKIPPED_RECORD;
319
319
  return;
320
320
  }
@@ -331,7 +331,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
331
331
  NULL,
332
332
  (live_object_data) {
333
333
  .weight = weight * heap_recorder->sample_rate,
334
- .class = string_from_char_slice(alloc_class),
334
+ .class = intern_or_raise(heap_recorder->string_storage, alloc_class),
335
335
  .alloc_gen = rb_gc_count(),
336
336
  }
337
337
  );
@@ -341,24 +341,30 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
341
341
  // locks. To enable us to correctly unlock the profile on exception, we wrap the call to end_heap_allocation_recording
342
342
  // with an rb_protect.
343
343
  __attribute__((warn_unused_result))
344
- int end_heap_allocation_recording_with_rb_protect(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
344
+ int end_heap_allocation_recording_with_rb_protect(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
345
345
  if (heap_recorder == NULL) {
346
346
  return 0;
347
347
  }
348
+ if (heap_recorder->active_recording == &SKIPPED_RECORD) {
349
+ // Short circuit, in this case there's nothing to be done
350
+ heap_recorder->active_recording = NULL;
351
+ return 0;
352
+ }
353
+
348
354
 
349
355
  int exception_state;
350
- struct end_heap_allocation_args end_heap_allocation_args = {
356
+ end_heap_allocation_args args = {
351
357
  .heap_recorder = heap_recorder,
352
358
  .locations = locations,
353
359
  };
354
- rb_protect(end_heap_allocation_recording, (VALUE) &end_heap_allocation_args, &exception_state);
360
+ rb_protect(end_heap_allocation_recording, (VALUE) &args, &exception_state);
355
361
  return exception_state;
356
362
  }
357
363
 
358
- static VALUE end_heap_allocation_recording(VALUE end_heap_allocation_args) {
359
- struct end_heap_allocation_args *args = (struct end_heap_allocation_args *) end_heap_allocation_args;
364
+ static VALUE end_heap_allocation_recording(VALUE protect_args) {
365
+ end_heap_allocation_args *args = (end_heap_allocation_args *) protect_args;
360
366
 
361
- struct heap_recorder *heap_recorder = args->heap_recorder;
367
+ heap_recorder *heap_recorder = args->heap_recorder;
362
368
  ddog_prof_Slice_Location locations = args->locations;
363
369
 
364
370
  object_record *active_recording = heap_recorder->active_recording;
@@ -373,6 +379,7 @@ static VALUE end_heap_allocation_recording(VALUE end_heap_allocation_args) {
373
379
  heap_recorder->active_recording = NULL;
374
380
 
375
381
  if (active_recording == &SKIPPED_RECORD) { // special marker when we decided to skip due to sampling
382
+ // Note: Remember to update the short circuit in end_heap_allocation_recording_with_rb_protect if this logic changes
376
383
  return Qnil;
377
384
  }
378
385
 
@@ -392,15 +399,28 @@ void heap_recorder_update_young_objects(heap_recorder *heap_recorder) {
392
399
  heap_recorder_update(heap_recorder, /* full_update: */ false);
393
400
  }
394
401
 
402
+ // NOTE: This function needs and assumes it gets called with the GVL being held.
403
+ // But importantly **some of the operations inside `st_object_record_update` may cause a thread switch**,
404
+ // so we can't assume a single update happens in a single "atomic" step -- other threads may get some running time
405
+ // in the meanwhile.
395
406
  static void heap_recorder_update(heap_recorder *heap_recorder, bool full_update) {
396
407
  if (heap_recorder->updating) {
397
- if (full_update) rb_raise(rb_eRuntimeError, "BUG: full_update should not be triggered during another update");
398
-
399
- // If we try to update while another update is still running, short-circuit.
400
- // NOTE: This runs while holding the GVL. But since updates may be triggered from GC activity, there's still
401
- // a chance for updates to be attempted concurrently if scheduling gods so determine.
402
- heap_recorder->stats_lifetime.updates_skipped_concurrent++;
403
- return;
408
+ if (full_update) {
409
+ // There's another thread that's already doing an update :(
410
+ //
411
+ // Because there's a lock on the `StackRecorder` (see @no_concurrent_serialize_mutex) then it's not possible that
412
+ // the other update is a full update.
413
+ // Thus we expect is happening is that the GVL got released by the other thread in the middle of a non-full update
414
+ // and the scheduler thread decided now was a great time to serialize the profile.
415
+ //
416
+ // So, let's yield the time on the current thread until Ruby goes back to the other thread doing the update and
417
+ // it finishes cleanly.
418
+ while (heap_recorder->updating) { rb_thread_schedule(); }
419
+ } else {
420
+ // Non-full updates are optional, so let's walk away
421
+ heap_recorder->stats_lifetime.updates_skipped_concurrent++;
422
+ return;
423
+ }
404
424
  }
405
425
 
406
426
  if (heap_recorder->object_records_snapshot != NULL) {
@@ -554,26 +574,10 @@ VALUE heap_recorder_state_snapshot(heap_recorder *heap_recorder) {
554
574
  return hash;
555
575
  }
556
576
 
557
- void heap_recorder_testonly_assert_hash_matches(ddog_prof_Slice_Location locations) {
558
- heap_stack *stack = heap_stack_new(locations);
559
- heap_record_key stack_based_key = (heap_record_key) {
560
- .type = HEAP_STACK,
561
- .heap_stack = stack,
562
- };
563
- heap_record_key location_based_key = (heap_record_key) {
564
- .type = LOCATION_SLICE,
565
- .location_slice = &locations,
566
- };
567
-
568
- st_index_t stack_hash = heap_record_key_hash_st((st_data_t) &stack_based_key);
569
- st_index_t location_hash = heap_record_key_hash_st((st_data_t) &location_based_key);
570
-
571
- heap_stack_free(stack);
572
-
573
- if (stack_hash != location_hash) {
574
- rb_raise(rb_eRuntimeError, "Heap record key hashes built from the same locations differ. stack_based_hash=%"PRI_VALUE_PREFIX"u location_based_hash=%"PRI_VALUE_PREFIX"u", stack_hash, location_hash);
575
- }
576
- }
577
+ typedef struct {
578
+ heap_recorder *recorder;
579
+ VALUE debug_str;
580
+ } debug_context;
577
581
 
578
582
  VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
579
583
  if (heap_recorder == NULL) {
@@ -581,7 +585,8 @@ VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
581
585
  }
582
586
 
583
587
  VALUE debug_str = rb_str_new2("object records:\n");
584
- st_foreach(heap_recorder->object_records, st_object_records_debug, (st_data_t) debug_str);
588
+ debug_context context = (debug_context) {.recorder = heap_recorder, .debug_str = debug_str};
589
+ st_foreach(heap_recorder->object_records, st_object_records_debug, (st_data_t) &context);
585
590
 
586
591
  rb_str_catf(debug_str, "state snapshot: %"PRIsVALUE"\n------\n", heap_recorder_state_snapshot(heap_recorder));
587
592
 
@@ -591,18 +596,19 @@ VALUE heap_recorder_testonly_debug(heap_recorder *heap_recorder) {
591
596
  // ==========================
592
597
  // Heap Recorder Internal API
593
598
  // ==========================
594
- static int st_heap_record_entry_free(st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
595
- heap_record_key *record_key = (heap_record_key*) key;
596
- heap_record_key_free(record_key);
597
- heap_record_free((heap_record *) value);
599
+ static int st_heap_record_entry_free(st_data_t key, DDTRACE_UNUSED st_data_t value, st_data_t extra_arg) {
600
+ heap_recorder *recorder = (heap_recorder *) extra_arg;
601
+ heap_record_free(recorder, (heap_record *) key);
598
602
  return ST_DELETE;
599
603
  }
600
604
 
601
- static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t value, DDTRACE_UNUSED st_data_t extra_arg) {
602
- object_record_free((object_record *) value);
605
+ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra_arg) {
606
+ heap_recorder *recorder = (heap_recorder *) extra_arg;
607
+ object_record_free(recorder, (object_record *) value);
603
608
  return ST_DELETE;
604
609
  }
605
610
 
611
+ // NOTE: Some operations inside this function can cause the GVL to be released! Plan accordingly.
606
612
  static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
607
613
  long obj_id = (long) key;
608
614
  object_record *record = (object_record*) value;
@@ -628,7 +634,7 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
628
634
  return ST_CONTINUE;
629
635
  }
630
636
 
631
- if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
637
+ if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) { // Note: This function call can cause the GVL to be released
632
638
  // Id no longer associated with a valid ref. Need to delete this object record!
633
639
  on_committed_object_record_cleanup(recorder, record);
634
640
  recorder->stats_last_update.objects_dead++;
@@ -644,7 +650,8 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
644
650
  ) {
645
651
  // if we were asked to update sizes and this object was not already seen as being frozen,
646
652
  // update size again.
647
- record->object_data.size = ruby_obj_memsize_of(ref);
653
+ record->object_data.size = ruby_obj_memsize_of(ref); // Note: This function call can cause the GVL to be released... maybe?
654
+ // (With T_DATA for instance, since it can be a custom method supplied by extensions)
648
655
  // Check if it's now frozen so we skip a size update next time
649
656
  record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
650
657
  }
@@ -664,7 +671,7 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
664
671
  // WARN: This can get called outside the GVL. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED.
665
672
  static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
666
673
  object_record *record = (object_record*) value;
667
- const heap_stack *stack = record->heap_record->stack;
674
+ const heap_record *stack = record->heap_record;
668
675
  iteration_context *context = (iteration_context*) extra;
669
676
 
670
677
  const heap_recorder *recorder = context->heap_recorder;
@@ -680,8 +687,10 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
680
687
  locations[i] = (ddog_prof_Location) {
681
688
  .mapping = {.filename = DDOG_CHARSLICE_C(""), .build_id = DDOG_CHARSLICE_C(""), .build_id_id = {}},
682
689
  .function = {
683
- .name = {.ptr = frame->name, .len = strlen(frame->name)},
684
- .filename = {.ptr = frame->filename, .len = strlen(frame->filename)},
690
+ .name = DDOG_CHARSLICE_C(""),
691
+ .name_id = frame->name,
692
+ .filename = DDOG_CHARSLICE_C(""),
693
+ .filename_id = frame->filename,
685
694
  },
686
695
  .line = frame->line,
687
696
  };
@@ -700,11 +709,12 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
700
709
  }
701
710
 
702
711
  static int st_object_records_debug(DDTRACE_UNUSED st_data_t key, st_data_t value, st_data_t extra) {
703
- VALUE debug_str = (VALUE) extra;
712
+ debug_context *context = (debug_context*) extra;
713
+ VALUE debug_str = context->debug_str;
704
714
 
705
715
  object_record *record = (object_record*) value;
706
716
 
707
- rb_str_catf(debug_str, "%"PRIsVALUE"\n", object_record_inspect(record));
717
+ rb_str_catf(debug_str, "%"PRIsVALUE"\n", object_record_inspect(context->recorder, record));
708
718
 
709
719
  return ST_CONTINUE;
710
720
  }
@@ -733,60 +743,35 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec
733
743
  st_lookup(heap_recorder->object_records, active_recording->obj_id, (st_data_t *) &existing_record);
734
744
  if (existing_record == NULL) rb_raise(rb_eRuntimeError, "Unexpected NULL when reading existing record");
735
745
 
736
- VALUE existing_inspect = object_record_inspect(existing_record);
737
- VALUE new_inspect = object_record_inspect(active_recording);
746
+ VALUE existing_inspect = object_record_inspect(heap_recorder, existing_record);
747
+ VALUE new_inspect = object_record_inspect(heap_recorder, active_recording);
738
748
  rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
739
749
  "the same id. previous={%"PRIsVALUE"} new={%"PRIsVALUE"}", existing_inspect, new_inspect);
740
750
  }
741
751
  }
742
752
 
743
- // Struct holding data required for an update operation on heap_records
744
- typedef struct {
745
- // [in] The locations we did this update with
746
- ddog_prof_Slice_Location locations;
747
- // [out] Pointer that will be updated to the updated heap record to prevent having to do
748
- // another lookup to access the updated heap record.
749
- heap_record **record;
750
- } heap_record_update_data;
751
-
752
- // This function assumes ownership of stack_data is passed on to it so it'll either transfer ownership or clean-up.
753
753
  static int update_heap_record_entry_with_new_allocation(st_data_t *key, st_data_t *value, st_data_t data, int existing) {
754
- heap_record_update_data *update_data = (heap_record_update_data*) data;
754
+ heap_record **new_or_existing_record = (heap_record **) data;
755
+ (*new_or_existing_record) = (heap_record *) (*key);
755
756
 
756
757
  if (!existing) {
757
- // there was no matching heap record so lets create a new one...
758
- // we need to initialize a heap_record_key with a new stack and use that for the key storage. We can't use the
759
- // locations-based key we used for the update call because we don't own its lifecycle. So we create a new
760
- // heap stack and will pass ownership of it to the heap_record.
761
- heap_stack *stack = heap_stack_new(update_data->locations);
762
- (*key) = (st_data_t) heap_record_key_new(stack);
763
- (*value) = (st_data_t) heap_record_new(stack);
758
+ (*value) = (st_data_t) true; // We're only using this hash as a set
764
759
  }
765
760
 
766
- heap_record *record = (heap_record*) (*value);
767
- (*update_data->record) = record;
768
-
769
761
  return ST_CONTINUE;
770
762
  }
771
763
 
772
764
  static heap_record* get_or_create_heap_record(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
773
- // For performance reasons we use a stack-allocated location-slice based key. This allows us
774
- // to do allocation-free lookups and reuse of a matching existing heap record.
775
- // NOTE: If we end up creating a new record, we'll create a heap-allocated key we own and use that for storage
776
- // instead of this one.
777
- heap_record_key lookup_key = (heap_record_key) {
778
- .type = LOCATION_SLICE,
779
- .location_slice = &locations,
780
- };
765
+ // See note on "heap_records" definition for why we keep this map.
766
+ heap_record *stack = heap_record_new(heap_recorder, locations);
781
767
 
782
- heap_record *heap_record = NULL;
783
- heap_record_update_data update_data = (heap_record_update_data) {
784
- .locations = locations,
785
- .record = &heap_record,
786
- };
787
- st_update(heap_recorder->heap_records, (st_data_t) &lookup_key, update_heap_record_entry_with_new_allocation, (st_data_t) &update_data);
768
+ heap_record *new_or_existing_record = NULL; // Will be set inside update_heap_record_entry_with_new_allocation
769
+ bool existing = st_update(heap_recorder->heap_records, (st_data_t) stack, update_heap_record_entry_with_new_allocation, (st_data_t) &new_or_existing_record);
770
+ if (existing) {
771
+ heap_record_free(heap_recorder, stack);
772
+ }
788
773
 
789
- return heap_record;
774
+ return new_or_existing_record;
790
775
  }
791
776
 
792
777
  static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_record *heap_record) {
@@ -795,18 +780,10 @@ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_rec
795
780
  return;
796
781
  }
797
782
 
798
- heap_record_key heap_key = (heap_record_key) {
799
- .type = HEAP_STACK,
800
- .heap_stack = heap_record->stack,
801
- };
802
- // We need to access the deleted key to free it since we gave ownership of the keys to the hash.
803
- // st_delete will change this pointer to point to the removed key if one is found.
804
- heap_record_key *deleted_key = &heap_key;
805
- if (!st_delete(heap_recorder->heap_records, (st_data_t*) &deleted_key, NULL)) {
783
+ if (!st_delete(heap_recorder->heap_records, (st_data_t*) &heap_record, NULL)) {
806
784
  rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record");
807
785
  };
808
- heap_record_key_free(deleted_key);
809
- heap_record_free(heap_record);
786
+ heap_record_free(heap_recorder, heap_record);
810
787
  }
811
788
 
812
789
  static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, object_record *record) {
@@ -822,59 +799,44 @@ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, obj
822
799
  heap_record *heap_record = record->heap_record;
823
800
 
824
801
  if (heap_record == NULL) rb_raise(rb_eRuntimeError, "heap_record was NULL in on_committed_object_record_cleanup");
825
- if (heap_record->stack == NULL) rb_raise(rb_eRuntimeError, "heap_record->stack was NULL in on_committed_object_record_cleanup");
826
802
 
827
803
  heap_record->num_tracked_objects--;
828
804
 
829
805
  // One less object using this heap record, it may have become unused...
830
806
  cleanup_heap_record_if_unused(heap_recorder, heap_record);
831
807
 
832
- object_record_free(record);
833
- }
834
-
835
- // ===============
836
- // Heap Record API
837
- // ===============
838
- heap_record* heap_record_new(heap_stack *stack) {
839
- heap_record *record = ruby_xcalloc(1, sizeof(heap_record));
840
- record->num_tracked_objects = 0;
841
- record->stack = stack;
842
- return record;
843
- }
844
-
845
- void heap_record_free(heap_record *record) {
846
- heap_stack_free(record->stack);
847
- ruby_xfree(record);
808
+ object_record_free(heap_recorder, record);
848
809
  }
849
810
 
850
811
  // =================
851
812
  // Object Record API
852
813
  // =================
853
814
  object_record* object_record_new(long obj_id, heap_record *heap_record, live_object_data object_data) {
854
- object_record *record = ruby_xcalloc(1, sizeof(object_record));
815
+ object_record *record = calloc(1, sizeof(object_record)); // See "note on calloc vs ruby_xcalloc use" above
855
816
  record->obj_id = obj_id;
856
817
  record->heap_record = heap_record;
857
818
  record->object_data = object_data;
858
819
  return record;
859
820
  }
860
821
 
861
- void object_record_free(object_record *record) {
862
- if (record->object_data.class != NULL) {
863
- ruby_xfree(record->object_data.class);
864
- }
865
- ruby_xfree(record);
822
+ void object_record_free(heap_recorder *recorder, object_record *record) {
823
+ unintern_or_raise(recorder, record->object_data.class);
824
+ free(record); // See "note on calloc vs ruby_xcalloc use" above
866
825
  }
867
826
 
868
- VALUE object_record_inspect(object_record *record) {
869
- heap_frame top_frame = record->heap_record->stack->frames[0];
827
+ VALUE object_record_inspect(heap_recorder *recorder, object_record *record) {
828
+ heap_frame top_frame = record->heap_record->frames[0];
829
+ VALUE filename = get_ruby_string_or_raise(recorder, top_frame.filename);
870
830
  live_object_data object_data = record->object_data;
871
- VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu gen_age=%zu frozen=%d ",
872
- record->obj_id, object_data.weight, object_data.size, top_frame.filename,
831
+
832
+ VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%"PRIsVALUE":%d alloc_gen=%zu gen_age=%zu frozen=%d ",
833
+ record->obj_id, object_data.weight, object_data.size, filename,
873
834
  (int) top_frame.line, object_data.alloc_gen, object_data.gen_age, object_data.is_frozen);
874
835
 
875
- const char *class = record->object_data.class;
876
- if (class != NULL) {
877
- rb_str_catf(inspect, "class=%s ", class);
836
+ if (record->object_data.class.value > 0) {
837
+ VALUE class = get_ruby_string_or_raise(recorder, record->object_data.class);
838
+
839
+ rb_str_catf(inspect, "class=%"PRIsVALUE" ", class);
878
840
  }
879
841
  VALUE ref;
880
842
 
@@ -894,202 +856,103 @@ VALUE object_record_inspect(object_record *record) {
894
856
  }
895
857
 
896
858
  // ==============
897
- // Heap Frame API
898
- // ==============
899
- // WARN: Must be kept in-sync with ::char_slice_hash
900
- st_index_t string_hash(char *str, st_index_t seed) {
901
- return st_hash(str, strlen(str), seed);
902
- }
903
-
904
- // WARN: Must be kept in-sync with ::string_hash
905
- st_index_t char_slice_hash(ddog_CharSlice char_slice, st_index_t seed) {
906
- return st_hash(char_slice.ptr, char_slice.len, seed);
907
- }
908
-
909
- // WARN: Must be kept in-sync with ::ddog_location_hash
910
- st_index_t heap_frame_hash(heap_frame *frame, st_index_t seed) {
911
- st_index_t hash = string_hash(frame->name, seed);
912
- hash = string_hash(frame->filename, hash);
913
- hash = st_hash(&frame->line, sizeof(frame->line), hash);
914
- return hash;
915
- }
916
-
917
- // WARN: Must be kept in-sync with ::heap_frame_hash
918
- st_index_t ddog_location_hash(ddog_prof_Location location, st_index_t seed) {
919
- st_index_t hash = char_slice_hash(location.function.name, seed);
920
- hash = char_slice_hash(location.function.filename, hash);
921
- // Convert ddog_prof line type to the same type we use for our heap_frames to
922
- // ensure we have compatible hashes
923
- int32_t line_as_int32 = (int32_t) location.line;
924
- hash = st_hash(&line_as_int32, sizeof(line_as_int32), hash);
925
- return hash;
926
- }
927
-
928
- // ==============
929
- // Heap Stack API
859
+ // Heap Record API
930
860
  // ==============
931
- heap_stack* heap_stack_new(ddog_prof_Slice_Location locations) {
861
+ heap_record* heap_record_new(heap_recorder *recorder, ddog_prof_Slice_Location locations) {
932
862
  uint16_t frames_len = locations.len;
933
863
  if (frames_len > MAX_FRAMES_LIMIT) {
934
864
  // This is not expected as MAX_FRAMES_LIMIT is shared with the stacktrace construction mechanism
935
865
  rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len);
936
866
  }
937
- heap_stack *stack = ruby_xcalloc(1, sizeof(heap_stack) + frames_len * sizeof(heap_frame));
867
+ heap_record *stack = calloc(1, sizeof(heap_record) + frames_len * sizeof(heap_frame)); // See "note on calloc vs ruby_xcalloc use" above
868
+ stack->num_tracked_objects = 0;
938
869
  stack->frames_len = frames_len;
870
+
871
+ // Intern all these strings...
872
+ ddog_CharSlice *strings = recorder->reusable_char_slices;
873
+ // Put all the char slices in the same array; we'll pull them out in the same order from the ids array
939
874
  for (uint16_t i = 0; i < stack->frames_len; i++) {
940
875
  const ddog_prof_Location *location = &locations.ptr[i];
876
+ strings[i] = location->function.filename;
877
+ strings[i + stack->frames_len] = location->function.name;
878
+ }
879
+ intern_all_or_raise(recorder->string_storage, (ddog_prof_Slice_CharSlice) { .ptr = strings, .len = stack->frames_len * 2 }, recorder->reusable_ids, stack->frames_len * 2);
880
+
881
+ // ...and record them for later use
882
+ for (uint16_t i = 0; i < stack->frames_len; i++) {
941
883
  stack->frames[i] = (heap_frame) {
942
- .name = string_from_char_slice(location->function.name),
943
- .filename = string_from_char_slice(location->function.filename),
884
+ .filename = recorder->reusable_ids[i],
885
+ .name = recorder->reusable_ids[i + stack->frames_len],
944
886
  // ddog_prof_Location is a int64_t. We don't expect to have to profile files with more than
945
887
  // 2M lines so this cast should be fairly safe?
946
- .line = (int32_t) location->line,
888
+ .line = (int32_t) locations.ptr[i].line,
947
889
  };
948
890
  }
949
- return stack;
950
- }
951
891
 
952
- void heap_stack_free(heap_stack *stack) {
953
- for (uint64_t i = 0; i < stack->frames_len; i++) {
954
- heap_frame *frame = &stack->frames[i];
955
- ruby_xfree(frame->name);
956
- ruby_xfree(frame->filename);
957
- }
958
- ruby_xfree(stack);
892
+ return stack;
959
893
  }
960
894
 
961
- // WARN: Must be kept in-sync with ::ddog_location_slice_hash
962
- st_index_t heap_stack_hash(heap_stack *stack, st_index_t seed) {
963
- st_index_t hash = seed;
964
- for (uint64_t i = 0; i < stack->frames_len; i++) {
965
- hash = heap_frame_hash(&stack->frames[i], hash);
966
- }
967
- return hash;
968
- }
895
+ void heap_record_free(heap_recorder *recorder, heap_record *stack) {
896
+ ddog_prof_ManagedStringId *ids = recorder->reusable_ids;
969
897
 
970
- // WARN: Must be kept in-sync with ::heap_stack_hash
971
- st_index_t ddog_location_slice_hash(ddog_prof_Slice_Location locations, st_index_t seed) {
972
- st_index_t hash = seed;
973
- for (uint64_t i = 0; i < locations.len; i++) {
974
- hash = ddog_location_hash(locations.ptr[i], hash);
898
+ // Put all the ids in the same array; doesn't really matter the order
899
+ for (u_int16_t i = 0; i < stack->frames_len; i++) {
900
+ ids[i] = stack->frames[i].filename;
901
+ ids[i + stack->frames_len] = stack->frames[i].name;
975
902
  }
976
- return hash;
977
- }
903
+ unintern_all_or_raise(recorder, (ddog_prof_Slice_ManagedStringId) { .ptr = ids, .len = stack->frames_len * 2 });
978
904
 
979
- // ===================
980
- // Heap Record Key API
981
- // ===================
982
- heap_record_key* heap_record_key_new(heap_stack *stack) {
983
- heap_record_key *key = ruby_xmalloc(sizeof(heap_record_key));
984
- key->type = HEAP_STACK;
985
- key->heap_stack = stack;
986
- return key;
905
+ free(stack); // See "note on calloc vs ruby_xcalloc use" above
987
906
  }
988
907
 
989
- void heap_record_key_free(heap_record_key *key) {
990
- ruby_xfree(key);
991
- }
992
-
993
- static inline size_t heap_record_key_len(heap_record_key *key) {
994
- if (key->type == HEAP_STACK) {
995
- return key->heap_stack->frames_len;
996
- } else {
997
- return key->location_slice->len;
998
- }
999
- }
908
+ // The entire stack is represented by ids (name, filename) and lines (integers) so we can treat is as just
909
+ // a big string of bytes and compare it all in one go.
910
+ int heap_record_cmp_st(st_data_t key1, st_data_t key2) {
911
+ heap_record *stack1 = (heap_record*) key1;
912
+ heap_record *stack2 = (heap_record*) key2;
1000
913
 
1001
- static inline int64_t heap_record_key_entry_line(heap_record_key *key, size_t entry_i) {
1002
- if (key->type == HEAP_STACK) {
1003
- return key->heap_stack->frames[entry_i].line;
914
+ if (stack1->frames_len != stack2->frames_len) {
915
+ return ((int) stack1->frames_len) - ((int) stack2->frames_len);
1004
916
  } else {
1005
- return key->location_slice->ptr[entry_i].line;
917
+ return memcmp(stack1->frames, stack2->frames, stack1->frames_len * sizeof(heap_frame));
1006
918
  }
1007
919
  }
1008
920
 
1009
- static inline size_t heap_record_key_entry_name(heap_record_key *key, size_t entry_i, const char **name_ptr) {
1010
- if (key->type == HEAP_STACK) {
1011
- char *name = key->heap_stack->frames[entry_i].name;
1012
- (*name_ptr) = name;
1013
- return strlen(name);
1014
- } else {
1015
- ddog_CharSlice name = key->location_slice->ptr[entry_i].function.name;
1016
- (*name_ptr) = name.ptr;
1017
- return name.len;
1018
- }
1019
- }
921
+ // Initial seed for hash function, same as Ruby uses
922
+ #define FNV1_32A_INIT 0x811c9dc5
1020
923
 
1021
- static inline size_t heap_record_key_entry_filename(heap_record_key *key, size_t entry_i, const char **filename_ptr) {
1022
- if (key->type == HEAP_STACK) {
1023
- char *filename = key->heap_stack->frames[entry_i].filename;
1024
- (*filename_ptr) = filename;
1025
- return strlen(filename);
1026
- } else {
1027
- ddog_CharSlice filename = key->location_slice->ptr[entry_i].function.filename;
1028
- (*filename_ptr) = filename.ptr;
1029
- return filename.len;
1030
- }
924
+ // The entire stack is represented by ids (name, filename) and lines (integers) so we can treat is as just
925
+ // a big string of bytes and hash it all in one go.
926
+ st_index_t heap_record_hash_st(st_data_t key) {
927
+ heap_record *stack = (heap_record*) key;
928
+ return st_hash(stack->frames, stack->frames_len * sizeof(heap_frame), FNV1_32A_INIT);
1031
929
  }
1032
930
 
1033
- int heap_record_key_cmp_st(st_data_t key1, st_data_t key2) {
1034
- heap_record_key *key_record1 = (heap_record_key*) key1;
1035
- heap_record_key *key_record2 = (heap_record_key*) key2;
931
+ static void unintern_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) {
932
+ if (id.value == 0) return; // Empty string, nothing to do
1036
933
 
1037
- // Fast path, check if lengths differ
1038
- size_t key_record1_len = heap_record_key_len(key_record1);
1039
- size_t key_record2_len = heap_record_key_len(key_record2);
1040
-
1041
- if (key_record1_len != key_record2_len) {
1042
- return ((int) key_record1_len) - ((int) key_record2_len);
934
+ ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern(recorder->string_storage, id);
935
+ if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) {
936
+ rb_raise(rb_eRuntimeError, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some));
1043
937
  }
938
+ }
1044
939
 
1045
- // If we got this far, we have same lengths so need to check item-by-item
1046
- for (size_t i = 0; i < key_record1_len; i++) {
1047
- // Lines are faster to compare, lets do that first
1048
- size_t line1 = heap_record_key_entry_line(key_record1, i);
1049
- size_t line2 = heap_record_key_entry_line(key_record2, i);
1050
- if (line1 != line2) {
1051
- return ((int) line1) - ((int)line2);
1052
- }
1053
-
1054
- // Then come names, they are usually smaller than filenames
1055
- const char *name1, *name2;
1056
- size_t name1_len = heap_record_key_entry_name(key_record1, i, &name1);
1057
- size_t name2_len = heap_record_key_entry_name(key_record2, i, &name2);
1058
- if (name1_len != name2_len) {
1059
- return ((int) name1_len) - ((int) name2_len);
1060
- }
1061
- int name_cmp_result = strncmp(name1, name2, name1_len);
1062
- if (name_cmp_result != 0) {
1063
- return name_cmp_result;
1064
- }
1065
-
1066
- // Then come filenames
1067
- const char *filename1, *filename2;
1068
- int64_t filename1_len = heap_record_key_entry_filename(key_record1, i, &filename1);
1069
- int64_t filename2_len = heap_record_key_entry_filename(key_record2, i, &filename2);
1070
- if (filename1_len != filename2_len) {
1071
- return ((int) filename1_len) - ((int) filename2_len);
1072
- }
1073
- int filename_cmp_result = strncmp(filename1, filename2, filename1_len);
1074
- if (filename_cmp_result != 0) {
1075
- return filename_cmp_result;
1076
- }
940
+ static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids) {
941
+ ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern_all(recorder->string_storage, ids);
942
+ if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) {
943
+ rb_raise(rb_eRuntimeError, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some));
1077
944
  }
1078
-
1079
- // If we survived the above for, then everything matched
1080
- return 0;
1081
945
  }
1082
946
 
1083
- // Initial seed for hash functions
1084
- #define FNV1_32A_INIT 0x811c9dc5
1085
-
1086
- st_index_t heap_record_key_hash_st(st_data_t key) {
1087
- heap_record_key *record_key = (heap_record_key*) key;
1088
- if (record_key->type == HEAP_STACK) {
1089
- return heap_stack_hash(record_key->heap_stack, FNV1_32A_INIT);
1090
- } else {
1091
- return ddog_location_slice_hash(*record_key->location_slice, FNV1_32A_INIT);
947
+ static VALUE get_ruby_string_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) {
948
+ ddog_StringWrapperResult get_string_result = ddog_prof_ManagedStringStorage_get_string(recorder->string_storage, id);
949
+ if (get_string_result.tag == DDOG_STRING_WRAPPER_RESULT_ERR) {
950
+ rb_raise(rb_eRuntimeError, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err));
1092
951
  }
952
+ VALUE ruby_string = ruby_string_from_vec_u8(get_string_result.ok.message);
953
+ ddog_StringWrapper_drop((ddog_StringWrapper *) &get_string_result.ok);
954
+
955
+ return ruby_string;
1093
956
  }
1094
957
 
1095
958
  static inline double ewma_stat(double previous, double current) {
@@ -1113,3 +976,23 @@ void heap_recorder_testonly_reset_last_update(heap_recorder *heap_recorder) {
1113
976
 
1114
977
  heap_recorder->last_update_ns = 0;
1115
978
  }
979
+
980
+ void heap_recorder_testonly_benchmark_intern(heap_recorder *heap_recorder, ddog_CharSlice string, int times, bool use_all) {
981
+ if (heap_recorder == NULL) rb_raise(rb_eArgError, "heap profiling must be enabled");
982
+ if (times > REUSABLE_FRAME_DETAILS_SIZE) rb_raise(rb_eArgError, "times cannot be > than REUSABLE_FRAME_DETAILS_SIZE");
983
+
984
+ if (use_all) {
985
+ ddog_CharSlice *strings = heap_recorder->reusable_char_slices;
986
+
987
+ for (int i = 0; i < times; i++) strings[i] = string;
988
+
989
+ intern_all_or_raise(
990
+ heap_recorder->string_storage,
991
+ (ddog_prof_Slice_CharSlice) { .ptr = strings, .len = times },
992
+ heap_recorder->reusable_ids,
993
+ times
994
+ );
995
+ } else {
996
+ for (int i = 0; i < times; i++) intern_or_raise(heap_recorder->string_storage, string);
997
+ }
998
+ }