datadog 2.35.0 → 2.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +68 -31
  4. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +1 -1
  5. data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +1 -1
  6. data/ext/datadog_profiling_native_extension/collectors_stack.c +37 -18
  7. data/ext/datadog_profiling_native_extension/collectors_stack.h +8 -2
  8. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +434 -300
  9. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +9 -7
  10. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +7 -8
  11. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -12
  12. data/ext/datadog_profiling_native_extension/extconf.rb +2 -2
  13. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +4 -43
  14. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +15 -47
  15. data/ext/datadog_profiling_native_extension/heap_recorder.c +44 -26
  16. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +14 -35
  17. data/ext/datadog_profiling_native_extension/profiling.c +41 -4
  18. data/ext/datadog_profiling_native_extension/ruby_helpers.c +33 -34
  19. data/ext/datadog_profiling_native_extension/stack_recorder.c +24 -3
  20. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  21. data/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h +4 -2
  22. data/ext/libdatadog_api/datadog_ruby_common.c +7 -8
  23. data/ext/libdatadog_api/datadog_ruby_common.h +0 -12
  24. data/ext/libdatadog_extconf_helpers.rb +1 -1
  25. data/lib/datadog/appsec/api_security/route_extractor.rb +6 -0
  26. data/lib/datadog/appsec/component.rb +1 -1
  27. data/lib/datadog/appsec/configuration.rb +7 -0
  28. data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +37 -4
  29. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +64 -19
  30. data/lib/datadog/appsec/contrib/graphql/integration.rb +1 -0
  31. data/lib/datadog/appsec/contrib/rack/buffered_input.rb +83 -0
  32. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +41 -3
  33. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +20 -7
  34. data/lib/datadog/appsec/contrib/rack/input_peeker.rb +92 -0
  35. data/lib/datadog/appsec/contrib/rails/gateway/request.rb +33 -0
  36. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +17 -1
  37. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +20 -3
  38. data/lib/datadog/appsec/default_header_tags.rb +10 -6
  39. data/lib/datadog/core/configuration/components.rb +1 -0
  40. data/lib/datadog/core/configuration/settings.rb +1 -2
  41. data/lib/datadog/core/configuration/supported_configurations.rb +2 -0
  42. data/lib/datadog/core/remote/component.rb +1 -1
  43. data/lib/datadog/core/telemetry/event/app_started.rb +0 -21
  44. data/lib/datadog/core/utils/at_fork_monkey_patch.rb +1 -1
  45. data/lib/datadog/core/utils/forking.rb +3 -1
  46. data/lib/datadog/core/utils/spawn_monkey_patch.rb +3 -1
  47. data/lib/datadog/core.rb +3 -0
  48. data/lib/datadog/di/base.rb +4 -1
  49. data/lib/datadog/di/component.rb +1 -1
  50. data/lib/datadog/error_tracking/collector.rb +2 -1
  51. data/lib/datadog/error_tracking/component.rb +2 -2
  52. data/lib/datadog/kit/tracing/method_tracer.rb +4 -1
  53. data/lib/datadog/opentelemetry/sdk/propagator.rb +9 -3
  54. data/lib/datadog/opentelemetry/sdk/span_processor.rb +4 -1
  55. data/lib/datadog/profiling/collectors/thread_context.rb +1 -0
  56. data/lib/datadog/profiling/component.rb +13 -15
  57. data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
  58. data/lib/datadog/ruby_version.rb +25 -0
  59. data/lib/datadog/symbol_database/component.rb +306 -98
  60. data/lib/datadog/symbol_database/extractor.rb +223 -84
  61. data/lib/datadog/tracing/configuration/ext.rb +13 -0
  62. data/lib/datadog/tracing/configuration/settings.rb +17 -0
  63. data/lib/datadog/tracing/contrib/configuration/resolver.rb +7 -0
  64. data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +2 -0
  65. data/lib/datadog/tracing/contrib/grpc.rb +1 -0
  66. data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +2 -0
  67. data/lib/datadog/tracing/contrib/http.rb +1 -0
  68. data/lib/datadog/tracing/contrib/karafka/distributed/propagation.rb +2 -0
  69. data/lib/datadog/tracing/contrib/karafka.rb +1 -0
  70. data/lib/datadog/tracing/contrib/rack/middlewares.rb +3 -1
  71. data/lib/datadog/tracing/contrib/rack/route_inference.rb +3 -1
  72. data/lib/datadog/tracing/contrib/sidekiq/distributed/propagation.rb +2 -0
  73. data/lib/datadog/tracing/contrib/sidekiq.rb +1 -0
  74. data/lib/datadog/tracing/contrib/waterdrop/distributed/propagation.rb +2 -0
  75. data/lib/datadog/tracing/contrib/waterdrop.rb +1 -0
  76. data/lib/datadog/tracing/distributed/propagation.rb +33 -1
  77. data/lib/datadog/tracing/distributed/trace_context.rb +11 -2
  78. data/lib/datadog/tracing/trace_digest.rb +7 -0
  79. data/lib/datadog/tracing/trace_operation.rb +4 -1
  80. data/lib/datadog/tracing/tracer.rb +1 -0
  81. data/lib/datadog/version.rb +1 -1
  82. data/lib/datadog.rb +4 -1
  83. metadata +8 -5
@@ -17,16 +17,24 @@ module Datadog
17
17
  # - Debounce: extraction is deferred by EXTRACT_DEBOUNCE_INTERVAL seconds so
18
18
  # reconfigurations during boot coalesce into a single extraction on the
19
19
  # final Component instance.
20
+ # - Hot-load coverage: TracePoint :class hook captures classes loaded after
21
+ # initial extraction, enqueues them on a per-instance buffer; the scheduler
22
+ # drains the buffer on debounce and extracts each one via Extractor#extract,
23
+ # matching Java/Python/.NET continuous coverage.
20
24
  #
21
25
  # Upload flow:
22
26
  # 1. Remote config sends upload_symbols: true (or force_upload mode)
23
27
  # 2. start_upload called — schedules extraction EXTRACT_DEBOUNCE_INTERVAL
24
- # seconds in the future on a per-instance scheduler thread.
28
+ # seconds in the future on a per-instance scheduler thread, and lazily
29
+ # installs the TracePoint :class hook if not already installed.
25
30
  # 3. When the timer fires (no further start_upload calls reset it),
26
- # extract_and_upload runs: ObjectSpace iteration → Extractor → ScopeBatcher.
31
+ # extract_and_upload runs. On the first call: ObjectSpace iteration →
32
+ # Extractor#extract_all. On subsequent calls: drain the hot-load buffer →
33
+ # Extractor#extract per module.
27
34
  # 4. ScopeBatcher batches and triggers Uploader.
28
- # 5. A class-level flag is set so subsequent Component instances created via
29
- # Datadog reconfiguration do not re-upload.
35
+ # 5. As new classes load throughout the process lifetime, the TracePoint hook
36
+ # fires and signals the scheduler — the next debounce window produces an
37
+ # incremental upload of just the new classes.
30
38
  #
31
39
  # Created by: Components#initialize (in Core::Configuration::Components)
32
40
  # Accessed by: Remote config receiver via Datadog.send(:components).symbol_database
@@ -39,41 +47,12 @@ module Datadog
39
47
  # Long enough to absorb reconfiguration cascades during Rails boot.
40
48
  EXTRACT_DEBOUNCE_INTERVAL = 5 # seconds
41
49
 
42
- # Class-level state: tracks whether any Component instance in this process
43
- # has performed an extract+upload. Survives Component replacement during
44
- # Datadog reconfiguration so duplicate uploads are prevented.
45
- @uploaded_this_process = false
46
- @upload_done_mutex = Mutex.new
47
- @upload_done_cv = ConditionVariable.new
48
-
49
- class << self
50
- attr_reader :upload_done_mutex, :upload_done_cv
51
-
52
- # Whether any Component instance in this process has completed an
53
- # upload. Cross-instance flag — used to dedupe uploads across
54
- # Component rebuilds within a single Ruby process.
55
- # @return [Boolean]
56
- def uploaded_this_process?
57
- @upload_done_mutex.synchronize { @uploaded_this_process }
58
- end
59
-
60
- # Mark the current process as having completed a symbol upload.
61
- # Called by the Component instance that successfully completes an
62
- # upload; subsequent start_upload calls on any instance short-circuit.
63
- # @return [void]
64
- def mark_uploaded
65
- @upload_done_mutex.synchronize do
66
- @uploaded_this_process = true
67
- @upload_done_cv.broadcast
68
- end
69
- end
70
-
71
- # Reset class-level upload state. Test-only.
72
- # @api private
73
- def reset_uploaded_this_process_for_tests!
74
- @upload_done_mutex.synchronize { @uploaded_this_process = false }
75
- end
76
- end
50
+ # Cached unbound Module#singleton_class? dispatched explicitly inside the
51
+ # hot-load TracePoint so user code that overrides `singleton_class?` (e.g.
52
+ # `def self.singleton_class?(arg)`) cannot raise inside the :class hook and
53
+ # abort the user's class definition. Mirrors the cache in Extractor.
54
+ MODULE_SINGLETON_CLASS_PRED = Module.instance_method(:singleton_class?)
55
+ private_constant :MODULE_SINGLETON_CLASS_PRED
77
56
 
78
57
  # Build a new Component if feature is enabled and dependencies met.
79
58
  # @param settings [Configuration::Settings] Tracer settings
@@ -130,6 +109,16 @@ module Datadog
130
109
  @upload_in_progress = false
131
110
  @upload_in_progress_cv = ConditionVariable.new
132
111
  @shutdown = false
112
+ # PID at construction time. Compared against Process.pid in shutdown!
113
+ # to detect forked-child callers, whose inherited @upload_in_progress
114
+ # snapshot is stale: the scheduler thread that would clear it lives
115
+ # only in the parent. See shutdown! for details.
116
+ @owner_pid = Process.pid
117
+
118
+ # Signalled when @last_upload_time advances. wait_for_idle blocks on this
119
+ # so short-lived scripts that trigger an upload can wait for an upload
120
+ # attempt to complete without depending on a one-shot flag.
121
+ @last_upload_time_cv = ConditionVariable.new
133
122
 
134
123
  # Per-instance scheduler state. The scheduler thread is started lazily
135
124
  # on the first start_upload call.
@@ -138,6 +127,15 @@ module Datadog
138
127
  @scheduled_at = nil
139
128
  @scheduler_signaled = false
140
129
  @scheduler_thread = nil
130
+
131
+ # Hot-load coverage state. TracePoint :class hook is installed lazily on
132
+ # the first start_upload call; classes defined after that point are
133
+ # enqueued here and drained by the scheduler on debounce. Distinguishes
134
+ # initial extraction (extract_all) from incremental (per-module extract).
135
+ @hot_load_buffer = []
136
+ @hot_load_buffer_mutex = Mutex.new
137
+ @hot_load_tracepoint = nil
138
+ @initial_extraction_done = false
141
139
  end
142
140
 
143
141
  # Schedule a deferred upload that waits for app boot to complete.
@@ -149,9 +147,10 @@ module Datadog
149
147
  # In non-Rails: triggers start_upload immediately.
150
148
  #
151
149
  # Each Component registers its own callback. Old Components that have
152
- # been shut down short-circuit in start_upload via @shutdown.
153
- # Cross-process deduplication is handled by the class-level
154
- # uploaded_this_process? flag, not by guarding registration.
150
+ # been shut down short-circuit in start_upload via @shutdown. The hot-load
151
+ # hook handles classes loaded after this initial trigger, so under
152
+ # eager_load=false an under-extracted initial upload self-corrects as the
153
+ # app exercises code.
155
154
  #
156
155
  # @return [void]
157
156
  def schedule_deferred_upload
@@ -160,18 +159,8 @@ module Datadog
160
159
  # loaded object (Rails::Application), so a bare `start_upload`
161
160
  # would resolve against it.
162
161
  component = self
163
- logger = @logger
164
162
  ::ActiveSupport.on_load(:after_initialize) do
165
- # Only auto-trigger when Rails has eager-loaded application
166
- # classes during initialization. In dev (eager_load=false)
167
- # there is nothing complete to extract; the auto-deferred
168
- # upload would race with explicit triggers and produce
169
- # under-extracted uploads.
170
- if defined?(::Rails) && ::Rails.application&.config&.eager_load # steep:ignore NoMethod
171
- component.start_upload
172
- else
173
- logger.debug { "symdb: skipping auto-deferred upload (eager_load disabled)" }
174
- end
163
+ component.start_upload
175
164
  end
176
165
  else
177
166
  start_upload
@@ -190,11 +179,20 @@ module Datadog
190
179
  # Thread-safe: can be called concurrently from multiple remote config updates.
191
180
  # @return [void]
192
181
  def start_upload
193
- return if Component.uploaded_this_process?
194
-
195
182
  @scheduler_mutex.synchronize do
196
183
  return if @shutdown
197
184
 
185
+ if @owner_pid != Process.pid
186
+ # Forked child: claim ownership and clear inherited
187
+ # @upload_in_progress. The inherited flag was the parent's
188
+ # snapshot; the parent's scheduler thread does not exist in this
189
+ # process. Any upload starting now is child-owned and must be
190
+ # waited on in shutdown! via the PID-match branch.
191
+ @owner_pid = Process.pid
192
+ @mutex.synchronize { @upload_in_progress = false }
193
+ end
194
+
195
+ install_hot_load_hook
198
196
  @scheduled_at = Datadog::Core::Utils::Time.get_time + EXTRACT_DEBOUNCE_INTERVAL
199
197
  @scheduler_signaled = true
200
198
  @scheduler_cv.signal
@@ -205,45 +203,69 @@ module Datadog
205
203
  @telemetry&.report(e, description: 'symdb: error scheduling upload')
206
204
  end
207
205
 
208
- # Stop symbol upload (cancel the scheduler).
206
+ # Stop symbol upload (cancel the scheduler) and suppress further hot-load
207
+ # extraction. Called when remote config sends upload_symbols: false or
208
+ # deletes the config. Disables the TracePoint :class hook so post-stop
209
+ # class loads don't re-arm the scheduler, clears the hot-load buffer, and
210
+ # resets @initial_extraction_done so a future re-enable performs a fresh
211
+ # extract_all instead of draining an empty buffer.
209
212
  # Thread-safe: can be called concurrently from multiple remote config updates.
213
+ # The TracePoint teardown sits inside the same @scheduler_mutex critical
214
+ # section as the @scheduled_at reset, so it is atomic against a concurrent
215
+ # start_upload (which installs the TracePoint under @scheduler_mutex). Without
216
+ # that, a stop interleaved with a start could leave an enabled TracePoint
217
+ # rooted by the VM after stop_upload returned.
210
218
  # @return [void]
211
219
  def stop_upload
212
220
  @scheduler_mutex.synchronize do
221
+ @hot_load_tracepoint&.disable
222
+ @hot_load_tracepoint = nil
213
223
  @scheduled_at = nil
214
224
  @scheduler_signaled = true
215
225
  @scheduler_cv.signal
216
226
  end
227
+ @hot_load_buffer_mutex.synchronize { @hot_load_buffer.clear }
228
+ @initial_extraction_done = false
217
229
  end
218
230
 
219
- # Block until any Component in this process has finished an extract+upload,
231
+ # Block until this Component finishes an extract+upload after this call,
220
232
  # or until the timeout elapses. Used by short-lived scripts that trigger
221
233
  # an upload via force_upload and need to wait before exiting.
234
+ # Tracks @last_upload_time advance — returns true once any upload attempt
235
+ # completes (success or failure), false on timeout.
222
236
  # @param timeout [Numeric] Maximum seconds to wait
223
237
  # @return [Boolean] true if an upload completed; false on timeout
224
238
  def wait_for_idle(timeout: 30)
225
239
  deadline = Datadog::Core::Utils::Time.get_time + timeout
226
- Component.upload_done_mutex.synchronize do
227
- # Read @uploaded_this_process directly: we already hold
228
- # Component.upload_done_mutex here, and uploaded_this_process?
229
- # would try to re-acquire it (non-reentrant), deadlocking.
230
- until Component.instance_variable_get(:@uploaded_this_process)
240
+ @mutex.synchronize do
241
+ start_time = @last_upload_time
242
+ while @last_upload_time == start_time
231
243
  remaining = deadline - Datadog::Core::Utils::Time.get_time
232
244
  return false if remaining <= 0
233
- Component.upload_done_cv.wait(Component.upload_done_mutex, remaining)
245
+ @last_upload_time_cv.wait(@mutex, remaining)
234
246
  end
235
247
  end
236
248
  true
237
249
  end
238
250
 
239
251
  # Shutdown component and cleanup resources.
240
- # Cancels the per-instance scheduler so any pending debounced extraction
241
- # is dropped. Waits for an in-flight extraction to complete before
242
- # returning. Does not touch class-level state, so a sibling Component
243
- # built after shutdown can still upload.
252
+ # Disables the hot-load TracePoint so no events queue for a dead
253
+ # scheduler. Cancels the per-instance scheduler so any pending debounced
254
+ # extraction is dropped. Waits for an in-flight extraction to complete
255
+ # before returning. Does not touch any sibling Components, so a sibling
256
+ # Component built after shutdown can still upload.
257
+ # The TracePoint teardown sits inside the same @scheduler_mutex critical
258
+ # section as the @shutdown flag flip, so it is atomic against a concurrent
259
+ # start_upload (which installs the TracePoint under @scheduler_mutex). Without
260
+ # that, a shutdown interleaved with a start could leave an enabled TracePoint
261
+ # rooted by the VM — class loads would keep growing @hot_load_buffer for the
262
+ # rest of the process lifetime (enqueue_hot_load's @shutdown check skips
263
+ # re-scheduling but only after the buffer push).
244
264
  # @return [void]
245
265
  def shutdown!
246
266
  @scheduler_mutex.synchronize do
267
+ @hot_load_tracepoint&.disable
268
+ @hot_load_tracepoint = nil
247
269
  @shutdown = true
248
270
  @scheduler_signaled = true
249
271
  @scheduler_cv.signal
@@ -253,13 +275,103 @@ module Datadog
253
275
 
254
276
  @mutex.synchronize do
255
277
  if @upload_in_progress
256
- @upload_in_progress_cv.wait(@mutex, 5)
278
+ if Process.pid == @owner_pid
279
+ @upload_in_progress_cv.wait(@mutex, 5)
280
+ else
281
+ # We are in a forked child that inherited this Component but
282
+ # never called start_upload here. The scheduler thread (the
283
+ # only writer that clears @upload_in_progress and signals the
284
+ # cv) lives only in the parent — fork carries only the calling
285
+ # thread, so nothing in this process can ever signal us.
286
+ # Waiting would burn the full 5s timeout for no benefit. Treat
287
+ # the inherited @upload_in_progress as a stale snapshot and
288
+ # proceed; the parent's shutdown! (running in the parent) is
289
+ # authoritative. Child-owned uploads (where start_upload was
290
+ # called in this process) take the PID-match branch above,
291
+ # because start_upload claims @owner_pid for the current
292
+ # process.
293
+ @upload_in_progress = false
294
+ end
257
295
  end
258
296
  end
259
297
 
260
298
  @scope_batcher.shutdown
261
299
  end
262
300
 
301
+ # Reinitialize per-instance state in a forked child process.
302
+ #
303
+ # `Process.fork` copies the parent's memory but only the forking thread
304
+ # survives in the child. Background threads (`@scheduler_thread`) are
305
+ # dead, mutexes and condition variables are copied without owner
306
+ # tracking (orphan-lock risk if the parent held a mutex at the fork
307
+ # instant), and the TracePoint hook is bound to the dead scheduler.
308
+ #
309
+ # State reset (the child does its own initial extraction, then hot-load
310
+ # continues from there):
311
+ # - Hot-load buffer cleared — the child will rediscover via extract_all.
312
+ # - `@initial_extraction_done = false` — child has not extracted yet.
313
+ # - `@hot_load_tracepoint = nil` — `start_upload` reinstalls a fresh one
314
+ # bound to the child's component.
315
+ # - `@scheduler_thread = nil`, `@scheduled_at = nil`,
316
+ # `@scheduler_signaled = false` — scheduler restarts on next
317
+ # `start_upload`.
318
+ # - `@upload_in_progress = false` — parent may have been mid-upload at
319
+ # the fork instant; the child has no upload in flight.
320
+ # - `@scope_batcher` replaced with a fresh instance. The inherited batcher
321
+ # carries the parent's `@uploaded_modules` set, which `add_scope` uses
322
+ # to dedup by scope name. Without a fresh batcher, the child's
323
+ # re-extraction silently drops every scope whose name the parent
324
+ # already uploaded — under `preload_app!` that's most of the app.
325
+ #
326
+ # Mutex/CV reinit (orphan-lock guard):
327
+ # - `@scheduler_mutex`, `@scheduler_cv`, `@mutex`,
328
+ # `@upload_in_progress_cv`, `@last_upload_time_cv`,
329
+ # `@hot_load_buffer_mutex`.
330
+ #
331
+ # Force-upload mode: the parent's scheduled extraction is dead in the
332
+ # child, so re-register the deferred-upload callback. In Rails the
333
+ # `:after_initialize` hook has already fired (initialization happened
334
+ # in the parent), so the on_load block runs immediately and the child
335
+ # schedules its own upload. In non-Rails, this calls `start_upload`
336
+ # directly.
337
+ #
338
+ # Cross-process upload deduplication is intentionally not handled here.
339
+ # Each forked Component does its own initial extraction. Workers in
340
+ # `preload_app! + eager_load=true` deployments hold identical code to
341
+ # the parent — backend dedup of identical-content uploads is the
342
+ # backend's responsibility, not the tracer's.
343
+ #
344
+ # @return [void]
345
+ def after_fork!
346
+ # Disable the inherited TracePoint before dropping the reference: fork
347
+ # copies the enabled TP into the child, where it remains rooted by the
348
+ # VM. Without an explicit disable, every subsequent class load in the
349
+ # child would enqueue through the inherited hook in addition to the
350
+ # fresh hook that start_upload installs.
351
+ @hot_load_tracepoint&.disable
352
+ @hot_load_buffer = []
353
+ @hot_load_buffer_mutex = Mutex.new
354
+ @hot_load_tracepoint = nil
355
+ @initial_extraction_done = false
356
+
357
+ @scheduler_mutex = Mutex.new
358
+ @scheduler_cv = ConditionVariable.new
359
+ @scheduled_at = nil
360
+ @scheduler_signaled = false
361
+ @scheduler_thread = nil
362
+
363
+ @mutex = Mutex.new
364
+ @upload_in_progress = false
365
+ @upload_in_progress_cv = ConditionVariable.new
366
+ @last_upload_time_cv = ConditionVariable.new
367
+
368
+ # Fresh ScopeBatcher: the inherited one carries the parent's
369
+ # @uploaded_modules set, against which add_scope dedups by name.
370
+ @scope_batcher = ScopeBatcher.new(@uploader, logger: @logger)
371
+
372
+ schedule_deferred_upload if @settings.symbol_database.internal.force_upload
373
+ end
374
+
263
375
  private
264
376
 
265
377
  # Check whether the runtime environment supports symbol database upload.
@@ -274,7 +386,7 @@ module Datadog
274
386
  logger.debug { "symdb: not supported on #{RUBY_ENGINE}, skipping" }
275
387
  return false
276
388
  end
277
- if RUBY_VERSION < '2.6'
389
+ if RubyVersion.is?('< 2.6')
278
390
  logger.debug { "symdb: requires Ruby 2.6+, running #{RUBY_VERSION}, skipping" }
279
391
  return false
280
392
  end
@@ -291,7 +403,9 @@ module Datadog
291
403
  end
292
404
 
293
405
  # Scheduler thread main loop. Waits for the debounce window to elapse,
294
- # then runs extract_and_upload exactly once for this Component.
406
+ # then runs extract_and_upload. Loops indefinitely so that hot-load
407
+ # signals fired after the initial upload trigger subsequent incremental
408
+ # uploads.
295
409
  # @return [void]
296
410
  def scheduler_loop
297
411
  loop do
@@ -301,27 +415,31 @@ module Datadog
301
415
 
302
416
  @scheduler_mutex.synchronize do
303
417
  return if @shutdown
304
- return if Component.uploaded_this_process?
305
418
 
306
419
  # Copy to local so Steep narrows `Float?` to `Float` in the else branch.
307
420
  # Steep does not track narrowing on instance variables across nil checks.
308
421
  scheduled_at = @scheduled_at
309
422
  if scheduled_at.nil?
310
- # Nothing scheduled (e.g. stop_upload cleared it). Wait
311
- # indefinitely for a signal, then re-evaluate on next loop.
423
+ # Nothing scheduled (e.g. stop_upload cleared it, or no hot-load
424
+ # events since the last upload). Wait indefinitely for a signal,
425
+ # then re-evaluate on next loop.
312
426
  @scheduler_signaled = false
313
427
  @scheduler_cv.wait(@scheduler_mutex)
314
428
  else
315
429
  remaining = scheduled_at - Datadog::Core::Utils::Time.get_time
316
430
  if remaining > 0
317
431
  # Wait until the debounce deadline. Any signal (start_upload,
318
- # stop_upload, shutdown!) wakes us early; we always re-loop
319
- # and recompute rather than firing immediately on wake.
432
+ # stop_upload, shutdown!, hot-load event) wakes us early; we
433
+ # always re-loop and recompute rather than firing immediately
434
+ # on wake.
320
435
  @scheduler_signaled = false
321
436
  @scheduler_cv.wait(@scheduler_mutex, remaining)
322
437
  else
323
- # Deadline elapsed without further signal — fire after releasing the mutex.
438
+ # Deadline elapsed without further signal — fire after releasing
439
+ # the mutex. Clear @scheduled_at so the next loop iteration
440
+ # waits for the next start_upload or hot-load signal.
324
441
  should_fire = true
442
+ @scheduled_at = nil
325
443
  end
326
444
  end
327
445
  end
@@ -333,20 +451,17 @@ module Datadog
333
451
 
334
452
  # Outside the mutex.
335
453
  return if @shutdown
336
- if Component.uploaded_this_process?
337
- return
338
- end
339
454
 
340
455
  extract_and_upload
341
- Component.mark_uploaded
342
- return
343
456
  end
344
457
  rescue => e
345
458
  @logger.debug { "symdb: scheduler error: #{e.class}: #{e.message}" }
346
459
  @telemetry&.report(e, description: 'symdb: scheduler error')
347
460
  end
348
461
 
349
- # Extract symbols from all loaded modules and upload.
462
+ # Extract symbols and upload. First call runs extract_all (full ObjectSpace
463
+ # walk); subsequent calls drain the hot-load buffer and extract just the
464
+ # newly-loaded modules via Extractor#extract.
350
465
  # @return [void]
351
466
  def extract_and_upload
352
467
  @mutex.synchronize { @upload_in_progress = true }
@@ -355,30 +470,50 @@ module Datadog
355
470
  @logger.trace { "symdb: starting extraction and upload" }
356
471
  start_time = Datadog::Core::Utils::Time.get_time
357
472
 
358
- # Extract symbols from all loaded modules grouped by source file.
359
- # extract_all handles ObjectSpace iteration, filtering, and FQN-based nesting.
360
- file_scopes = @extractor.extract_all
361
473
  extracted_count = 0
362
- file_scopes.each do |scope|
474
+ targetable_count = 0
475
+ consume = lambda do |scope|
363
476
  @scope_batcher.add_scope(scope)
364
477
  extracted_count += 1
478
+ targetable_count += count_targetable_methods_in_scope(scope)
365
479
  log_scope_tree(scope, 0)
366
480
  end
367
481
 
368
- @logger.debug do
369
- extraction_duration = Datadog::Core::Utils::Time.get_time - start_time
370
- targetable_count = count_targetable_methods(file_scopes)
371
- "symdb: extracted #{extracted_count} scopes (#{targetable_count} methods with targetable lines) in #{'%.2f' % extraction_duration}s"
482
+ if @initial_extraction_done
483
+ extract_hot_load_buffer.each(&consume)
484
+ mode_label = "hot-load"
485
+ else
486
+ # Discard any TracePoint events captured between hook install and
487
+ # this initial scan — extract_all walks ObjectSpace which already
488
+ # covers everything loaded at this moment. Anything loaded during
489
+ # or after extract_all stays buffered for the next drain.
490
+ @hot_load_buffer_mutex.synchronize { @hot_load_buffer.clear }
491
+ # Stream form of extract_all yields one FILE scope at a time and frees
492
+ # the per-file intermediate tree as it goes — the full Array<Scope> is
493
+ # never materialized, keeping peak memory bounded for large workspaces.
494
+ @extractor.extract_all(&consume)
495
+ @initial_extraction_done = true
496
+ mode_label = "initial"
372
497
  end
373
498
 
499
+ extraction_duration = Datadog::Core::Utils::Time.get_time - start_time
500
+ @logger.debug { "symdb: #{mode_label} extracted #{extracted_count} scopes (#{targetable_count} methods with targetable lines) in #{'%.2f' % extraction_duration}s" }
501
+
374
502
  # Flush any remaining scopes (triggers upload)
375
503
  @scope_batcher.flush
376
504
 
377
- @last_upload_time = Datadog::Core::Utils::Time.now
378
- @last_upload_scope_count = extracted_count
505
+ @mutex.synchronize do
506
+ @last_upload_time = Datadog::Core::Utils::Time.now
507
+ @last_upload_scope_count = extracted_count
508
+ @last_upload_time_cv.broadcast
509
+ end
379
510
  rescue => e
380
511
  @logger.debug { "symdb: extraction error: #{e.class}: #{e.message}" }
381
512
  @telemetry&.report(e, description: 'symdb: extraction error')
513
+ @mutex.synchronize do
514
+ @last_upload_time = Datadog::Core::Utils::Time.now
515
+ @last_upload_time_cv.broadcast
516
+ end
382
517
  ensure
383
518
  @mutex.synchronize do
384
519
  @upload_in_progress = false
@@ -387,19 +522,92 @@ module Datadog
387
522
  end
388
523
  end
389
524
 
525
+ # Drain the hot-load buffer, dedup by object_id, return the array of
526
+ # FILE scopes from per-module extraction.
527
+ # @return [Array<Scope>]
528
+ def extract_hot_load_buffer
529
+ modules = @hot_load_buffer_mutex.synchronize { @hot_load_buffer.shift(@hot_load_buffer.length) }
530
+ return [] if modules.empty?
531
+
532
+ seen = {}
533
+ modules.each { |mod| seen[mod.object_id] = mod }
534
+ seen.values.map { |mod| @extractor.extract(mod) }.compact
535
+ end
536
+
537
+ # Install the TracePoint :class hook (lazy — only on first start_upload).
538
+ # Hook fires for every class/module body open including reopens; pushes
539
+ # the module onto @hot_load_buffer and signals the scheduler. Singleton
540
+ # classes are filtered for the same reason as in Extractor#extract_all.
541
+ # Must be called from within @scheduler_mutex.synchronize.
542
+ # @return [void]
543
+ def install_hot_load_hook
544
+ return if @hot_load_tracepoint
545
+ component = self
546
+ logger = @logger
547
+ telemetry = @telemetry
548
+ @hot_load_tracepoint = TracePoint.new(:class) do |tp|
549
+ # The :class TracePoint fires inside the customer's class body —
550
+ # any exception that escapes this block surfaces at the customer's
551
+ # `class Foo; ... end` line and breaks their class load. The
552
+ # MODULE_SINGLETON_CLASS_PRED dispatch defends against one specific
553
+ # raise source (user-overridden singleton_class?); this rescue
554
+ # closes the general case. Verified: a raise inside the callback
555
+ # backtraces through `<class:CustomerClass>` in Ruby 3.x.
556
+
557
+ mod = tp.self
558
+ next if MODULE_SINGLETON_CLASS_PRED.bind(mod).call
559
+ component.send(:enqueue_hot_load, mod)
560
+ rescue => e
561
+ # Logger or telemetry can themselves raise (custom logger
562
+ # implementation, telemetry worker in an unexpected state). The
563
+ # :class TracePoint fires inside customer class bodies, so the
564
+ # error boundary must hold even when error reporting fails;
565
+ # nothing useful to do if logging is broken.
566
+ begin
567
+ logger.debug { "symdb: hot-load hook error: #{e.class}: #{e.message}" }
568
+ telemetry&.report(e, description: 'symdb: hot-load hook error')
569
+ rescue
570
+ nil
571
+ end
572
+ end
573
+ @hot_load_tracepoint.enable # steep:ignore NoMethod
574
+ end
575
+
576
+ # Enqueue a hot-loaded module and signal the scheduler.
577
+ # Called from the TracePoint :class block — must be cheap.
578
+ # @param mod [Module]
579
+ # @return [void]
580
+ def enqueue_hot_load(mod)
581
+ @hot_load_buffer_mutex.synchronize { @hot_load_buffer << mod }
582
+ @scheduler_mutex.synchronize do
583
+ return if @shutdown
584
+ # TracePoint#disable does not wait for in-flight callbacks: a :class
585
+ # event firing concurrently with stop_upload can reach here after the
586
+ # hook has been torn down. Without this guard the stale event would
587
+ # re-arm the scheduler, contradicting stop_upload's contract. The
588
+ # buffer push above is harmless — the next start_upload runs
589
+ # extract_all, which clears the buffer before extracting.
590
+ return unless @hot_load_tracepoint
591
+ @scheduled_at = Datadog::Core::Utils::Time.get_time + EXTRACT_DEBOUNCE_INTERVAL
592
+ @scheduler_signaled = true
593
+ @scheduler_cv.signal
594
+ end
595
+ end
596
+
390
597
  def log_scope_tree(scope, depth)
391
598
  indent = ' ' * depth
392
599
  @logger.trace { "symdb: #{indent}#{scope.scope_type} #{scope.name}" }
393
600
  scope.scopes&.each { |child| log_scope_tree(child, depth + 1) }
394
601
  end
395
602
 
396
- def count_targetable_methods(file_scopes)
603
+ # Count METHOD scopes with targetable lines inside one FILE scope. Used by
604
+ # extract_and_upload to accumulate the count while streaming, without
605
+ # retaining the Array<Scope> just to compute the total at the end.
606
+ def count_targetable_methods_in_scope(file_scope)
397
607
  count = 0
398
- file_scopes.each do |file_scope|
399
- file_scope.scopes&.each do |class_or_module|
400
- class_or_module.scopes&.each do |method_scope|
401
- count += 1 if method_scope.scope_type == 'METHOD' && method_scope.targetable_lines?
402
- end
608
+ file_scope.scopes&.each do |class_or_module|
609
+ class_or_module.scopes&.each do |method_scope|
610
+ count += 1 if method_scope.scope_type == 'METHOD' && method_scope.targetable_lines?
403
611
  end
404
612
  end
405
613
  count