datadog 2.32.0 → 2.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/ext/datadog_profiling_native_extension/clock_id.h +9 -1
  4. data/ext/datadog_profiling_native_extension/clock_id_from_mach.c +73 -0
  5. data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -1
  6. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +20 -0
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +5 -1
  8. data/ext/datadog_profiling_native_extension/extconf.rb +3 -0
  9. data/ext/datadog_profiling_native_extension/macos_sampler_thread.h +55 -0
  10. data/ext/datadog_profiling_native_extension/stack_recorder.c +3 -9
  11. data/ext/datadog_profiling_native_extension/time_helpers.h +1 -0
  12. data/ext/libdatadog_api/crashtracker.c +2 -0
  13. data/ext/libdatadog_extconf_helpers.rb +1 -1
  14. data/lib/datadog/ai_guard/autoload.rb +10 -0
  15. data/lib/datadog/ai_guard/component.rb +1 -1
  16. data/lib/datadog/ai_guard/contrib/auto_instrument.rb +24 -0
  17. data/lib/datadog/ai_guard/contrib/rack/integration.rb +42 -0
  18. data/lib/datadog/ai_guard/contrib/rack/patcher.rb +26 -0
  19. data/lib/datadog/ai_guard/contrib/rack/request_middleware.rb +83 -0
  20. data/lib/datadog/ai_guard/contrib/rails/integration.rb +41 -0
  21. data/lib/datadog/ai_guard/contrib/rails/patcher.rb +97 -0
  22. data/lib/datadog/ai_guard/evaluation.rb +1 -0
  23. data/lib/datadog/ai_guard/ext.rb +1 -0
  24. data/lib/datadog/ai_guard.rb +8 -0
  25. data/lib/datadog/appsec/component.rb +4 -1
  26. data/lib/datadog/appsec/compressed_json.rb +2 -2
  27. data/lib/datadog/appsec/contrib/aws_lambda/gateway/watcher.rb +75 -0
  28. data/lib/datadog/appsec/contrib/aws_lambda/integration.rb +39 -0
  29. data/lib/datadog/appsec/contrib/aws_lambda/patcher.rb +30 -0
  30. data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +111 -0
  31. data/lib/datadog/appsec/contrib/rack/ext.rb +1 -1
  32. data/lib/datadog/appsec.rb +1 -0
  33. data/lib/datadog/core/configuration/components.rb +8 -1
  34. data/lib/datadog/core/configuration/settings.rb +16 -1
  35. data/lib/datadog/core/configuration/supported_configurations.rb +12 -0
  36. data/lib/datadog/core/environment/ext.rb +5 -0
  37. data/lib/datadog/core/environment/identity.rb +15 -1
  38. data/lib/datadog/core/environment/process.rb +48 -27
  39. data/lib/datadog/core/environment/socket.rb +13 -0
  40. data/lib/datadog/core/remote/client/capabilities.rb +11 -2
  41. data/lib/datadog/core/remote/transport/http/config.rb +5 -5
  42. data/lib/datadog/core/telemetry/request.rb +0 -2
  43. data/lib/datadog/core/transport/response.rb +1 -1
  44. data/lib/datadog/core/utils/{base64.rb → base64_codec.rb} +3 -2
  45. data/lib/datadog/core/utils/hash.rb +0 -23
  46. data/lib/datadog/core/utils/spawn_monkey_patch.rb +46 -16
  47. data/lib/datadog/data_streams/pathway_context.rb +3 -3
  48. data/lib/datadog/di/code_tracker.rb +43 -22
  49. data/lib/datadog/di/contrib/active_record.rb +6 -2
  50. data/lib/datadog/di/instrumenter.rb +24 -4
  51. data/lib/datadog/di/probe_notification_builder.rb +1 -1
  52. data/lib/datadog/di/remote.rb +4 -4
  53. data/lib/datadog/di/serializer.rb +5 -5
  54. data/lib/datadog/di/utils.rb +42 -14
  55. data/lib/datadog/opentelemetry/configuration/settings.rb +65 -0
  56. data/lib/datadog/opentelemetry/ext.rb +9 -0
  57. data/lib/datadog/opentelemetry/logs.rb +98 -0
  58. data/lib/datadog/opentelemetry/metrics.rb +10 -37
  59. data/lib/datadog/opentelemetry/sdk/configurator.rb +40 -0
  60. data/lib/datadog/opentelemetry/sdk/id_generator.rb +16 -10
  61. data/lib/datadog/opentelemetry/sdk/logs_exporter.rb +37 -0
  62. data/lib/datadog/opentelemetry/signal_configuration.rb +53 -0
  63. data/lib/datadog/opentelemetry.rb +1 -0
  64. data/lib/datadog/profiling/component.rb +0 -1
  65. data/lib/datadog/profiling/stack_recorder.rb +0 -4
  66. data/lib/datadog/symbol_database/component.rb +409 -0
  67. data/lib/datadog/symbol_database/configuration.rb +2 -2
  68. data/lib/datadog/symbol_database/extractor.rb +45 -26
  69. data/lib/datadog/symbol_database/remote.rb +175 -0
  70. data/lib/datadog/symbol_database/scope.rb +16 -12
  71. data/lib/datadog/symbol_database/scope_batcher.rb +288 -0
  72. data/lib/datadog/symbol_database/service_version.rb +15 -6
  73. data/lib/datadog/symbol_database/symbol.rb +6 -3
  74. data/lib/datadog/symbol_database/uploader.rb +65 -8
  75. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +8 -0
  76. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +0 -4
  77. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +0 -4
  78. data/lib/datadog/tracing/contrib/active_support/cache/instrumentation.rb +0 -4
  79. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +0 -5
  80. data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +0 -5
  81. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +0 -5
  82. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +0 -5
  83. data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +0 -8
  84. data/lib/datadog/tracing/contrib/excon/middleware.rb +0 -5
  85. data/lib/datadog/tracing/contrib/ext.rb +2 -3
  86. data/lib/datadog/tracing/contrib/faraday/middleware.rb +0 -5
  87. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +0 -5
  88. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +0 -5
  89. data/lib/datadog/tracing/contrib/http/instrumentation.rb +0 -5
  90. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +0 -5
  91. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +0 -5
  92. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +0 -5
  93. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +0 -5
  94. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +0 -5
  95. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +0 -5
  96. data/lib/datadog/tracing/contrib/presto/instrumentation.rb +0 -5
  97. data/lib/datadog/tracing/contrib/racecar/event.rb +0 -5
  98. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +6 -0
  99. data/lib/datadog/tracing/contrib/rack/ext.rb +27 -0
  100. data/lib/datadog/tracing/contrib/rack/trace_proxy_middleware.rb +117 -1
  101. data/lib/datadog/tracing/contrib/redis/tags.rb +0 -5
  102. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +0 -5
  103. data/lib/datadog/tracing/contrib/sequel/utils.rb +0 -5
  104. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +0 -5
  105. data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +0 -13
  106. data/lib/datadog/tracing/distributed/trace_context.rb +0 -28
  107. data/lib/datadog/tracing/metadata/ext.rb +3 -0
  108. data/lib/datadog/tracing/span_operation.rb +13 -0
  109. data/lib/datadog/tracing/trace_operation.rb +22 -0
  110. data/lib/datadog/tracing/tracer.rb +7 -3
  111. data/lib/datadog/version.rb +1 -1
  112. metadata +27 -8
  113. data/ext/datadog_profiling_native_extension/clock_id_noop.c +0 -21
@@ -0,0 +1,409 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extractor'
4
+ require_relative 'logger'
5
+ require_relative 'scope_batcher'
6
+ require_relative 'uploader'
7
+ require_relative '../core/utils/time'
8
+
9
+ module Datadog
10
+ module SymbolDatabase
11
+ # Main coordinator for symbol database upload functionality.
12
+ #
13
+ # Responsibilities:
14
+ # - Lifecycle management: Initialization, shutdown, upload triggering
15
+ # - Coordination: Connects Extractor → ScopeBatcher → Uploader
16
+ # - Remote config handling: start_upload called by Remote module on config changes
17
+ # - Debounce: extraction is deferred by EXTRACT_DEBOUNCE_INTERVAL seconds so
18
+ # reconfigurations during boot coalesce into a single extraction on the
19
+ # final Component instance.
20
+ #
21
+ # Upload flow:
22
+ # 1. Remote config sends upload_symbols: true (or force_upload mode)
23
+ # 2. start_upload called — schedules extraction EXTRACT_DEBOUNCE_INTERVAL
24
+ # seconds in the future on a per-instance scheduler thread.
25
+ # 3. When the timer fires (no further start_upload calls reset it),
26
+ # extract_and_upload runs: ObjectSpace iteration → Extractor → ScopeBatcher.
27
+ # 4. ScopeBatcher batches and triggers Uploader.
28
+ # 5. A class-level flag is set so subsequent Component instances created via
29
+ # Datadog reconfiguration do not re-upload.
30
+ #
31
+ # Created by: Components#initialize (in Core::Configuration::Components)
32
+ # Accessed by: Remote config receiver via Datadog.send(:components).symbol_database
33
+ # Requires: Remote config enabled (unless force mode)
34
+ #
35
+ # @api private
36
+ class Component
37
+ # Debounce window for extraction. Multiple start_upload calls within this
38
+ # window coalesce; the timer fires once after the window of inactivity.
39
+ # Long enough to absorb reconfiguration cascades during Rails boot.
40
+ EXTRACT_DEBOUNCE_INTERVAL = 5 # seconds
41
+
42
+ # Class-level state: tracks whether any Component instance in this process
43
+ # has performed an extract+upload. Survives Component replacement during
44
+ # Datadog reconfiguration so duplicate uploads are prevented.
45
+ @uploaded_this_process = false
46
+ @upload_done_mutex = Mutex.new
47
+ @upload_done_cv = ConditionVariable.new
48
+
49
+ class << self
50
+ attr_reader :upload_done_mutex, :upload_done_cv
51
+
52
+ # Whether any Component instance in this process has completed an
53
+ # upload. Cross-instance flag — used to dedupe uploads across
54
+ # Component rebuilds within a single Ruby process.
55
+ # @return [Boolean]
56
+ def uploaded_this_process?
57
+ @upload_done_mutex.synchronize { @uploaded_this_process }
58
+ end
59
+
60
+ # Mark the current process as having completed a symbol upload.
61
+ # Called by the Component instance that successfully completes an
62
+ # upload; subsequent start_upload calls on any instance short-circuit.
63
+ # @return [void]
64
+ def mark_uploaded
65
+ @upload_done_mutex.synchronize do
66
+ @uploaded_this_process = true
67
+ @upload_done_cv.broadcast
68
+ end
69
+ end
70
+
71
+ # Reset class-level upload state. Test-only.
72
+ # @api private
73
+ def reset_uploaded_this_process_for_tests!
74
+ @upload_done_mutex.synchronize { @uploaded_this_process = false }
75
+ end
76
+ end
77
+
78
+ # Build a new Component if feature is enabled and dependencies met.
79
+ # @param settings [Configuration::Settings] Tracer settings
80
+ # @param agent_settings [Configuration::AgentSettings] Agent configuration
81
+ # @param logger [Logger] Logger instance
82
+ # @param telemetry [Core::Telemetry::Component, nil] Telemetry component for error reporting
83
+ # @return [Component, nil] Component instance or nil if not enabled/requirements not met
84
+ def self.build(settings, agent_settings, logger, telemetry: nil)
85
+ symdb_logger = SymbolDatabase::Logger.new(settings, logger)
86
+
87
+ unless settings.respond_to?(:symbol_database) && settings.symbol_database.enabled
88
+ symdb_logger.debug("symdb: symbol database upload not enabled, skipping")
89
+ return
90
+ end
91
+
92
+ # Symbol database requires MRI Ruby 2.6+.
93
+ # Configuration accessors (settings.symbol_database.*) remain available on all
94
+ # platforms — only the component (upload) is disabled on unsupported engines/versions.
95
+ # environment_supported? logs the specific reason (engine or version) internally.
96
+ return nil unless environment_supported?(symdb_logger)
97
+
98
+ # Requires remote config (unless force mode)
99
+ if !settings.remote&.enabled && !settings.symbol_database.internal.force_upload
100
+ symdb_logger.debug("symdb: remote config not available and force_upload not set, skipping")
101
+ return nil
102
+ end
103
+
104
+ new(settings, agent_settings, symdb_logger, telemetry: telemetry).tap do |component|
105
+ # Defer extraction if force upload mode — wait for app boot to complete
106
+ component.schedule_deferred_upload if settings.symbol_database.internal.force_upload
107
+ end
108
+ end
109
+
110
+ attr_reader :settings, :logger, :last_upload_time, :last_upload_scope_count, :upload_in_progress
111
+
112
+ # Initialize component.
113
+ # @param settings [Configuration::Settings] Tracer settings
114
+ # @param agent_settings [Configuration::AgentSettings] Agent configuration
115
+ # @param logger [Logger] Logger instance
116
+ # @param telemetry [Core::Telemetry::Component, nil] Telemetry component for error reporting
117
+ def initialize(settings, agent_settings, logger, telemetry: nil)
118
+ @settings = settings
119
+ @agent_settings = agent_settings
120
+ @logger = logger
121
+ @telemetry = telemetry
122
+
123
+ @extractor = Extractor.new(logger: logger, settings: settings)
124
+ @uploader = Uploader.new(settings: settings, agent_settings: agent_settings, logger: logger, telemetry: telemetry)
125
+ @scope_batcher = ScopeBatcher.new(@uploader, logger: logger)
126
+
127
+ @last_upload_time = nil
128
+ @last_upload_scope_count = nil
129
+ @mutex = Mutex.new
130
+ @upload_in_progress = false
131
+ @upload_in_progress_cv = ConditionVariable.new
132
+ @shutdown = false
133
+
134
+ # Per-instance scheduler state. The scheduler thread is started lazily
135
+ # on the first start_upload call.
136
+ @scheduler_mutex = Mutex.new
137
+ @scheduler_cv = ConditionVariable.new
138
+ @scheduled_at = nil
139
+ @scheduler_signaled = false
140
+ @scheduler_thread = nil
141
+ end
142
+
143
+ # Schedule a deferred upload that waits for app boot to complete.
144
+ #
145
+ # In Rails: registers ActiveSupport.on_load(:after_initialize). When the
146
+ # hook has already fired (e.g., this Component was built by a reconfigure
147
+ # after Rails finished initializing), the callback runs immediately.
148
+ #
149
+ # In non-Rails: triggers start_upload immediately.
150
+ #
151
+ # Each Component registers its own callback. Old Components that have
152
+ # been shut down short-circuit in start_upload via @shutdown.
153
+ # Cross-process deduplication is handled by the class-level
154
+ # uploaded_this_process? flag, not by guarding registration.
155
+ #
156
+ # @return [void]
157
+ def schedule_deferred_upload
158
+ if defined?(::ActiveSupport) && defined?(::Rails::Railtie)
159
+ # Capture self — on_load runs the block via instance_exec on the
160
+ # loaded object (Rails::Application), so a bare `start_upload`
161
+ # would resolve against it.
162
+ component = self
163
+ logger = @logger
164
+ ::ActiveSupport.on_load(:after_initialize) do
165
+ # Only auto-trigger when Rails has eager-loaded application
166
+ # classes during initialization. In dev (eager_load=false)
167
+ # there is nothing complete to extract; the auto-deferred
168
+ # upload would race with explicit triggers and produce
169
+ # under-extracted uploads.
170
+ if defined?(::Rails) && ::Rails.application&.config&.eager_load # steep:ignore NoMethod
171
+ component.start_upload
172
+ else
173
+ logger.debug { "symdb: skipping auto-deferred upload (eager_load disabled)" }
174
+ end
175
+ end
176
+ else
177
+ start_upload
178
+ end
179
+ end
180
+
181
+ # Whether this component has been shut down.
182
+ # @return [Boolean]
183
+ def shutdown?
184
+ @scheduler_mutex.synchronize { @shutdown }
185
+ end
186
+
187
+ # Schedule symbol upload (triggered by remote config or force mode).
188
+ # The actual extraction is debounced by EXTRACT_DEBOUNCE_INTERVAL seconds —
189
+ # subsequent calls within the window restart the timer.
190
+ # Thread-safe: can be called concurrently from multiple remote config updates.
191
+ # @return [void]
192
+ def start_upload
193
+ return if Component.uploaded_this_process?
194
+
195
+ @scheduler_mutex.synchronize do
196
+ return if @shutdown
197
+
198
+ @scheduled_at = Datadog::Core::Utils::Time.get_time + EXTRACT_DEBOUNCE_INTERVAL
199
+ @scheduler_signaled = true
200
+ @scheduler_cv.signal
201
+ ensure_scheduler_thread
202
+ end
203
+ rescue => e
204
+ @logger.debug { "symdb: error scheduling upload: #{e.class}: #{e.message}" }
205
+ @telemetry&.report(e, description: 'symdb: error scheduling upload')
206
+ end
207
+
208
+ # Stop symbol upload (cancel the scheduler).
209
+ # Thread-safe: can be called concurrently from multiple remote config updates.
210
+ # @return [void]
211
+ def stop_upload
212
+ @scheduler_mutex.synchronize do
213
+ @scheduled_at = nil
214
+ @scheduler_signaled = true
215
+ @scheduler_cv.signal
216
+ end
217
+ end
218
+
219
+ # Block until any Component in this process has finished an extract+upload,
220
+ # or until the timeout elapses. Used by short-lived scripts that trigger
221
+ # an upload via force_upload and need to wait before exiting.
222
+ # @param timeout [Numeric] Maximum seconds to wait
223
+ # @return [Boolean] true if an upload completed; false on timeout
224
+ def wait_for_idle(timeout: 30)
225
+ deadline = Datadog::Core::Utils::Time.get_time + timeout
226
+ Component.upload_done_mutex.synchronize do
227
+ # Read @uploaded_this_process directly: we already hold
228
+ # Component.upload_done_mutex here, and uploaded_this_process?
229
+ # would try to re-acquire it (non-reentrant), deadlocking.
230
+ until Component.instance_variable_get(:@uploaded_this_process)
231
+ remaining = deadline - Datadog::Core::Utils::Time.get_time
232
+ return false if remaining <= 0
233
+ Component.upload_done_cv.wait(Component.upload_done_mutex, remaining)
234
+ end
235
+ end
236
+ true
237
+ end
238
+
239
+ # Shutdown component and cleanup resources.
240
+ # Cancels the per-instance scheduler so any pending debounced extraction
241
+ # is dropped. Waits for an in-flight extraction to complete before
242
+ # returning. Does not touch class-level state, so a sibling Component
243
+ # built after shutdown can still upload.
244
+ # @return [void]
245
+ def shutdown!
246
+ @scheduler_mutex.synchronize do
247
+ @shutdown = true
248
+ @scheduler_signaled = true
249
+ @scheduler_cv.signal
250
+ end
251
+ @scheduler_thread&.join(5)
252
+ @scheduler_thread = nil
253
+
254
+ @mutex.synchronize do
255
+ if @upload_in_progress
256
+ @upload_in_progress_cv.wait(@mutex, 5)
257
+ end
258
+ end
259
+
260
+ @scope_batcher.shutdown
261
+ end
262
+
263
+ private
264
+
265
+ # Check whether the runtime environment supports symbol database upload.
266
+ # Only MRI Ruby 2.6+ is supported. JRuby and TruffleRuby are not supported
267
+ # because ObjectSpace iteration and Method#source_location behave differently.
268
+ # Configuration accessors remain available on all platforms — this only gates
269
+ # the component (upload) itself.
270
+ # @param logger [Logger]
271
+ # @return [Boolean]
272
+ def self.environment_supported?(logger)
273
+ if RUBY_ENGINE != 'ruby'
274
+ logger.debug { "symdb: not supported on #{RUBY_ENGINE}, skipping" }
275
+ return false
276
+ end
277
+ if RUBY_VERSION < '2.6'
278
+ logger.debug { "symdb: requires Ruby 2.6+, running #{RUBY_VERSION}, skipping" }
279
+ return false
280
+ end
281
+ true
282
+ end
283
+ private_class_method :environment_supported?
284
+
285
+ # Start the scheduler thread if not already running.
286
+ # Must be called from within @scheduler_mutex.synchronize.
287
+ # @return [void]
288
+ def ensure_scheduler_thread
289
+ return if @scheduler_thread&.alive?
290
+ @scheduler_thread = Thread.new { scheduler_loop }
291
+ end
292
+
293
+ # Scheduler thread main loop. Waits for the debounce window to elapse,
294
+ # then runs extract_and_upload exactly once for this Component.
295
+ # @return [void]
296
+ def scheduler_loop
297
+ loop do
298
+ # should_fire = true means the debounce deadline elapsed without further
299
+ # signals; extract_and_upload runs once after the mutex is released.
300
+ should_fire = false
301
+
302
+ @scheduler_mutex.synchronize do
303
+ return if @shutdown
304
+ return if Component.uploaded_this_process?
305
+
306
+ # Copy to local so Steep narrows `Float?` to `Float` in the else branch.
307
+ # Steep does not track narrowing on instance variables across nil checks.
308
+ scheduled_at = @scheduled_at
309
+ if scheduled_at.nil?
310
+ # Nothing scheduled (e.g. stop_upload cleared it). Wait
311
+ # indefinitely for a signal, then re-evaluate on next loop.
312
+ @scheduler_signaled = false
313
+ @scheduler_cv.wait(@scheduler_mutex)
314
+ else
315
+ remaining = scheduled_at - Datadog::Core::Utils::Time.get_time
316
+ if remaining > 0
317
+ # Wait until the debounce deadline. Any signal (start_upload,
318
+ # stop_upload, shutdown!) wakes us early; we always re-loop
319
+ # and recompute rather than firing immediately on wake.
320
+ @scheduler_signaled = false
321
+ @scheduler_cv.wait(@scheduler_mutex, remaining)
322
+ else
323
+ # Deadline elapsed without further signal — fire after releasing the mutex.
324
+ should_fire = true
325
+ end
326
+ end
327
+ end
328
+
329
+ # `next` inside `synchronize` only exits the synchronize block — not the
330
+ # surrounding loop. Use an explicit flag so the loop only fires
331
+ # extract_and_upload when the debounce deadline has actually elapsed.
332
+ next unless should_fire
333
+
334
+ # Outside the mutex.
335
+ return if @shutdown
336
+ if Component.uploaded_this_process?
337
+ return
338
+ end
339
+
340
+ extract_and_upload
341
+ Component.mark_uploaded
342
+ return
343
+ end
344
+ rescue => e
345
+ @logger.debug { "symdb: scheduler error: #{e.class}: #{e.message}" }
346
+ @telemetry&.report(e, description: 'symdb: scheduler error')
347
+ end
348
+
349
+ # Extract symbols from all loaded modules and upload.
350
+ # @return [void]
351
+ def extract_and_upload
352
+ @mutex.synchronize { @upload_in_progress = true }
353
+
354
+ begin
355
+ @logger.trace { "symdb: starting extraction and upload" }
356
+ start_time = Datadog::Core::Utils::Time.get_time
357
+
358
+ # Extract symbols from all loaded modules grouped by source file.
359
+ # extract_all handles ObjectSpace iteration, filtering, and FQN-based nesting.
360
+ file_scopes = @extractor.extract_all
361
+ extracted_count = 0
362
+ file_scopes.each do |scope|
363
+ @scope_batcher.add_scope(scope)
364
+ extracted_count += 1
365
+ log_scope_tree(scope, 0)
366
+ end
367
+
368
+ @logger.debug do
369
+ extraction_duration = Datadog::Core::Utils::Time.get_time - start_time
370
+ targetable_count = count_targetable_methods(file_scopes)
371
+ "symdb: extracted #{extracted_count} scopes (#{targetable_count} methods with targetable lines) in #{'%.2f' % extraction_duration}s"
372
+ end
373
+
374
+ # Flush any remaining scopes (triggers upload)
375
+ @scope_batcher.flush
376
+
377
+ @last_upload_time = Datadog::Core::Utils::Time.now
378
+ @last_upload_scope_count = extracted_count
379
+ rescue => e
380
+ @logger.debug { "symdb: extraction error: #{e.class}: #{e.message}" }
381
+ @telemetry&.report(e, description: 'symdb: extraction error')
382
+ ensure
383
+ @mutex.synchronize do
384
+ @upload_in_progress = false
385
+ @upload_in_progress_cv.signal
386
+ end
387
+ end
388
+ end
389
+
390
+ def log_scope_tree(scope, depth)
391
+ indent = ' ' * depth
392
+ @logger.trace { "symdb: #{indent}#{scope.scope_type} #{scope.name}" }
393
+ scope.scopes&.each { |child| log_scope_tree(child, depth + 1) }
394
+ end
395
+
396
+ def count_targetable_methods(file_scopes)
397
+ count = 0
398
+ file_scopes.each do |file_scope|
399
+ file_scope.scopes&.each do |class_or_module|
400
+ class_or_module.scopes&.each do |method_scope|
401
+ count += 1 if method_scope.scope_type == 'METHOD' && method_scope.targetable_lines?
402
+ end
403
+ end
404
+ end
405
+ count
406
+ end
407
+ end
408
+ end
409
+ end
@@ -7,7 +7,7 @@ module Datadog
7
7
  # Configuration settings for symbol database upload feature.
8
8
  #
9
9
  # Public environment variable:
10
- # - DD_SYMBOL_DATABASE_UPLOAD_ENABLED (default: true) - Feature gate
10
+ # - DD_SYMBOL_DATABASE_UPLOAD_ENABLED (default: false) - Feature gate
11
11
  #
12
12
  # Extended into: Core::Configuration::Settings (via extend)
13
13
  # Accessed as: Datadog.configuration.symbol_database.enabled
@@ -31,7 +31,7 @@ module Datadog
31
31
  option :enabled do |o|
32
32
  o.type :bool
33
33
  o.env 'DD_SYMBOL_DATABASE_UPLOAD_ENABLED'
34
- o.default true
34
+ o.default false
35
35
  end
36
36
 
37
37
  # Settings in the 'internal' group are for internal Datadog
@@ -9,9 +9,9 @@ module Datadog
9
9
  module SymbolDatabase
10
10
  # Extracts symbol metadata from loaded Ruby modules and classes via introspection.
11
11
  #
12
- # Instance created by Component with injected dependencies (logger, settings,
13
- # telemetry). All methods are instance methods accessing @logger, @settings,
14
- # @telemetry directly — no parameter threading needed.
12
+ # Instance created by Component with injected dependencies (logger, settings).
13
+ # All methods are instance methods accessing @logger, @settings directly —
14
+ # no parameter threading needed.
15
15
  #
16
16
  # Uses Ruby's reflection APIs (Module#constants, Class#instance_methods, Method#parameters)
17
17
  # to build hierarchical Scope structures representing code organization.
@@ -46,10 +46,9 @@ module Datadog
46
46
  # for post-hoc diagnosis, return nil or empty array. One bad method/module
47
47
  # doesn't kill the entire class extraction.
48
48
  #
49
- # 3. **Top-level entry rescues** (`rescue => e` with logging + telemetry):
49
+ # 3. **Top-level entry rescues** (`rescue => e` with logging):
50
50
  # extract() and extract_all() are the error boundaries. Any exception that
51
- # escapes layers 1-2 is caught here, logged, and tracked via telemetry.
52
- # These are the only rescue blocks that increment telemetry counters.
51
+ # escapes layers 1-2 is caught here and logged.
53
52
  #
54
53
  # @api private
55
54
  class Extractor
@@ -71,11 +70,11 @@ module Datadog
71
70
  EXCLUDED_COMMON_MODULES = ['Kernel', 'PP::', 'JSON::', 'Enumerable', 'Comparable'].freeze
72
71
 
73
72
  # RubyVM::InstructionSequence#trace_points event types included when
74
- # computing injectable lines on METHOD scopes.
73
+ # computing targetable lines on METHOD scopes.
75
74
  # :line — any line with executable bytecode (primary line probe target)
76
75
  # :return — last expression before method returns (DI instruments return events)
77
76
  # :call excluded — method entry is handled by method probes, not line probes
78
- INJECTABLE_LINE_EVENTS = [:line, :return].freeze
77
+ TARGETABLE_LINE_EVENTS = [:line, :return].freeze
79
78
 
80
79
  # Cached unbound Module#singleton_class? — dispatched explicitly so user classes
81
80
  # that define their own `singleton_class?` (e.g. with required arguments) cannot
@@ -85,13 +84,16 @@ module Datadog
85
84
  MODULE_SINGLETON_CLASS_PRED = Module.instance_method(:singleton_class?)
86
85
  private_constant :MODULE_SINGLETON_CLASS_PRED
87
86
 
87
+ # Cached UnboundMethod for Module#name — avoids resolving it on every
88
+ # safe_mod_name call. Some classes override .name (e.g. Faker::Travel::Airport),
89
+ # so we bind the original Module#name to get the real module name safely.
90
+ MODULE_NAME = Module.instance_method(:name)
91
+
88
92
  # @param logger [Logger] Logger instance (SymbolDatabase::Logger facade or compatible)
89
93
  # @param settings [Configuration::Settings] Tracer settings
90
- # @param telemetry [Telemetry, nil] Optional telemetry for metrics
91
- def initialize(logger:, settings:, telemetry: nil)
94
+ def initialize(logger:, settings:)
92
95
  @logger = logger
93
96
  @settings = settings
94
- @telemetry = telemetry
95
97
  end
96
98
 
97
99
  # Extract symbols from a single module or class.
@@ -125,7 +127,6 @@ module Datadog
125
127
  wrap_in_file_scope(source_file, [inner_scope])
126
128
  rescue => e
127
129
  @logger.debug { "symdb: failed to extract #{mod_name || '<unknown>'}: #{e.class}: #{e.message}" }
128
- @telemetry&.inc('tracers', 'symbol_database.extract_error', 1)
129
130
  nil
130
131
  end
131
132
 
@@ -147,7 +148,6 @@ module Datadog
147
148
  convert_trees_to_scopes(file_trees)
148
149
  rescue => e
149
150
  @logger.debug { "symdb: error in extract_all: #{e.class}: #{e.message}" }
150
- @telemetry&.inc('tracers', 'symbol_database.extract_all_error', 1)
151
151
  []
152
152
  end
153
153
 
@@ -159,7 +159,7 @@ module Datadog
159
159
  # @param mod [Module] The module
160
160
  # @return [String, nil] Module name or nil
161
161
  def safe_mod_name(mod)
162
- Module.instance_method(:name).bind(mod).call
162
+ MODULE_NAME.bind(mod).call
163
163
  rescue => e
164
164
  @logger.debug { "symdb: safe_mod_name failed: #{e.class}: #{e.message}" }
165
165
  nil
@@ -399,7 +399,7 @@ module Datadog
399
399
  location = method.source_location
400
400
  next unless location && location[0]
401
401
  starts << location[1]
402
- _ranges, method_end = extract_injectable_lines(method, location[1])
402
+ _ranges, method_end = extract_targetable_lines(method, location[1])
403
403
  ends << method_end
404
404
  end
405
405
 
@@ -491,7 +491,7 @@ module Datadog
491
491
  source_file, line = location
492
492
  return nil unless user_code_path?(source_file) # Skip gem/stdlib methods
493
493
 
494
- injectable_lines, end_line = extract_injectable_lines(method, line)
494
+ targetable_lines, end_line = extract_targetable_lines(method, line)
495
495
 
496
496
  Scope.new(
497
497
  scope_type: 'METHOD',
@@ -499,7 +499,7 @@ module Datadog
499
499
  source_file: source_file,
500
500
  start_line: line,
501
501
  end_line: end_line,
502
- injectible_lines: injectable_lines,
502
+ targetable_lines: targetable_lines,
503
503
  language_specifics: {
504
504
  visibility: method_visibility(klass, method_name),
505
505
  method_type: method_type.to_s,
@@ -526,29 +526,29 @@ module Datadog
526
526
  end
527
527
  end
528
528
 
529
- # Extract injectable lines and end_line from a method's bytecode.
529
+ # Extract targetable lines and end_line from a method's bytecode.
530
530
  # Returns [ranges, end_line] where ranges is an array of {start:, end:} hashes
531
531
  # or nil if iseq is unavailable (C-extension methods).
532
532
  # @param method [Method, UnboundMethod] The method
533
533
  # @param start_line [Integer] Fallback end_line if iseq unavailable
534
534
  # @return [Array(Array<Hash>, Integer), Array(nil, Integer)]
535
- def extract_injectable_lines(method, start_line)
535
+ def extract_targetable_lines(method, start_line)
536
536
  iseq = RubyVM::InstructionSequence.of(method) # steep:ignore
537
537
  unless iseq
538
- @logger.debug { "symdb: no iseq for #{method.name} (C extension or native), skipping injectable lines" }
538
+ @logger.debug { "symdb: no iseq for #{method.name} (C extension or native), skipping targetable lines" }
539
539
  return [nil, start_line]
540
540
  end
541
541
 
542
542
  lines = iseq.trace_points
543
- .select { |_, event| INJECTABLE_LINE_EVENTS.include?(event) }
543
+ .select { |_, event| TARGETABLE_LINE_EVENTS.include?(event) }
544
544
  .map(&:first)
545
545
  .uniq
546
546
  .sort
547
547
 
548
548
  end_line = lines.max || start_line
549
- ranges = build_injectable_ranges(lines)
549
+ ranges = build_targetable_ranges(lines)
550
550
  result = ranges.empty? ? nil : ranges
551
- @logger.debug { "symdb: #{method.name} injectable lines: #{result ? "#{ranges.size} range(s), lines #{lines.first}..#{lines.last}" : 'none (no matching events)'}" }
551
+ @logger.debug { "symdb: #{method.name} targetable lines: #{result ? "#{ranges.size} range(s), lines #{lines.first}..#{lines.last}" : 'none (no matching events)'}" }
552
552
  [result, end_line]
553
553
  end
554
554
 
@@ -556,7 +556,7 @@ module Datadog
556
556
  # [4, 5, 6, 8, 10, 11] => [{start: 4, end: 6}, {start: 8, end: 8}, {start: 10, end: 11}]
557
557
  # @param lines [Array<Integer>] Sorted, deduplicated line numbers
558
558
  # @return [Array<Hash>] Array of {start:, end:} range hashes
559
- def build_injectable_ranges(lines)
559
+ def build_targetable_ranges(lines)
560
560
  return [] if lines.empty?
561
561
 
562
562
  ranges = []
@@ -614,10 +614,26 @@ module Datadog
614
614
 
615
615
  # ── extract_all helpers ──────────────────────────────────────────────
616
616
 
617
+ # Sleep between chunks of modules processed in collect_extractable_modules so
618
+ # request-handling threads have guaranteed CPU time while extraction is in
619
+ # flight. Unlike Thread.pass (which only offers the GVL among runnable
620
+ # threads and leaves the extractor immediately re-runnable), sleep removes
621
+ # the extractor thread from the runnable set for a fixed duration, capping
622
+ # its CPU share at sleep_work_ratio regardless of GVL scheduling.
623
+ #
624
+ # The cadence is measured in modules that pass the singleton-class fast-path
625
+ # skip — singleton classes are discarded in microseconds and counting them
626
+ # would add wall-clock delay disproportionate to the work being done (e.g.
627
+ # on heavily monkey-patched processes that retain large singleton chains).
628
+ SLEEP_EVERY_N_MODULES = 100
629
+ SLEEP_SECONDS = 0.001
630
+ private_constant :SLEEP_EVERY_N_MODULES, :SLEEP_SECONDS
631
+
617
632
  # Pass 1: Collect all extractable modules with methods grouped by source file.
618
633
  # @return [Hash] { mod_name => { mod:, methods_by_file: { path => [{name:, method:, type:}] } } }
619
634
  def collect_extractable_modules
620
635
  entries = {}
636
+ seen = 0
621
637
 
622
638
  ObjectSpace.each_object(Module) do |mod|
623
639
  # Singleton classes (per-object metaclasses) are never user-code classes.
@@ -629,6 +645,9 @@ module Datadog
629
645
  # processes. Ruby 2.7+ optimized this path; the skip is a no-op there.
630
646
  next if MODULE_SINGLETON_CLASS_PRED.bind(mod).call
631
647
 
648
+ seen += 1
649
+ sleep SLEEP_SECONDS if (seen % SLEEP_EVERY_N_MODULES).zero?
650
+
632
651
  mod_name = safe_mod_name(mod)
633
652
  next unless mod_name
634
653
  next unless user_code_module?(mod)
@@ -840,7 +859,7 @@ module Datadog
840
859
 
841
860
  source_file, line = location
842
861
 
843
- injectable_lines, end_line = extract_injectable_lines(method, line)
862
+ targetable_lines, end_line = extract_targetable_lines(method, line)
844
863
 
845
864
  Scope.new(
846
865
  scope_type: 'METHOD',
@@ -848,7 +867,7 @@ module Datadog
848
867
  source_file: source_file,
849
868
  start_line: line,
850
869
  end_line: end_line,
851
- injectible_lines: injectable_lines,
870
+ targetable_lines: targetable_lines,
852
871
  language_specifics: {
853
872
  visibility: klass ? method_visibility(klass, method_name) : 'public', # steep:ignore
854
873
  method_type: 'instance',