datadog 2.32.0 → 2.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/ext/datadog_profiling_native_extension/clock_id.h +9 -1
  4. data/ext/datadog_profiling_native_extension/clock_id_from_mach.c +73 -0
  5. data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -1
  6. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +20 -0
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +5 -1
  8. data/ext/datadog_profiling_native_extension/extconf.rb +3 -0
  9. data/ext/datadog_profiling_native_extension/macos_sampler_thread.h +55 -0
  10. data/ext/datadog_profiling_native_extension/stack_recorder.c +3 -9
  11. data/ext/datadog_profiling_native_extension/time_helpers.h +1 -0
  12. data/ext/libdatadog_api/crashtracker.c +2 -0
  13. data/ext/libdatadog_extconf_helpers.rb +1 -1
  14. data/lib/datadog/ai_guard/autoload.rb +10 -0
  15. data/lib/datadog/ai_guard/component.rb +1 -1
  16. data/lib/datadog/ai_guard/contrib/auto_instrument.rb +24 -0
  17. data/lib/datadog/ai_guard/contrib/rack/integration.rb +42 -0
  18. data/lib/datadog/ai_guard/contrib/rack/patcher.rb +26 -0
  19. data/lib/datadog/ai_guard/contrib/rack/request_middleware.rb +83 -0
  20. data/lib/datadog/ai_guard/contrib/rails/integration.rb +41 -0
  21. data/lib/datadog/ai_guard/contrib/rails/patcher.rb +97 -0
  22. data/lib/datadog/ai_guard/evaluation.rb +1 -0
  23. data/lib/datadog/ai_guard/ext.rb +1 -0
  24. data/lib/datadog/ai_guard.rb +8 -0
  25. data/lib/datadog/appsec/component.rb +4 -1
  26. data/lib/datadog/appsec/compressed_json.rb +2 -2
  27. data/lib/datadog/appsec/contrib/aws_lambda/gateway/watcher.rb +75 -0
  28. data/lib/datadog/appsec/contrib/aws_lambda/integration.rb +39 -0
  29. data/lib/datadog/appsec/contrib/aws_lambda/patcher.rb +30 -0
  30. data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +111 -0
  31. data/lib/datadog/appsec/contrib/rack/ext.rb +1 -1
  32. data/lib/datadog/appsec.rb +1 -0
  33. data/lib/datadog/core/configuration/components.rb +8 -1
  34. data/lib/datadog/core/configuration/settings.rb +16 -1
  35. data/lib/datadog/core/configuration/supported_configurations.rb +12 -0
  36. data/lib/datadog/core/environment/ext.rb +5 -0
  37. data/lib/datadog/core/environment/identity.rb +15 -1
  38. data/lib/datadog/core/environment/process.rb +48 -27
  39. data/lib/datadog/core/environment/socket.rb +13 -0
  40. data/lib/datadog/core/remote/client/capabilities.rb +11 -2
  41. data/lib/datadog/core/remote/transport/http/config.rb +5 -5
  42. data/lib/datadog/core/telemetry/request.rb +0 -2
  43. data/lib/datadog/core/transport/response.rb +1 -1
  44. data/lib/datadog/core/utils/{base64.rb → base64_codec.rb} +3 -2
  45. data/lib/datadog/core/utils/hash.rb +0 -23
  46. data/lib/datadog/core/utils/spawn_monkey_patch.rb +46 -16
  47. data/lib/datadog/data_streams/pathway_context.rb +3 -3
  48. data/lib/datadog/di/code_tracker.rb +43 -22
  49. data/lib/datadog/di/contrib/active_record.rb +6 -2
  50. data/lib/datadog/di/instrumenter.rb +24 -4
  51. data/lib/datadog/di/probe_notification_builder.rb +1 -1
  52. data/lib/datadog/di/remote.rb +4 -4
  53. data/lib/datadog/di/serializer.rb +5 -5
  54. data/lib/datadog/di/utils.rb +42 -14
  55. data/lib/datadog/opentelemetry/configuration/settings.rb +65 -0
  56. data/lib/datadog/opentelemetry/ext.rb +9 -0
  57. data/lib/datadog/opentelemetry/logs.rb +98 -0
  58. data/lib/datadog/opentelemetry/metrics.rb +10 -37
  59. data/lib/datadog/opentelemetry/sdk/configurator.rb +40 -0
  60. data/lib/datadog/opentelemetry/sdk/id_generator.rb +16 -10
  61. data/lib/datadog/opentelemetry/sdk/logs_exporter.rb +37 -0
  62. data/lib/datadog/opentelemetry/signal_configuration.rb +53 -0
  63. data/lib/datadog/opentelemetry.rb +1 -0
  64. data/lib/datadog/profiling/component.rb +0 -1
  65. data/lib/datadog/profiling/stack_recorder.rb +0 -4
  66. data/lib/datadog/symbol_database/component.rb +409 -0
  67. data/lib/datadog/symbol_database/configuration.rb +2 -2
  68. data/lib/datadog/symbol_database/extractor.rb +45 -26
  69. data/lib/datadog/symbol_database/remote.rb +175 -0
  70. data/lib/datadog/symbol_database/scope.rb +16 -12
  71. data/lib/datadog/symbol_database/scope_batcher.rb +288 -0
  72. data/lib/datadog/symbol_database/service_version.rb +15 -6
  73. data/lib/datadog/symbol_database/symbol.rb +6 -3
  74. data/lib/datadog/symbol_database/uploader.rb +65 -8
  75. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +8 -0
  76. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +0 -4
  77. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +0 -4
  78. data/lib/datadog/tracing/contrib/active_support/cache/instrumentation.rb +0 -4
  79. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +0 -5
  80. data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +0 -5
  81. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +0 -5
  82. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +0 -5
  83. data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +0 -8
  84. data/lib/datadog/tracing/contrib/excon/middleware.rb +0 -5
  85. data/lib/datadog/tracing/contrib/ext.rb +2 -3
  86. data/lib/datadog/tracing/contrib/faraday/middleware.rb +0 -5
  87. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +0 -5
  88. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +0 -5
  89. data/lib/datadog/tracing/contrib/http/instrumentation.rb +0 -5
  90. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +0 -5
  91. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +0 -5
  92. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +0 -5
  93. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +0 -5
  94. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +0 -5
  95. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +0 -5
  96. data/lib/datadog/tracing/contrib/presto/instrumentation.rb +0 -5
  97. data/lib/datadog/tracing/contrib/racecar/event.rb +0 -5
  98. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +6 -0
  99. data/lib/datadog/tracing/contrib/rack/ext.rb +27 -0
  100. data/lib/datadog/tracing/contrib/rack/trace_proxy_middleware.rb +117 -1
  101. data/lib/datadog/tracing/contrib/redis/tags.rb +0 -5
  102. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +0 -5
  103. data/lib/datadog/tracing/contrib/sequel/utils.rb +0 -5
  104. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +0 -5
  105. data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +0 -13
  106. data/lib/datadog/tracing/distributed/trace_context.rb +0 -28
  107. data/lib/datadog/tracing/metadata/ext.rb +3 -0
  108. data/lib/datadog/tracing/span_operation.rb +13 -0
  109. data/lib/datadog/tracing/trace_operation.rb +22 -0
  110. data/lib/datadog/tracing/tracer.rb +7 -3
  111. data/lib/datadog/version.rb +1 -1
  112. metadata +27 -8
  113. data/ext/datadog_profiling_native_extension/clock_id_noop.c +0 -21
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Datadog
4
+ module SymbolDatabase
5
+ # Provides remote configuration integration for symbol database.
6
+ #
7
+ # Responsibilities:
8
+ # - Registers with Core::Remote as a receiver for LIVE_DEBUGGING_SYMBOL_DB product
9
+ # - Processes remote config changes (insert/update/delete)
10
+ # - Calls Component.start_upload when upload_symbols: true
11
+ # - Calls Component.stop_upload when config deleted or upload_symbols: false
12
+ #
13
+ # Flow:
14
+ # 1. Remote config system calls receiver with repository and changes
15
+ # 2. For each change, process_change called
16
+ # 3. parse_config extracts upload_symbols flag
17
+ # 4. enable_upload or disable_upload called on component
18
+ #
19
+ # Created by: Symbol database initialization
20
+ # Accessed by: Core::Remote system when configurations change
21
+ # Requires: Component must exist (accessed via Datadog.send(:components).symbol_database)
22
+ #
23
+ # @api private
24
+ module Remote
25
+ PRODUCT = 'LIVE_DEBUGGING_SYMBOL_DB'
26
+
27
+ class << self
28
+ # Declare products this receiver handles.
29
+ # @return [Array<String>] Product names
30
+ def products
31
+ [PRODUCT]
32
+ end
33
+
34
+ # Declare capabilities for this receiver.
35
+ # @return [Array] Capabilities (none for symbol database)
36
+ def capabilities
37
+ []
38
+ end
39
+
40
+ # Create receivers for remote configuration.
41
+ # @return [Array<Receiver>] Array of receivers
42
+ def receivers(_telemetry)
43
+ receiver do |repository, changes|
44
+ telemetry = lookup_telemetry
45
+ component = begin
46
+ Datadog.send(:components, allow_initialization: false)&.symbol_database
47
+ rescue => e
48
+ Datadog.logger.debug { "symdb: failed to look up component in RC receiver: #{e.class}: #{e.message}" }
49
+ telemetry&.report(e, description: 'symdb: failed to look up component in RC receiver')
50
+ nil
51
+ end
52
+
53
+ if component
54
+ changes.each do |change|
55
+ process_change(component, change, telemetry)
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ # Create a single receiver for the product.
62
+ # @param products [Array<String>] Product names to match
63
+ # @return [Array<Receiver>] Receiver array
64
+ def receiver(products = [PRODUCT], &block)
65
+ matcher = Core::Remote::Dispatcher::Matcher::Product.new(products)
66
+ [Core::Remote::Dispatcher::Receiver.new(matcher, &block)]
67
+ end
68
+
69
+ private
70
+
71
+ # Look up the telemetry component for error reporting. Returns nil if the
72
+ # component tree isn't built yet (very early boot) or the lookup raises.
73
+ # `allow_initialization: false` avoids triggering component-tree construction
74
+ # from inside an RC receiver callback.
75
+ # @return [Core::Telemetry::Component, nil]
76
+ # @api private
77
+ def lookup_telemetry
78
+ Datadog.send(:components, allow_initialization: false)&.telemetry
79
+ rescue
80
+ nil
81
+ end
82
+
83
+ # Process a single configuration change.
84
+ # @param component [Component] Symbol database component
85
+ # @param change [Change] Configuration change (:insert, :update, :delete)
86
+ # @param telemetry [Core::Telemetry::Component, nil] Telemetry for error reporting
87
+ # @return [void]
88
+ # @api private
89
+ def process_change(component, change, telemetry)
90
+ case change.type
91
+ when :insert
92
+ # @type var change: ::Datadog::Core::Remote::Configuration::Repository::Change::Inserted
93
+ enable_upload(component, change.content)
94
+ change.content.applied
95
+ when :update
96
+ # @type var change: ::Datadog::Core::Remote::Configuration::Repository::Change::Updated
97
+ disable_upload(component)
98
+ enable_upload(component, change.content)
99
+ change.content.applied
100
+ when :delete
101
+ # @type var change: ::Datadog::Core::Remote::Configuration::Repository::Change::Deleted
102
+ disable_upload(component)
103
+ change.previous&.applied
104
+ else
105
+ component.logger.debug { "symdb: unrecognized change type: #{change.type}" }
106
+ # Steep cannot narrow `change.content` from a respond_to? check — it sees
107
+ # the Repository::Change union type where `Deleted` lacks `content`.
108
+ change.content.errored("Unrecognized change type: #{change.type}") if change.respond_to?(:content) # steep:ignore NoMethod
109
+ end
110
+ rescue => e
111
+ component.logger.debug { "symdb: error processing remote config change: #{e.class}: #{e.message}" }
112
+ telemetry&.report(e, description: 'symdb: error processing remote config change')
113
+ # Rescue runs regardless of which branch raised — Steep cannot narrow the
114
+ # union type from a respond_to? check.
115
+ content_obj = change.respond_to?(:content) ? change.content : change.previous # steep:ignore NoMethod
116
+ content_obj&.errored(e.to_s)
117
+ end
118
+
119
+ # Enable upload if config has upload_symbols: true.
120
+ # @param component [Component] Symbol database component
121
+ # @param content [Content] Remote config content
122
+ # @return [void]
123
+ # @api private
124
+ def enable_upload(component, content)
125
+ config = parse_config(content, component.logger)
126
+
127
+ unless config
128
+ return
129
+ end
130
+
131
+ if config['upload_symbols']
132
+ component.logger.debug { "symdb: upload enabled via remote config" }
133
+ component.start_upload
134
+ else
135
+ component.logger.debug { "symdb: upload disabled in config" }
136
+ end
137
+ end
138
+
139
+ # Disable upload.
140
+ # @param component [Component] Symbol database component
141
+ # @return [void]
142
+ # @api private
143
+ def disable_upload(component)
144
+ component.logger.debug { "symdb: upload disabled via remote config" }
145
+ component.stop_upload
146
+ end
147
+
148
+ # Parse and validate remote config content.
149
+ # @param content [Content] Remote config content
150
+ # @param logger [SymbolDatabase::Logger] Logger for invalid-config diagnostics
151
+ # @return [Hash, nil] Parsed config or nil if invalid
152
+ # @api private
153
+ #
154
+ # JSON::ParserError is intentionally NOT rescued here — it propagates to
155
+ # process_change's rescue, which logs and reports to telemetry. Catching
156
+ # it locally would swallow the error from telemetry observability.
157
+ def parse_config(content, logger)
158
+ config = JSON.parse(content.data)
159
+
160
+ unless config.is_a?(Hash)
161
+ logger.debug { "symdb: invalid config format: expected Hash, got #{config.class}" }
162
+ return nil
163
+ end
164
+
165
+ unless config.key?('upload_symbols')
166
+ logger.debug { "symdb: missing 'upload_symbols' key in config" }
167
+ return nil
168
+ end
169
+
170
+ config
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
@@ -22,9 +22,11 @@ module Datadog
22
22
  # - nil: not computed (source unreadable, native/C-extension method)
23
23
  # - []: computed but no executable lines found (comments/whitespace only)
24
24
  # - non-empty: computed, contains executable line ranges
25
- # nil and [] both serialize as injectible_lines?: false on METHOD
26
- # scopes. Key is absent on non-METHOD scopes.
27
- :injectible_lines,
25
+ # nil and [] both serialize as has_injectible_lines: false on METHOD
26
+ # scopes. Key is absent on non-METHOD scopes. The wire format key
27
+ # name keeps the historical spelling +injectible+ for backend
28
+ # compatibility; the Ruby identifier is +targetable_lines+.
29
+ :targetable_lines,
28
30
  :language_specifics, :symbols, :scopes
29
31
 
30
32
  # Initialize a new Scope
@@ -33,7 +35,7 @@ module Datadog
33
35
  # @param source_file [String, nil] Path to source file
34
36
  # @param start_line [Integer, nil] Starting line number (UNKNOWN_MIN_LINE for unknown)
35
37
  # @param end_line [Integer, nil] Ending line number (UNKNOWN_MAX_LINE for entire file)
36
- # @param injectible_lines [Array<Hash>, nil] Ranges of executable lines [{start:, end:}]
38
+ # @param targetable_lines [Array<Hash>, nil] Ranges of executable lines [{start:, end:}]
37
39
  # @param language_specifics [Hash, nil] Ruby-specific metadata
38
40
  # @param symbols [Array<Symbol>, nil] Symbols defined in this scope
39
41
  # @param scopes [Array<Scope>, nil] Nested child scopes
@@ -43,7 +45,7 @@ module Datadog
43
45
  source_file: nil,
44
46
  start_line: nil,
45
47
  end_line: nil,
46
- injectible_lines: nil,
48
+ targetable_lines: nil,
47
49
  language_specifics: nil,
48
50
  symbols: nil,
49
51
  scopes: nil
@@ -53,15 +55,15 @@ module Datadog
53
55
  @source_file = source_file
54
56
  @start_line = start_line
55
57
  @end_line = end_line
56
- @injectible_lines = injectible_lines
58
+ @targetable_lines = targetable_lines
57
59
  @language_specifics = language_specifics || {}
58
60
  @symbols = symbols || []
59
61
  @scopes = scopes || []
60
62
  end
61
63
 
62
- # @return [Boolean] true when injectible_lines is non-nil and non-empty
63
- def injectible_lines?
64
- !injectible_lines.nil? && !injectible_lines.empty?
64
+ # @return [Boolean] true when targetable_lines is non-nil and non-empty
65
+ def targetable_lines?
66
+ !targetable_lines.nil? && !targetable_lines.empty?
65
67
  end
66
68
 
67
69
  # Convert scope to Hash for JSON serialization.
@@ -79,11 +81,13 @@ module Datadog
79
81
  scopes: scopes.empty? ? nil : scopes.map(&:to_h),
80
82
  }
81
83
  h.compact!
82
- # Injectable lines only on METHOD scopes (per spec — not on CLASS/MODULE/FILE).
84
+ # Targetable lines only on METHOD scopes (per spec — not on CLASS/MODULE/FILE).
83
85
  # Always emit has_injectible_lines (even when false) on METHOD scopes.
86
+ # Wire format keeps the historical spelling +injectible+; Ruby identifier
87
+ # is +targetable_lines+.
84
88
  if scope_type == 'METHOD'
85
- h[:has_injectible_lines] = injectible_lines? # steep:ignore ArgumentTypeMismatch
86
- h[:injectible_lines] = injectible_lines if injectible_lines && !injectible_lines.empty?
89
+ h[:has_injectible_lines] = targetable_lines? # steep:ignore ArgumentTypeMismatch
90
+ h[:injectible_lines] = targetable_lines if targetable_lines && !targetable_lines.empty?
87
91
  end
88
92
  h
89
93
  end
@@ -0,0 +1,288 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ # Batches extracted scopes and triggers uploads at appropriate times.
8
+ #
9
+ # Implements two upload triggers:
10
+ # 1. Size-based: Immediate upload when 400 scopes collected (MAX_SCOPES)
11
+ # 2. Time-based: Upload after 1 second of inactivity (debounce timer, not periodic)
12
+ #
13
+ # Also provides:
14
+ # - Deduplication: Tracks uploaded module names to prevent re-uploads
15
+ # - File limiting: Stops after 10,000 files to prevent runaway extraction
16
+ # - Thread safety: Mutex-protected state for concurrent access
17
+ #
18
+ # Timer implementation: A single long-lived thread waits on a ConditionVariable
19
+ # with a timeout. Each add_scope signals the CV to reset the deadline. When the
20
+ # timeout expires without a signal, the timer fires and flushes the batch.
21
+ # This avoids creating/destroying a thread per add_scope call.
22
+ #
23
+ # Flow: Extractor → add_scope → (batch or timer) → Uploader
24
+ # Created by: Component (during initialization)
25
+ # Calls: Uploader.upload_scopes when batch full or timer fires
26
+ #
27
+ # @api private
28
+ class ScopeBatcher
29
+ # Maximum scopes per batch before triggering immediate upload.
30
+ # This matches the batch size used in Java and Python tracers to ensure
31
+ # consistent upload behavior across languages.
32
+ MAX_SCOPES = 400
33
+ INACTIVITY_TIMEOUT = 1.0 # seconds
34
+ # Maximum unique files to track before stopping extraction.
35
+ # This prevents runaway memory usage in applications with very large
36
+ # numbers of loaded classes (e.g., heavily modularized Rails apps).
37
+ MAX_FILES = 10_000
38
+ # Seconds to wait for the timer thread to exit when joining during
39
+ # shutdown or reset. Bounded so a misbehaving thread cannot hang the
40
+ # caller indefinitely.
41
+ TIMER_JOIN_TIMEOUT = 5
42
+
43
+ # Initialize batching context.
44
+ # @param uploader [Uploader] Uploader instance for triggering uploads
45
+ # @param logger [Logger] Logger for diagnostics
46
+ # @param on_upload [Proc, nil] Optional callback called after upload (for testing)
47
+ # @param timer_enabled [Boolean] Enable async timer (default true, false for tests)
48
+ def initialize(uploader, logger:, on_upload: nil, timer_enabled: true)
49
+ @uploader = uploader
50
+ @logger = logger
51
+ @on_upload = on_upload
52
+ @timer_enabled = timer_enabled
53
+ @scopes = []
54
+ @mutex = Mutex.new
55
+ @file_count = 0
56
+ @uploaded_modules = Set.new
57
+
58
+ # Timer state: single long-lived thread + ConditionVariable for debounce.
59
+ # @timer_signaled is set to true on each add_scope and cleared by the timer
60
+ # thread after waking. This flag is needed because ConditionVariable#wait
61
+ # does not distinguish signal vs timeout on Ruby < 3.2 (returns self in both
62
+ # cases). The flag gives a portable way to detect whether the wakeup was a
63
+ # signal (reset deadline) or a timeout (fire the timer).
64
+ @timer_cv = ConditionVariable.new
65
+ @timer_thread = nil
66
+ @timer_stopped = false
67
+ @timer_signaled = false
68
+ end
69
+
70
+ # Add a scope to the batch.
71
+ # Triggers immediate upload if batch reaches 400 scopes.
72
+ # Resets inactivity timer if batch not full.
73
+ # @param scope [Scope] The scope to add
74
+ # @return [void]
75
+ def add_scope(scope)
76
+ # @type var scopes_to_upload: ::Array[Scope]?
77
+ scopes_to_upload = nil
78
+
79
+ @mutex.synchronize do
80
+ # Check file limit (counts only unique accepted files; duplicates are
81
+ # filtered by the dedup check below and do not consume the budget).
82
+ if @file_count >= MAX_FILES
83
+ @logger.debug { "symdb: file limit (#{MAX_FILES}) reached, ignoring scope: #{scope.name}" }
84
+ return
85
+ end
86
+
87
+ # Check if already uploaded — duplicates do not count toward MAX_FILES
88
+ # so a re-extraction scenario does not exhaust the budget for unique scopes.
89
+ if @uploaded_modules.include?(scope.name)
90
+ @logger.trace { "symdb: skipping #{scope.name}: already uploaded" }
91
+ return
92
+ end
93
+
94
+ # Marked uploaded before perform_upload runs. This is intentional:
95
+ # symdb extraction is a one-shot operation per process — extract_all
96
+ # walks ObjectSpace once and never revisits a module within the same
97
+ # process. The Set deduplicates within a single extraction run, not
98
+ # across upload attempts. Failed uploads are not retried; symbols
99
+ # from a failed batch are lost until the next process restart, by
100
+ # design (matches Python and Go; Java retries via OkHttp, .NET via
101
+ # exponential backoff — Ruby does neither).
102
+ @uploaded_modules.add(scope.name)
103
+ @file_count += 1
104
+
105
+ # Add the scope
106
+ @scopes << scope
107
+
108
+ # Check if batch size reached (AFTER adding)
109
+ if @scopes.size >= MAX_SCOPES
110
+ # Prepare for upload (clear within mutex)
111
+ scopes_to_upload = @scopes.dup
112
+ @scopes.clear
113
+ end
114
+
115
+ # Signal the timer thread to reset its inactivity deadline.
116
+ # If batch was full, this is harmless — the timer will just
117
+ # re-check and find an empty batch if it fires.
118
+ ensure_timer_running
119
+ @timer_signaled = true
120
+ @timer_cv.signal
121
+ end
122
+
123
+ # Upload outside mutex (if batch was full)
124
+ perform_upload(scopes_to_upload) if scopes_to_upload
125
+ rescue => e
126
+ @logger.debug { "symdb: failed to add scope: #{e.class}: #{e.message}" }
127
+ # Don't propagate, continue operation
128
+ end
129
+
130
+ # Force upload of current batch immediately.
131
+ # @return [void]
132
+ def flush
133
+ # @type var scopes_to_upload: ::Array[Scope]?
134
+ scopes_to_upload = nil
135
+
136
+ @mutex.synchronize do
137
+ return if @scopes.empty?
138
+
139
+ scopes_to_upload = @scopes.dup
140
+ @scopes.clear
141
+ end
142
+
143
+ perform_upload(scopes_to_upload)
144
+ end
145
+
146
+ # Shutdown and upload remaining scopes.
147
+ # @return [void]
148
+ def shutdown
149
+ # @type var scopes_to_upload: ::Array[Scope]?
150
+ scopes_to_upload = nil
151
+ # @type var thread_to_join: ::Thread?
152
+ thread_to_join = nil
153
+
154
+ @mutex.synchronize do
155
+ @timer_stopped = true
156
+ @timer_cv.signal # Wake the timer thread so it exits
157
+
158
+ # Capture the timer thread under the mutex so a concurrent add_scope
159
+ # cannot create a new thread that we'd accidentally orphan when we
160
+ # nil the field below.
161
+ thread_to_join = @timer_thread
162
+ @timer_thread = nil
163
+
164
+ scopes_to_upload = @scopes.dup
165
+ @scopes.clear
166
+ end
167
+
168
+ # Join the timer thread outside the mutex.
169
+ # The thread checks @timer_stopped and exits when signaled.
170
+ thread_to_join&.join(TIMER_JOIN_TIMEOUT)
171
+
172
+ # Upload outside mutex
173
+ perform_upload(scopes_to_upload) unless scopes_to_upload.nil? || scopes_to_upload.empty?
174
+ end
175
+
176
+ # Check if scopes are pending upload.
177
+ # @return [Boolean] true if scopes waiting in batch
178
+ def scopes_pending?
179
+ @mutex.synchronize { @scopes.any? }
180
+ end
181
+
182
+ # Get current batch size.
183
+ # @return [Integer] Number of scopes in current batch
184
+ def size
185
+ @mutex.synchronize { @scopes.size }
186
+ end
187
+
188
+ private
189
+
190
+ # Reset state. Private so production code cannot accidentally invoke it;
191
+ # tests call via +send(:reset)+.
192
+ # @return [void]
193
+ def reset
194
+ # @type var thread_to_join: ::Thread?
195
+ thread_to_join = nil
196
+
197
+ @mutex.synchronize do
198
+ @scopes.clear
199
+ @timer_stopped = true
200
+ @timer_cv.signal
201
+ @file_count = 0
202
+ @uploaded_modules.clear
203
+
204
+ # Capture under the mutex (see shutdown for rationale).
205
+ thread_to_join = @timer_thread
206
+ @timer_thread = nil
207
+ end
208
+
209
+ thread_to_join&.join(TIMER_JOIN_TIMEOUT)
210
+
211
+ # Allow timer to be restarted after reset
212
+ @mutex.synchronize do
213
+ @timer_stopped = false
214
+ @timer_signaled = false
215
+ end
216
+ end
217
+
218
+ # Start the timer thread if not already running.
219
+ # Must be called from within @mutex.synchronize.
220
+ # @return [void]
221
+ def ensure_timer_running
222
+ return unless @timer_enabled
223
+ return if @timer_thread&.alive?
224
+
225
+ @timer_stopped = false
226
+ @timer_signaled = false
227
+
228
+ @timer_thread = Thread.new do
229
+ timer_loop
230
+ end
231
+ end
232
+
233
+ # Timer thread main loop. Waits on the ConditionVariable with a timeout.
234
+ # Each signal resets the deadline (debounce). When the wait times out
235
+ # (no signal within INACTIVITY_TIMEOUT), the batch is flushed.
236
+ #
237
+ # Uses @timer_signaled flag instead of ConditionVariable#wait return value
238
+ # because Ruby < 3.2 returns self for both signal and timeout (no way to
239
+ # distinguish). The flag is set by add_scope before signaling, and cleared
240
+ # by the timer thread after waking.
241
+ # @return [void]
242
+ def timer_loop
243
+ loop do
244
+ should_flush = false
245
+
246
+ @mutex.synchronize do
247
+ return if @timer_stopped
248
+
249
+ @timer_signaled = false
250
+ @timer_cv.wait(@mutex, INACTIVITY_TIMEOUT)
251
+
252
+ return if @timer_stopped
253
+
254
+ if @timer_signaled
255
+ # Woke up because add_scope signaled — loop back to re-wait with
256
+ # a fresh timeout. This implements the debounce: the timeout resets
257
+ # on every scope addition.
258
+ next # steep:ignore BreakTypeMismatch
259
+ end
260
+
261
+ # Timed out (no signal within INACTIVITY_TIMEOUT). If there are
262
+ # scopes pending, flush them. Otherwise, loop back and wait again.
263
+ should_flush = !@scopes.empty?
264
+ end
265
+
266
+ if should_flush
267
+ flush
268
+ end
269
+ end
270
+ rescue => e
271
+ @logger.debug { "symdb: timer thread error: #{e.class}: #{e.message}" }
272
+ end
273
+
274
+ # Perform upload via uploader.
275
+ # @param scopes [Array<Scope>] Scopes to upload
276
+ # @return [void]
277
+ def perform_upload(scopes)
278
+ return if scopes.nil? || scopes.empty?
279
+
280
+ @uploader.upload_scopes(scopes)
281
+ @on_upload&.call(scopes) # Notify tests after upload
282
+ rescue => e
283
+ @logger.debug { "symdb: upload failed: #{e.class}: #{e.message}" }
284
+ # Don't propagate, uploader handles retries
285
+ end
286
+ end
287
+ end
288
+ end
@@ -16,23 +16,29 @@ module Datadog
16
16
  #
17
17
  # @api private
18
18
  class ServiceVersion
19
- attr_reader :service, :env, :version, :language, :scopes
19
+ attr_reader :service, :env, :version, :language, :scopes, :upload_id, :batch_num, :final
20
20
 
21
21
  # Initialize a new ServiceVersion
22
22
  # @param service [String] Service name (required, from DD_SERVICE)
23
- # @param env [String] Environment (from DD_ENV, defaults to "none")
24
- # @param version [String] Version (from DD_VERSION, defaults to "none")
23
+ # @param env [String, nil] Environment (from DD_ENV, passed through unchanged)
24
+ # @param version [String, nil] Version (from DD_VERSION, passed through unchanged)
25
25
  # @param scopes [Array<Scope>] Top-level scopes (required)
26
+ # @param upload_id [String, nil] UUID identifying the logical upload (shared by all batches)
27
+ # @param batch_num [Integer, nil] 1-indexed batch number within the upload
28
+ # @param final [Boolean, nil] true if this is the last batch of the upload
26
29
  # @raise [ArgumentError] if service empty or scopes not an array
27
- def initialize(service:, env:, version:, scopes:)
30
+ def initialize(service:, env:, version:, scopes:, upload_id: nil, batch_num: nil, final: nil)
28
31
  raise ArgumentError, 'service is required' if service.nil? || service.empty?
29
32
  raise ArgumentError, 'scopes must be an array' unless scopes.is_a?(Array)
30
33
 
31
34
  @service = service
32
- @env = env.to_s.empty? ? 'none' : env.to_s
33
- @version = version.to_s.empty? ? 'none' : version.to_s
35
+ @env = env
36
+ @version = version
34
37
  @language = 'ruby'
35
38
  @scopes = scopes
39
+ @upload_id = upload_id
40
+ @batch_num = batch_num
41
+ @final = final
36
42
  end
37
43
 
38
44
  # Convert service version to Hash for JSON serialization.
@@ -44,6 +50,9 @@ module Datadog
44
50
  version: version,
45
51
  language: language,
46
52
  scopes: scopes.map(&:to_h),
53
+ upload_id: upload_id,
54
+ batch_num: batch_num,
55
+ final: final,
47
56
  }
48
57
  end
49
58
 
@@ -42,17 +42,20 @@ module Datadog
42
42
  end
43
43
 
44
44
  # Convert symbol to Hash for JSON serialization.
45
- # Removes nil values to reduce payload size.
45
+ # The `type` key is always present per the symdb JSON schema — emit nil
46
+ # when the type cannot be determined (the common case in Ruby since
47
+ # parameters and instance variables carry no declared type).
48
+ # `language_specifics` is omitted when nil to reduce payload size.
46
49
  # @return [Hash] Symbol as hash with symbol keys
47
50
  def to_h
51
+ # @type var h: Hash[::Symbol, untyped]
48
52
  h = {
49
53
  symbol_type: symbol_type,
50
54
  name: name,
51
55
  line: line,
52
56
  type: type,
53
- language_specifics: language_specifics,
54
57
  }
55
- h.compact!
58
+ h[:language_specifics] = language_specifics if language_specifics
56
59
  h
57
60
  end
58
61