datadog 2.31.0 → 2.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/ext/datadog_profiling_native_extension/clock_id.h +9 -1
  3. data/ext/datadog_profiling_native_extension/clock_id_from_mach.c +73 -0
  4. data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -1
  5. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +17 -7
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +16 -5
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +6 -0
  8. data/ext/datadog_profiling_native_extension/extconf.rb +8 -4
  9. data/ext/datadog_profiling_native_extension/http_transport.c +10 -5
  10. data/ext/datadog_profiling_native_extension/stack_recorder.c +3 -9
  11. data/ext/datadog_profiling_native_extension/time_helpers.h +1 -0
  12. data/ext/libdatadog_api/crashtracker.c +2 -0
  13. data/ext/libdatadog_api/di.c +48 -0
  14. data/ext/libdatadog_api/extconf.rb +7 -4
  15. data/ext/libdatadog_extconf_helpers.rb +38 -1
  16. data/lib/datadog/ai_guard/autoload.rb +10 -0
  17. data/lib/datadog/ai_guard/component.rb +1 -1
  18. data/lib/datadog/ai_guard/configuration.rb +105 -2
  19. data/lib/datadog/ai_guard/contrib/auto_instrument.rb +24 -0
  20. data/lib/datadog/ai_guard/contrib/rack/integration.rb +42 -0
  21. data/lib/datadog/ai_guard/contrib/rack/patcher.rb +26 -0
  22. data/lib/datadog/ai_guard/contrib/rack/request_middleware.rb +83 -0
  23. data/lib/datadog/ai_guard/contrib/rails/integration.rb +41 -0
  24. data/lib/datadog/ai_guard/contrib/rails/patcher.rb +97 -0
  25. data/lib/datadog/ai_guard/evaluation.rb +2 -0
  26. data/lib/datadog/ai_guard/ext.rb +2 -0
  27. data/lib/datadog/ai_guard.rb +8 -0
  28. data/lib/datadog/appsec/autoload.rb +1 -1
  29. data/lib/datadog/appsec/component.rb +1 -1
  30. data/lib/datadog/appsec/configuration.rb +414 -1
  31. data/lib/datadog/appsec/contrib/aws_lambda/gateway/watcher.rb +75 -0
  32. data/lib/datadog/appsec/contrib/aws_lambda/integration.rb +39 -0
  33. data/lib/datadog/appsec/contrib/aws_lambda/patcher.rb +30 -0
  34. data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +111 -0
  35. data/lib/datadog/appsec/contrib/devise/patches/signin_tracking_patch.rb +2 -1
  36. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
  37. data/lib/datadog/appsec/contrib/rails/patcher.rb +2 -2
  38. data/lib/datadog/appsec/metrics/telemetry.rb +13 -1
  39. data/lib/datadog/appsec/security_engine/runner.rb +1 -1
  40. data/lib/datadog/appsec/trace_keeper.rb +18 -6
  41. data/lib/datadog/appsec/utils/http/url_encoded.rb +2 -2
  42. data/lib/datadog/appsec.rb +1 -0
  43. data/lib/datadog/core/configuration/components.rb +1 -1
  44. data/lib/datadog/core/configuration/settings.rb +13 -0
  45. data/lib/datadog/core/configuration/supported_configurations.rb +4 -0
  46. data/lib/datadog/core/configuration.rb +1 -1
  47. data/lib/datadog/core/contrib/rails/utils.rb +1 -1
  48. data/lib/datadog/core/crashtracking/component.rb +3 -3
  49. data/lib/datadog/core/diagnostics/environment_logger.rb +3 -1
  50. data/lib/datadog/core/environment/container.rb +2 -2
  51. data/lib/datadog/core/environment/ext.rb +1 -0
  52. data/lib/datadog/core/environment/socket.rb +13 -0
  53. data/lib/datadog/core/feature_flags.rb +1 -1
  54. data/lib/datadog/core/metrics/client.rb +5 -5
  55. data/lib/datadog/core/remote/client.rb +1 -1
  56. data/lib/datadog/core/remote/component.rb +2 -2
  57. data/lib/datadog/core/runtime/metrics.rb +1 -1
  58. data/lib/datadog/core/telemetry/emitter.rb +1 -1
  59. data/lib/datadog/core/telemetry/event/app_started.rb +2 -2
  60. data/lib/datadog/core/transport/http.rb +2 -0
  61. data/lib/datadog/core/utils.rb +1 -1
  62. data/lib/datadog/core/workers/async.rb +1 -1
  63. data/lib/datadog/core.rb +1 -1
  64. data/lib/datadog/data_streams/configuration.rb +40 -1
  65. data/lib/datadog/data_streams/pathway_context.rb +1 -1
  66. data/lib/datadog/data_streams/processor.rb +1 -1
  67. data/lib/datadog/data_streams.rb +1 -1
  68. data/lib/datadog/di/base.rb +8 -5
  69. data/lib/datadog/di/code_tracker.rb +179 -1
  70. data/lib/datadog/di/component.rb +1 -1
  71. data/lib/datadog/di/configuration.rb +235 -2
  72. data/lib/datadog/di/instrumenter.rb +46 -26
  73. data/lib/datadog/di/probe_builder.rb +1 -1
  74. data/lib/datadog/di/probe_file_loader.rb +2 -2
  75. data/lib/datadog/di/probe_manager.rb +6 -6
  76. data/lib/datadog/di/probe_notification_builder.rb +1 -1
  77. data/lib/datadog/di/probe_notifier_worker.rb +2 -2
  78. data/lib/datadog/di/remote.rb +6 -6
  79. data/lib/datadog/di/serializer.rb +1 -1
  80. data/lib/datadog/di/transport/input.rb +3 -3
  81. data/lib/datadog/error_tracking/configuration.rb +55 -2
  82. data/lib/datadog/kit/enable_core_dumps.rb +1 -1
  83. data/lib/datadog/open_feature/component.rb +18 -1
  84. data/lib/datadog/open_feature/evaluation_engine.rb +3 -3
  85. data/lib/datadog/open_feature/exposures/reporter.rb +1 -1
  86. data/lib/datadog/open_feature/exposures/worker.rb +1 -1
  87. data/lib/datadog/open_feature/hooks/flag_eval_hook.rb +49 -0
  88. data/lib/datadog/open_feature/metrics/flag_eval_metrics.rb +149 -0
  89. data/lib/datadog/open_feature/provider.rb +19 -1
  90. data/lib/datadog/open_feature/remote.rb +1 -1
  91. data/lib/datadog/open_feature/transport.rb +1 -1
  92. data/lib/datadog/opentelemetry/metrics.rb +13 -4
  93. data/lib/datadog/opentelemetry/sdk/configurator.rb +1 -1
  94. data/lib/datadog/opentelemetry/sdk/id_generator.rb +16 -10
  95. data/lib/datadog/opentelemetry/sdk/metrics_exporter.rb +1 -1
  96. data/lib/datadog/profiling/collectors/code_provenance.rb +35 -9
  97. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -2
  98. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +8 -2
  99. data/lib/datadog/profiling/collectors/info.rb +16 -3
  100. data/lib/datadog/profiling/component.rb +3 -6
  101. data/lib/datadog/profiling/exporter.rb +37 -12
  102. data/lib/datadog/profiling/ext.rb +0 -2
  103. data/lib/datadog/profiling/flush.rb +21 -12
  104. data/lib/datadog/profiling/http_transport.rb +12 -1
  105. data/lib/datadog/profiling/load_native_extension.rb +1 -1
  106. data/lib/datadog/profiling/profiler.rb +13 -1
  107. data/lib/datadog/profiling/scheduler.rb +2 -2
  108. data/lib/datadog/profiling/stack_recorder.rb +0 -4
  109. data/lib/datadog/profiling/tasks/exec.rb +8 -3
  110. data/lib/datadog/profiling/tasks/help.rb +1 -0
  111. data/lib/datadog/profiling/tasks/setup.rb +2 -2
  112. data/lib/datadog/single_step_instrument.rb +1 -1
  113. data/lib/datadog/symbol_database/configuration.rb +65 -0
  114. data/lib/datadog/symbol_database/extractor.rb +906 -0
  115. data/lib/datadog/symbol_database/file_hash.rb +46 -0
  116. data/lib/datadog/symbol_database/logger.rb +43 -0
  117. data/lib/datadog/symbol_database/scope.rb +102 -0
  118. data/lib/datadog/symbol_database/scope_batcher.rb +280 -0
  119. data/lib/datadog/symbol_database/service_version.rb +57 -0
  120. data/lib/datadog/symbol_database/symbol.rb +66 -0
  121. data/lib/datadog/symbol_database/transport/http/endpoint.rb +28 -0
  122. data/lib/datadog/symbol_database/transport/http.rb +45 -0
  123. data/lib/datadog/symbol_database/transport.rb +54 -0
  124. data/lib/datadog/symbol_database/uploader.rb +169 -0
  125. data/lib/datadog/symbol_database.rb +49 -0
  126. data/lib/datadog/tracing/buffer.rb +3 -3
  127. data/lib/datadog/tracing/configuration/settings.rb +1 -1
  128. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -3
  129. data/lib/datadog/tracing/contrib/action_view/events/render_template.rb +1 -1
  130. data/lib/datadog/tracing/contrib/active_job/events/discard.rb +1 -1
  131. data/lib/datadog/tracing/contrib/active_job/events/enqueue.rb +1 -1
  132. data/lib/datadog/tracing/contrib/active_job/events/enqueue_at.rb +1 -1
  133. data/lib/datadog/tracing/contrib/active_job/events/enqueue_retry.rb +1 -1
  134. data/lib/datadog/tracing/contrib/active_job/events/perform.rb +1 -1
  135. data/lib/datadog/tracing/contrib/active_job/events/retry_stopped.rb +1 -1
  136. data/lib/datadog/tracing/contrib/active_model_serializers/events/render.rb +1 -1
  137. data/lib/datadog/tracing/contrib/active_model_serializers/events/serialize.rb +1 -1
  138. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +2 -2
  139. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +1 -1
  140. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -1
  141. data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
  142. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +1 -1
  143. data/lib/datadog/tracing/contrib/active_support/notifications/subscription.rb +2 -2
  144. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +1 -1
  145. data/lib/datadog/tracing/contrib/component.rb +1 -1
  146. data/lib/datadog/tracing/contrib/configuration/resolver.rb +7 -4
  147. data/lib/datadog/tracing/contrib/dalli/quantize.rb +1 -1
  148. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +1 -1
  149. data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -2
  150. data/lib/datadog/tracing/contrib/extensions.rb +9 -0
  151. data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -2
  152. data/lib/datadog/tracing/contrib/grape/endpoint.rb +5 -5
  153. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +2 -2
  154. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +2 -2
  155. data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -2
  156. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -2
  157. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -2
  158. data/lib/datadog/tracing/contrib/kafka/instrumentation/consumer.rb +2 -2
  159. data/lib/datadog/tracing/contrib/kafka/instrumentation/producer.rb +2 -2
  160. data/lib/datadog/tracing/contrib/karafka/patcher.rb +1 -1
  161. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +3 -3
  162. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +1 -1
  163. data/lib/datadog/tracing/contrib/presto/instrumentation.rb +3 -3
  164. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +6 -0
  165. data/lib/datadog/tracing/contrib/rack/ext.rb +27 -0
  166. data/lib/datadog/tracing/contrib/rack/patcher.rb +1 -1
  167. data/lib/datadog/tracing/contrib/rack/request_queue.rb +1 -1
  168. data/lib/datadog/tracing/contrib/rack/trace_proxy_middleware.rb +117 -1
  169. data/lib/datadog/tracing/contrib/rails/log_injection.rb +1 -1
  170. data/lib/datadog/tracing/contrib/rails/runner.rb +1 -1
  171. data/lib/datadog/tracing/contrib/rake/instrumentation.rb +2 -2
  172. data/lib/datadog/tracing/contrib/redis/quantize.rb +1 -1
  173. data/lib/datadog/tracing/contrib/redis/tags.rb +1 -1
  174. data/lib/datadog/tracing/contrib/sidekiq/utils.rb +1 -1
  175. data/lib/datadog/tracing/contrib/stripe/request.rb +1 -1
  176. data/lib/datadog/tracing/contrib.rb +8 -0
  177. data/lib/datadog/tracing/diagnostics/environment_logger.rb +3 -1
  178. data/lib/datadog/tracing/distributed/baggage.rb +59 -5
  179. data/lib/datadog/tracing/distributed/datadog.rb +11 -11
  180. data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +1 -1
  181. data/lib/datadog/tracing/distributed/propagation.rb +2 -2
  182. data/lib/datadog/tracing/distributed/trace_context.rb +74 -32
  183. data/lib/datadog/tracing/event.rb +1 -1
  184. data/lib/datadog/tracing/metadata/tagging.rb +2 -2
  185. data/lib/datadog/tracing/pipeline.rb +1 -1
  186. data/lib/datadog/tracing/remote.rb +1 -1
  187. data/lib/datadog/tracing/sampling/rule.rb +1 -1
  188. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -2
  189. data/lib/datadog/tracing/sampling/span/rule_parser.rb +2 -2
  190. data/lib/datadog/tracing/span_operation.rb +3 -3
  191. data/lib/datadog/tracing/trace_operation.rb +4 -4
  192. data/lib/datadog/tracing/tracer.rb +6 -8
  193. data/lib/datadog/tracing/transport/io/client.rb +1 -1
  194. data/lib/datadog/tracing/workers.rb +2 -1
  195. data/lib/datadog/version.rb +1 -1
  196. metadata +33 -12
  197. data/ext/datadog_profiling_native_extension/clock_id_noop.c +0 -21
  198. data/lib/datadog/ai_guard/configuration/settings.rb +0 -113
  199. data/lib/datadog/appsec/configuration/settings.rb +0 -423
  200. data/lib/datadog/data_streams/configuration/settings.rb +0 -49
  201. data/lib/datadog/di/configuration/settings.rb +0 -243
  202. data/lib/datadog/error_tracking/configuration/settings.rb +0 -63
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest/sha1'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ # Computes Git-style SHA-1 hashes of Ruby source files for backend commit inference.
8
+ #
9
+ # Uses Git's blob hash algorithm: SHA1("blob <size>\0<content>")
10
+ # Hashes enable the backend to correlate runtime code with Git repository history,
11
+ # identifying which commit is actually deployed.
12
+ #
13
+ # Called by: Extractor (when building MODULE scopes)
14
+ # Stores result in: Scope's language_specifics[:file_hash]
15
+ # Returns: 40-character hex string or nil if file unreadable
16
+ #
17
+ # @api private
18
+ module FileHash
19
+ module_function
20
+
21
+ # Compute Git-style SHA-1 hash of a file.
22
+ # Uses Git's blob hash algorithm: SHA1("blob <size>\0<content>")
23
+ # Returns nil on any error (file not found, permission denied, etc.)
24
+ #
25
+ # @param file_path [String] Path to the file
26
+ # @param logger [#debug] Logger for error reporting
27
+ # @return [String, nil] 40-character hex-encoded SHA-1 hash, or nil if error
28
+ def compute(file_path, logger:)
29
+ return nil unless file_path
30
+ return nil unless File.exist?(file_path)
31
+
32
+ content = File.read(file_path, mode: 'rb')
33
+ size = content.bytesize
34
+ git_blob = "blob #{size}\0#{content}"
35
+
36
+ # SHA-1 is required here to match Git's blob hash format for commit inference.
37
+ # This is not a security vulnerability - we're computing file content hashes
38
+ # to match against Git objects, not using SHA-1 for authentication/integrity.
39
+ Digest::SHA1.hexdigest(git_blob) # nosemgrep: ruby.lang.security.weak-hashes-sha1.weak-hashes-sha1
40
+ rescue => e
41
+ logger.debug { "symdb: file hash failed for #{file_path}: #{e.class}: #{e.message}" }
42
+ nil
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ # Logger facade that adds a config-gated +trace+ method.
8
+ #
9
+ # Wraps any logger (customer-provided or default) and delegates
10
+ # standard methods. The +trace+ method is a sub-debug level that
11
+ # is a no-op unless DD_TRACE_DEBUG is set, avoiding overhead for
12
+ # high-frequency log sites (per-module filtering, dedup checks).
13
+ #
14
+ # @api private
15
+ class Logger
16
+ extend Forwardable
17
+
18
+ # @param settings [Configuration::Settings] Tracer settings (reads trace_logging flag)
19
+ # @param target [::Logger] Underlying logger to delegate to
20
+ def initialize(settings, target)
21
+ @settings = settings
22
+ @target = target
23
+ end
24
+
25
+ attr_reader :settings
26
+
27
+ # Only debug and warn are delegated by design — symbol database
28
+ # extraction logs only at debug (high-volume diagnostics) and warn
29
+ # (user-actionable problems). Adding info/error would invite
30
+ # log-level drift; explicit additions can be made if needed.
31
+ def_delegators :@target, :debug, :warn
32
+
33
+ # Log at trace level (sub-debug). No-op unless DD_TRACE_DEBUG is set.
34
+ # @yield Block that returns the log message string
35
+ # @return [void]
36
+ def trace(&block)
37
+ if settings.symbol_database.internal.trace_logging
38
+ debug(&block)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ # Represents a scope in the hierarchical symbol structure (FILE → MODULE/CLASS → METHOD).
8
+ #
9
+ # Scopes form a tree structure representing Ruby code organization. Each scope contains:
10
+ # - Metadata: name, source file, line range, scope type (MODULE/CLASS/METHOD/etc.)
11
+ # - Symbols: Variables, constants, parameters defined in this scope
12
+ # - Nested scopes: Child scopes (e.g., methods within a class)
13
+ #
14
+ # Created by: Extractor (during symbol extraction)
15
+ # Used by: ScopeBatcher (batching), ServiceVersion (wrapping for upload)
16
+ # Serialized to: JSON via to_h/to_json for upload to agent
17
+ #
18
+ # @api private
19
+ class Scope
20
+ attr_reader :scope_type, :name, :source_file, :start_line, :end_line,
21
+ # Ranges of executable lines [{start:, end:}]. Three states:
22
+ # - nil: not computed (source unreadable, native/C-extension method)
23
+ # - []: computed but no executable lines found (comments/whitespace only)
24
+ # - non-empty: computed, contains executable line ranges
25
+ # nil and [] both serialize as has_injectible_lines: false on METHOD
26
+ # scopes. Key is absent on non-METHOD scopes. The wire format key
27
+ # name keeps the historical spelling +injectible+ for backend
28
+ # compatibility; the Ruby identifier is +targetable_lines+.
29
+ :targetable_lines,
30
+ :language_specifics, :symbols, :scopes
31
+
32
+ # Initialize a new Scope
33
+ # @param scope_type [String] Type of scope (FILE, MODULE, CLASS, METHOD)
34
+ # @param name [String, nil] Name of the scope (class name, method name, etc.)
35
+ # @param source_file [String, nil] Path to source file
36
+ # @param start_line [Integer, nil] Starting line number (UNKNOWN_MIN_LINE for unknown)
37
+ # @param end_line [Integer, nil] Ending line number (UNKNOWN_MAX_LINE for entire file)
38
+ # @param targetable_lines [Array<Hash>, nil] Ranges of executable lines [{start:, end:}]
39
+ # @param language_specifics [Hash, nil] Ruby-specific metadata
40
+ # @param symbols [Array<Symbol>, nil] Symbols defined in this scope
41
+ # @param scopes [Array<Scope>, nil] Nested child scopes
42
+ def initialize(
43
+ scope_type:,
44
+ name: nil,
45
+ source_file: nil,
46
+ start_line: nil,
47
+ end_line: nil,
48
+ targetable_lines: nil,
49
+ language_specifics: nil,
50
+ symbols: nil,
51
+ scopes: nil
52
+ )
53
+ @scope_type = scope_type
54
+ @name = name
55
+ @source_file = source_file
56
+ @start_line = start_line
57
+ @end_line = end_line
58
+ @targetable_lines = targetable_lines
59
+ @language_specifics = language_specifics || {}
60
+ @symbols = symbols || []
61
+ @scopes = scopes || []
62
+ end
63
+
64
+ # @return [Boolean] true when targetable_lines is non-nil and non-empty
65
+ def targetable_lines?
66
+ !targetable_lines.nil? && !targetable_lines.empty?
67
+ end
68
+
69
+ # Convert scope to Hash for JSON serialization.
70
+ # Removes nil values to reduce payload size.
71
+ # @return [Hash] Scope as hash with symbol keys
72
+ def to_h
73
+ h = {
74
+ scope_type: scope_type,
75
+ name: name,
76
+ source_file: source_file,
77
+ start_line: start_line,
78
+ end_line: end_line,
79
+ language_specifics: language_specifics.empty? ? nil : language_specifics,
80
+ symbols: symbols.empty? ? nil : symbols.map(&:to_h),
81
+ scopes: scopes.empty? ? nil : scopes.map(&:to_h),
82
+ }
83
+ h.compact!
84
+ # Targetable lines only on METHOD scopes (per spec — not on CLASS/MODULE/FILE).
85
+ # Always emit has_injectible_lines (even when false) on METHOD scopes.
86
+ # Wire format keeps the historical spelling +injectible+; Ruby identifier
87
+ # is +targetable_lines+.
88
+ if scope_type == 'METHOD'
89
+ h[:has_injectible_lines] = targetable_lines? # steep:ignore ArgumentTypeMismatch
90
+ h[:injectible_lines] = targetable_lines if targetable_lines && !targetable_lines.empty?
91
+ end
92
+ h
93
+ end
94
+
95
+ # Serialize scope to JSON.
96
+ # @return [String] JSON string representation
97
+ def to_json(_state = nil)
98
+ JSON.generate(to_h)
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,280 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ # Batches extracted scopes and triggers uploads at appropriate times.
8
+ #
9
+ # Implements two upload triggers:
10
+ # 1. Size-based: Immediate upload when 400 scopes collected (MAX_SCOPES)
11
+ # 2. Time-based: Upload after 1 second of inactivity (debounce timer, not periodic)
12
+ #
13
+ # Also provides:
14
+ # - Deduplication: Tracks uploaded module names to prevent re-uploads
15
+ # - File limiting: Stops after 10,000 files to prevent runaway extraction
16
+ # - Thread safety: Mutex-protected state for concurrent access
17
+ #
18
+ # Timer implementation: A single long-lived thread waits on a ConditionVariable
19
+ # with a timeout. Each add_scope signals the CV to reset the deadline. When the
20
+ # timeout expires without a signal, the timer fires and flushes the batch.
21
+ # This avoids creating/destroying a thread per add_scope call.
22
+ #
23
+ # Flow: Extractor → add_scope → (batch or timer) → Uploader
24
+ # Created by: Component (during initialization)
25
+ # Calls: Uploader.upload_scopes when batch full or timer fires
26
+ #
27
+ # @api private
28
+ class ScopeBatcher
29
+ # Maximum scopes per batch before triggering immediate upload.
30
+ # This matches the batch size used in Java and Python tracers to ensure
31
+ # consistent upload behavior across languages.
32
+ MAX_SCOPES = 400
33
+ INACTIVITY_TIMEOUT = 1.0 # seconds
34
+ # Maximum unique files to track before stopping extraction.
35
+ # This prevents runaway memory usage in applications with very large
36
+ # numbers of loaded classes (e.g., heavily modularized Rails apps).
37
+ MAX_FILES = 10_000
38
+ # Seconds to wait for the timer thread to exit when joining during
39
+ # shutdown or reset. Bounded so a misbehaving thread cannot hang the
40
+ # caller indefinitely.
41
+ TIMER_JOIN_TIMEOUT = 5
42
+
43
+ # Initialize batching context.
44
+ # @param uploader [Uploader] Uploader instance for triggering uploads
45
+ # @param logger [Logger] Logger for diagnostics
46
+ # @param on_upload [Proc, nil] Optional callback called after upload (for testing)
47
+ # @param timer_enabled [Boolean] Enable async timer (default true, false for tests)
48
+ def initialize(uploader, logger:, on_upload: nil, timer_enabled: true)
49
+ @uploader = uploader
50
+ @logger = logger
51
+ @on_upload = on_upload
52
+ @timer_enabled = timer_enabled
53
+ @scopes = []
54
+ @mutex = Mutex.new
55
+ @file_count = 0
56
+ @uploaded_modules = Set.new
57
+
58
+ # Timer state: single long-lived thread + ConditionVariable for debounce.
59
+ # @timer_signaled is set to true on each add_scope and cleared by the timer
60
+ # thread after waking. This flag is needed because ConditionVariable#wait
61
+ # does not distinguish signal vs timeout on Ruby < 3.2 (returns self in both
62
+ # cases). The flag gives a portable way to detect whether the wakeup was a
63
+ # signal (reset deadline) or a timeout (fire the timer).
64
+ @timer_cv = ConditionVariable.new
65
+ @timer_thread = nil
66
+ @timer_stopped = false
67
+ @timer_signaled = false
68
+ end
69
+
70
+ # Add a scope to the batch.
71
+ # Triggers immediate upload if batch reaches 400 scopes.
72
+ # Resets inactivity timer if batch not full.
73
+ # @param scope [Scope] The scope to add
74
+ # @return [void]
75
+ def add_scope(scope)
76
+ # @type var scopes_to_upload: ::Array[Scope]?
77
+ scopes_to_upload = nil
78
+
79
+ @mutex.synchronize do
80
+ # Check file limit (counts only unique accepted files; duplicates are
81
+ # filtered by the dedup check below and do not consume the budget).
82
+ if @file_count >= MAX_FILES
83
+ @logger.debug { "symdb: file limit (#{MAX_FILES}) reached, ignoring scope: #{scope.name}" }
84
+ return
85
+ end
86
+
87
+ # Check if already uploaded — duplicates do not count toward MAX_FILES
88
+ # so a re-extraction scenario does not exhaust the budget for unique scopes.
89
+ if @uploaded_modules.include?(scope.name)
90
+ @logger.trace { "symdb: skipping #{scope.name}: already uploaded" }
91
+ return
92
+ end
93
+
94
+ @uploaded_modules.add(scope.name)
95
+ @file_count += 1
96
+
97
+ # Add the scope
98
+ @scopes << scope
99
+
100
+ # Check if batch size reached (AFTER adding)
101
+ if @scopes.size >= MAX_SCOPES
102
+ # Prepare for upload (clear within mutex)
103
+ scopes_to_upload = @scopes.dup
104
+ @scopes.clear
105
+ end
106
+
107
+ # Signal the timer thread to reset its inactivity deadline.
108
+ # If batch was full, this is harmless — the timer will just
109
+ # re-check and find an empty batch if it fires.
110
+ ensure_timer_running
111
+ @timer_signaled = true
112
+ @timer_cv.signal
113
+ end
114
+
115
+ # Upload outside mutex (if batch was full)
116
+ perform_upload(scopes_to_upload) if scopes_to_upload
117
+ rescue => e
118
+ @logger.debug { "symdb: failed to add scope: #{e.class}: #{e.message}" }
119
+ # Don't propagate, continue operation
120
+ end
121
+
122
+ # Force upload of current batch immediately.
123
+ # @return [void]
124
+ def flush
125
+ # @type var scopes_to_upload: ::Array[Scope]?
126
+ scopes_to_upload = nil
127
+
128
+ @mutex.synchronize do
129
+ return if @scopes.empty?
130
+
131
+ scopes_to_upload = @scopes.dup
132
+ @scopes.clear
133
+ end
134
+
135
+ perform_upload(scopes_to_upload)
136
+ end
137
+
138
+ # Shutdown and upload remaining scopes.
139
+ # @return [void]
140
+ def shutdown
141
+ # @type var scopes_to_upload: ::Array[Scope]?
142
+ scopes_to_upload = nil
143
+ # @type var thread_to_join: ::Thread?
144
+ thread_to_join = nil
145
+
146
+ @mutex.synchronize do
147
+ @timer_stopped = true
148
+ @timer_cv.signal # Wake the timer thread so it exits
149
+
150
+ # Capture the timer thread under the mutex so a concurrent add_scope
151
+ # cannot create a new thread that we'd accidentally orphan when we
152
+ # nil the field below.
153
+ thread_to_join = @timer_thread
154
+ @timer_thread = nil
155
+
156
+ scopes_to_upload = @scopes.dup
157
+ @scopes.clear
158
+ end
159
+
160
+ # Join the timer thread outside the mutex.
161
+ # The thread checks @timer_stopped and exits when signaled.
162
+ thread_to_join&.join(TIMER_JOIN_TIMEOUT)
163
+
164
+ # Upload outside mutex
165
+ perform_upload(scopes_to_upload) unless scopes_to_upload.nil? || scopes_to_upload.empty?
166
+ end
167
+
168
+ # Check if scopes are pending upload.
169
+ # @return [Boolean] true if scopes waiting in batch
170
+ def scopes_pending?
171
+ @mutex.synchronize { @scopes.any? }
172
+ end
173
+
174
+ # Get current batch size.
175
+ # @return [Integer] Number of scopes in current batch
176
+ def size
177
+ @mutex.synchronize { @scopes.size }
178
+ end
179
+
180
+ private
181
+
182
+ # Reset state. Private so production code cannot accidentally invoke it;
183
+ # tests call via +send(:reset)+.
184
+ # @return [void]
185
+ def reset
186
+ # @type var thread_to_join: ::Thread?
187
+ thread_to_join = nil
188
+
189
+ @mutex.synchronize do
190
+ @scopes.clear
191
+ @timer_stopped = true
192
+ @timer_cv.signal
193
+ @file_count = 0
194
+ @uploaded_modules.clear
195
+
196
+ # Capture under the mutex (see shutdown for rationale).
197
+ thread_to_join = @timer_thread
198
+ @timer_thread = nil
199
+ end
200
+
201
+ thread_to_join&.join(TIMER_JOIN_TIMEOUT)
202
+
203
+ # Allow timer to be restarted after reset
204
+ @mutex.synchronize do
205
+ @timer_stopped = false
206
+ @timer_signaled = false
207
+ end
208
+ end
209
+
210
+ # Start the timer thread if not already running.
211
+ # Must be called from within @mutex.synchronize.
212
+ # @return [void]
213
+ def ensure_timer_running
214
+ return unless @timer_enabled
215
+ return if @timer_thread&.alive?
216
+
217
+ @timer_stopped = false
218
+ @timer_signaled = false
219
+
220
+ @timer_thread = Thread.new do
221
+ timer_loop
222
+ end
223
+ end
224
+
225
+ # Timer thread main loop. Waits on the ConditionVariable with a timeout.
226
+ # Each signal resets the deadline (debounce). When the wait times out
227
+ # (no signal within INACTIVITY_TIMEOUT), the batch is flushed.
228
+ #
229
+ # Uses @timer_signaled flag instead of ConditionVariable#wait return value
230
+ # because Ruby < 3.2 returns self for both signal and timeout (no way to
231
+ # distinguish). The flag is set by add_scope before signaling, and cleared
232
+ # by the timer thread after waking.
233
+ # @return [void]
234
+ def timer_loop
235
+ loop do
236
+ should_flush = false
237
+
238
+ @mutex.synchronize do
239
+ return if @timer_stopped
240
+
241
+ @timer_signaled = false
242
+ @timer_cv.wait(@mutex, INACTIVITY_TIMEOUT)
243
+
244
+ return if @timer_stopped
245
+
246
+ if @timer_signaled
247
+ # Woke up because add_scope signaled — loop back to re-wait with
248
+ # a fresh timeout. This implements the debounce: the timeout resets
249
+ # on every scope addition.
250
+ next # steep:ignore BreakTypeMismatch
251
+ end
252
+
253
+ # Timed out (no signal within INACTIVITY_TIMEOUT). If there are
254
+ # scopes pending, flush them. Otherwise, loop back and wait again.
255
+ should_flush = !@scopes.empty?
256
+ end
257
+
258
+ if should_flush
259
+ flush
260
+ end
261
+ end
262
+ rescue => e
263
+ @logger.debug { "symdb: timer thread error: #{e.class}: #{e.message}" }
264
+ end
265
+
266
+ # Perform upload via uploader.
267
+ # @param scopes [Array<Scope>] Scopes to upload
268
+ # @return [void]
269
+ def perform_upload(scopes)
270
+ return if scopes.nil? || scopes.empty?
271
+
272
+ @uploader.upload_scopes(scopes)
273
+ @on_upload&.call(scopes) # Notify tests after upload
274
+ rescue => e
275
+ @logger.debug { "symdb: upload failed: #{e.class}: #{e.message}" }
276
+ # Don't propagate, uploader handles retries
277
+ end
278
+ end
279
+ end
280
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ # Top-level container wrapping scopes for upload to the agent.
8
+ #
9
+ # ServiceVersion is the root object serialized to JSON for symbol database uploads.
10
+ # Contains service metadata (name, env, version) and all extracted scopes.
11
+ # The language field identifies the tracer.
12
+ #
13
+ # Created by: Uploader (wraps scopes array before serialization)
14
+ # Contains: Array of top-level Scope objects (FILE scopes)
15
+ # Serialized to: JSON via to_json, then GZIP compressed for upload
16
+ #
17
+ # @api private
18
+ class ServiceVersion
19
+ attr_reader :service, :env, :version, :language, :scopes
20
+
21
+ # Initialize a new ServiceVersion
22
+ # @param service [String] Service name (required, from DD_SERVICE)
23
+ # @param env [String, nil] Environment (from DD_ENV, passed through unchanged)
24
+ # @param version [String, nil] Version (from DD_VERSION, passed through unchanged)
25
+ # @param scopes [Array<Scope>] Top-level scopes (required)
26
+ # @raise [ArgumentError] if service empty or scopes not an array
27
+ def initialize(service:, env:, version:, scopes:)
28
+ raise ArgumentError, 'service is required' if service.nil? || service.empty?
29
+ raise ArgumentError, 'scopes must be an array' unless scopes.is_a?(Array)
30
+
31
+ @service = service
32
+ @env = env
33
+ @version = version
34
+ @language = 'ruby'
35
+ @scopes = scopes
36
+ end
37
+
38
+ # Convert service version to Hash for JSON serialization.
39
+ # @return [Hash] ServiceVersion as hash with symbol keys
40
+ def to_h
41
+ {
42
+ service: service,
43
+ env: env,
44
+ version: version,
45
+ language: language,
46
+ scopes: scopes.map(&:to_h),
47
+ }
48
+ end
49
+
50
+ # Serialize service version to JSON.
51
+ # @return [String] JSON string representation
52
+ def to_json(_state = nil)
53
+ JSON.generate(to_h)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ # Represents a symbol (variable, parameter, field, constant) within a scope.
8
+ #
9
+ # Symbols are the actual identifiers extracted from Ruby code:
10
+ # - Instance variables (@var) - FIELD type
11
+ # - Class variables (@@var) - STATIC_FIELD type
12
+ # - Constants (CONST) - STATIC_FIELD type
13
+ # - Method parameters (arg) - ARG type
14
+ # - Local variables (var) - LOCAL type (not yet implemented)
15
+ #
16
+ # Created by: Extractor (during class/method introspection)
17
+ # Contained in: Scope objects (symbols array)
18
+ # Serialized to: JSON via to_h/to_json
19
+ #
20
+ # @api private
21
+ class Symbol
22
+ attr_reader :symbol_type, :name, :line, :type, :language_specifics
23
+
24
+ # Initialize a new Symbol
25
+ # @param symbol_type [String] Type: FIELD, STATIC_FIELD, ARG, LOCAL
26
+ # @param name [String] Symbol name (variable name, parameter name)
27
+ # @param line [Integer] Line number (UNKNOWN_MIN_LINE for entire scope, UNKNOWN_MAX_LINE for method-level only)
28
+ # @param type [String, nil] Type annotation (optional, Ruby is dynamic)
29
+ # @param language_specifics [Hash, nil] Symbol-specific metadata
30
+ def initialize(
31
+ symbol_type:,
32
+ name:,
33
+ line:,
34
+ type: nil,
35
+ language_specifics: nil
36
+ )
37
+ @symbol_type = symbol_type
38
+ @name = name
39
+ @line = line
40
+ @type = type
41
+ @language_specifics = language_specifics
42
+ end
43
+
44
+ # Convert symbol to Hash for JSON serialization.
45
+ # Removes nil values to reduce payload size.
46
+ # @return [Hash] Symbol as hash with symbol keys
47
+ def to_h
48
+ h = {
49
+ symbol_type: symbol_type,
50
+ name: name,
51
+ line: line,
52
+ type: type,
53
+ language_specifics: language_specifics,
54
+ }
55
+ h.compact!
56
+ h
57
+ end
58
+
59
+ # Serialize symbol to JSON.
60
+ # @return [String] JSON string representation
61
+ def to_json(_state = nil)
62
+ JSON.generate(to_h)
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../../core/transport/http/api/endpoint'
4
+
5
+ module Datadog
6
+ module SymbolDatabase
7
+ module Transport
8
+ module HTTP
9
+ module API
10
+ # POST endpoint for symbol database uploads. Multipart form-data
11
+ # is triggered by setting `env.form` on the request (handled in
12
+ # `Core::Transport::HTTP::Adapters::Net`); the multipart library
13
+ # sets Content-Type itself.
14
+ class Endpoint < Datadog::Core::Transport::HTTP::API::Endpoint
15
+ attr_reader :encoder
16
+
17
+ # @param path [String] URL path for the endpoint
18
+ # @param encoder [#encode] Encoder for request data
19
+ def initialize(path, encoder)
20
+ super(:post, path)
21
+ @encoder = encoder
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../core/encoding'
4
+ require_relative '../../core/transport/http'
5
+ require_relative 'http/endpoint'
6
+ require_relative '../transport'
7
+
8
+ module Datadog
9
+ module SymbolDatabase
10
+ module Transport
11
+ # Namespace for HTTP transport components
12
+ module HTTP
13
+ # POST endpoint for the agent's symbol database intake.
14
+ # Multipart form-data is dispatched via `env.form` from the
15
+ # `Symbols::Client` subclass.
16
+ SYMBOLS_ENDPOINT = API::Endpoint.new(
17
+ '/symdb/v1/input',
18
+ Datadog::Core::Encoding::JSONEncoder,
19
+ )
20
+
21
+ # Builds a transport for the symbols upload endpoint.
22
+ # @param agent_settings [Core::Configuration::AgentSettingsResolver::AgentSettings]
23
+ # Agent connection settings (host, port, timeout, etc.)
24
+ # @param logger [Logger] Logger instance
25
+ # @param headers [Hash, nil] Optional additional headers
26
+ # @return [Symbols::Transport] Transport for the symbols endpoint
27
+ def self.symbols(
28
+ agent_settings:,
29
+ logger:,
30
+ headers: nil
31
+ )
32
+ Core::Transport::HTTP.build(
33
+ logger: logger,
34
+ agent_settings: agent_settings,
35
+ headers: headers,
36
+ ) do |transport|
37
+ transport.api 'symbols', SYMBOLS_ENDPOINT, default: true
38
+
39
+ yield(transport) if block_given?
40
+ end.to_transport(SymbolDatabase::Transport::Symbols::Transport)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end