datadog 2.31.0 → 2.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/ext/datadog_profiling_native_extension/clock_id.h +9 -1
  3. data/ext/datadog_profiling_native_extension/clock_id_from_mach.c +73 -0
  4. data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -1
  5. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +17 -7
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +16 -5
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +6 -0
  8. data/ext/datadog_profiling_native_extension/extconf.rb +8 -4
  9. data/ext/datadog_profiling_native_extension/http_transport.c +10 -5
  10. data/ext/datadog_profiling_native_extension/stack_recorder.c +3 -9
  11. data/ext/datadog_profiling_native_extension/time_helpers.h +1 -0
  12. data/ext/libdatadog_api/crashtracker.c +2 -0
  13. data/ext/libdatadog_api/di.c +48 -0
  14. data/ext/libdatadog_api/extconf.rb +7 -4
  15. data/ext/libdatadog_extconf_helpers.rb +38 -1
  16. data/lib/datadog/ai_guard/autoload.rb +10 -0
  17. data/lib/datadog/ai_guard/component.rb +1 -1
  18. data/lib/datadog/ai_guard/configuration.rb +105 -2
  19. data/lib/datadog/ai_guard/contrib/auto_instrument.rb +24 -0
  20. data/lib/datadog/ai_guard/contrib/rack/integration.rb +42 -0
  21. data/lib/datadog/ai_guard/contrib/rack/patcher.rb +26 -0
  22. data/lib/datadog/ai_guard/contrib/rack/request_middleware.rb +83 -0
  23. data/lib/datadog/ai_guard/contrib/rails/integration.rb +41 -0
  24. data/lib/datadog/ai_guard/contrib/rails/patcher.rb +97 -0
  25. data/lib/datadog/ai_guard/evaluation.rb +2 -0
  26. data/lib/datadog/ai_guard/ext.rb +2 -0
  27. data/lib/datadog/ai_guard.rb +8 -0
  28. data/lib/datadog/appsec/autoload.rb +1 -1
  29. data/lib/datadog/appsec/component.rb +1 -1
  30. data/lib/datadog/appsec/configuration.rb +414 -1
  31. data/lib/datadog/appsec/contrib/aws_lambda/gateway/watcher.rb +75 -0
  32. data/lib/datadog/appsec/contrib/aws_lambda/integration.rb +39 -0
  33. data/lib/datadog/appsec/contrib/aws_lambda/patcher.rb +30 -0
  34. data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +111 -0
  35. data/lib/datadog/appsec/contrib/devise/patches/signin_tracking_patch.rb +2 -1
  36. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
  37. data/lib/datadog/appsec/contrib/rails/patcher.rb +2 -2
  38. data/lib/datadog/appsec/metrics/telemetry.rb +13 -1
  39. data/lib/datadog/appsec/security_engine/runner.rb +1 -1
  40. data/lib/datadog/appsec/trace_keeper.rb +18 -6
  41. data/lib/datadog/appsec/utils/http/url_encoded.rb +2 -2
  42. data/lib/datadog/appsec.rb +1 -0
  43. data/lib/datadog/core/configuration/components.rb +1 -1
  44. data/lib/datadog/core/configuration/settings.rb +13 -0
  45. data/lib/datadog/core/configuration/supported_configurations.rb +4 -0
  46. data/lib/datadog/core/configuration.rb +1 -1
  47. data/lib/datadog/core/contrib/rails/utils.rb +1 -1
  48. data/lib/datadog/core/crashtracking/component.rb +3 -3
  49. data/lib/datadog/core/diagnostics/environment_logger.rb +3 -1
  50. data/lib/datadog/core/environment/container.rb +2 -2
  51. data/lib/datadog/core/environment/ext.rb +1 -0
  52. data/lib/datadog/core/environment/socket.rb +13 -0
  53. data/lib/datadog/core/feature_flags.rb +1 -1
  54. data/lib/datadog/core/metrics/client.rb +5 -5
  55. data/lib/datadog/core/remote/client.rb +1 -1
  56. data/lib/datadog/core/remote/component.rb +2 -2
  57. data/lib/datadog/core/runtime/metrics.rb +1 -1
  58. data/lib/datadog/core/telemetry/emitter.rb +1 -1
  59. data/lib/datadog/core/telemetry/event/app_started.rb +2 -2
  60. data/lib/datadog/core/transport/http.rb +2 -0
  61. data/lib/datadog/core/utils.rb +1 -1
  62. data/lib/datadog/core/workers/async.rb +1 -1
  63. data/lib/datadog/core.rb +1 -1
  64. data/lib/datadog/data_streams/configuration.rb +40 -1
  65. data/lib/datadog/data_streams/pathway_context.rb +1 -1
  66. data/lib/datadog/data_streams/processor.rb +1 -1
  67. data/lib/datadog/data_streams.rb +1 -1
  68. data/lib/datadog/di/base.rb +8 -5
  69. data/lib/datadog/di/code_tracker.rb +179 -1
  70. data/lib/datadog/di/component.rb +1 -1
  71. data/lib/datadog/di/configuration.rb +235 -2
  72. data/lib/datadog/di/instrumenter.rb +46 -26
  73. data/lib/datadog/di/probe_builder.rb +1 -1
  74. data/lib/datadog/di/probe_file_loader.rb +2 -2
  75. data/lib/datadog/di/probe_manager.rb +6 -6
  76. data/lib/datadog/di/probe_notification_builder.rb +1 -1
  77. data/lib/datadog/di/probe_notifier_worker.rb +2 -2
  78. data/lib/datadog/di/remote.rb +6 -6
  79. data/lib/datadog/di/serializer.rb +1 -1
  80. data/lib/datadog/di/transport/input.rb +3 -3
  81. data/lib/datadog/error_tracking/configuration.rb +55 -2
  82. data/lib/datadog/kit/enable_core_dumps.rb +1 -1
  83. data/lib/datadog/open_feature/component.rb +18 -1
  84. data/lib/datadog/open_feature/evaluation_engine.rb +3 -3
  85. data/lib/datadog/open_feature/exposures/reporter.rb +1 -1
  86. data/lib/datadog/open_feature/exposures/worker.rb +1 -1
  87. data/lib/datadog/open_feature/hooks/flag_eval_hook.rb +49 -0
  88. data/lib/datadog/open_feature/metrics/flag_eval_metrics.rb +149 -0
  89. data/lib/datadog/open_feature/provider.rb +19 -1
  90. data/lib/datadog/open_feature/remote.rb +1 -1
  91. data/lib/datadog/open_feature/transport.rb +1 -1
  92. data/lib/datadog/opentelemetry/metrics.rb +13 -4
  93. data/lib/datadog/opentelemetry/sdk/configurator.rb +1 -1
  94. data/lib/datadog/opentelemetry/sdk/id_generator.rb +16 -10
  95. data/lib/datadog/opentelemetry/sdk/metrics_exporter.rb +1 -1
  96. data/lib/datadog/profiling/collectors/code_provenance.rb +35 -9
  97. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -2
  98. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +8 -2
  99. data/lib/datadog/profiling/collectors/info.rb +16 -3
  100. data/lib/datadog/profiling/component.rb +3 -6
  101. data/lib/datadog/profiling/exporter.rb +37 -12
  102. data/lib/datadog/profiling/ext.rb +0 -2
  103. data/lib/datadog/profiling/flush.rb +21 -12
  104. data/lib/datadog/profiling/http_transport.rb +12 -1
  105. data/lib/datadog/profiling/load_native_extension.rb +1 -1
  106. data/lib/datadog/profiling/profiler.rb +13 -1
  107. data/lib/datadog/profiling/scheduler.rb +2 -2
  108. data/lib/datadog/profiling/stack_recorder.rb +0 -4
  109. data/lib/datadog/profiling/tasks/exec.rb +8 -3
  110. data/lib/datadog/profiling/tasks/help.rb +1 -0
  111. data/lib/datadog/profiling/tasks/setup.rb +2 -2
  112. data/lib/datadog/single_step_instrument.rb +1 -1
  113. data/lib/datadog/symbol_database/configuration.rb +65 -0
  114. data/lib/datadog/symbol_database/extractor.rb +906 -0
  115. data/lib/datadog/symbol_database/file_hash.rb +46 -0
  116. data/lib/datadog/symbol_database/logger.rb +43 -0
  117. data/lib/datadog/symbol_database/scope.rb +102 -0
  118. data/lib/datadog/symbol_database/scope_batcher.rb +280 -0
  119. data/lib/datadog/symbol_database/service_version.rb +57 -0
  120. data/lib/datadog/symbol_database/symbol.rb +66 -0
  121. data/lib/datadog/symbol_database/transport/http/endpoint.rb +28 -0
  122. data/lib/datadog/symbol_database/transport/http.rb +45 -0
  123. data/lib/datadog/symbol_database/transport.rb +54 -0
  124. data/lib/datadog/symbol_database/uploader.rb +169 -0
  125. data/lib/datadog/symbol_database.rb +49 -0
  126. data/lib/datadog/tracing/buffer.rb +3 -3
  127. data/lib/datadog/tracing/configuration/settings.rb +1 -1
  128. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -3
  129. data/lib/datadog/tracing/contrib/action_view/events/render_template.rb +1 -1
  130. data/lib/datadog/tracing/contrib/active_job/events/discard.rb +1 -1
  131. data/lib/datadog/tracing/contrib/active_job/events/enqueue.rb +1 -1
  132. data/lib/datadog/tracing/contrib/active_job/events/enqueue_at.rb +1 -1
  133. data/lib/datadog/tracing/contrib/active_job/events/enqueue_retry.rb +1 -1
  134. data/lib/datadog/tracing/contrib/active_job/events/perform.rb +1 -1
  135. data/lib/datadog/tracing/contrib/active_job/events/retry_stopped.rb +1 -1
  136. data/lib/datadog/tracing/contrib/active_model_serializers/events/render.rb +1 -1
  137. data/lib/datadog/tracing/contrib/active_model_serializers/events/serialize.rb +1 -1
  138. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +2 -2
  139. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +1 -1
  140. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -1
  141. data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
  142. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +1 -1
  143. data/lib/datadog/tracing/contrib/active_support/notifications/subscription.rb +2 -2
  144. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +1 -1
  145. data/lib/datadog/tracing/contrib/component.rb +1 -1
  146. data/lib/datadog/tracing/contrib/configuration/resolver.rb +7 -4
  147. data/lib/datadog/tracing/contrib/dalli/quantize.rb +1 -1
  148. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +1 -1
  149. data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -2
  150. data/lib/datadog/tracing/contrib/extensions.rb +9 -0
  151. data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -2
  152. data/lib/datadog/tracing/contrib/grape/endpoint.rb +5 -5
  153. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +2 -2
  154. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +2 -2
  155. data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -2
  156. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -2
  157. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -2
  158. data/lib/datadog/tracing/contrib/kafka/instrumentation/consumer.rb +2 -2
  159. data/lib/datadog/tracing/contrib/kafka/instrumentation/producer.rb +2 -2
  160. data/lib/datadog/tracing/contrib/karafka/patcher.rb +1 -1
  161. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +3 -3
  162. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +1 -1
  163. data/lib/datadog/tracing/contrib/presto/instrumentation.rb +3 -3
  164. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +6 -0
  165. data/lib/datadog/tracing/contrib/rack/ext.rb +27 -0
  166. data/lib/datadog/tracing/contrib/rack/patcher.rb +1 -1
  167. data/lib/datadog/tracing/contrib/rack/request_queue.rb +1 -1
  168. data/lib/datadog/tracing/contrib/rack/trace_proxy_middleware.rb +117 -1
  169. data/lib/datadog/tracing/contrib/rails/log_injection.rb +1 -1
  170. data/lib/datadog/tracing/contrib/rails/runner.rb +1 -1
  171. data/lib/datadog/tracing/contrib/rake/instrumentation.rb +2 -2
  172. data/lib/datadog/tracing/contrib/redis/quantize.rb +1 -1
  173. data/lib/datadog/tracing/contrib/redis/tags.rb +1 -1
  174. data/lib/datadog/tracing/contrib/sidekiq/utils.rb +1 -1
  175. data/lib/datadog/tracing/contrib/stripe/request.rb +1 -1
  176. data/lib/datadog/tracing/contrib.rb +8 -0
  177. data/lib/datadog/tracing/diagnostics/environment_logger.rb +3 -1
  178. data/lib/datadog/tracing/distributed/baggage.rb +59 -5
  179. data/lib/datadog/tracing/distributed/datadog.rb +11 -11
  180. data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +1 -1
  181. data/lib/datadog/tracing/distributed/propagation.rb +2 -2
  182. data/lib/datadog/tracing/distributed/trace_context.rb +74 -32
  183. data/lib/datadog/tracing/event.rb +1 -1
  184. data/lib/datadog/tracing/metadata/tagging.rb +2 -2
  185. data/lib/datadog/tracing/pipeline.rb +1 -1
  186. data/lib/datadog/tracing/remote.rb +1 -1
  187. data/lib/datadog/tracing/sampling/rule.rb +1 -1
  188. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -2
  189. data/lib/datadog/tracing/sampling/span/rule_parser.rb +2 -2
  190. data/lib/datadog/tracing/span_operation.rb +3 -3
  191. data/lib/datadog/tracing/trace_operation.rb +4 -4
  192. data/lib/datadog/tracing/tracer.rb +6 -8
  193. data/lib/datadog/tracing/transport/io/client.rb +1 -1
  194. data/lib/datadog/tracing/workers.rb +2 -1
  195. data/lib/datadog/version.rb +1 -1
  196. metadata +33 -12
  197. data/ext/datadog_profiling_native_extension/clock_id_noop.c +0 -21
  198. data/lib/datadog/ai_guard/configuration/settings.rb +0 -113
  199. data/lib/datadog/appsec/configuration/settings.rb +0 -423
  200. data/lib/datadog/data_streams/configuration/settings.rb +0 -49
  201. data/lib/datadog/di/configuration/settings.rb +0 -243
  202. data/lib/datadog/error_tracking/configuration/settings.rb +0 -63
@@ -0,0 +1,906 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'scope'
4
+ require_relative 'symbol'
5
+ require_relative 'file_hash'
6
+ require_relative '../core/utils/array'
7
+
8
+ module Datadog
9
+ module SymbolDatabase
10
+ # Extracts symbol metadata from loaded Ruby modules and classes via introspection.
11
+ #
12
+ # Uses Ruby's reflection APIs (Module#constants, Class#instance_methods, Method#parameters)
13
+ # to build hierarchical Scope structures representing code organization.
14
+ # Filters to user code only (excludes gems, stdlib, test files).
15
+ #
16
+ # Extraction flow:
17
+ # 1. ObjectSpace.each_object(Module) - Iterate all loaded modules/classes
18
+ # 2. Filter to user code (user_code_module?)
19
+ # 3. Build MODULE or CLASS scope with nested METHOD scopes
20
+ # 4. Extract symbols: constants, class variables, method parameters
21
+ #
22
+ # Called by: Component.extract_and_upload (during upload trigger)
23
+ # Produces: Scope objects passed to ScopeBatcher for batching
24
+ # File hashing: Calls FileHash.compute for MODULE scopes
25
+ #
26
+ # Error handling strategy (defense-in-depth):
27
+ #
28
+ # The extractor introspects arbitrary Ruby objects via ObjectSpace. Ruby's
29
+ # reflection APIs (Module#name, #instance_methods, #const_get, #source_location,
30
+ # #parameters) can fail unpredictably on third-party code: NameError from removed
31
+ # constants, LoadError from autoload, ArgumentError from overridden #name methods,
32
+ # SecurityError in restricted contexts, and more.
33
+ #
34
+ # Rescue blocks are organized in three layers:
35
+ #
36
+ # 1. **Inner per-item rescues** (bare `rescue` in const_get loops, method.name):
37
+ # Skip one constant or name lookup without aborting the enclosing collection.
38
+ # These are expected failures — no logging needed.
39
+ #
40
+ # 2. **Method-level rescues** (`rescue => e` with logging):
41
+ # Catch failures in extract_method_scope, find_source_file, etc. Log at debug
42
+ # for post-hoc diagnosis, return nil or empty array. One bad method/module
43
+ # doesn't kill the entire class extraction.
44
+ #
45
+ # 3. **Top-level entry rescues** (`rescue => e` with logging):
46
+ # extract() and extract_all() are the error boundaries. Any exception that
47
+ # escapes layers 1-2 is caught here and logged.
48
+ #
49
+ # @api private
50
+ class Extractor
51
+ # Common Ruby core modules to exclude from included_modules extraction.
52
+ # These are ubiquitous mix-ins that don't provide meaningful context about the class structure.
53
+ # Kernel: Mixed into Object, appears in nearly all classes
54
+ # PP: Pretty-printing module, loaded by many tools
55
+ # JSON: JSON serialization module, loaded by many tools
56
+ # Enumerable: Core iteration protocol, extremely common
57
+ # Comparable: Core comparison protocol, extremely common
58
+ # Sentinel for unknown minimum line number. 0 means "available throughout the scope."
59
+ # Defined here (the only runtime consumer) so extractor.rb is self-contained.
60
+ # The parent module (lib/datadog/symbol_database.rb) defines the same values for
61
+ # documentation and external reference, but is not required by this file.
62
+ UNKNOWN_MIN_LINE = 0
63
+ # PostgreSQL signed INT_MAX (2^31 - 1). Means "entire file" or "unknown end."
64
+ UNKNOWN_MAX_LINE = 2147483647
65
+
66
+ EXCLUDED_COMMON_MODULES = ['Kernel', 'PP::', 'JSON::', 'Enumerable', 'Comparable'].freeze
67
+
68
+ # RubyVM::InstructionSequence#trace_points event types included when
69
+ # computing targetable lines on METHOD scopes.
70
+ # :line — any line with executable bytecode (primary line probe target)
71
+ # :return — last expression before method returns (DI instruments return events)
72
+ # :call excluded — method entry is handled by method probes, not line probes
73
+ TARGETABLE_LINE_EVENTS = [:line, :return].freeze
74
+
75
+ # Cached unbound Module#singleton_class? — dispatched explicitly so user classes
76
+ # that define their own `singleton_class?` (e.g. with required arguments) cannot
77
+ # intercept the predicate and cause the module to be silently dropped from
78
+ # extract_all. Cached at load time because collect_extractable_modules iterates
79
+ # ObjectSpace.each_object(Module) over tens of thousands of modules.
80
+ MODULE_SINGLETON_CLASS_PRED = Module.instance_method(:singleton_class?)
81
+ private_constant :MODULE_SINGLETON_CLASS_PRED
82
+
83
+ # @param logger [Logger] Logger instance (SymbolDatabase::Logger facade or compatible)
84
+ # @param settings [Configuration::Settings] Tracer settings
85
+ def initialize(logger:, settings:)
86
+ @logger = logger
87
+ @settings = settings
88
+ end
89
+
90
+ # Extract symbols from a single module or class.
91
+ # Returns nil if module should be skipped (anonymous, gem code, stdlib).
92
+ #
93
+ # Returns a FILE scope wrapping the extracted CLASS or MODULE scope.
94
+ # The backend requires root-level scopes to be in ROOT_SCOPES (MODULE, JAR,
95
+ # ASSEMBLY, PACKAGE, FILE). FILE is the natural root for Ruby — one per source file.
96
+ #
97
+ # For full extraction with proper FQN-based nesting and per-file method grouping,
98
+ # use extract_all instead. This method is kept for single-module extraction in tests.
99
+ #
100
+ # @param mod [Module, Class] The module or class to extract from
101
+ # @return [Scope, nil] FILE scope wrapping extracted scope, or nil if filtered out
102
+ def extract(mod)
103
+ return nil unless mod.is_a?(Module)
104
+ mod_name = safe_mod_name(mod)
105
+ return nil unless mod_name
106
+
107
+ return nil unless user_code_module?(mod)
108
+
109
+ source_file = find_source_file(mod)
110
+ return nil unless source_file
111
+
112
+ inner_scope = if mod.is_a?(Class)
113
+ extract_class_scope(mod)
114
+ else
115
+ extract_module_scope(mod)
116
+ end
117
+
118
+ wrap_in_file_scope(source_file, [inner_scope])
119
+ rescue => e
120
+ @logger.debug { "symdb: failed to extract #{mod_name || '<unknown>'}: #{e.class}: #{e.message}" }
121
+ nil
122
+ end
123
+
124
+ # Extract symbols from all loaded modules and classes.
125
+ # Returns an array of FILE scopes with proper FQN-based nesting.
126
+ #
127
+ # Two-pass algorithm:
128
+ # Pass 1: Iterate ObjectSpace, collect all extractable modules with methods grouped by file
129
+ # Pass 2: Build FILE scope trees with nested MODULE/CLASS hierarchy from FQN splitting
130
+ #
131
+ # This is the production path used by Component. Methods are split by source file,
132
+ # so a class reopened across two files produces two FILE scopes, each with only
133
+ # the methods defined in that file.
134
+ #
135
+ # @return [Array<Scope>] Array of FILE scopes
136
+ def extract_all
137
+ entries = collect_extractable_modules
138
+ file_trees = build_file_trees(entries)
139
+ convert_trees_to_scopes(file_trees)
140
+ rescue => e
141
+ @logger.debug { "symdb: error in extract_all: #{e.class}: #{e.message}" }
142
+ []
143
+ end
144
+
145
+ private
146
+
147
+ # Safe Module#name lookup — some classes override the singleton `name` method
148
+ # (e.g. Faker::Travel::Airport defines `def name(size:, region:)` in class << self,
149
+ # which shadows Module#name and raises ArgumentError when called without args).
150
+ # @param mod [Module] The module
151
+ # @return [String, nil] Module name or nil
152
+ def safe_mod_name(mod)
153
+ Module.instance_method(:name).bind(mod).call
154
+ rescue => e
155
+ @logger.debug { "symdb: safe_mod_name failed: #{e.class}: #{e.message}" }
156
+ nil
157
+ end
158
+
159
+ # Check if module is from user code (not gems or stdlib)
160
+ # @param mod [Module] The module to check
161
+ # @return [Boolean] true if user code
162
+ def user_code_module?(mod)
163
+ mod_name = safe_mod_name(mod)
164
+ return false unless mod_name
165
+
166
+ # CRITICAL: Exclude entire Datadog namespace (prevents circular extraction)
167
+ # Matches Java: className.startsWith("com/datadog/")
168
+ # Matches Python: packages.is_user_code() excludes ddtrace.*
169
+ # Note: bare 'Datadog' must be checked separately — start_with?('Datadog::')
170
+ # doesn't match the root module itself.
171
+ return false if mod_name == 'Datadog' || mod_name.start_with?('Datadog::')
172
+
173
+ # Exclude Ruby root classes. These are never user code, but
174
+ # find_source_file can return a user-code path for them via
175
+ # const_source_location (top-level constants like User are
176
+ # Object constants, so Object.const_source_location(:User)
177
+ # points to the user's file).
178
+ return false if mod.equal?(Object) || mod.equal?(BasicObject) ||
179
+ mod.equal?(Kernel) || mod.equal?(Module) || mod.equal?(Class)
180
+
181
+ source_file = find_source_file(mod)
182
+ return false unless source_file
183
+
184
+ user_code_path?(source_file)
185
+ end
186
+
187
+ # Check if path is user code
188
+ # @param path [String] File path
189
+ # @return [Boolean] true if user code
190
+ def user_code_path?(path)
191
+ # Only absolute paths are real source files. Pseudo-paths like '<main>',
192
+ # '<internal:...>', '(eval)' are not user code.
193
+ return false unless path.start_with?('/')
194
+ # Only .rb files are Ruby source. Excludes the Ruby binary
195
+ # (/usr/local/bin/ruby), C extensions (.so/.bundle), and other
196
+ # non-source files that appear in method source_location.
197
+ return false unless path.end_with?('.rb')
198
+ # Exclude gem paths
199
+ return false if path.include?('/gems/')
200
+ # Exclude Ruby stdlib
201
+ return false if path.include?('/ruby/')
202
+ return false if path.start_with?('<internal:')
203
+ return false if path.include?('(eval)')
204
+ # Exclude test code (not application code)
205
+ return false if path.include?('/spec/')
206
+ return false if path.include?('/test/')
207
+ # Exclude Datadog's own library code (e.g., monkey-patched methods from tracing contrib).
208
+ # Without this, stdlib classes like Net::HTTP appear as user code when dd-trace-rb
209
+ # instruments them, because the patched method source points to lib/datadog/tracing/contrib/.
210
+ return false if path.include?('/lib/datadog/')
211
+
212
+ true
213
+ end
214
+
215
+ # Find source file for a module.
216
+ # Prefers user code paths over gem/stdlib paths. ActiveRecord models have
217
+ # generated methods (autosave callbacks) whose source is in the gem, but
218
+ # user-defined methods point to app/models/. Without this preference,
219
+ # AR models get filtered out as gem code.
220
+ #
221
+ # For namespace-only modules (no instance or singleton methods), falls back to
222
+ # Module#const_source_location (Ruby 2.7+) to locate the module via its constants.
223
+ # This handles patterns like `module ApplicationCable; class Channel...; end; end`
224
+ # where the namespace module itself has no methods but defines user-code classes.
225
+ #
226
+ # On Ruby 2.6 (where const_source_location is unavailable), namespace-only modules
227
+ # and classes whose only methods are generated (e.g., AR models with only associations)
228
+ # may not be found — the extraction silently omits them. This is a graceful degradation:
229
+ # fewer symbols uploaded, no errors.
230
+ #
231
+ # @param mod [Module] The module
232
+ # @return [String, nil] Source file path or nil
233
+ def find_source_file(mod)
234
+ fallback = nil
235
+
236
+ # Try instance methods first
237
+ mod.instance_methods(false).each do |method_name|
238
+ method = mod.instance_method(method_name)
239
+ location = method.source_location
240
+ next unless location
241
+
242
+ path = location[0]
243
+ return path if user_code_path?(path)
244
+
245
+ fallback ||= path # steep:ignore
246
+ end
247
+
248
+ # Try singleton methods
249
+ mod.singleton_methods(false).each do |method_name|
250
+ method = mod.method(method_name)
251
+ location = method.source_location
252
+ next unless location
253
+
254
+ path = location[0]
255
+ return path if user_code_path?(path)
256
+
257
+ fallback ||= path # steep:ignore
258
+ end
259
+
260
+ # Try const_source_location (Ruby 2.7+) to find where this class/module is declared.
261
+ # This handles two cases:
262
+ # 1. Classes with no user-defined methods (e.g. AR models with only associations) whose
263
+ # generated methods point to gem code — we find the `class Foo` declaration instead.
264
+ # 2. Namespace-only modules (`module Foo; class Bar; end; end`) with no methods at all.
265
+ if Module.method_defined?(:const_source_location) && mod.name
266
+ # Look up the class/module by its last name component in its enclosing namespace.
267
+ parts = mod.name.split('::')
268
+ const_name = parts.last
269
+ namespace = if parts.length > 1
270
+ begin
271
+ Object.const_get(parts[0..-2].join('::')) # steep:ignore
272
+ rescue NameError
273
+ nil
274
+ end
275
+ else
276
+ Object
277
+ end
278
+
279
+ if namespace
280
+ location = begin
281
+ namespace.const_source_location(const_name)
282
+ rescue => e
283
+ @logger.debug { "symdb: const_source_location(#{const_name}) failed: #{e.class}: #{e.message}" }
284
+ nil
285
+ end
286
+
287
+ if location && !location.empty?
288
+ path = location[0]
289
+ return path if path && !path.empty? && user_code_path?(path)
290
+ fallback ||= ((path && !path.empty?) ? path : nil)
291
+ end
292
+ end
293
+
294
+ # Also scan constants defined by mod itself (namespace-only modules).
295
+ mod.constants(false).each do |child_const_name|
296
+ location = begin
297
+ mod.const_source_location(child_const_name)
298
+ rescue => e
299
+ @logger.debug { "symdb: const_source_location(#{child_const_name}) failed: #{e.class}: #{e.message}" }
300
+ nil
301
+ end
302
+ next unless location && !location.empty?
303
+
304
+ path = location[0]
305
+ next unless path && !path.empty?
306
+
307
+ return path if user_code_path?(path)
308
+
309
+ fallback ||= path
310
+ end
311
+ end
312
+
313
+ fallback
314
+ rescue => e
315
+ @logger.debug { "symdb: error finding source file for #{safe_mod_name(mod) || '<unknown>'}: #{e.class}: #{e.message}" }
316
+ nil
317
+ end
318
+
319
+ # Wrap inner scopes in a FILE root scope.
320
+ # FILE is the per-source-file root scope for Ruby uploads, analogous to
321
+ # Python's MODULE-per-file or Java's JAR.
322
+ #
323
+ # @param file_path [String] Source file path
324
+ # @param inner_scopes [Array<Scope>] Child scopes to nest under FILE
325
+ # @return [Scope] FILE scope wrapping the inner scopes
326
+ def wrap_in_file_scope(file_path, inner_scopes)
327
+ file_hash = FileHash.compute(file_path, logger: @logger)
328
+ lang = {}
329
+ lang[:file_hash] = file_hash if file_hash
330
+
331
+ Scope.new(
332
+ scope_type: 'FILE',
333
+ name: file_path,
334
+ source_file: file_path,
335
+ start_line: UNKNOWN_MIN_LINE,
336
+ end_line: UNKNOWN_MAX_LINE,
337
+ language_specifics: lang,
338
+ scopes: inner_scopes
339
+ )
340
+ end
341
+
342
+ # Extract MODULE scope (without file_hash — that belongs on the FILE root scope).
343
+ # Does not include nested classes — nesting is handled by extract_all via FQN splitting.
344
+ # @param mod [Module] The module
345
+ # @return [Scope] The module scope
346
+ def extract_module_scope(mod)
347
+ source_file = find_source_file(mod)
348
+
349
+ Scope.new(
350
+ scope_type: 'MODULE',
351
+ name: mod.name,
352
+ source_file: source_file,
353
+ start_line: UNKNOWN_MIN_LINE,
354
+ end_line: UNKNOWN_MAX_LINE,
355
+ symbols: extract_scope_symbols(mod)
356
+ )
357
+ end
358
+
359
+ # Extract CLASS scope
360
+ # @param klass [Class] The class
361
+ # @return [Scope] The class scope
362
+ def extract_class_scope(klass)
363
+ methods = klass.instance_methods(false)
364
+ start_line, end_line = calculate_class_line_range(klass, methods)
365
+ source_file = find_source_file(klass)
366
+
367
+ Scope.new(
368
+ scope_type: 'CLASS',
369
+ name: klass.name,
370
+ source_file: source_file,
371
+ start_line: start_line,
372
+ end_line: end_line,
373
+ language_specifics: build_class_language_specifics(klass),
374
+ scopes: extract_method_scopes(klass),
375
+ symbols: extract_scope_symbols(klass)
376
+ )
377
+ end
378
+
379
+ # Calculate class line range from method locations.
380
+ # Start from the earliest method start, end at the latest method end (derived
381
+ # from iseq trace_points so methods spanning multiple lines aren't truncated).
382
+ # @param klass [Class] The class
383
+ # @param methods [Array<Symbol>] Method names
384
+ # @return [Array<Integer, Integer>] [start_line, end_line]
385
+ def calculate_class_line_range(klass, methods)
386
+ starts = []
387
+ ends = []
388
+ methods.each do |method_name|
389
+ method = klass.instance_method(method_name)
390
+ location = method.source_location
391
+ next unless location && location[0]
392
+ starts << location[1]
393
+ _ranges, method_end = extract_targetable_lines(method, location[1])
394
+ ends << method_end
395
+ end
396
+
397
+ return [UNKNOWN_MIN_LINE, UNKNOWN_MAX_LINE] if starts.empty?
398
+
399
+ [starts.min, ends.max]
400
+ rescue => e
401
+ @logger.debug { "symdb: error calculating line range for #{klass.name}: #{e.class}: #{e.message}" }
402
+ [UNKNOWN_MIN_LINE, UNKNOWN_MAX_LINE]
403
+ end
404
+
405
+ # Build language specifics for CLASS
406
+ # @param klass [Class] The class
407
+ # @return [Hash] Language-specific metadata
408
+ def build_class_language_specifics(klass)
409
+ specifics = {}
410
+
411
+ # Superclass chain (exclude Object and BasicObject).
412
+ # Emitted as an array named super_classes — consistent with Java, .NET, and Python.
413
+ # Array allows for multiple entries if future Ruby versions or mixins expand the chain.
414
+ # Anonymous superclasses (class Foo < Class.new { ... }) have nil name; compact to skip.
415
+ if klass.superclass && klass.superclass != Object && klass.superclass != BasicObject
416
+ super_name = klass.superclass.name # steep:ignore
417
+ specifics[:super_classes] = [super_name] if super_name
418
+ end
419
+
420
+ # Included modules (exclude common ones).
421
+ # included_modules returns the entire ancestor chain's mixins, not only directly
422
+ # included ones. This is intentional: the field reports "modules this class
423
+ # responds to," which is what the consumer (UI navigation, probe context) needs.
424
+ included = klass.included_modules.map(&:name).reject do |name|
425
+ name.nil? || EXCLUDED_COMMON_MODULES.any? { |prefix| name.start_with?(prefix) }
426
+ end
427
+ specifics[:included_modules] = included unless included.empty?
428
+
429
+ # Prepended modules
430
+ # Take all ancestors before the class itself (prepending inserts modules before the class in ancestor chain).
431
+ # This code path is taken when a class has prepended modules (e.g., class Foo; prepend Bar; end).
432
+ # Single-pass collection avoids the intermediate arrays from take_while.map.compact.
433
+ # Test coverage: spec/datadog/symbol_database/extractor_spec.rb tests prepend behavior.
434
+ prepended = []
435
+ klass.ancestors.each do |a|
436
+ break if a == klass
437
+ name = a.name
438
+ prepended << name if name
439
+ end
440
+ specifics[:prepended_modules] = prepended unless prepended.empty?
441
+
442
+ specifics
443
+ rescue => e
444
+ @logger.debug { "symdb: error building language specifics for #{klass.name}: #{e.class}: #{e.message}" }
445
+ {}
446
+ end
447
+
448
+ # Extract method scopes from a class
449
+ # @param klass [Class] The class
450
+ # @return [Array<Scope>] Method scopes
451
+ def extract_method_scopes(klass)
452
+ scopes = []
453
+
454
+ # Get all instance methods (public, protected, private)
455
+ all_instance_methods = klass.instance_methods(false) +
456
+ klass.protected_instance_methods(false) +
457
+ klass.private_instance_methods(false)
458
+ all_instance_methods.uniq!
459
+
460
+ all_instance_methods.each do |method_name|
461
+ method_scope = extract_method_scope(klass, method_name, :instance)
462
+ scopes << method_scope if method_scope
463
+ end
464
+
465
+ scopes
466
+ rescue => e
467
+ @logger.debug { "symdb: failed to extract methods from #{klass.name}: #{e.class}: #{e.message}" }
468
+ []
469
+ end
470
+
471
+ # Extract a single method scope
472
+ # @param klass [Class] The class
473
+ # @param method_name [Symbol] Method name
474
+ # @param method_type [Symbol] :instance or :class
475
+ # @return [Scope, nil] Method scope or nil
476
+ def extract_method_scope(klass, method_name, method_type)
477
+ method = klass.instance_method(method_name)
478
+ location = method.source_location
479
+
480
+ return nil unless location # Skip methods without source location
481
+
482
+ source_file, line = location
483
+ return nil unless user_code_path?(source_file) # Skip gem/stdlib methods
484
+
485
+ targetable_lines, end_line = extract_targetable_lines(method, line)
486
+
487
+ Scope.new(
488
+ scope_type: 'METHOD',
489
+ name: method_name.to_s,
490
+ source_file: source_file,
491
+ start_line: line,
492
+ end_line: end_line,
493
+ targetable_lines: targetable_lines,
494
+ language_specifics: {
495
+ visibility: method_visibility(klass, method_name),
496
+ method_type: method_type.to_s,
497
+ arity: method.arity
498
+ },
499
+ symbols: extract_method_parameters(method)
500
+ )
501
+ rescue => e
502
+ @logger.debug { "symdb: failed to extract method #{klass.name}##{method_name}: #{e.class}: #{e.message}" }
503
+ nil
504
+ end
505
+
506
+ # Get method visibility
507
+ # @param klass [Class] The class
508
+ # @param method_name [Symbol] Method name
509
+ # @return [String] 'public', 'private', or 'protected'
510
+ def method_visibility(klass, method_name)
511
+ if klass.private_instance_methods(false).include?(method_name)
512
+ 'private'
513
+ elsif klass.protected_instance_methods(false).include?(method_name)
514
+ 'protected'
515
+ else
516
+ 'public'
517
+ end
518
+ end
519
+
520
+ # Extract targetable lines and end_line from a method's bytecode.
521
+ # Returns [ranges, end_line] where ranges is an array of {start:, end:} hashes
522
+ # or nil if iseq is unavailable (C-extension methods).
523
+ # @param method [Method, UnboundMethod] The method
524
+ # @param start_line [Integer] Fallback end_line if iseq unavailable
525
+ # @return [Array(Array<Hash>, Integer), Array(nil, Integer)]
526
+ def extract_targetable_lines(method, start_line)
527
+ iseq = RubyVM::InstructionSequence.of(method) # steep:ignore
528
+ unless iseq
529
+ @logger.debug { "symdb: no iseq for #{method.name} (C extension or native), skipping targetable lines" }
530
+ return [nil, start_line]
531
+ end
532
+
533
+ lines = iseq.trace_points
534
+ .select { |_, event| TARGETABLE_LINE_EVENTS.include?(event) }
535
+ .map(&:first)
536
+ .uniq
537
+ .sort
538
+
539
+ end_line = lines.max || start_line
540
+ ranges = build_targetable_ranges(lines)
541
+ result = ranges.empty? ? nil : ranges
542
+ @logger.debug { "symdb: #{method.name} targetable lines: #{result ? "#{ranges.size} range(s), lines #{lines.first}..#{lines.last}" : 'none (no matching events)'}" }
543
+ [result, end_line]
544
+ end
545
+
546
+ # Compress sorted line numbers into consecutive ranges.
547
+ # [4, 5, 6, 8, 10, 11] => [{start: 4, end: 6}, {start: 8, end: 8}, {start: 10, end: 11}]
548
+ # @param lines [Array<Integer>] Sorted, deduplicated line numbers
549
+ # @return [Array<Hash>] Array of {start:, end:} range hashes
550
+ def build_targetable_ranges(lines)
551
+ return [] if lines.empty?
552
+
553
+ ranges = []
554
+ range_start = lines[0]
555
+ prev = range_start
556
+
557
+ lines[1..-1].each do |line| # steep:ignore
558
+ if line == prev + 1
559
+ prev = line
560
+ else
561
+ ranges << {start: range_start, end: prev}
562
+ range_start = line
563
+ prev = line
564
+ end
565
+ end
566
+ ranges << {start: range_start, end: prev}
567
+ ranges
568
+ end
569
+
570
+ # Extract method parameters as symbols.
571
+ # Does NOT include `self` — Ruby's implicit receiver is not a declared parameter.
572
+ # Java skips slot 0 (this) for the same reason. .NET uploads `this` but the web-ui
573
+ # filters it for dotnet. Ruby follows Java's approach: don't upload it.
574
+ # @param method [UnboundMethod] The method
575
+ # @return [Array<Symbol>] Parameter symbols
576
+ def extract_method_parameters(method)
577
+ method_name = begin
578
+ method.name.to_s
579
+ rescue => e
580
+ @logger.debug { "symdb: method.name failed: #{e.class}: #{e.message}" }
581
+ 'unknown'
582
+ end
583
+ params = method.parameters
584
+
585
+ return [] if params.nil? || params.empty?
586
+
587
+ Core::Utils::Array.filter_map(params) do |param_type, param_name|
588
+ # Skip block parameters for MVP
589
+ next if param_type == :block
590
+
591
+ # Skip if param_name is nil — normal for generated methods (attr_writer, attr_accessor).
592
+ # See pitfall 37 and specs/json-schema.md "Discovered During Implementation".
593
+ next if param_name.nil?
594
+
595
+ Symbol.new(
596
+ symbol_type: 'ARG',
597
+ name: param_name.to_s,
598
+ line: UNKNOWN_MIN_LINE, # Parameters available in entire method
599
+ )
600
+ end
601
+ rescue => e
602
+ @logger.debug { "symdb: failed to extract parameters from #{method_name}: #{e.class}: #{e.message}" }
603
+ []
604
+ end
605
+
606
+ # ── extract_all helpers ──────────────────────────────────────────────
607
+
608
+ # Pass 1: Collect all extractable modules with methods grouped by source file.
609
+ # @return [Hash] { mod_name => { mod:, methods_by_file: { path => [{name:, method:, type:}] } } }
610
+ def collect_extractable_modules
611
+ entries = {}
612
+
613
+ ObjectSpace.each_object(Module) do |mod|
614
+ # Singleton classes (per-object metaclasses) are never user-code classes.
615
+ # They're not const-referenced, DI cannot instrument methods on a singular
616
+ # object instance, and on Ruby 2.6 specifically, Module#name on unnamed
617
+ # singleton classes with long ancestor chains (e.g. through monkey-patches
618
+ # prepended into Kernel, common in dd-trace-rb test processes) is O(ancestors)
619
+ # — measured ~20ms per call, which dominates extract_all on heavily-loaded
620
+ # processes. Ruby 2.7+ optimized this path; the skip is a no-op there.
621
+ next if MODULE_SINGLETON_CLASS_PRED.bind(mod).call
622
+
623
+ mod_name = safe_mod_name(mod)
624
+ next unless mod_name
625
+ next unless user_code_module?(mod)
626
+
627
+ methods_by_file = group_methods_by_file(mod)
628
+
629
+ # For modules/classes with no methods but valid source, use find_source_file as fallback.
630
+ # This handles namespace modules and classes with only constants.
631
+ if methods_by_file.empty?
632
+ source_file = find_source_file(mod)
633
+ methods_by_file[source_file] = [] if source_file
634
+ end
635
+
636
+ next if methods_by_file.empty?
637
+
638
+ entries[mod_name] = {mod: mod, methods_by_file: methods_by_file}
639
+ rescue => e
640
+ @logger.debug { "symdb: error collecting #{mod_name || '<unknown>'}: #{e.class}: #{e.message}" }
641
+ end
642
+
643
+ entries
644
+ end
645
+
646
+ # Group a module's methods by their source file path.
647
+ # @param mod [Module] The module
648
+ # @return [Hash] { file_path => [{name:, method:, type:}] }
649
+ def group_methods_by_file(mod)
650
+ result = Hash.new { |h, k| h[k] = [] } # steep:ignore
651
+
652
+ # Instance methods (public, protected, private)
653
+ all_methods = mod.instance_methods(false) +
654
+ mod.protected_instance_methods(false) +
655
+ mod.private_instance_methods(false)
656
+ all_methods.uniq!
657
+
658
+ all_methods.each do |method_name|
659
+ method = mod.instance_method(method_name)
660
+ loc = method.source_location
661
+ next unless loc
662
+ next unless user_code_path?(loc[0])
663
+
664
+ result[loc[0]] << {name: method_name, method: method, type: :instance}
665
+ rescue => e
666
+ @logger.debug { "symdb: error grouping method #{method_name}: #{e.class}: #{e.message}" }
667
+ end
668
+
669
+ result
670
+ rescue => e
671
+ @logger.debug { "symdb: error grouping methods: #{e.class}: #{e.message}" }
672
+ {}
673
+ end
674
+
675
+ # Pass 2: Build per-file trees from collected entries.
676
+ # Uses hash nodes during construction, converted to Scope objects at the end.
677
+ #
678
+ # Node structure: { name:, type:, children: {name => node}, methods: [], mod:, source_file:, fqn: }
679
+ #
680
+ # @param entries [Hash] Output from collect_extractable_modules
681
+ # @return [Hash] { file_path => root_node }
682
+ def build_file_trees(entries)
683
+ file_trees = {}
684
+
685
+ # Sort by FQN depth so parents are placed before children.
686
+ # This ensures intermediate nodes created for parents have correct scope_type.
687
+ sorted = entries.sort_by { |name, _| name.count(':') }
688
+
689
+ sorted.each do |mod_name, entry|
690
+ entry[:methods_by_file].each do |file_path, methods|
691
+ root = file_trees[file_path] ||= {
692
+ name: file_path, type: 'FILE', children: {},
693
+ methods: [], mod: nil, source_file: file_path, fqn: nil
694
+ }
695
+ parts = mod_name.split('::')
696
+ place_in_tree(root, parts, entry[:mod], mod_name, methods, file_path)
697
+ end
698
+ rescue => e
699
+ @logger.debug { "symdb: error building tree for #{mod_name}: #{e.class}: #{e.message}" }
700
+ end
701
+
702
+ file_trees
703
+ end
704
+
705
+ # Place a module/class in the file tree at the correct nesting depth.
706
+ # Creates intermediate namespace nodes as needed.
707
+ # mod_name is the safe name (resolved via Module#instance_method bind) —
708
+ # callers must not pass raw mod.name, since classes that override singleton
709
+ # name (e.g. Faker::Travel::Airport) will raise.
710
+ def place_in_tree(root, name_parts, mod, mod_name, methods, file_path)
711
+ current = root
712
+
713
+ # Create/find intermediate nodes for each namespace segment except the last
714
+ name_parts[0..-2].each_with_index do |part, idx| # steep:ignore
715
+ fqn = name_parts[0..idx].join('::') # steep:ignore
716
+ current[:children][part] ||= {
717
+ name: fqn, type: resolve_scope_type(fqn),
718
+ children: {}, methods: [], mod: nil,
719
+ source_file: file_path, fqn: fqn
720
+ }
721
+ current = current[:children][part]
722
+ end
723
+
724
+ # Create or find the leaf node
725
+ leaf_name = name_parts.last
726
+ leaf = current[:children][leaf_name]
727
+ if leaf
728
+ # Node exists (was created as intermediate or from another entry).
729
+ # Update type and mod — the actual module object is authoritative.
730
+ leaf[:type] = mod.is_a?(Class) ? 'CLASS' : 'MODULE'
731
+ leaf[:mod] = mod
732
+ else
733
+ leaf = {
734
+ name: mod_name,
735
+ type: mod.is_a?(Class) ? 'CLASS' : 'MODULE',
736
+ children: {}, methods: [],
737
+ mod: mod, source_file: file_path,
738
+ fqn: mod_name
739
+ }
740
+ current[:children][leaf_name] = leaf
741
+ end
742
+
743
+ # Add methods for this file
744
+ leaf[:methods].concat(methods)
745
+ end
746
+
747
+ # Determine scope type (CLASS or MODULE) for a fully-qualified name.
748
+ # Looks up the actual Ruby constant to check if it's a Class.
749
+ # @param fqn [String] Fully-qualified name (e.g. "Authentication::Strategies")
750
+ # @return [String] 'CLASS' or 'MODULE'
751
+ def resolve_scope_type(fqn)
752
+ const = Object.const_get(fqn)
753
+ const.is_a?(Class) ? 'CLASS' : 'MODULE'
754
+ rescue => e
755
+ @logger.debug { "symdb: resolve_scope_type(#{fqn}) failed: #{e.class}: #{e.message}, defaulting to MODULE" }
756
+ 'MODULE'
757
+ end
758
+
759
+ # Convert hash-based file trees to Scope objects.
760
+ # @param file_trees [Hash] { file_path => root_node }
761
+ # @return [Array<Scope>] Array of FILE scopes
762
+ def convert_trees_to_scopes(file_trees)
763
+ file_trees.map do |file_path, root|
764
+ file_hash = FileHash.compute(file_path, logger: @logger)
765
+ lang = {}
766
+ lang[:file_hash] = file_hash if file_hash
767
+
768
+ Scope.new(
769
+ scope_type: 'FILE',
770
+ name: file_path,
771
+ source_file: file_path,
772
+ start_line: UNKNOWN_MIN_LINE,
773
+ end_line: UNKNOWN_MAX_LINE,
774
+ language_specifics: lang,
775
+ scopes: root[:children].values.map { |child| convert_node_to_scope(child) }
776
+ )
777
+ end
778
+ end
779
+
780
+ # Convert a single hash node to a Scope object (recursive).
781
+ # @param node [Hash] Tree node
782
+ # @return [Scope] Scope object
783
+ def convert_node_to_scope(node)
784
+ # Build method scopes from collected method entries
785
+ method_scopes = Core::Utils::Array.filter_map(node[:methods]) do |method_info|
786
+ build_instance_method_scope(node[:mod], method_info[:name], method_info[:method])
787
+ end
788
+
789
+ # Recurse into child scopes (nested modules/classes)
790
+ child_scopes = node[:children].values.map { |child| convert_node_to_scope(child) }
791
+
792
+ # Compute line range: start from the earliest method start, end at the latest
793
+ # method end. Using max(start_line) would underreport the class's end_line for
794
+ # classes whose last method spans multiple lines.
795
+ starts = method_scopes.map(&:start_line).reject { |l| l == UNKNOWN_MIN_LINE } # steep:ignore
796
+ ends = method_scopes.map(&:end_line).reject { |l| l == UNKNOWN_MAX_LINE } # steep:ignore
797
+ start_line = starts.empty? ? UNKNOWN_MIN_LINE : starts.min
798
+ end_line = ends.empty? ? UNKNOWN_MAX_LINE : ends.max
799
+
800
+ # Extract symbols (constants, class variables) if we have the actual module object
801
+ symbols = node[:mod] ? extract_scope_symbols(node[:mod]) : []
802
+
803
+ # Build language specifics
804
+ lang = if node[:type] == 'CLASS' && node[:mod]
805
+ build_class_language_specifics(node[:mod])
806
+ else
807
+ {}
808
+ end
809
+
810
+ Scope.new(
811
+ scope_type: node[:type],
812
+ name: node[:name],
813
+ source_file: node[:source_file],
814
+ start_line: start_line,
815
+ end_line: end_line,
816
+ language_specifics: lang,
817
+ scopes: method_scopes + child_scopes,
818
+ symbols: symbols
819
+ )
820
+ end
821
+
822
+ # Build a METHOD scope from a pre-resolved instance method.
823
+ # Used by extract_all path where methods are collected in Pass 1.
824
+ # @param klass [Module] The class/module (for visibility lookup)
825
+ # @param method_name [Symbol] Method name
826
+ # @param method [UnboundMethod] The method object
827
+ # @return [Scope, nil] Method scope or nil
828
+ def build_instance_method_scope(klass, method_name, method)
829
+ location = method.source_location
830
+ return nil unless location
831
+
832
+ source_file, line = location
833
+
834
+ targetable_lines, end_line = extract_targetable_lines(method, line)
835
+
836
+ Scope.new(
837
+ scope_type: 'METHOD',
838
+ name: method_name.to_s,
839
+ source_file: source_file,
840
+ start_line: line,
841
+ end_line: end_line,
842
+ targetable_lines: targetable_lines,
843
+ language_specifics: {
844
+ visibility: klass ? method_visibility(klass, method_name) : 'public', # steep:ignore
845
+ method_type: 'instance',
846
+ arity: method.arity
847
+ },
848
+ symbols: extract_method_parameters(method)
849
+ )
850
+ rescue => e
851
+ klass_name = klass ? (safe_mod_name(klass) || '<unknown>') : '<unknown>'
852
+ @logger.debug { "symdb: failed to build method scope #{klass_name}##{method_name}: #{e.class}: #{e.message}" }
853
+ nil
854
+ end
855
+
856
+ # Extract symbols (constants, class variables) from a module or class.
857
+ # Class variables are emitted only for classes; constants for both.
858
+ # @param mod [Module] The module or class
859
+ # @return [Array<Symbol>] Symbols
860
+ def extract_scope_symbols(mod)
861
+ symbols = []
862
+
863
+ # Class variables (only for classes)
864
+ if mod.is_a?(Class)
865
+ mod.class_variables(false).each do |var_name|
866
+ symbols << Symbol.new(
867
+ symbol_type: 'STATIC_FIELD',
868
+ name: var_name.to_s,
869
+ line: UNKNOWN_MIN_LINE
870
+ )
871
+ end
872
+ end
873
+
874
+ # Constants (excluding nested modules/classes).
875
+ # Skip autoloaded constants to avoid triggering loading as a side effect.
876
+ mod.constants(false).each do |const_name|
877
+ next if mod.autoload?(const_name)
878
+ const_value = mod.const_get(const_name)
879
+ next if const_value.is_a?(Module)
880
+
881
+ symbols << Symbol.new(
882
+ symbol_type: 'STATIC_FIELD',
883
+ name: const_name.to_s,
884
+ line: UNKNOWN_MIN_LINE,
885
+ type: const_value.class.name
886
+ )
887
+ rescue NameError, LoadError, NoMethodError => e # standard:disable Lint/ShadowedException
888
+ # Expected: constant removed/undefined, autoload failure, or const value missing
889
+ # #class. Logged separately from unexpected errors so the latter stand out in triage.
890
+ # Lint/ShadowedException disabled: NameError/NoMethodError do descend from
891
+ # StandardError, but Ruby's rescue-clause-order semantics ensure the bare rescue
892
+ # below only catches exceptions not matched here.
893
+ @logger.debug { "symdb: skipping module constant #{const_name}: #{e.class}: #{e.message}" }
894
+ rescue => e
895
+ @logger.debug { "symdb: unexpected error reading module constant #{const_name}: #{e.class}: #{e.message}" }
896
+ end
897
+
898
+ symbols
899
+ rescue => e
900
+ mod_name = safe_mod_name(mod) || '<unknown>'
901
+ @logger.debug { "symdb: failed to extract symbols from #{mod_name}: #{e.class}: #{e.message}" }
902
+ []
903
+ end
904
+ end
905
+ end
906
+ end