datadog 2.31.0 → 2.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +17 -7
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +11 -4
- data/ext/datadog_profiling_native_extension/collectors_thread_context.h +6 -0
- data/ext/datadog_profiling_native_extension/extconf.rb +5 -4
- data/ext/datadog_profiling_native_extension/http_transport.c +10 -5
- data/ext/libdatadog_api/di.c +48 -0
- data/ext/libdatadog_api/extconf.rb +7 -4
- data/ext/libdatadog_extconf_helpers.rb +37 -0
- data/lib/datadog/ai_guard/configuration.rb +105 -2
- data/lib/datadog/ai_guard/evaluation.rb +1 -0
- data/lib/datadog/ai_guard/ext.rb +1 -0
- data/lib/datadog/appsec/autoload.rb +1 -1
- data/lib/datadog/appsec/component.rb +1 -1
- data/lib/datadog/appsec/configuration.rb +414 -1
- data/lib/datadog/appsec/contrib/devise/patches/signin_tracking_patch.rb +2 -1
- data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
- data/lib/datadog/appsec/contrib/rails/patcher.rb +2 -2
- data/lib/datadog/appsec/metrics/telemetry.rb +13 -1
- data/lib/datadog/appsec/security_engine/runner.rb +1 -1
- data/lib/datadog/appsec/trace_keeper.rb +18 -6
- data/lib/datadog/appsec/utils/http/url_encoded.rb +2 -2
- data/lib/datadog/core/configuration/components.rb +1 -1
- data/lib/datadog/core/configuration/settings.rb +3 -0
- data/lib/datadog/core/configuration/supported_configurations.rb +2 -0
- data/lib/datadog/core/configuration.rb +1 -1
- data/lib/datadog/core/contrib/rails/utils.rb +1 -1
- data/lib/datadog/core/crashtracking/component.rb +3 -3
- data/lib/datadog/core/diagnostics/environment_logger.rb +3 -1
- data/lib/datadog/core/environment/container.rb +2 -2
- data/lib/datadog/core/feature_flags.rb +1 -1
- data/lib/datadog/core/metrics/client.rb +5 -5
- data/lib/datadog/core/remote/client.rb +1 -1
- data/lib/datadog/core/remote/component.rb +2 -2
- data/lib/datadog/core/runtime/metrics.rb +1 -1
- data/lib/datadog/core/telemetry/emitter.rb +1 -1
- data/lib/datadog/core/telemetry/event/app_started.rb +2 -2
- data/lib/datadog/core/transport/http.rb +2 -0
- data/lib/datadog/core/utils.rb +1 -1
- data/lib/datadog/core/workers/async.rb +1 -1
- data/lib/datadog/core.rb +1 -1
- data/lib/datadog/data_streams/configuration.rb +40 -1
- data/lib/datadog/data_streams/pathway_context.rb +1 -1
- data/lib/datadog/data_streams/processor.rb +1 -1
- data/lib/datadog/data_streams.rb +1 -1
- data/lib/datadog/di/base.rb +8 -5
- data/lib/datadog/di/code_tracker.rb +179 -1
- data/lib/datadog/di/component.rb +1 -1
- data/lib/datadog/di/configuration.rb +235 -2
- data/lib/datadog/di/instrumenter.rb +46 -26
- data/lib/datadog/di/probe_builder.rb +1 -1
- data/lib/datadog/di/probe_file_loader.rb +2 -2
- data/lib/datadog/di/probe_manager.rb +6 -6
- data/lib/datadog/di/probe_notification_builder.rb +1 -1
- data/lib/datadog/di/probe_notifier_worker.rb +2 -2
- data/lib/datadog/di/remote.rb +6 -6
- data/lib/datadog/di/serializer.rb +1 -1
- data/lib/datadog/di/transport/input.rb +3 -3
- data/lib/datadog/error_tracking/configuration.rb +55 -2
- data/lib/datadog/kit/enable_core_dumps.rb +1 -1
- data/lib/datadog/open_feature/component.rb +18 -1
- data/lib/datadog/open_feature/evaluation_engine.rb +3 -3
- data/lib/datadog/open_feature/exposures/reporter.rb +1 -1
- data/lib/datadog/open_feature/exposures/worker.rb +1 -1
- data/lib/datadog/open_feature/hooks/flag_eval_hook.rb +49 -0
- data/lib/datadog/open_feature/metrics/flag_eval_metrics.rb +149 -0
- data/lib/datadog/open_feature/provider.rb +19 -1
- data/lib/datadog/open_feature/remote.rb +1 -1
- data/lib/datadog/open_feature/transport.rb +1 -1
- data/lib/datadog/opentelemetry/metrics.rb +3 -3
- data/lib/datadog/opentelemetry/sdk/configurator.rb +1 -1
- data/lib/datadog/opentelemetry/sdk/metrics_exporter.rb +1 -1
- data/lib/datadog/profiling/collectors/code_provenance.rb +35 -9
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -2
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +8 -2
- data/lib/datadog/profiling/collectors/info.rb +16 -3
- data/lib/datadog/profiling/component.rb +3 -5
- data/lib/datadog/profiling/exporter.rb +37 -12
- data/lib/datadog/profiling/ext.rb +0 -2
- data/lib/datadog/profiling/flush.rb +21 -12
- data/lib/datadog/profiling/http_transport.rb +12 -1
- data/lib/datadog/profiling/load_native_extension.rb +1 -1
- data/lib/datadog/profiling/profiler.rb +13 -1
- data/lib/datadog/profiling/scheduler.rb +2 -2
- data/lib/datadog/profiling/tasks/exec.rb +8 -3
- data/lib/datadog/profiling/tasks/help.rb +1 -0
- data/lib/datadog/profiling/tasks/setup.rb +2 -2
- data/lib/datadog/single_step_instrument.rb +1 -1
- data/lib/datadog/symbol_database/configuration.rb +65 -0
- data/lib/datadog/symbol_database/extractor.rb +915 -0
- data/lib/datadog/symbol_database/file_hash.rb +46 -0
- data/lib/datadog/symbol_database/logger.rb +43 -0
- data/lib/datadog/symbol_database/scope.rb +98 -0
- data/lib/datadog/symbol_database/service_version.rb +57 -0
- data/lib/datadog/symbol_database/symbol.rb +66 -0
- data/lib/datadog/symbol_database/transport/http/endpoint.rb +28 -0
- data/lib/datadog/symbol_database/transport/http.rb +45 -0
- data/lib/datadog/symbol_database/transport.rb +54 -0
- data/lib/datadog/symbol_database/uploader.rb +166 -0
- data/lib/datadog/symbol_database.rb +49 -0
- data/lib/datadog/tracing/buffer.rb +3 -3
- data/lib/datadog/tracing/configuration/settings.rb +1 -1
- data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -3
- data/lib/datadog/tracing/contrib/action_view/events/render_template.rb +1 -1
- data/lib/datadog/tracing/contrib/active_job/events/discard.rb +1 -1
- data/lib/datadog/tracing/contrib/active_job/events/enqueue.rb +1 -1
- data/lib/datadog/tracing/contrib/active_job/events/enqueue_at.rb +1 -1
- data/lib/datadog/tracing/contrib/active_job/events/enqueue_retry.rb +1 -1
- data/lib/datadog/tracing/contrib/active_job/events/perform.rb +1 -1
- data/lib/datadog/tracing/contrib/active_job/events/retry_stopped.rb +1 -1
- data/lib/datadog/tracing/contrib/active_model_serializers/events/render.rb +1 -1
- data/lib/datadog/tracing/contrib/active_model_serializers/events/serialize.rb +1 -1
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +2 -2
- data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +1 -1
- data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -1
- data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
- data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +1 -1
- data/lib/datadog/tracing/contrib/active_support/notifications/subscription.rb +2 -2
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +1 -1
- data/lib/datadog/tracing/contrib/component.rb +1 -1
- data/lib/datadog/tracing/contrib/configuration/resolver.rb +7 -4
- data/lib/datadog/tracing/contrib/dalli/quantize.rb +1 -1
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +1 -1
- data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -2
- data/lib/datadog/tracing/contrib/extensions.rb +9 -0
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -2
- data/lib/datadog/tracing/contrib/grape/endpoint.rb +5 -5
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +2 -2
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +2 -2
- data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -2
- data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -2
- data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -2
- data/lib/datadog/tracing/contrib/kafka/instrumentation/consumer.rb +2 -2
- data/lib/datadog/tracing/contrib/kafka/instrumentation/producer.rb +2 -2
- data/lib/datadog/tracing/contrib/karafka/patcher.rb +1 -1
- data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +3 -3
- data/lib/datadog/tracing/contrib/opensearch/patcher.rb +1 -1
- data/lib/datadog/tracing/contrib/presto/instrumentation.rb +3 -3
- data/lib/datadog/tracing/contrib/rack/patcher.rb +1 -1
- data/lib/datadog/tracing/contrib/rack/request_queue.rb +1 -1
- data/lib/datadog/tracing/contrib/rails/log_injection.rb +1 -1
- data/lib/datadog/tracing/contrib/rails/runner.rb +1 -1
- data/lib/datadog/tracing/contrib/rake/instrumentation.rb +2 -2
- data/lib/datadog/tracing/contrib/redis/quantize.rb +1 -1
- data/lib/datadog/tracing/contrib/redis/tags.rb +1 -1
- data/lib/datadog/tracing/contrib/sidekiq/utils.rb +1 -1
- data/lib/datadog/tracing/contrib/stripe/request.rb +1 -1
- data/lib/datadog/tracing/contrib.rb +8 -0
- data/lib/datadog/tracing/diagnostics/environment_logger.rb +3 -1
- data/lib/datadog/tracing/distributed/baggage.rb +59 -5
- data/lib/datadog/tracing/distributed/datadog.rb +11 -11
- data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +1 -1
- data/lib/datadog/tracing/distributed/propagation.rb +2 -2
- data/lib/datadog/tracing/distributed/trace_context.rb +74 -32
- data/lib/datadog/tracing/event.rb +1 -1
- data/lib/datadog/tracing/metadata/tagging.rb +2 -2
- data/lib/datadog/tracing/pipeline.rb +1 -1
- data/lib/datadog/tracing/remote.rb +1 -1
- data/lib/datadog/tracing/sampling/rule.rb +1 -1
- data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -2
- data/lib/datadog/tracing/sampling/span/rule_parser.rb +2 -2
- data/lib/datadog/tracing/span_operation.rb +3 -3
- data/lib/datadog/tracing/trace_operation.rb +4 -4
- data/lib/datadog/tracing/tracer.rb +5 -5
- data/lib/datadog/tracing/transport/io/client.rb +1 -1
- data/lib/datadog/tracing/workers.rb +2 -1
- data/lib/datadog/version.rb +1 -1
- metadata +18 -9
- data/lib/datadog/ai_guard/configuration/settings.rb +0 -113
- data/lib/datadog/appsec/configuration/settings.rb +0 -423
- data/lib/datadog/data_streams/configuration/settings.rb +0 -49
- data/lib/datadog/di/configuration/settings.rb +0 -243
- data/lib/datadog/error_tracking/configuration/settings.rb +0 -63
|
@@ -0,0 +1,915 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'scope'
|
|
4
|
+
require_relative 'symbol'
|
|
5
|
+
require_relative 'file_hash'
|
|
6
|
+
require_relative '../core/utils/array'
|
|
7
|
+
|
|
8
|
+
module Datadog
|
|
9
|
+
module SymbolDatabase
|
|
10
|
+
# Extracts symbol metadata from loaded Ruby modules and classes via introspection.
|
|
11
|
+
#
|
|
12
|
+
# Instance created by Component with injected dependencies (logger, settings,
|
|
13
|
+
# telemetry). All methods are instance methods accessing @logger, @settings,
|
|
14
|
+
# @telemetry directly — no parameter threading needed.
|
|
15
|
+
#
|
|
16
|
+
# Uses Ruby's reflection APIs (Module#constants, Class#instance_methods, Method#parameters)
|
|
17
|
+
# to build hierarchical Scope structures representing code organization.
|
|
18
|
+
# Filters to user code only (excludes gems, stdlib, test files).
|
|
19
|
+
#
|
|
20
|
+
# Extraction flow:
|
|
21
|
+
# 1. ObjectSpace.each_object(Module) - Iterate all loaded modules/classes
|
|
22
|
+
# 2. Filter to user code (user_code_module?)
|
|
23
|
+
# 3. Build MODULE or CLASS scope with nested METHOD scopes
|
|
24
|
+
# 4. Extract symbols: constants, class variables, method parameters
|
|
25
|
+
#
|
|
26
|
+
# Called by: Component.extract_and_upload (during upload trigger)
|
|
27
|
+
# Produces: Scope objects passed to ScopeBatcher for batching
|
|
28
|
+
# File hashing: Calls FileHash.compute for MODULE scopes
|
|
29
|
+
#
|
|
30
|
+
# Error handling strategy (defense-in-depth):
|
|
31
|
+
#
|
|
32
|
+
# The extractor introspects arbitrary Ruby objects via ObjectSpace. Ruby's
|
|
33
|
+
# reflection APIs (Module#name, #instance_methods, #const_get, #source_location,
|
|
34
|
+
# #parameters) can fail unpredictably on third-party code: NameError from removed
|
|
35
|
+
# constants, LoadError from autoload, ArgumentError from overridden #name methods,
|
|
36
|
+
# SecurityError in restricted contexts, and more.
|
|
37
|
+
#
|
|
38
|
+
# Rescue blocks are organized in three layers:
|
|
39
|
+
#
|
|
40
|
+
# 1. **Inner per-item rescues** (bare `rescue` in const_get loops, method.name):
|
|
41
|
+
# Skip one constant or name lookup without aborting the enclosing collection.
|
|
42
|
+
# These are expected failures — no logging needed.
|
|
43
|
+
#
|
|
44
|
+
# 2. **Method-level rescues** (`rescue => e` with logging):
|
|
45
|
+
# Catch failures in extract_method_scope, find_source_file, etc. Log at debug
|
|
46
|
+
# for post-hoc diagnosis, return nil or empty array. One bad method/module
|
|
47
|
+
# doesn't kill the entire class extraction.
|
|
48
|
+
#
|
|
49
|
+
# 3. **Top-level entry rescues** (`rescue => e` with logging + telemetry):
|
|
50
|
+
# extract() and extract_all() are the error boundaries. Any exception that
|
|
51
|
+
# escapes layers 1-2 is caught here, logged, and tracked via telemetry.
|
|
52
|
+
# These are the only rescue blocks that increment telemetry counters.
|
|
53
|
+
#
|
|
54
|
+
# @api private
|
|
55
|
+
class Extractor
|
|
56
|
+
# Common Ruby core modules to exclude from included_modules extraction.
|
|
57
|
+
# These are ubiquitous mix-ins that don't provide meaningful context about the class structure.
|
|
58
|
+
# Kernel: Mixed into Object, appears in nearly all classes
|
|
59
|
+
# PP: Pretty-printing module, loaded by many tools
|
|
60
|
+
# JSON: JSON serialization module, loaded by many tools
|
|
61
|
+
# Enumerable: Core iteration protocol, extremely common
|
|
62
|
+
# Comparable: Core comparison protocol, extremely common
|
|
63
|
+
# Sentinel for unknown minimum line number. 0 means "available throughout the scope."
|
|
64
|
+
# Defined here (the only runtime consumer) so extractor.rb is self-contained.
|
|
65
|
+
# The parent module (lib/datadog/symbol_database.rb) defines the same values for
|
|
66
|
+
# documentation and external reference, but is not required by this file.
|
|
67
|
+
UNKNOWN_MIN_LINE = 0
|
|
68
|
+
# PostgreSQL signed INT_MAX (2^31 - 1). Means "entire file" or "unknown end."
|
|
69
|
+
UNKNOWN_MAX_LINE = 2147483647
|
|
70
|
+
|
|
71
|
+
EXCLUDED_COMMON_MODULES = ['Kernel', 'PP::', 'JSON::', 'Enumerable', 'Comparable'].freeze
|
|
72
|
+
|
|
73
|
+
# RubyVM::InstructionSequence#trace_points event types included when
|
|
74
|
+
# computing injectable lines on METHOD scopes.
|
|
75
|
+
# :line — any line with executable bytecode (primary line probe target)
|
|
76
|
+
# :return — last expression before method returns (DI instruments return events)
|
|
77
|
+
# :call excluded — method entry is handled by method probes, not line probes
|
|
78
|
+
INJECTABLE_LINE_EVENTS = [:line, :return].freeze
|
|
79
|
+
|
|
80
|
+
# Cached unbound Module#singleton_class? — dispatched explicitly so user classes
|
|
81
|
+
# that define their own `singleton_class?` (e.g. with required arguments) cannot
|
|
82
|
+
# intercept the predicate and cause the module to be silently dropped from
|
|
83
|
+
# extract_all. Cached at load time because collect_extractable_modules iterates
|
|
84
|
+
# ObjectSpace.each_object(Module) over tens of thousands of modules.
|
|
85
|
+
MODULE_SINGLETON_CLASS_PRED = Module.instance_method(:singleton_class?)
|
|
86
|
+
private_constant :MODULE_SINGLETON_CLASS_PRED
|
|
87
|
+
|
|
88
|
+
# @param logger [Logger] Logger instance (SymbolDatabase::Logger facade or compatible)
|
|
89
|
+
# @param settings [Configuration::Settings] Tracer settings
|
|
90
|
+
# @param telemetry [Telemetry, nil] Optional telemetry for metrics
|
|
91
|
+
def initialize(logger:, settings:, telemetry: nil)
|
|
92
|
+
@logger = logger
|
|
93
|
+
@settings = settings
|
|
94
|
+
@telemetry = telemetry
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Extract symbols from a single module or class.
|
|
98
|
+
# Returns nil if module should be skipped (anonymous, gem code, stdlib).
|
|
99
|
+
#
|
|
100
|
+
# Returns a FILE scope wrapping the extracted CLASS or MODULE scope.
|
|
101
|
+
# The backend requires root-level scopes to be in ROOT_SCOPES (MODULE, JAR,
|
|
102
|
+
# ASSEMBLY, PACKAGE, FILE). FILE is the natural root for Ruby — one per source file.
|
|
103
|
+
#
|
|
104
|
+
# For full extraction with proper FQN-based nesting and per-file method grouping,
|
|
105
|
+
# use extract_all instead. This method is kept for single-module extraction in tests.
|
|
106
|
+
#
|
|
107
|
+
# @param mod [Module, Class] The module or class to extract from
|
|
108
|
+
# @return [Scope, nil] FILE scope wrapping extracted scope, or nil if filtered out
|
|
109
|
+
def extract(mod)
|
|
110
|
+
return nil unless mod.is_a?(Module)
|
|
111
|
+
mod_name = safe_mod_name(mod)
|
|
112
|
+
return nil unless mod_name
|
|
113
|
+
|
|
114
|
+
return nil unless user_code_module?(mod)
|
|
115
|
+
|
|
116
|
+
source_file = find_source_file(mod)
|
|
117
|
+
return nil unless source_file
|
|
118
|
+
|
|
119
|
+
inner_scope = if mod.is_a?(Class)
|
|
120
|
+
extract_class_scope(mod)
|
|
121
|
+
else
|
|
122
|
+
extract_module_scope(mod)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
wrap_in_file_scope(source_file, [inner_scope])
|
|
126
|
+
rescue => e
|
|
127
|
+
@logger.debug { "symdb: failed to extract #{mod_name || '<unknown>'}: #{e.class}: #{e.message}" }
|
|
128
|
+
@telemetry&.inc('tracers', 'symbol_database.extract_error', 1)
|
|
129
|
+
nil
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Extract symbols from all loaded modules and classes.
|
|
133
|
+
# Returns an array of FILE scopes with proper FQN-based nesting.
|
|
134
|
+
#
|
|
135
|
+
# Two-pass algorithm:
|
|
136
|
+
# Pass 1: Iterate ObjectSpace, collect all extractable modules with methods grouped by file
|
|
137
|
+
# Pass 2: Build FILE scope trees with nested MODULE/CLASS hierarchy from FQN splitting
|
|
138
|
+
#
|
|
139
|
+
# This is the production path used by Component. Methods are split by source file,
|
|
140
|
+
# so a class reopened across two files produces two FILE scopes, each with only
|
|
141
|
+
# the methods defined in that file.
|
|
142
|
+
#
|
|
143
|
+
# @return [Array<Scope>] Array of FILE scopes
|
|
144
|
+
def extract_all
|
|
145
|
+
entries = collect_extractable_modules
|
|
146
|
+
file_trees = build_file_trees(entries)
|
|
147
|
+
convert_trees_to_scopes(file_trees)
|
|
148
|
+
rescue => e
|
|
149
|
+
@logger.debug { "symdb: error in extract_all: #{e.class}: #{e.message}" }
|
|
150
|
+
@telemetry&.inc('tracers', 'symbol_database.extract_all_error', 1)
|
|
151
|
+
[]
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
private
|
|
155
|
+
|
|
156
|
+
# Safe Module#name lookup — some classes override the singleton `name` method
|
|
157
|
+
# (e.g. Faker::Travel::Airport defines `def name(size:, region:)` in class << self,
|
|
158
|
+
# which shadows Module#name and raises ArgumentError when called without args).
|
|
159
|
+
# @param mod [Module] The module
|
|
160
|
+
# @return [String, nil] Module name or nil
|
|
161
|
+
def safe_mod_name(mod)
|
|
162
|
+
Module.instance_method(:name).bind(mod).call
|
|
163
|
+
rescue => e
|
|
164
|
+
@logger.debug { "symdb: safe_mod_name failed: #{e.class}: #{e.message}" }
|
|
165
|
+
nil
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Check if module is from user code (not gems or stdlib)
|
|
169
|
+
# @param mod [Module] The module to check
|
|
170
|
+
# @return [Boolean] true if user code
|
|
171
|
+
def user_code_module?(mod)
|
|
172
|
+
mod_name = safe_mod_name(mod)
|
|
173
|
+
return false unless mod_name
|
|
174
|
+
|
|
175
|
+
# CRITICAL: Exclude entire Datadog namespace (prevents circular extraction)
|
|
176
|
+
# Matches Java: className.startsWith("com/datadog/")
|
|
177
|
+
# Matches Python: packages.is_user_code() excludes ddtrace.*
|
|
178
|
+
# Note: bare 'Datadog' must be checked separately — start_with?('Datadog::')
|
|
179
|
+
# doesn't match the root module itself.
|
|
180
|
+
return false if mod_name == 'Datadog' || mod_name.start_with?('Datadog::')
|
|
181
|
+
|
|
182
|
+
# Exclude Ruby root classes. These are never user code, but
|
|
183
|
+
# find_source_file can return a user-code path for them via
|
|
184
|
+
# const_source_location (top-level constants like User are
|
|
185
|
+
# Object constants, so Object.const_source_location(:User)
|
|
186
|
+
# points to the user's file).
|
|
187
|
+
return false if mod.equal?(Object) || mod.equal?(BasicObject) ||
|
|
188
|
+
mod.equal?(Kernel) || mod.equal?(Module) || mod.equal?(Class)
|
|
189
|
+
|
|
190
|
+
source_file = find_source_file(mod)
|
|
191
|
+
return false unless source_file
|
|
192
|
+
|
|
193
|
+
user_code_path?(source_file)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Check if path is user code
|
|
197
|
+
# @param path [String] File path
|
|
198
|
+
# @return [Boolean] true if user code
|
|
199
|
+
def user_code_path?(path)
|
|
200
|
+
# Only absolute paths are real source files. Pseudo-paths like '<main>',
|
|
201
|
+
# '<internal:...>', '(eval)' are not user code.
|
|
202
|
+
return false unless path.start_with?('/')
|
|
203
|
+
# Only .rb files are Ruby source. Excludes the Ruby binary
|
|
204
|
+
# (/usr/local/bin/ruby), C extensions (.so/.bundle), and other
|
|
205
|
+
# non-source files that appear in method source_location.
|
|
206
|
+
return false unless path.end_with?('.rb')
|
|
207
|
+
# Exclude gem paths
|
|
208
|
+
return false if path.include?('/gems/')
|
|
209
|
+
# Exclude Ruby stdlib
|
|
210
|
+
return false if path.include?('/ruby/')
|
|
211
|
+
return false if path.start_with?('<internal:')
|
|
212
|
+
return false if path.include?('(eval)')
|
|
213
|
+
# Exclude test code (not application code)
|
|
214
|
+
return false if path.include?('/spec/')
|
|
215
|
+
return false if path.include?('/test/')
|
|
216
|
+
# Exclude Datadog's own library code (e.g., monkey-patched methods from tracing contrib).
|
|
217
|
+
# Without this, stdlib classes like Net::HTTP appear as user code when dd-trace-rb
|
|
218
|
+
# instruments them, because the patched method source points to lib/datadog/tracing/contrib/.
|
|
219
|
+
return false if path.include?('/lib/datadog/')
|
|
220
|
+
|
|
221
|
+
true
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Find source file for a module.
|
|
225
|
+
# Prefers user code paths over gem/stdlib paths. ActiveRecord models have
|
|
226
|
+
# generated methods (autosave callbacks) whose source is in the gem, but
|
|
227
|
+
# user-defined methods point to app/models/. Without this preference,
|
|
228
|
+
# AR models get filtered out as gem code.
|
|
229
|
+
#
|
|
230
|
+
# For namespace-only modules (no instance or singleton methods), falls back to
|
|
231
|
+
# Module#const_source_location (Ruby 2.7+) to locate the module via its constants.
|
|
232
|
+
# This handles patterns like `module ApplicationCable; class Channel...; end; end`
|
|
233
|
+
# where the namespace module itself has no methods but defines user-code classes.
|
|
234
|
+
#
|
|
235
|
+
# On Ruby 2.6 (where const_source_location is unavailable), namespace-only modules
|
|
236
|
+
# and classes whose only methods are generated (e.g., AR models with only associations)
|
|
237
|
+
# may not be found — the extraction silently omits them. This is a graceful degradation:
|
|
238
|
+
# fewer symbols uploaded, no errors.
|
|
239
|
+
#
|
|
240
|
+
# @param mod [Module] The module
|
|
241
|
+
# @return [String, nil] Source file path or nil
|
|
242
|
+
def find_source_file(mod)
|
|
243
|
+
fallback = nil
|
|
244
|
+
|
|
245
|
+
# Try instance methods first
|
|
246
|
+
mod.instance_methods(false).each do |method_name|
|
|
247
|
+
method = mod.instance_method(method_name)
|
|
248
|
+
location = method.source_location
|
|
249
|
+
next unless location
|
|
250
|
+
|
|
251
|
+
path = location[0]
|
|
252
|
+
return path if user_code_path?(path)
|
|
253
|
+
|
|
254
|
+
fallback ||= path # steep:ignore
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Try singleton methods
|
|
258
|
+
mod.singleton_methods(false).each do |method_name|
|
|
259
|
+
method = mod.method(method_name)
|
|
260
|
+
location = method.source_location
|
|
261
|
+
next unless location
|
|
262
|
+
|
|
263
|
+
path = location[0]
|
|
264
|
+
return path if user_code_path?(path)
|
|
265
|
+
|
|
266
|
+
fallback ||= path # steep:ignore
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Try const_source_location (Ruby 2.7+) to find where this class/module is declared.
|
|
270
|
+
# This handles two cases:
|
|
271
|
+
# 1. Classes with no user-defined methods (e.g. AR models with only associations) whose
|
|
272
|
+
# generated methods point to gem code — we find the `class Foo` declaration instead.
|
|
273
|
+
# 2. Namespace-only modules (`module Foo; class Bar; end; end`) with no methods at all.
|
|
274
|
+
if Module.method_defined?(:const_source_location) && mod.name
|
|
275
|
+
# Look up the class/module by its last name component in its enclosing namespace.
|
|
276
|
+
parts = mod.name.split('::')
|
|
277
|
+
const_name = parts.last
|
|
278
|
+
namespace = if parts.length > 1
|
|
279
|
+
begin
|
|
280
|
+
Object.const_get(parts[0..-2].join('::')) # steep:ignore
|
|
281
|
+
rescue NameError
|
|
282
|
+
nil
|
|
283
|
+
end
|
|
284
|
+
else
|
|
285
|
+
Object
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
if namespace
|
|
289
|
+
location = begin
|
|
290
|
+
namespace.const_source_location(const_name)
|
|
291
|
+
rescue => e
|
|
292
|
+
@logger.debug { "symdb: const_source_location(#{const_name}) failed: #{e.class}: #{e.message}" }
|
|
293
|
+
nil
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
if location && !location.empty?
|
|
297
|
+
path = location[0]
|
|
298
|
+
return path if path && !path.empty? && user_code_path?(path)
|
|
299
|
+
fallback ||= ((path && !path.empty?) ? path : nil)
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Also scan constants defined by mod itself (namespace-only modules).
|
|
304
|
+
mod.constants(false).each do |child_const_name|
|
|
305
|
+
location = begin
|
|
306
|
+
mod.const_source_location(child_const_name)
|
|
307
|
+
rescue => e
|
|
308
|
+
@logger.debug { "symdb: const_source_location(#{child_const_name}) failed: #{e.class}: #{e.message}" }
|
|
309
|
+
nil
|
|
310
|
+
end
|
|
311
|
+
next unless location && !location.empty?
|
|
312
|
+
|
|
313
|
+
path = location[0]
|
|
314
|
+
next unless path && !path.empty?
|
|
315
|
+
|
|
316
|
+
return path if user_code_path?(path)
|
|
317
|
+
|
|
318
|
+
fallback ||= path
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
fallback
|
|
323
|
+
rescue => e
|
|
324
|
+
@logger.debug { "symdb: error finding source file for #{safe_mod_name(mod) || '<unknown>'}: #{e.class}: #{e.message}" }
|
|
325
|
+
nil
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Wrap inner scopes in a FILE root scope.
|
|
329
|
+
# FILE is the per-source-file root scope for Ruby uploads, analogous to
|
|
330
|
+
# Python's MODULE-per-file or Java's JAR.
|
|
331
|
+
#
|
|
332
|
+
# @param file_path [String] Source file path
|
|
333
|
+
# @param inner_scopes [Array<Scope>] Child scopes to nest under FILE
|
|
334
|
+
# @return [Scope] FILE scope wrapping the inner scopes
|
|
335
|
+
def wrap_in_file_scope(file_path, inner_scopes)
|
|
336
|
+
file_hash = FileHash.compute(file_path, logger: @logger)
|
|
337
|
+
lang = {}
|
|
338
|
+
lang[:file_hash] = file_hash if file_hash
|
|
339
|
+
|
|
340
|
+
Scope.new(
|
|
341
|
+
scope_type: 'FILE',
|
|
342
|
+
name: file_path,
|
|
343
|
+
source_file: file_path,
|
|
344
|
+
start_line: UNKNOWN_MIN_LINE,
|
|
345
|
+
end_line: UNKNOWN_MAX_LINE,
|
|
346
|
+
language_specifics: lang,
|
|
347
|
+
scopes: inner_scopes
|
|
348
|
+
)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Extract MODULE scope (without file_hash — that belongs on the FILE root scope).
|
|
352
|
+
# Does not include nested classes — nesting is handled by extract_all via FQN splitting.
|
|
353
|
+
# @param mod [Module] The module
|
|
354
|
+
# @return [Scope] The module scope
|
|
355
|
+
def extract_module_scope(mod)
|
|
356
|
+
source_file = find_source_file(mod)
|
|
357
|
+
|
|
358
|
+
Scope.new(
|
|
359
|
+
scope_type: 'MODULE',
|
|
360
|
+
name: mod.name,
|
|
361
|
+
source_file: source_file,
|
|
362
|
+
start_line: UNKNOWN_MIN_LINE,
|
|
363
|
+
end_line: UNKNOWN_MAX_LINE,
|
|
364
|
+
symbols: extract_scope_symbols(mod)
|
|
365
|
+
)
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# Extract CLASS scope
|
|
369
|
+
# @param klass [Class] The class
|
|
370
|
+
# @return [Scope] The class scope
|
|
371
|
+
def extract_class_scope(klass)
|
|
372
|
+
methods = klass.instance_methods(false)
|
|
373
|
+
start_line, end_line = calculate_class_line_range(klass, methods)
|
|
374
|
+
source_file = find_source_file(klass)
|
|
375
|
+
|
|
376
|
+
Scope.new(
|
|
377
|
+
scope_type: 'CLASS',
|
|
378
|
+
name: klass.name,
|
|
379
|
+
source_file: source_file,
|
|
380
|
+
start_line: start_line,
|
|
381
|
+
end_line: end_line,
|
|
382
|
+
language_specifics: build_class_language_specifics(klass),
|
|
383
|
+
scopes: extract_method_scopes(klass),
|
|
384
|
+
symbols: extract_scope_symbols(klass)
|
|
385
|
+
)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Calculate class line range from method locations.
|
|
389
|
+
# Start from the earliest method start, end at the latest method end (derived
|
|
390
|
+
# from iseq trace_points so methods spanning multiple lines aren't truncated).
|
|
391
|
+
# @param klass [Class] The class
|
|
392
|
+
# @param methods [Array<Symbol>] Method names
|
|
393
|
+
# @return [Array<Integer, Integer>] [start_line, end_line]
|
|
394
|
+
def calculate_class_line_range(klass, methods)
|
|
395
|
+
starts = []
|
|
396
|
+
ends = []
|
|
397
|
+
methods.each do |method_name|
|
|
398
|
+
method = klass.instance_method(method_name)
|
|
399
|
+
location = method.source_location
|
|
400
|
+
next unless location && location[0]
|
|
401
|
+
starts << location[1]
|
|
402
|
+
_ranges, method_end = extract_injectable_lines(method, location[1])
|
|
403
|
+
ends << method_end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
return [UNKNOWN_MIN_LINE, UNKNOWN_MAX_LINE] if starts.empty?
|
|
407
|
+
|
|
408
|
+
[starts.min, ends.max]
|
|
409
|
+
rescue => e
|
|
410
|
+
@logger.debug { "symdb: error calculating line range for #{klass.name}: #{e.class}: #{e.message}" }
|
|
411
|
+
[UNKNOWN_MIN_LINE, UNKNOWN_MAX_LINE]
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Build language specifics for CLASS
|
|
415
|
+
# @param klass [Class] The class
|
|
416
|
+
# @return [Hash] Language-specific metadata
|
|
417
|
+
def build_class_language_specifics(klass)
|
|
418
|
+
specifics = {}
|
|
419
|
+
|
|
420
|
+
# Superclass chain (exclude Object and BasicObject).
|
|
421
|
+
# Emitted as an array named super_classes — consistent with Java, .NET, and Python.
|
|
422
|
+
# Array allows for multiple entries if future Ruby versions or mixins expand the chain.
|
|
423
|
+
# Anonymous superclasses (class Foo < Class.new { ... }) have nil name; compact to skip.
|
|
424
|
+
if klass.superclass && klass.superclass != Object && klass.superclass != BasicObject
|
|
425
|
+
super_name = klass.superclass.name # steep:ignore
|
|
426
|
+
specifics[:super_classes] = [super_name] if super_name
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
# Included modules (exclude common ones).
|
|
430
|
+
# included_modules returns the entire ancestor chain's mixins, not only directly
|
|
431
|
+
# included ones. This is intentional: the field reports "modules this class
|
|
432
|
+
# responds to," which is what the consumer (UI navigation, probe context) needs.
|
|
433
|
+
included = klass.included_modules.map(&:name).reject do |name|
|
|
434
|
+
name.nil? || EXCLUDED_COMMON_MODULES.any? { |prefix| name.start_with?(prefix) }
|
|
435
|
+
end
|
|
436
|
+
specifics[:included_modules] = included unless included.empty?
|
|
437
|
+
|
|
438
|
+
# Prepended modules
|
|
439
|
+
# Take all ancestors before the class itself (prepending inserts modules before the class in ancestor chain).
|
|
440
|
+
# This code path is taken when a class has prepended modules (e.g., class Foo; prepend Bar; end).
|
|
441
|
+
# Single-pass collection avoids the intermediate arrays from take_while.map.compact.
|
|
442
|
+
# Test coverage: spec/datadog/symbol_database/extractor_spec.rb tests prepend behavior.
|
|
443
|
+
prepended = []
|
|
444
|
+
klass.ancestors.each do |a|
|
|
445
|
+
break if a == klass
|
|
446
|
+
name = a.name
|
|
447
|
+
prepended << name if name
|
|
448
|
+
end
|
|
449
|
+
specifics[:prepended_modules] = prepended unless prepended.empty?
|
|
450
|
+
|
|
451
|
+
specifics
|
|
452
|
+
rescue => e
|
|
453
|
+
@logger.debug { "symdb: error building language specifics for #{klass.name}: #{e.class}: #{e.message}" }
|
|
454
|
+
{}
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Extract method scopes from a class
|
|
458
|
+
# @param klass [Class] The class
|
|
459
|
+
# @return [Array<Scope>] Method scopes
|
|
460
|
+
def extract_method_scopes(klass)
|
|
461
|
+
scopes = []
|
|
462
|
+
|
|
463
|
+
# Get all instance methods (public, protected, private)
|
|
464
|
+
all_instance_methods = klass.instance_methods(false) +
|
|
465
|
+
klass.protected_instance_methods(false) +
|
|
466
|
+
klass.private_instance_methods(false)
|
|
467
|
+
all_instance_methods.uniq!
|
|
468
|
+
|
|
469
|
+
all_instance_methods.each do |method_name|
|
|
470
|
+
method_scope = extract_method_scope(klass, method_name, :instance)
|
|
471
|
+
scopes << method_scope if method_scope
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
scopes
|
|
475
|
+
rescue => e
|
|
476
|
+
@logger.debug { "symdb: failed to extract methods from #{klass.name}: #{e.class}: #{e.message}" }
|
|
477
|
+
[]
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
# Extract a single method scope
|
|
481
|
+
# @param klass [Class] The class
|
|
482
|
+
# @param method_name [Symbol] Method name
|
|
483
|
+
# @param method_type [Symbol] :instance or :class
|
|
484
|
+
# @return [Scope, nil] Method scope or nil
|
|
485
|
+
def extract_method_scope(klass, method_name, method_type)
|
|
486
|
+
method = klass.instance_method(method_name)
|
|
487
|
+
location = method.source_location
|
|
488
|
+
|
|
489
|
+
return nil unless location # Skip methods without source location
|
|
490
|
+
|
|
491
|
+
source_file, line = location
|
|
492
|
+
return nil unless user_code_path?(source_file) # Skip gem/stdlib methods
|
|
493
|
+
|
|
494
|
+
injectable_lines, end_line = extract_injectable_lines(method, line)
|
|
495
|
+
|
|
496
|
+
Scope.new(
|
|
497
|
+
scope_type: 'METHOD',
|
|
498
|
+
name: method_name.to_s,
|
|
499
|
+
source_file: source_file,
|
|
500
|
+
start_line: line,
|
|
501
|
+
end_line: end_line,
|
|
502
|
+
injectible_lines: injectable_lines,
|
|
503
|
+
language_specifics: {
|
|
504
|
+
visibility: method_visibility(klass, method_name),
|
|
505
|
+
method_type: method_type.to_s,
|
|
506
|
+
arity: method.arity
|
|
507
|
+
},
|
|
508
|
+
symbols: extract_method_parameters(method)
|
|
509
|
+
)
|
|
510
|
+
rescue => e
|
|
511
|
+
@logger.debug { "symdb: failed to extract method #{klass.name}##{method_name}: #{e.class}: #{e.message}" }
|
|
512
|
+
nil
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
# Get method visibility
|
|
516
|
+
# @param klass [Class] The class
|
|
517
|
+
# @param method_name [Symbol] Method name
|
|
518
|
+
# @return [String] 'public', 'private', or 'protected'
|
|
519
|
+
def method_visibility(klass, method_name)
|
|
520
|
+
if klass.private_instance_methods(false).include?(method_name)
|
|
521
|
+
'private'
|
|
522
|
+
elsif klass.protected_instance_methods(false).include?(method_name)
|
|
523
|
+
'protected'
|
|
524
|
+
else
|
|
525
|
+
'public'
|
|
526
|
+
end
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Extract injectable lines and end_line from a method's bytecode.
|
|
530
|
+
# Returns [ranges, end_line] where ranges is an array of {start:, end:} hashes
|
|
531
|
+
# or nil if iseq is unavailable (C-extension methods).
|
|
532
|
+
# @param method [Method, UnboundMethod] The method
|
|
533
|
+
# @param start_line [Integer] Fallback end_line if iseq unavailable
|
|
534
|
+
# @return [Array(Array<Hash>, Integer), Array(nil, Integer)]
|
|
535
|
+
def extract_injectable_lines(method, start_line)
|
|
536
|
+
iseq = RubyVM::InstructionSequence.of(method) # steep:ignore
|
|
537
|
+
unless iseq
|
|
538
|
+
@logger.debug { "symdb: no iseq for #{method.name} (C extension or native), skipping injectable lines" }
|
|
539
|
+
return [nil, start_line]
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
lines = iseq.trace_points
|
|
543
|
+
.select { |_, event| INJECTABLE_LINE_EVENTS.include?(event) }
|
|
544
|
+
.map(&:first)
|
|
545
|
+
.uniq
|
|
546
|
+
.sort
|
|
547
|
+
|
|
548
|
+
end_line = lines.max || start_line
|
|
549
|
+
ranges = build_injectable_ranges(lines)
|
|
550
|
+
result = ranges.empty? ? nil : ranges
|
|
551
|
+
@logger.debug { "symdb: #{method.name} injectable lines: #{result ? "#{ranges.size} range(s), lines #{lines.first}..#{lines.last}" : 'none (no matching events)'}" }
|
|
552
|
+
[result, end_line]
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
# Compress sorted line numbers into consecutive ranges.
|
|
556
|
+
# [4, 5, 6, 8, 10, 11] => [{start: 4, end: 6}, {start: 8, end: 8}, {start: 10, end: 11}]
|
|
557
|
+
# @param lines [Array<Integer>] Sorted, deduplicated line numbers
|
|
558
|
+
# @return [Array<Hash>] Array of {start:, end:} range hashes
|
|
559
|
+
def build_injectable_ranges(lines)
|
|
560
|
+
return [] if lines.empty?
|
|
561
|
+
|
|
562
|
+
ranges = []
|
|
563
|
+
range_start = lines[0]
|
|
564
|
+
prev = range_start
|
|
565
|
+
|
|
566
|
+
lines[1..-1].each do |line| # steep:ignore
|
|
567
|
+
if line == prev + 1
|
|
568
|
+
prev = line
|
|
569
|
+
else
|
|
570
|
+
ranges << {start: range_start, end: prev}
|
|
571
|
+
range_start = line
|
|
572
|
+
prev = line
|
|
573
|
+
end
|
|
574
|
+
end
|
|
575
|
+
ranges << {start: range_start, end: prev}
|
|
576
|
+
ranges
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
# Extract method parameters as symbols.
|
|
580
|
+
# Does NOT include `self` — Ruby's implicit receiver is not a declared parameter.
|
|
581
|
+
# Java skips slot 0 (this) for the same reason. .NET uploads `this` but the web-ui
|
|
582
|
+
# filters it for dotnet. Ruby follows Java's approach: don't upload it.
|
|
583
|
+
# @param method [UnboundMethod] The method
|
|
584
|
+
# @return [Array<Symbol>] Parameter symbols
|
|
585
|
+
def extract_method_parameters(method)
|
|
586
|
+
method_name = begin
|
|
587
|
+
method.name.to_s
|
|
588
|
+
rescue => e
|
|
589
|
+
@logger.debug { "symdb: method.name failed: #{e.class}: #{e.message}" }
|
|
590
|
+
'unknown'
|
|
591
|
+
end
|
|
592
|
+
params = method.parameters
|
|
593
|
+
|
|
594
|
+
return [] if params.nil? || params.empty?
|
|
595
|
+
|
|
596
|
+
Core::Utils::Array.filter_map(params) do |param_type, param_name|
|
|
597
|
+
# Skip block parameters for MVP
|
|
598
|
+
next if param_type == :block
|
|
599
|
+
|
|
600
|
+
# Skip if param_name is nil — normal for generated methods (attr_writer, attr_accessor).
|
|
601
|
+
# See pitfall 37 and specs/json-schema.md "Discovered During Implementation".
|
|
602
|
+
next if param_name.nil?
|
|
603
|
+
|
|
604
|
+
Symbol.new(
|
|
605
|
+
symbol_type: 'ARG',
|
|
606
|
+
name: param_name.to_s,
|
|
607
|
+
line: UNKNOWN_MIN_LINE, # Parameters available in entire method
|
|
608
|
+
)
|
|
609
|
+
end
|
|
610
|
+
rescue => e
|
|
611
|
+
@logger.debug { "symdb: failed to extract parameters from #{method_name}: #{e.class}: #{e.message}" }
|
|
612
|
+
[]
|
|
613
|
+
end
|
|
614
|
+
|
|
615
|
+
# ── extract_all helpers ──────────────────────────────────────────────
|
|
616
|
+
|
|
617
|
+
# Pass 1: Collect all extractable modules with methods grouped by source file.
|
|
618
|
+
# @return [Hash] { mod_name => { mod:, methods_by_file: { path => [{name:, method:, type:}] } } }
|
|
619
|
+
def collect_extractable_modules
|
|
620
|
+
entries = {}
|
|
621
|
+
|
|
622
|
+
ObjectSpace.each_object(Module) do |mod|
|
|
623
|
+
# Singleton classes (per-object metaclasses) are never user-code classes.
|
|
624
|
+
# They're not const-referenced, DI cannot instrument methods on a singular
|
|
625
|
+
# object instance, and on Ruby 2.6 specifically, Module#name on unnamed
|
|
626
|
+
# singleton classes with long ancestor chains (e.g. through monkey-patches
|
|
627
|
+
# prepended into Kernel, common in dd-trace-rb test processes) is O(ancestors)
|
|
628
|
+
# — measured ~20ms per call, which dominates extract_all on heavily-loaded
|
|
629
|
+
# processes. Ruby 2.7+ optimized this path; the skip is a no-op there.
|
|
630
|
+
next if MODULE_SINGLETON_CLASS_PRED.bind(mod).call
|
|
631
|
+
|
|
632
|
+
mod_name = safe_mod_name(mod)
|
|
633
|
+
next unless mod_name
|
|
634
|
+
next unless user_code_module?(mod)
|
|
635
|
+
|
|
636
|
+
methods_by_file = group_methods_by_file(mod)
|
|
637
|
+
|
|
638
|
+
# For modules/classes with no methods but valid source, use find_source_file as fallback.
|
|
639
|
+
# This handles namespace modules and classes with only constants.
|
|
640
|
+
if methods_by_file.empty?
|
|
641
|
+
source_file = find_source_file(mod)
|
|
642
|
+
methods_by_file[source_file] = [] if source_file
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
next if methods_by_file.empty?
|
|
646
|
+
|
|
647
|
+
entries[mod_name] = {mod: mod, methods_by_file: methods_by_file}
|
|
648
|
+
rescue => e
|
|
649
|
+
@logger.debug { "symdb: error collecting #{mod_name || '<unknown>'}: #{e.class}: #{e.message}" }
|
|
650
|
+
end
|
|
651
|
+
|
|
652
|
+
entries
|
|
653
|
+
end
|
|
654
|
+
|
|
655
|
+
# Group a module's methods by their source file path.
|
|
656
|
+
# @param mod [Module] The module
|
|
657
|
+
# @return [Hash] { file_path => [{name:, method:, type:}] }
|
|
658
|
+
def group_methods_by_file(mod)
|
|
659
|
+
result = Hash.new { |h, k| h[k] = [] } # steep:ignore
|
|
660
|
+
|
|
661
|
+
# Instance methods (public, protected, private)
|
|
662
|
+
all_methods = mod.instance_methods(false) +
|
|
663
|
+
mod.protected_instance_methods(false) +
|
|
664
|
+
mod.private_instance_methods(false)
|
|
665
|
+
all_methods.uniq!
|
|
666
|
+
|
|
667
|
+
all_methods.each do |method_name|
|
|
668
|
+
method = mod.instance_method(method_name)
|
|
669
|
+
loc = method.source_location
|
|
670
|
+
next unless loc
|
|
671
|
+
next unless user_code_path?(loc[0])
|
|
672
|
+
|
|
673
|
+
result[loc[0]] << {name: method_name, method: method, type: :instance}
|
|
674
|
+
rescue => e
|
|
675
|
+
@logger.debug { "symdb: error grouping method #{method_name}: #{e.class}: #{e.message}" }
|
|
676
|
+
end
|
|
677
|
+
|
|
678
|
+
result
|
|
679
|
+
rescue => e
|
|
680
|
+
@logger.debug { "symdb: error grouping methods: #{e.class}: #{e.message}" }
|
|
681
|
+
{}
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
# Pass 2: Build per-file trees from collected entries.
|
|
685
|
+
# Uses hash nodes during construction, converted to Scope objects at the end.
|
|
686
|
+
#
|
|
687
|
+
# Node structure: { name:, type:, children: {name => node}, methods: [], mod:, source_file:, fqn: }
|
|
688
|
+
#
|
|
689
|
+
# @param entries [Hash] Output from collect_extractable_modules
|
|
690
|
+
# @return [Hash] { file_path => root_node }
|
|
691
|
+
def build_file_trees(entries)
|
|
692
|
+
file_trees = {}
|
|
693
|
+
|
|
694
|
+
# Sort by FQN depth so parents are placed before children.
|
|
695
|
+
# This ensures intermediate nodes created for parents have correct scope_type.
|
|
696
|
+
sorted = entries.sort_by { |name, _| name.count(':') }
|
|
697
|
+
|
|
698
|
+
sorted.each do |mod_name, entry|
|
|
699
|
+
entry[:methods_by_file].each do |file_path, methods|
|
|
700
|
+
root = file_trees[file_path] ||= {
|
|
701
|
+
name: file_path, type: 'FILE', children: {},
|
|
702
|
+
methods: [], mod: nil, source_file: file_path, fqn: nil
|
|
703
|
+
}
|
|
704
|
+
parts = mod_name.split('::')
|
|
705
|
+
place_in_tree(root, parts, entry[:mod], mod_name, methods, file_path)
|
|
706
|
+
end
|
|
707
|
+
rescue => e
|
|
708
|
+
@logger.debug { "symdb: error building tree for #{mod_name}: #{e.class}: #{e.message}" }
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
file_trees
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
# Place a module/class in the file tree at the correct nesting depth.
|
|
715
|
+
# Creates intermediate namespace nodes as needed.
|
|
716
|
+
# mod_name is the safe name (resolved via Module#instance_method bind) —
|
|
717
|
+
# callers must not pass raw mod.name, since classes that override singleton
|
|
718
|
+
# name (e.g. Faker::Travel::Airport) will raise.
|
|
719
|
+
def place_in_tree(root, name_parts, mod, mod_name, methods, file_path)
|
|
720
|
+
current = root
|
|
721
|
+
|
|
722
|
+
# Create/find intermediate nodes for each namespace segment except the last
|
|
723
|
+
name_parts[0..-2].each_with_index do |part, idx| # steep:ignore
|
|
724
|
+
fqn = name_parts[0..idx].join('::') # steep:ignore
|
|
725
|
+
current[:children][part] ||= {
|
|
726
|
+
name: fqn, type: resolve_scope_type(fqn),
|
|
727
|
+
children: {}, methods: [], mod: nil,
|
|
728
|
+
source_file: file_path, fqn: fqn
|
|
729
|
+
}
|
|
730
|
+
current = current[:children][part]
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
# Create or find the leaf node
|
|
734
|
+
leaf_name = name_parts.last
|
|
735
|
+
leaf = current[:children][leaf_name]
|
|
736
|
+
if leaf
|
|
737
|
+
# Node exists (was created as intermediate or from another entry).
|
|
738
|
+
# Update type and mod — the actual module object is authoritative.
|
|
739
|
+
leaf[:type] = mod.is_a?(Class) ? 'CLASS' : 'MODULE'
|
|
740
|
+
leaf[:mod] = mod
|
|
741
|
+
else
|
|
742
|
+
leaf = {
|
|
743
|
+
name: mod_name,
|
|
744
|
+
type: mod.is_a?(Class) ? 'CLASS' : 'MODULE',
|
|
745
|
+
children: {}, methods: [],
|
|
746
|
+
mod: mod, source_file: file_path,
|
|
747
|
+
fqn: mod_name
|
|
748
|
+
}
|
|
749
|
+
current[:children][leaf_name] = leaf
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
# Add methods for this file
|
|
753
|
+
leaf[:methods].concat(methods)
|
|
754
|
+
end
|
|
755
|
+
|
|
756
|
+
# Determine scope type (CLASS or MODULE) for a fully-qualified name.
|
|
757
|
+
# Looks up the actual Ruby constant to check if it's a Class.
|
|
758
|
+
# @param fqn [String] Fully-qualified name (e.g. "Authentication::Strategies")
|
|
759
|
+
# @return [String] 'CLASS' or 'MODULE'
|
|
760
|
+
def resolve_scope_type(fqn)
|
|
761
|
+
const = Object.const_get(fqn)
|
|
762
|
+
const.is_a?(Class) ? 'CLASS' : 'MODULE'
|
|
763
|
+
rescue => e
|
|
764
|
+
@logger.debug { "symdb: resolve_scope_type(#{fqn}) failed: #{e.class}: #{e.message}, defaulting to MODULE" }
|
|
765
|
+
'MODULE'
|
|
766
|
+
end
|
|
767
|
+
|
|
768
|
+
# Convert hash-based file trees to Scope objects.
|
|
769
|
+
# @param file_trees [Hash] { file_path => root_node }
|
|
770
|
+
# @return [Array<Scope>] Array of FILE scopes
|
|
771
|
+
def convert_trees_to_scopes(file_trees)
|
|
772
|
+
file_trees.map do |file_path, root|
|
|
773
|
+
file_hash = FileHash.compute(file_path, logger: @logger)
|
|
774
|
+
lang = {}
|
|
775
|
+
lang[:file_hash] = file_hash if file_hash
|
|
776
|
+
|
|
777
|
+
Scope.new(
|
|
778
|
+
scope_type: 'FILE',
|
|
779
|
+
name: file_path,
|
|
780
|
+
source_file: file_path,
|
|
781
|
+
start_line: UNKNOWN_MIN_LINE,
|
|
782
|
+
end_line: UNKNOWN_MAX_LINE,
|
|
783
|
+
language_specifics: lang,
|
|
784
|
+
scopes: root[:children].values.map { |child| convert_node_to_scope(child) }
|
|
785
|
+
)
|
|
786
|
+
end
|
|
787
|
+
end
|
|
788
|
+
|
|
789
|
+
# Convert a single hash node to a Scope object (recursive).
|
|
790
|
+
# @param node [Hash] Tree node
|
|
791
|
+
# @return [Scope] Scope object
|
|
792
|
+
def convert_node_to_scope(node)
|
|
793
|
+
# Build method scopes from collected method entries
|
|
794
|
+
method_scopes = Core::Utils::Array.filter_map(node[:methods]) do |method_info|
|
|
795
|
+
build_instance_method_scope(node[:mod], method_info[:name], method_info[:method])
|
|
796
|
+
end
|
|
797
|
+
|
|
798
|
+
# Recurse into child scopes (nested modules/classes)
|
|
799
|
+
child_scopes = node[:children].values.map { |child| convert_node_to_scope(child) }
|
|
800
|
+
|
|
801
|
+
# Compute line range: start from the earliest method start, end at the latest
|
|
802
|
+
# method end. Using max(start_line) would underreport the class's end_line for
|
|
803
|
+
# classes whose last method spans multiple lines.
|
|
804
|
+
starts = method_scopes.map(&:start_line).reject { |l| l == UNKNOWN_MIN_LINE } # steep:ignore
|
|
805
|
+
ends = method_scopes.map(&:end_line).reject { |l| l == UNKNOWN_MAX_LINE } # steep:ignore
|
|
806
|
+
start_line = starts.empty? ? UNKNOWN_MIN_LINE : starts.min
|
|
807
|
+
end_line = ends.empty? ? UNKNOWN_MAX_LINE : ends.max
|
|
808
|
+
|
|
809
|
+
# Extract symbols (constants, class variables) if we have the actual module object
|
|
810
|
+
symbols = node[:mod] ? extract_scope_symbols(node[:mod]) : []
|
|
811
|
+
|
|
812
|
+
# Build language specifics
|
|
813
|
+
lang = if node[:type] == 'CLASS' && node[:mod]
|
|
814
|
+
build_class_language_specifics(node[:mod])
|
|
815
|
+
else
|
|
816
|
+
{}
|
|
817
|
+
end
|
|
818
|
+
|
|
819
|
+
Scope.new(
|
|
820
|
+
scope_type: node[:type],
|
|
821
|
+
name: node[:name],
|
|
822
|
+
source_file: node[:source_file],
|
|
823
|
+
start_line: start_line,
|
|
824
|
+
end_line: end_line,
|
|
825
|
+
language_specifics: lang,
|
|
826
|
+
scopes: method_scopes + child_scopes,
|
|
827
|
+
symbols: symbols
|
|
828
|
+
)
|
|
829
|
+
end
|
|
830
|
+
|
|
831
|
+
# Build a METHOD scope from a pre-resolved instance method.
|
|
832
|
+
# Used by extract_all path where methods are collected in Pass 1.
|
|
833
|
+
# @param klass [Module] The class/module (for visibility lookup)
|
|
834
|
+
# @param method_name [Symbol] Method name
|
|
835
|
+
# @param method [UnboundMethod] The method object
|
|
836
|
+
# @return [Scope, nil] Method scope or nil
|
|
837
|
+
def build_instance_method_scope(klass, method_name, method)
|
|
838
|
+
location = method.source_location
|
|
839
|
+
return nil unless location
|
|
840
|
+
|
|
841
|
+
source_file, line = location
|
|
842
|
+
|
|
843
|
+
injectable_lines, end_line = extract_injectable_lines(method, line)
|
|
844
|
+
|
|
845
|
+
Scope.new(
|
|
846
|
+
scope_type: 'METHOD',
|
|
847
|
+
name: method_name.to_s,
|
|
848
|
+
source_file: source_file,
|
|
849
|
+
start_line: line,
|
|
850
|
+
end_line: end_line,
|
|
851
|
+
injectible_lines: injectable_lines,
|
|
852
|
+
language_specifics: {
|
|
853
|
+
visibility: klass ? method_visibility(klass, method_name) : 'public', # steep:ignore
|
|
854
|
+
method_type: 'instance',
|
|
855
|
+
arity: method.arity
|
|
856
|
+
},
|
|
857
|
+
symbols: extract_method_parameters(method)
|
|
858
|
+
)
|
|
859
|
+
rescue => e
|
|
860
|
+
klass_name = klass ? (safe_mod_name(klass) || '<unknown>') : '<unknown>'
|
|
861
|
+
@logger.debug { "symdb: failed to build method scope #{klass_name}##{method_name}: #{e.class}: #{e.message}" }
|
|
862
|
+
nil
|
|
863
|
+
end
|
|
864
|
+
|
|
865
|
+
# Extract symbols (constants, class variables) from a module or class.
|
|
866
|
+
# Class variables are emitted only for classes; constants for both.
|
|
867
|
+
# @param mod [Module] The module or class
|
|
868
|
+
# @return [Array<Symbol>] Symbols
|
|
869
|
+
def extract_scope_symbols(mod)
|
|
870
|
+
symbols = []
|
|
871
|
+
|
|
872
|
+
# Class variables (only for classes)
|
|
873
|
+
if mod.is_a?(Class)
|
|
874
|
+
mod.class_variables(false).each do |var_name|
|
|
875
|
+
symbols << Symbol.new(
|
|
876
|
+
symbol_type: 'STATIC_FIELD',
|
|
877
|
+
name: var_name.to_s,
|
|
878
|
+
line: UNKNOWN_MIN_LINE
|
|
879
|
+
)
|
|
880
|
+
end
|
|
881
|
+
end
|
|
882
|
+
|
|
883
|
+
# Constants (excluding nested modules/classes).
|
|
884
|
+
# Skip autoloaded constants to avoid triggering loading as a side effect.
|
|
885
|
+
mod.constants(false).each do |const_name|
|
|
886
|
+
next if mod.autoload?(const_name)
|
|
887
|
+
const_value = mod.const_get(const_name)
|
|
888
|
+
next if const_value.is_a?(Module)
|
|
889
|
+
|
|
890
|
+
symbols << Symbol.new(
|
|
891
|
+
symbol_type: 'STATIC_FIELD',
|
|
892
|
+
name: const_name.to_s,
|
|
893
|
+
line: UNKNOWN_MIN_LINE,
|
|
894
|
+
type: const_value.class.name
|
|
895
|
+
)
|
|
896
|
+
rescue NameError, LoadError, NoMethodError => e # standard:disable Lint/ShadowedException
|
|
897
|
+
# Expected: constant removed/undefined, autoload failure, or const value missing
|
|
898
|
+
# #class. Logged separately from unexpected errors so the latter stand out in triage.
|
|
899
|
+
# Lint/ShadowedException disabled: NameError/NoMethodError do descend from
|
|
900
|
+
# StandardError, but Ruby's rescue-clause-order semantics ensure the bare rescue
|
|
901
|
+
# below only catches exceptions not matched here.
|
|
902
|
+
@logger.debug { "symdb: skipping module constant #{const_name}: #{e.class}: #{e.message}" }
|
|
903
|
+
rescue => e
|
|
904
|
+
@logger.debug { "symdb: unexpected error reading module constant #{const_name}: #{e.class}: #{e.message}" }
|
|
905
|
+
end
|
|
906
|
+
|
|
907
|
+
symbols
|
|
908
|
+
rescue => e
|
|
909
|
+
mod_name = safe_mod_name(mod) || '<unknown>'
|
|
910
|
+
@logger.debug { "symdb: failed to extract symbols from #{mod_name}: #{e.class}: #{e.message}" }
|
|
911
|
+
[]
|
|
912
|
+
end
|
|
913
|
+
end
|
|
914
|
+
end
|
|
915
|
+
end
|