codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'shared_utility_methods'
|
|
4
|
+
require_relative 'shared_dependency_scanner'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Extractors
|
|
8
|
+
# CachingExtractor detects caching usage across controllers, models, and views.
|
|
9
|
+
#
|
|
10
|
+
# Scans `app/controllers/**/*.rb`, `app/models/**/*.rb`, and
|
|
11
|
+
# `app/views/**/*.erb` for cache-related patterns: Rails.cache.*,
|
|
12
|
+
# caches_action, fragment cache blocks, cache_key, cache_version,
|
|
13
|
+
# and expires_in. Produces one unit per file that contains any
|
|
14
|
+
# cache calls, identifying the strategy and TTL patterns.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# extractor = CachingExtractor.new
|
|
18
|
+
# units = extractor.extract_all
|
|
19
|
+
# ctrl = units.find { |u| u.identifier == "app/controllers/products_controller.rb" }
|
|
20
|
+
# ctrl.metadata[:cache_strategy] # => :low_level
|
|
21
|
+
# ctrl.metadata[:cache_calls].size # => 3
|
|
22
|
+
#
|
|
23
|
+
class CachingExtractor
|
|
24
|
+
include SharedUtilityMethods
|
|
25
|
+
include SharedDependencyScanner
|
|
26
|
+
|
|
27
|
+
# File glob patterns to scan
|
|
28
|
+
SCAN_PATTERNS = {
|
|
29
|
+
controller: 'app/controllers/**/*.rb',
|
|
30
|
+
model: 'app/models/**/*.rb',
|
|
31
|
+
view: 'app/views/**/*.erb'
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
# Patterns that indicate cache usage, grouped by type
|
|
35
|
+
CACHE_PATTERNS = {
|
|
36
|
+
fetch: /Rails\.cache\.fetch\s*[(\[]/,
|
|
37
|
+
read: /Rails\.cache\.read\s*[(\[]/,
|
|
38
|
+
write: /Rails\.cache\.write\s*[(\[]/,
|
|
39
|
+
delete: /Rails\.cache\.delete\s*[(\[]/,
|
|
40
|
+
exist: /Rails\.cache\.exist\?\s*[(\[]/,
|
|
41
|
+
caches_action: /\bcaches_action\b/,
|
|
42
|
+
fragment: /\bcache\s+.*?\bdo\b|\bcache\s+do\b|\bcache\s*\(/,
|
|
43
|
+
cache_key: /\bcache_key\b/,
|
|
44
|
+
cache_version: /\bcache_version\b/
|
|
45
|
+
}.freeze
|
|
46
|
+
|
|
47
|
+
# Patterns for extracting TTL values
|
|
48
|
+
TTL_PATTERN = /expires_in:\s*([^,\n)]+)/
|
|
49
|
+
|
|
50
|
+
# Key-pattern regex (first argument to Rails.cache.*)
|
|
51
|
+
KEY_PATTERN = /Rails\.cache\.(?:fetch|read|write|delete|exist\?)\s*[(\[]?\s*([^,\n)\]]+)/
|
|
52
|
+
|
|
53
|
+
def initialize
|
|
54
|
+
@rails_root = Rails.root
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Extract caching units from all scanned files.
|
|
58
|
+
#
|
|
59
|
+
# @return [Array<ExtractedUnit>] One unit per file with cache calls
|
|
60
|
+
def extract_all
|
|
61
|
+
units = []
|
|
62
|
+
|
|
63
|
+
SCAN_PATTERNS.each do |file_type, pattern|
|
|
64
|
+
Dir[@rails_root.join(pattern)].each do |file|
|
|
65
|
+
unit = extract_caching_file(file, file_type)
|
|
66
|
+
units << unit if unit
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
units
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Extract a single file for caching patterns.
|
|
74
|
+
#
|
|
75
|
+
# Returns nil if the file contains no cache calls.
|
|
76
|
+
#
|
|
77
|
+
# @param file_path [String] Absolute path to the file
|
|
78
|
+
# @param file_type [Symbol] :controller, :model, or :view
|
|
79
|
+
# @return [ExtractedUnit, nil] The unit or nil if no cache usage
|
|
80
|
+
def extract_caching_file(file_path, file_type = nil)
|
|
81
|
+
source = File.read(file_path)
|
|
82
|
+
|
|
83
|
+
return nil unless cache_usage?(source)
|
|
84
|
+
|
|
85
|
+
file_type ||= infer_file_type(file_path)
|
|
86
|
+
identifier = relative_path(file_path)
|
|
87
|
+
|
|
88
|
+
unit = ExtractedUnit.new(
|
|
89
|
+
type: :caching,
|
|
90
|
+
identifier: identifier,
|
|
91
|
+
file_path: file_path
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
unit.namespace = nil
|
|
95
|
+
unit.source_code = annotate_source(source, identifier, file_type)
|
|
96
|
+
unit.metadata = extract_metadata(source, file_type)
|
|
97
|
+
unit.dependencies = extract_dependencies(source)
|
|
98
|
+
|
|
99
|
+
unit
|
|
100
|
+
rescue StandardError => e
|
|
101
|
+
Rails.logger.error("Failed to extract caching info from #{file_path}: #{e.message}")
|
|
102
|
+
nil
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
108
|
+
# Detection
|
|
109
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
# Check whether the source contains any cache calls.
|
|
112
|
+
#
|
|
113
|
+
# @param source [String] Ruby or ERB source
|
|
114
|
+
# @return [Boolean]
|
|
115
|
+
def cache_usage?(source)
|
|
116
|
+
CACHE_PATTERNS.values.any? { |pattern| source.match?(pattern) }
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
120
|
+
# Source Annotation
|
|
121
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
122
|
+
|
|
123
|
+
# Prepend a summary annotation header to the source.
|
|
124
|
+
#
|
|
125
|
+
# @param source [String] Source code
|
|
126
|
+
# @param identifier [String] Relative file path identifier
|
|
127
|
+
# @param file_type [Symbol] :controller, :model, or :view
|
|
128
|
+
# @return [String] Annotated source
|
|
129
|
+
def annotate_source(source, identifier, file_type)
|
|
130
|
+
annotation = <<~ANNOTATION
|
|
131
|
+
# ╔═══════════════════════════════════════════════════════════════════════╗
|
|
132
|
+
# ║ Caching: #{identifier.ljust(59)}║
|
|
133
|
+
# ║ File type: #{file_type.to_s.ljust(57)}║
|
|
134
|
+
# ╚═══════════════════════════════════════════════════════════════════════╝
|
|
135
|
+
|
|
136
|
+
ANNOTATION
|
|
137
|
+
|
|
138
|
+
annotation + source
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
142
|
+
# Metadata Extraction
|
|
143
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
# Build the metadata hash for a caching unit.
|
|
146
|
+
#
|
|
147
|
+
# @param source [String] Source code
|
|
148
|
+
# @param file_type [Symbol] :controller, :model, or :view
|
|
149
|
+
# @return [Hash] Caching metadata
|
|
150
|
+
def extract_metadata(source, file_type)
|
|
151
|
+
cache_calls = extract_cache_calls(source)
|
|
152
|
+
{
|
|
153
|
+
cache_calls: cache_calls,
|
|
154
|
+
cache_strategy: infer_cache_strategy(source, cache_calls),
|
|
155
|
+
file_type: file_type,
|
|
156
|
+
loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Extract individual cache call entries from source.
|
|
161
|
+
#
|
|
162
|
+
# Each entry has :type, :key_pattern, and :ttl.
|
|
163
|
+
#
|
|
164
|
+
# @param source [String] Source code
|
|
165
|
+
# @return [Array<Hash>] Cache call descriptors
|
|
166
|
+
def extract_cache_calls(source)
|
|
167
|
+
calls = []
|
|
168
|
+
|
|
169
|
+
CACHE_PATTERNS.each do |type, pattern|
|
|
170
|
+
source.scan(pattern) do
|
|
171
|
+
key = extract_key_pattern(source, type)
|
|
172
|
+
ttl = extract_ttl(source)
|
|
173
|
+
calls << { type: type, key_pattern: key, ttl: ttl }
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
calls
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Extract the key pattern for a Rails.cache call.
|
|
181
|
+
#
|
|
182
|
+
# Returns a simplified string representation of the first argument.
|
|
183
|
+
#
|
|
184
|
+
# @param source [String] Source code
|
|
185
|
+
# @param type [Symbol] The cache call type
|
|
186
|
+
# @return [String, nil] The key pattern or nil
|
|
187
|
+
def extract_key_pattern(source, type)
|
|
188
|
+
return nil unless %i[fetch read write delete exist].include?(type)
|
|
189
|
+
|
|
190
|
+
match = source.match(KEY_PATTERN)
|
|
191
|
+
match ? match[1].strip[0, 60] : nil
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Extract TTL value from expires_in option.
|
|
195
|
+
#
|
|
196
|
+
# @param source [String] Source code
|
|
197
|
+
# @return [String, nil] The TTL expression or nil
|
|
198
|
+
def extract_ttl(source)
|
|
199
|
+
match = source.match(TTL_PATTERN)
|
|
200
|
+
match ? match[1].strip : nil
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Infer the caching strategy from the call types present.
|
|
204
|
+
#
|
|
205
|
+
# @param source [String] Source code
|
|
206
|
+
# @param cache_calls [Array<Hash>] Extracted cache calls
|
|
207
|
+
# @return [Symbol] :fragment, :action, :low_level, or :mixed
|
|
208
|
+
def infer_cache_strategy(source, _cache_calls)
|
|
209
|
+
has_action = source.match?(CACHE_PATTERNS[:caches_action])
|
|
210
|
+
has_fragment = source.match?(CACHE_PATTERNS[:fragment])
|
|
211
|
+
has_low_level = source.match?(/Rails\.cache\.(?:fetch|read|write)/)
|
|
212
|
+
|
|
213
|
+
active_strategies = [has_action, has_fragment, has_low_level].count(true)
|
|
214
|
+
|
|
215
|
+
return :mixed if active_strategies > 1
|
|
216
|
+
return :action if has_action
|
|
217
|
+
return :fragment if has_fragment
|
|
218
|
+
return :low_level if has_low_level
|
|
219
|
+
|
|
220
|
+
:unknown
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
224
|
+
# Helpers
|
|
225
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
226
|
+
|
|
227
|
+
# Infer the file type from the file path.
|
|
228
|
+
#
|
|
229
|
+
# @param file_path [String] Absolute path to the file
|
|
230
|
+
# @return [Symbol] :controller, :model, or :view
|
|
231
|
+
def infer_file_type(file_path)
|
|
232
|
+
case file_path
|
|
233
|
+
when %r{app/controllers/} then :controller
|
|
234
|
+
when %r{app/models/} then :model
|
|
235
|
+
when %r{app/views/} then :view
|
|
236
|
+
else :unknown
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Compute the relative path from Rails root.
|
|
241
|
+
#
|
|
242
|
+
# @param file_path [String] Absolute path
|
|
243
|
+
# @return [String] Relative path (e.g., "app/controllers/products_controller.rb")
|
|
244
|
+
def relative_path(file_path)
|
|
245
|
+
file_path.sub("#{@rails_root}/", '')
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
249
|
+
# Dependency Extraction
|
|
250
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
251
|
+
|
|
252
|
+
# Build the dependency array by scanning source for common references.
|
|
253
|
+
#
|
|
254
|
+
# @param source [String] Source code
|
|
255
|
+
# @return [Array<Hash>] Dependency hashes with :type, :target, :via
|
|
256
|
+
def extract_dependencies(source)
|
|
257
|
+
scan_common_dependencies(source)
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
require_relative '../ast/parser'
|
|
5
|
+
require_relative '../flow_analysis/operation_extractor'
|
|
6
|
+
|
|
7
|
+
module CodebaseIndex
|
|
8
|
+
module Extractors
|
|
9
|
+
# Analyzes callback method bodies to detect side effects.
|
|
10
|
+
#
|
|
11
|
+
# Given a model's composite source code (with inlined concerns) and its
|
|
12
|
+
# callback metadata, this analyzer finds each callback method body and
|
|
13
|
+
# classifies its side effects: column writes, job enqueues, service calls,
|
|
14
|
+
# mailer triggers, and database reads.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# analyzer = CallbackAnalyzer.new(
|
|
18
|
+
# source_code: model_source,
|
|
19
|
+
# column_names: %w[email status name]
|
|
20
|
+
# )
|
|
21
|
+
# enriched = analyzer.analyze(callback_hash)
|
|
22
|
+
# enriched[:side_effects][:columns_written] #=> ["email"]
|
|
23
|
+
#
|
|
24
|
+
class CallbackAnalyzer
|
|
25
|
+
# Database query methods that indicate a read operation.
|
|
26
|
+
DB_READ_METHODS = %w[find where pluck first last].freeze
|
|
27
|
+
|
|
28
|
+
# Methods that write a single column, taking column name as first argument.
|
|
29
|
+
SINGLE_COLUMN_WRITERS = %w[update_column write_attribute].freeze
|
|
30
|
+
|
|
31
|
+
# Methods that write multiple columns via keyword arguments.
|
|
32
|
+
MULTI_COLUMN_WRITERS = %w[update_columns assign_attributes].freeze
|
|
33
|
+
|
|
34
|
+
# Async enqueue methods that indicate a job is being dispatched.
|
|
35
|
+
ASYNC_METHODS = %w[perform_later perform_async perform_in perform_at].freeze
|
|
36
|
+
|
|
37
|
+
# @param source_code [String] Composite model source (with inlined concerns)
|
|
38
|
+
# @param column_names [Array<String>] Model's database column names
|
|
39
|
+
def initialize(source_code:, column_names: [])
|
|
40
|
+
@source_code = source_code
|
|
41
|
+
@column_names = column_names.map(&:to_s)
|
|
42
|
+
@parser = Ast::Parser.new
|
|
43
|
+
@operation_extractor = FlowAnalysis::OperationExtractor.new
|
|
44
|
+
@parsed_root = safe_parse
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Analyze a single callback and enrich it with side-effect data.
|
|
48
|
+
#
|
|
49
|
+
# Finds the callback's method body in the source, scans it for
|
|
50
|
+
# side effects, and returns the original callback hash with an
|
|
51
|
+
# added :side_effects key.
|
|
52
|
+
#
|
|
53
|
+
# @param callback_hash [Hash] Callback metadata from ModelExtractor:
|
|
54
|
+
# { type:, filter:, kind:, conditions: }
|
|
55
|
+
# @return [Hash] The callback hash with an added :side_effects key
|
|
56
|
+
def analyze(callback_hash)
|
|
57
|
+
filter = callback_hash[:filter].to_s
|
|
58
|
+
method_node = find_method_node(filter)
|
|
59
|
+
|
|
60
|
+
return callback_hash.merge(side_effects: empty_side_effects) if method_node.nil?
|
|
61
|
+
|
|
62
|
+
method_source = method_source_from_node(method_node)
|
|
63
|
+
return callback_hash.merge(side_effects: empty_side_effects) if method_source.nil?
|
|
64
|
+
|
|
65
|
+
callback_hash.merge(
|
|
66
|
+
side_effects: {
|
|
67
|
+
columns_written: detect_columns_written(method_source),
|
|
68
|
+
jobs_enqueued: detect_jobs_enqueued(method_source),
|
|
69
|
+
services_called: detect_services_called(method_source),
|
|
70
|
+
mailers_triggered: detect_mailers_triggered(method_source),
|
|
71
|
+
database_reads: detect_database_reads(method_source),
|
|
72
|
+
operations: extract_operations(method_node)
|
|
73
|
+
}
|
|
74
|
+
)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
# Parse source code safely, returning nil on failure.
|
|
80
|
+
#
|
|
81
|
+
# @return [Ast::Node, nil]
|
|
82
|
+
def safe_parse
|
|
83
|
+
@parser.parse(@source_code)
|
|
84
|
+
rescue StandardError
|
|
85
|
+
nil
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Find a method definition node by name in the cached AST.
|
|
89
|
+
#
|
|
90
|
+
# @param method_name [String]
|
|
91
|
+
# @return [Ast::Node, nil]
|
|
92
|
+
def find_method_node(method_name)
|
|
93
|
+
return nil unless @parsed_root
|
|
94
|
+
return nil if method_name.empty? || !valid_method_name?(method_name)
|
|
95
|
+
|
|
96
|
+
@parsed_root.find_all(:def).find do |node|
|
|
97
|
+
node.method_name == method_name
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Extract the raw source text of a method from its AST node.
|
|
102
|
+
#
|
|
103
|
+
# @param node [Ast::Node]
|
|
104
|
+
# @return [String, nil]
|
|
105
|
+
def method_source_from_node(node)
|
|
106
|
+
return node.source if node.source
|
|
107
|
+
|
|
108
|
+
return nil unless node.line && node.end_line
|
|
109
|
+
|
|
110
|
+
lines = @source_code.lines
|
|
111
|
+
start_idx = node.line - 1
|
|
112
|
+
end_idx = node.end_line - 1
|
|
113
|
+
return nil if start_idx.negative? || end_idx >= lines.length
|
|
114
|
+
|
|
115
|
+
lines[start_idx..end_idx].join
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Check if a filter string looks like a valid Ruby method name.
|
|
119
|
+
# Rejects proc/lambda string representations and other non-method filters.
|
|
120
|
+
#
|
|
121
|
+
# @param name [String]
|
|
122
|
+
# @return [Boolean]
|
|
123
|
+
def valid_method_name?(name)
|
|
124
|
+
name.match?(/\A[a-z_]\w*[!?=]?\z/i)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Detect columns written by the callback method.
|
|
128
|
+
#
|
|
129
|
+
# Scans for self.col= assignments, update_column, update_columns,
|
|
130
|
+
# write_attribute, and assign_attributes calls, cross-referencing
|
|
131
|
+
# against the model's known column_names.
|
|
132
|
+
#
|
|
133
|
+
# @param method_source [String]
|
|
134
|
+
# @return [Array<String>]
|
|
135
|
+
def detect_columns_written(method_source)
|
|
136
|
+
columns = Set.new
|
|
137
|
+
|
|
138
|
+
# Pattern: self.col = value (direct assignment, not ==)
|
|
139
|
+
method_source.scan(/self\.(\w+)\s*=(?!=)/).flatten.each do |col|
|
|
140
|
+
columns << col if @column_names.include?(col)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Pattern: update_column(:col, ...) / write_attribute(:col, ...)
|
|
144
|
+
SINGLE_COLUMN_WRITERS.each do |writer|
|
|
145
|
+
method_source.scan(/\b#{Regexp.escape(writer)}\s*\(?\s*[:'"](\w+)/).flatten.each do |col|
|
|
146
|
+
columns << col if @column_names.include?(col)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Pattern: update_columns(col: ...) / assign_attributes(col: ...)
|
|
151
|
+
MULTI_COLUMN_WRITERS.each do |writer|
|
|
152
|
+
method_source.scan(/\b#{Regexp.escape(writer)}\s*\(([^)]+)\)/m).each do |match|
|
|
153
|
+
match[0].scan(/\b(\w+)\s*:(?!:)/).flatten.each do |col|
|
|
154
|
+
columns << col if @column_names.include?(col)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
columns.to_a.sort
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Detect jobs enqueued by the callback method.
|
|
163
|
+
#
|
|
164
|
+
# Matches Job/Worker classes calling async dispatch methods.
|
|
165
|
+
#
|
|
166
|
+
# @param method_source [String]
|
|
167
|
+
# @return [Array<String>]
|
|
168
|
+
def detect_jobs_enqueued(method_source)
|
|
169
|
+
async_pattern = ASYNC_METHODS.map { |m| Regexp.escape(m) }.join('|')
|
|
170
|
+
method_source.scan(/(\w+(?:Job|Worker))\.(?:#{async_pattern})/).flatten.uniq.sort
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Detect service objects called by the callback method.
|
|
174
|
+
#
|
|
175
|
+
# Matches classes ending in Service followed by a method call.
|
|
176
|
+
#
|
|
177
|
+
# @param method_source [String]
|
|
178
|
+
# @return [Array<String>]
|
|
179
|
+
def detect_services_called(method_source)
|
|
180
|
+
method_source.scan(/(\w+Service)(?:\.|::)/).flatten.uniq.sort
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Detect mailers triggered by the callback method.
|
|
184
|
+
#
|
|
185
|
+
# Matches classes ending in Mailer followed by a method call.
|
|
186
|
+
#
|
|
187
|
+
# @param method_source [String]
|
|
188
|
+
# @return [Array<String>]
|
|
189
|
+
def detect_mailers_triggered(method_source)
|
|
190
|
+
method_source.scan(/(\w+Mailer)\./).flatten.uniq.sort
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Detect database read operations in the callback method.
|
|
194
|
+
#
|
|
195
|
+
# Checks for common ActiveRecord query methods called via dot notation.
|
|
196
|
+
#
|
|
197
|
+
# @param method_source [String]
|
|
198
|
+
# @return [Array<String>]
|
|
199
|
+
def detect_database_reads(method_source)
|
|
200
|
+
DB_READ_METHODS.select do |method|
|
|
201
|
+
method_source.match?(/\.#{Regexp.escape(method)}\b/)
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Extract operations using OperationExtractor from the method's AST node.
|
|
206
|
+
#
|
|
207
|
+
# @param method_node [Ast::Node, nil]
|
|
208
|
+
# @return [Array<Hash>]
|
|
209
|
+
def extract_operations(method_node)
|
|
210
|
+
return [] unless method_node
|
|
211
|
+
|
|
212
|
+
@operation_extractor.extract(method_node)
|
|
213
|
+
rescue StandardError
|
|
214
|
+
[]
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Return an empty side-effects structure.
|
|
218
|
+
#
|
|
219
|
+
# @return [Hash]
|
|
220
|
+
def empty_side_effects
|
|
221
|
+
{
|
|
222
|
+
columns_written: [],
|
|
223
|
+
jobs_enqueued: [],
|
|
224
|
+
services_called: [],
|
|
225
|
+
mailers_triggered: [],
|
|
226
|
+
database_reads: [],
|
|
227
|
+
operations: []
|
|
228
|
+
}
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|