codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Extractors
|
|
5
|
+
# RailsSourceExtractor indexes selected parts of the Rails framework
|
|
6
|
+
# and key gems for version-specific accuracy.
|
|
7
|
+
#
|
|
8
|
+
# This enables queries like "what options does has_many support" or
|
|
9
|
+
# "how does Rails implement callbacks" to return accurate answers
|
|
10
|
+
# for the exact versions in use.
|
|
11
|
+
#
|
|
12
|
+
# Only high-value, frequently-referenced code is indexed to avoid bloat.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# extractor = RailsSourceExtractor.new
|
|
16
|
+
# units = extractor.extract_all
|
|
17
|
+
# # Returns units for ActiveRecord associations, callbacks, etc.
|
|
18
|
+
#
|
|
19
|
+
class RailsSourceExtractor
|
|
20
|
+
# High-value Rails paths to index
|
|
21
|
+
RAILS_PATHS = {
|
|
22
|
+
'activerecord' => [
|
|
23
|
+
'lib/active_record/associations',
|
|
24
|
+
'lib/active_record/callbacks.rb',
|
|
25
|
+
'lib/active_record/validations',
|
|
26
|
+
'lib/active_record/relation',
|
|
27
|
+
'lib/active_record/querying.rb',
|
|
28
|
+
'lib/active_record/scoping',
|
|
29
|
+
'lib/active_record/transactions.rb',
|
|
30
|
+
'lib/active_record/persistence.rb',
|
|
31
|
+
'lib/active_record/attribute_methods',
|
|
32
|
+
'lib/active_record/enum.rb',
|
|
33
|
+
'lib/active_record/store.rb',
|
|
34
|
+
'lib/active_record/nested_attributes.rb'
|
|
35
|
+
],
|
|
36
|
+
'actionpack' => [
|
|
37
|
+
'lib/action_controller/metal',
|
|
38
|
+
'lib/action_controller/callbacks.rb',
|
|
39
|
+
'lib/abstract_controller/callbacks.rb',
|
|
40
|
+
'lib/action_controller/rendering.rb',
|
|
41
|
+
'lib/action_controller/redirecting.rb',
|
|
42
|
+
'lib/action_controller/params_wrapper.rb'
|
|
43
|
+
],
|
|
44
|
+
'activesupport' => [
|
|
45
|
+
'lib/active_support/callbacks.rb',
|
|
46
|
+
'lib/active_support/concern.rb',
|
|
47
|
+
'lib/active_support/configurable.rb',
|
|
48
|
+
'lib/active_support/core_ext/module/delegation.rb',
|
|
49
|
+
'lib/active_support/core_ext/object/inclusion.rb'
|
|
50
|
+
],
|
|
51
|
+
'activejob' => [
|
|
52
|
+
'lib/active_job/callbacks.rb',
|
|
53
|
+
'lib/active_job/enqueuing.rb',
|
|
54
|
+
'lib/active_job/execution.rb',
|
|
55
|
+
'lib/active_job/exceptions.rb'
|
|
56
|
+
],
|
|
57
|
+
'actionmailer' => [
|
|
58
|
+
'lib/action_mailer/base.rb',
|
|
59
|
+
'lib/action_mailer/delivery_methods.rb',
|
|
60
|
+
'lib/action_mailer/callbacks.rb'
|
|
61
|
+
]
|
|
62
|
+
}.freeze
|
|
63
|
+
|
|
64
|
+
# Common gems worth indexing (configure based on project)
|
|
65
|
+
GEM_CONFIGS = {
|
|
66
|
+
'devise' => {
|
|
67
|
+
paths: ['lib/devise/models', 'lib/devise/controllers', 'lib/devise/strategies'],
|
|
68
|
+
priority: :high
|
|
69
|
+
},
|
|
70
|
+
'pundit' => {
|
|
71
|
+
paths: ['lib/pundit.rb', 'lib/pundit'],
|
|
72
|
+
priority: :high
|
|
73
|
+
},
|
|
74
|
+
'sidekiq' => {
|
|
75
|
+
paths: ['lib/sidekiq/worker.rb', 'lib/sidekiq/job.rb', 'lib/sidekiq/client.rb'],
|
|
76
|
+
priority: :high
|
|
77
|
+
},
|
|
78
|
+
'activeadmin' => {
|
|
79
|
+
paths: ['lib/active_admin/dsl.rb', 'lib/active_admin/resource_dsl.rb'],
|
|
80
|
+
priority: :medium
|
|
81
|
+
},
|
|
82
|
+
'cancancan' => {
|
|
83
|
+
paths: ['lib/cancan/ability.rb', 'lib/cancan/controller_additions.rb'],
|
|
84
|
+
priority: :high
|
|
85
|
+
},
|
|
86
|
+
'friendly_id' => {
|
|
87
|
+
paths: ['lib/friendly_id'],
|
|
88
|
+
priority: :medium
|
|
89
|
+
},
|
|
90
|
+
'paper_trail' => {
|
|
91
|
+
paths: ['lib/paper_trail/has_paper_trail.rb', 'lib/paper_trail/model_config.rb'],
|
|
92
|
+
priority: :medium
|
|
93
|
+
},
|
|
94
|
+
'aasm' => {
|
|
95
|
+
paths: ['lib/aasm'],
|
|
96
|
+
priority: :high
|
|
97
|
+
},
|
|
98
|
+
'phlex' => {
|
|
99
|
+
paths: ['lib/phlex'],
|
|
100
|
+
priority: :high
|
|
101
|
+
},
|
|
102
|
+
'dry-monads' => {
|
|
103
|
+
paths: ['lib/dry/monads'],
|
|
104
|
+
priority: :medium
|
|
105
|
+
}
|
|
106
|
+
}.freeze
|
|
107
|
+
|
|
108
|
+
def initialize
|
|
109
|
+
@rails_version = Rails.version
|
|
110
|
+
@gem_versions = {}
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Extract Rails framework and gem source
|
|
114
|
+
#
|
|
115
|
+
# @return [Array<ExtractedUnit>] List of framework/gem source units
|
|
116
|
+
def extract_all
|
|
117
|
+
units = []
|
|
118
|
+
|
|
119
|
+
# Extract Rails framework sources
|
|
120
|
+
units.concat(extract_rails_sources)
|
|
121
|
+
|
|
122
|
+
# Extract configured gem sources
|
|
123
|
+
units.concat(extract_gem_sources)
|
|
124
|
+
|
|
125
|
+
units.compact
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
129
|
+
# Rails Framework Extraction
|
|
130
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
# Extract only Rails framework sources
|
|
133
|
+
def extract_rails_sources
|
|
134
|
+
units = []
|
|
135
|
+
|
|
136
|
+
RAILS_PATHS.each do |gem_name, paths|
|
|
137
|
+
gem_path = find_gem_path(gem_name)
|
|
138
|
+
next unless gem_path
|
|
139
|
+
|
|
140
|
+
paths.each do |relative_path|
|
|
141
|
+
full_path = gem_path.join(relative_path)
|
|
142
|
+
|
|
143
|
+
if full_path.directory?
|
|
144
|
+
Dir[full_path.join('**/*.rb')].each do |file|
|
|
145
|
+
unit = extract_framework_file(gem_name, file)
|
|
146
|
+
units << unit if unit
|
|
147
|
+
end
|
|
148
|
+
elsif full_path.exist?
|
|
149
|
+
unit = extract_framework_file(gem_name, full_path.to_s)
|
|
150
|
+
units << unit if unit
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
units
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
159
|
+
# Gem Source Extraction
|
|
160
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
# Extract gem sources
|
|
163
|
+
def extract_gem_sources
|
|
164
|
+
units = []
|
|
165
|
+
|
|
166
|
+
GEM_CONFIGS.each do |gem_name, config|
|
|
167
|
+
gem_path = find_gem_path(gem_name)
|
|
168
|
+
next unless gem_path
|
|
169
|
+
|
|
170
|
+
@gem_versions[gem_name] = gem_version(gem_name)
|
|
171
|
+
|
|
172
|
+
config[:paths].each do |relative_path|
|
|
173
|
+
full_path = gem_path.join(relative_path)
|
|
174
|
+
|
|
175
|
+
if full_path.directory?
|
|
176
|
+
Dir[full_path.join('**/*.rb')].each do |file|
|
|
177
|
+
unit = extract_gem_file(gem_name, config[:priority], file)
|
|
178
|
+
units << unit if unit
|
|
179
|
+
end
|
|
180
|
+
elsif full_path.exist?
|
|
181
|
+
unit = extract_gem_file(gem_name, config[:priority], full_path.to_s)
|
|
182
|
+
units << unit if unit
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
units
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
private
|
|
191
|
+
|
|
192
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
193
|
+
# Gem Discovery
|
|
194
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
def find_gem_path(gem_name)
|
|
197
|
+
spec = Gem::Specification.find_by_name(gem_name)
|
|
198
|
+
Pathname.new(spec.gem_dir)
|
|
199
|
+
rescue Gem::MissingSpecError
|
|
200
|
+
nil
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def gem_version(gem_name)
|
|
204
|
+
Gem::Specification.find_by_name(gem_name).version.to_s
|
|
205
|
+
rescue StandardError
|
|
206
|
+
'unknown'
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
210
|
+
# File Extraction
|
|
211
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
def extract_framework_file(component, file_path)
|
|
214
|
+
source = File.read(file_path)
|
|
215
|
+
relative = file_path.sub(%r{.*/gems/[^/]+/}, '')
|
|
216
|
+
|
|
217
|
+
# Create a meaningful identifier
|
|
218
|
+
identifier = "rails/#{component}/#{relative}"
|
|
219
|
+
|
|
220
|
+
unit = ExtractedUnit.new(
|
|
221
|
+
type: :rails_source,
|
|
222
|
+
identifier: identifier,
|
|
223
|
+
file_path: file_path
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
unit.source_code = annotate_framework_source(source, component, relative)
|
|
227
|
+
|
|
228
|
+
public_methods = extract_public_api(source)
|
|
229
|
+
dsl_methods = extract_dsl_methods(source)
|
|
230
|
+
|
|
231
|
+
unit.metadata = {
|
|
232
|
+
rails_version: @rails_version,
|
|
233
|
+
component: component,
|
|
234
|
+
relative_path: relative,
|
|
235
|
+
|
|
236
|
+
# API extraction for retrieval
|
|
237
|
+
defined_modules: extract_module_names(source),
|
|
238
|
+
defined_classes: extract_class_names(source),
|
|
239
|
+
public_methods: public_methods,
|
|
240
|
+
dsl_methods: dsl_methods,
|
|
241
|
+
|
|
242
|
+
# Common options/configurations
|
|
243
|
+
option_definitions: extract_option_definitions(source),
|
|
244
|
+
|
|
245
|
+
# For retrieval ranking
|
|
246
|
+
is_public_api: public_api_file?(relative),
|
|
247
|
+
importance: rate_importance(relative, source, public_methods: public_methods, dsl_methods: dsl_methods)
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
unit
|
|
251
|
+
rescue StandardError => e
|
|
252
|
+
Rails.logger.error("Failed to extract Rails source #{file_path}: #{e.message}")
|
|
253
|
+
nil
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def extract_gem_file(gem_name, priority, file_path)
|
|
257
|
+
source = File.read(file_path)
|
|
258
|
+
relative = file_path.sub(%r{.*/gems/[^/]+/}, '')
|
|
259
|
+
|
|
260
|
+
identifier = "gems/#{gem_name}/#{relative}"
|
|
261
|
+
|
|
262
|
+
unit = ExtractedUnit.new(
|
|
263
|
+
type: :gem_source,
|
|
264
|
+
identifier: identifier,
|
|
265
|
+
file_path: file_path
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
unit.source_code = annotate_gem_source(source, gem_name, relative)
|
|
269
|
+
unit.metadata = {
|
|
270
|
+
gem_name: gem_name,
|
|
271
|
+
gem_version: @gem_versions[gem_name],
|
|
272
|
+
relative_path: relative,
|
|
273
|
+
priority: priority,
|
|
274
|
+
|
|
275
|
+
defined_modules: extract_module_names(source),
|
|
276
|
+
defined_classes: extract_class_names(source),
|
|
277
|
+
public_methods: extract_public_api(source),
|
|
278
|
+
|
|
279
|
+
# Gem-specific patterns
|
|
280
|
+
mixins_provided: extract_mixins(source),
|
|
281
|
+
configuration_options: extract_configuration(source)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
unit
|
|
285
|
+
rescue StandardError => e
|
|
286
|
+
Rails.logger.error("Failed to extract gem source #{file_path}: #{e.message}")
|
|
287
|
+
nil
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
291
|
+
# Source Annotation
|
|
292
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
293
|
+
|
|
294
|
+
def annotate_framework_source(source, component, relative)
|
|
295
|
+
<<~ANNOTATION
|
|
296
|
+
# ╔═══════════════════════════════════════════════════════════════════════╗
|
|
297
|
+
# ║ Rails #{@rails_version} - #{component.ljust(55)}║
|
|
298
|
+
# ║ File: #{relative.ljust(62)}║
|
|
299
|
+
# ╚═══════════════════════════════════════════════════════════════════════╝
|
|
300
|
+
|
|
301
|
+
#{source}
|
|
302
|
+
ANNOTATION
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def annotate_gem_source(source, gem_name, relative)
|
|
306
|
+
version = @gem_versions[gem_name] || 'unknown'
|
|
307
|
+
|
|
308
|
+
<<~ANNOTATION
|
|
309
|
+
# ╔═══════════════════════════════════════════════════════════════════════╗
|
|
310
|
+
# ║ Gem: #{gem_name} v#{version.ljust(55 - gem_name.length)}║
|
|
311
|
+
# ║ File: #{relative.ljust(62)}║
|
|
312
|
+
# ╚═══════════════════════════════════════════════════════════════════════╝
|
|
313
|
+
|
|
314
|
+
#{source}
|
|
315
|
+
ANNOTATION
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
319
|
+
# Code Analysis
|
|
320
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
321
|
+
|
|
322
|
+
def extract_module_names(source)
|
|
323
|
+
source.scan(/^\s*module\s+([\w:]+)/).flatten.uniq
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def extract_class_names(source)
|
|
327
|
+
source.scan(/^\s*class\s+([\w:]+)/).flatten.uniq
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def extract_public_api(source)
|
|
331
|
+
methods = []
|
|
332
|
+
in_private = false
|
|
333
|
+
|
|
334
|
+
source.each_line do |line|
|
|
335
|
+
stripped = line.strip
|
|
336
|
+
|
|
337
|
+
in_private = true if stripped.match?(/^\s*private\s*$/)
|
|
338
|
+
in_private = false if stripped.match?(/^\s*public\s*$/)
|
|
339
|
+
|
|
340
|
+
next unless !in_private && stripped =~ /def\s+((?:self\.)?\w+[?!=]?)(\(.*?\))?/
|
|
341
|
+
|
|
342
|
+
method_name = ::Regexp.last_match(1)
|
|
343
|
+
signature = ::Regexp.last_match(2)
|
|
344
|
+
next if method_name.start_with?('_')
|
|
345
|
+
|
|
346
|
+
methods << {
|
|
347
|
+
name: method_name,
|
|
348
|
+
signature: signature,
|
|
349
|
+
class_method: method_name.start_with?('self.')
|
|
350
|
+
}
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
methods
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# Extract DSL-style methods (like has_many, validates, etc.)
|
|
357
|
+
def extract_dsl_methods(source)
|
|
358
|
+
dsl_patterns = [
|
|
359
|
+
/def\s+self\.(\w+).*?#.*?DSL/i,
|
|
360
|
+
/def\s+(\w+)\(.*?\)\s*#\s*:call-seq:/,
|
|
361
|
+
/class_methods\s+do.*?def\s+(\w+)/m
|
|
362
|
+
]
|
|
363
|
+
|
|
364
|
+
methods = []
|
|
365
|
+
dsl_patterns.each do |pattern|
|
|
366
|
+
source.scan(pattern) { |m| methods.concat(Array(m)) }
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
methods.uniq
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Extract option hashes and their documentation
|
|
373
|
+
def extract_option_definitions(source)
|
|
374
|
+
options = []
|
|
375
|
+
|
|
376
|
+
# Look for VALID_OPTIONS or similar constants
|
|
377
|
+
source.scan(/(\w+_OPTIONS|VALID_\w+)\s*=\s*\[(.*?)\]/m) do |const, values|
|
|
378
|
+
options << {
|
|
379
|
+
constant: const,
|
|
380
|
+
values: values.scan(/:(\w+)/).flatten
|
|
381
|
+
}
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# Look for documented options in comments
|
|
385
|
+
source.scan(/# (\w+) - (.+)$/) do |opt, desc|
|
|
386
|
+
options << { name: opt, description: desc }
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
options
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
393
|
+
# Importance Rating
|
|
394
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
395
|
+
|
|
396
|
+
# Determine if this is a public API file worth prioritizing
|
|
397
|
+
def public_api_file?(relative_path)
|
|
398
|
+
public_patterns = [
|
|
399
|
+
%r{associations/builder},
|
|
400
|
+
/callbacks\.rb$/,
|
|
401
|
+
/validations\.rb$/,
|
|
402
|
+
/base\.rb$/,
|
|
403
|
+
%r{/metal/[^/]+\.rb$}
|
|
404
|
+
]
|
|
405
|
+
|
|
406
|
+
public_patterns.any? { |p| relative_path.match?(p) }
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# Rate importance for retrieval ranking
|
|
410
|
+
def rate_importance(relative_path, source, public_methods: nil, dsl_methods: nil)
|
|
411
|
+
score = 0
|
|
412
|
+
|
|
413
|
+
# High-traffic files
|
|
414
|
+
score += 3 if relative_path.match?(/associations|callbacks|validations/)
|
|
415
|
+
|
|
416
|
+
# Files with lots of public methods
|
|
417
|
+
public_method_count = public_methods ? public_methods.size : extract_public_api(source).size
|
|
418
|
+
score += 2 if public_method_count > 10
|
|
419
|
+
|
|
420
|
+
# Files with DSL methods
|
|
421
|
+
dsl = dsl_methods || extract_dsl_methods(source)
|
|
422
|
+
score += 2 if dsl.any?
|
|
423
|
+
|
|
424
|
+
# Files with option documentation
|
|
425
|
+
score += 1 if source.include?('# Options:')
|
|
426
|
+
|
|
427
|
+
case score
|
|
428
|
+
when 0..2 then :low
|
|
429
|
+
when 3..5 then :medium
|
|
430
|
+
else :high
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
435
|
+
# Gem-Specific Analysis
|
|
436
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
437
|
+
|
|
438
|
+
# Extract mixin modules provided by a gem
|
|
439
|
+
def extract_mixins(source)
|
|
440
|
+
mixins = []
|
|
441
|
+
|
|
442
|
+
# Look for modules designed to be included
|
|
443
|
+
source.scan(/module\s+(\w+).*?def\s+self\.included/m) do |mod|
|
|
444
|
+
mixins << mod[0]
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# ActiveSupport::Concern pattern
|
|
448
|
+
source.scan(/extend\s+ActiveSupport::Concern.*?module\s+ClassMethods/m) do
|
|
449
|
+
mixins << ::Regexp.last_match(1) if source =~ /module\s+(\w+).*?extend\s+ActiveSupport::Concern/m
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
mixins.uniq
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Extract configuration options provided by a gem
|
|
456
|
+
def extract_configuration(source)
|
|
457
|
+
configs = []
|
|
458
|
+
|
|
459
|
+
# Railtie configuration
|
|
460
|
+
source.scan(/config\.(\w+)\s*=/) do |cfg|
|
|
461
|
+
configs << cfg[0]
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
# Class-level configuration
|
|
465
|
+
source.scan(/(?:mattr|cattr)_accessor\s+:(\w+)/) do |cfg|
|
|
466
|
+
configs << cfg[0]
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
configs.uniq
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
end
|
|
473
|
+
end
|