codebase_index 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +95 -300
- data/exe/codebase-index-mcp +3 -31
- data/exe/codebase-index-mcp-http +3 -31
- data/lib/codebase_index/ast/method_extractor.rb +3 -8
- data/lib/codebase_index/ast/node.rb +28 -0
- data/lib/codebase_index/ast/parser.rb +53 -92
- data/lib/codebase_index/builder.rb +67 -4
- data/lib/codebase_index/cache/cache_middleware.rb +199 -0
- data/lib/codebase_index/cache/cache_store.rb +264 -0
- data/lib/codebase_index/cache/redis_cache_store.rb +116 -0
- data/lib/codebase_index/cache/solid_cache_store.rb +111 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +29 -24
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/job_adapter.rb +68 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +7 -40
- data/lib/codebase_index/console/bridge.rb +7 -0
- data/lib/codebase_index/console/console_response_renderer.rb +3 -7
- data/lib/codebase_index/console/embedded_executor.rb +2 -1
- data/lib/codebase_index/console/server.rb +1 -4
- data/lib/codebase_index/dependency_graph.rb +28 -19
- data/lib/codebase_index/embedding/indexer.rb +18 -8
- data/lib/codebase_index/embedding/openai.rb +27 -6
- data/lib/codebase_index/embedding/provider.rb +29 -2
- data/lib/codebase_index/evaluation/evaluator.rb +5 -12
- data/lib/codebase_index/extractor.rb +40 -44
- data/lib/codebase_index/extractors/action_cable_extractor.rb +9 -36
- data/lib/codebase_index/extractors/callback_analyzer.rb +22 -8
- data/lib/codebase_index/extractors/controller_extractor.rb +3 -93
- data/lib/codebase_index/extractors/decorator_extractor.rb +7 -14
- data/lib/codebase_index/extractors/engine_extractor.rb +20 -1
- data/lib/codebase_index/extractors/graphql_extractor.rb +4 -29
- data/lib/codebase_index/extractors/job_extractor.rb +11 -6
- data/lib/codebase_index/extractors/lib_extractor.rb +0 -31
- data/lib/codebase_index/extractors/mailer_extractor.rb +15 -85
- data/lib/codebase_index/extractors/manager_extractor.rb +1 -15
- data/lib/codebase_index/extractors/model_extractor.rb +20 -53
- data/lib/codebase_index/extractors/phlex_extractor.rb +8 -8
- data/lib/codebase_index/extractors/policy_extractor.rb +1 -24
- data/lib/codebase_index/extractors/poro_extractor.rb +0 -17
- data/lib/codebase_index/extractors/serializer_extractor.rb +12 -7
- data/lib/codebase_index/extractors/service_extractor.rb +1 -38
- data/lib/codebase_index/extractors/shared_utility_methods.rb +183 -1
- data/lib/codebase_index/extractors/validator_extractor.rb +3 -17
- data/lib/codebase_index/extractors/view_component_extractor.rb +10 -9
- data/lib/codebase_index/filename_utils.rb +32 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +1 -4
- data/lib/codebase_index/formatting/base.rb +0 -10
- data/lib/codebase_index/graph_analyzer.rb +1 -1
- data/lib/codebase_index/mcp/bootstrapper.rb +58 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +35 -34
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +29 -29
- data/lib/codebase_index/mcp/server.rb +59 -68
- data/lib/codebase_index/mcp/tool_response_renderer.rb +23 -0
- data/lib/codebase_index/notion/client.rb +2 -2
- data/lib/codebase_index/notion/mapper.rb +1 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +3 -11
- data/lib/codebase_index/notion/mappers/model_mapper.rb +20 -23
- data/lib/codebase_index/notion/mappers/shared.rb +22 -0
- data/lib/codebase_index/observability/health_check.rb +0 -2
- data/lib/codebase_index/observability/structured_logger.rb +12 -30
- data/lib/codebase_index/operator/pipeline_guard.rb +0 -7
- data/lib/codebase_index/resilience/index_validator.rb +3 -21
- data/lib/codebase_index/retrieval/context_assembler.rb +19 -7
- data/lib/codebase_index/retrieval/query_classifier.rb +14 -12
- data/lib/codebase_index/retrieval/ranker.rb +6 -2
- data/lib/codebase_index/retrieval/search_executor.rb +8 -19
- data/lib/codebase_index/retriever.rb +1 -9
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +5 -25
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +6 -7
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +58 -53
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +11 -7
- data/lib/codebase_index/session_tracer/file_store.rb +1 -8
- data/lib/codebase_index/session_tracer/redis_store.rb +1 -7
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +4 -13
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +1 -7
- data/lib/codebase_index/session_tracer/store.rb +14 -0
- data/lib/codebase_index/storage/metadata_store.rb +37 -10
- data/lib/codebase_index/storage/pgvector.rb +37 -5
- data/lib/codebase_index/storage/qdrant.rb +39 -6
- data/lib/codebase_index/storage/vector_store.rb +11 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +14 -10
- data/lib/codebase_index/token_utils.rb +19 -0
- data/lib/codebase_index/version.rb +1 -1
- data/lib/codebase_index.rb +25 -6
- data/lib/tasks/codebase_index.rake +2 -2
- metadata +11 -2
|
@@ -20,6 +20,184 @@ module CodebaseIndex
|
|
|
20
20
|
# end
|
|
21
21
|
#
|
|
22
22
|
module SharedUtilityMethods
|
|
23
|
+
# Check whether a path points to application source (under app_root, but
|
|
24
|
+
# not inside vendor/ or node_modules/ directories).
|
|
25
|
+
#
|
|
26
|
+
# In Docker environments where Rails.root is `/app`, a naive
|
|
27
|
+
# `start_with?(app_root)` also matches vendor bundle paths like
|
|
28
|
+
# `/app/vendor/bundle/ruby/…`. This helper rejects those.
|
|
29
|
+
#
|
|
30
|
+
# @param path [String, nil] Absolute file path
|
|
31
|
+
# @param app_root [String] Rails.root.to_s
|
|
32
|
+
# @return [Boolean]
|
|
33
|
+
def app_source?(path, app_root)
|
|
34
|
+
return false unless path
|
|
35
|
+
|
|
36
|
+
path.start_with?(app_root) && !path.include?('/vendor/') && !path.include?('/node_modules/')
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Resolve the source file for a class using reliable introspection,
|
|
40
|
+
# filtered through {#app_source?} to reject vendor/gem paths.
|
|
41
|
+
#
|
|
42
|
+
# Tier order:
|
|
43
|
+
# 1. +const_source_location+ (returns the class definition site)
|
|
44
|
+
# 2. Instance method source locations (first match wins)
|
|
45
|
+
# 3. Class/singleton method source locations (first match wins)
|
|
46
|
+
#
|
|
47
|
+
# @param klass [Class, Module] The class to resolve
|
|
48
|
+
# @param app_root [String] Rails.root.to_s
|
|
49
|
+
# @param fallback [String] Path to return when resolution fails
|
|
50
|
+
# @return [String] Resolved source path or fallback
|
|
51
|
+
def resolve_source_location(klass, app_root:, fallback:)
|
|
52
|
+
# Tier 1: const_source_location (most reliable — returns class definition site)
|
|
53
|
+
if Object.respond_to?(:const_source_location) && klass.name
|
|
54
|
+
loc = Object.const_source_location(klass.name)&.first
|
|
55
|
+
return loc if app_source?(loc, app_root)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Tier 2: Instance methods defined directly on this class
|
|
59
|
+
klass.instance_methods(false).each do |method_name|
|
|
60
|
+
loc = klass.instance_method(method_name).source_location&.first
|
|
61
|
+
return loc if app_source?(loc, app_root)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Tier 3: Class/singleton methods defined on this class
|
|
65
|
+
klass.methods(false).each do |method_name|
|
|
66
|
+
loc = klass.method(method_name).source_location&.first
|
|
67
|
+
return loc if app_source?(loc, app_root)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
fallback
|
|
71
|
+
rescue StandardError
|
|
72
|
+
fallback
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Extract the primary class name from source or fall back to a file path convention.
|
|
76
|
+
#
|
|
77
|
+
# @param file_path [String] Absolute path to the Ruby file
|
|
78
|
+
# @param source [String] Ruby source code
|
|
79
|
+
# @param dir_prefix [String] Regex fragment matching the app/ subdirectory to strip
|
|
80
|
+
# (e.g., "policies", "validators", "(?:services|interactors|operations|commands|use_cases)")
|
|
81
|
+
# @return [String] The class name
|
|
82
|
+
def extract_class_name(file_path, source, dir_prefix)
|
|
83
|
+
return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
|
|
84
|
+
|
|
85
|
+
file_path.sub("#{Rails.root}/", '').sub(%r{^app/#{dir_prefix}/}, '').sub('.rb', '').camelize
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Extract the parent class name from a class definition.
|
|
89
|
+
#
|
|
90
|
+
# @param source [String] Ruby source code
|
|
91
|
+
# @return [String, nil] Parent class name or nil
|
|
92
|
+
def extract_parent_class(source)
|
|
93
|
+
match = source.match(/^\s*class\s+[\w:]+\s*<\s*([\w:]+)/)
|
|
94
|
+
match ? match[1] : nil
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Count non-blank, non-comment lines of code.
|
|
98
|
+
#
|
|
99
|
+
# @param source [String] Ruby source code
|
|
100
|
+
# @return [Integer] LOC count
|
|
101
|
+
def count_loc(source)
|
|
102
|
+
source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Skip module-only files (concerns, base modules without a class).
|
|
106
|
+
#
|
|
107
|
+
# @param source [String] Ruby source code
|
|
108
|
+
# @return [Boolean]
|
|
109
|
+
def skip_file?(source)
|
|
110
|
+
source.match?(/^\s*module\s+[\w:]+\s*$/) && !source.match?(/^\s*class\s+/)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Extract custom error/exception class names defined inline.
|
|
114
|
+
#
|
|
115
|
+
# @param source [String] Ruby source code
|
|
116
|
+
# @return [Array<String>] Custom error class names
|
|
117
|
+
def extract_custom_errors(source)
|
|
118
|
+
source.scan(/class\s+(\w+(?:Error|Exception))\s*</).flatten
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Detect common entry point methods in a source file.
|
|
122
|
+
#
|
|
123
|
+
# @param source [String] Ruby source code
|
|
124
|
+
# @return [Array<String>] Entry point method names
|
|
125
|
+
def detect_entry_points(source)
|
|
126
|
+
points = []
|
|
127
|
+
points << 'call' if source.match?(/def (self\.)?call\b/)
|
|
128
|
+
points << 'perform' if source.match?(/def (self\.)?perform\b/)
|
|
129
|
+
points << 'execute' if source.match?(/def (self\.)?execute\b/)
|
|
130
|
+
points << 'run' if source.match?(/def (self\.)?run\b/)
|
|
131
|
+
points << 'process' if source.match?(/def (self\.)?process\b/)
|
|
132
|
+
points.empty? ? ['unknown'] : points
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Extract :only/:except action lists and :if/:unless conditions from a callback.
|
|
136
|
+
#
|
|
137
|
+
# Modern Rails (4.2+) stores conditions in @if/@unless ivar arrays.
|
|
138
|
+
# ActionFilter objects hold action Sets; other conditions are procs/symbols.
|
|
139
|
+
#
|
|
140
|
+
# @param callback [ActiveSupport::Callbacks::Callback]
|
|
141
|
+
# @return [Array(Array<String>, Array<String>, Array<String>, Array<String>)]
|
|
142
|
+
# [only_actions, except_actions, if_labels, unless_labels]
|
|
143
|
+
def extract_callback_conditions(callback)
|
|
144
|
+
if_conditions = callback.instance_variable_get(:@if) || []
|
|
145
|
+
unless_conditions = callback.instance_variable_get(:@unless) || []
|
|
146
|
+
|
|
147
|
+
only = []
|
|
148
|
+
except = []
|
|
149
|
+
if_labels = []
|
|
150
|
+
unless_labels = []
|
|
151
|
+
|
|
152
|
+
if_conditions.each do |cond|
|
|
153
|
+
actions = extract_action_filter_actions(cond)
|
|
154
|
+
if actions
|
|
155
|
+
only.concat(actions)
|
|
156
|
+
else
|
|
157
|
+
if_labels << condition_label(cond)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
unless_conditions.each do |cond|
|
|
162
|
+
actions = extract_action_filter_actions(cond)
|
|
163
|
+
if actions
|
|
164
|
+
except.concat(actions)
|
|
165
|
+
else
|
|
166
|
+
unless_labels << condition_label(cond)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
[only, except, if_labels, unless_labels]
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Extract action names from an ActionFilter-like condition object.
|
|
174
|
+
# Duck-types on the @actions ivar being a Set, avoiding dependence
|
|
175
|
+
# on private class names across Rails versions.
|
|
176
|
+
#
|
|
177
|
+
# @param condition [Object] A condition from the callback's @if/@unless array
|
|
178
|
+
# @return [Array<String>, nil] Action names, or nil if not an ActionFilter
|
|
179
|
+
def extract_action_filter_actions(condition)
|
|
180
|
+
return nil unless condition.instance_variable_defined?(:@actions)
|
|
181
|
+
|
|
182
|
+
actions = condition.instance_variable_get(:@actions)
|
|
183
|
+
return nil unless actions.is_a?(Set)
|
|
184
|
+
|
|
185
|
+
actions.to_a
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Human-readable label for a non-ActionFilter condition.
|
|
189
|
+
#
|
|
190
|
+
# @param condition [Object] A proc, symbol, or other condition
|
|
191
|
+
# @return [String]
|
|
192
|
+
def condition_label(condition)
|
|
193
|
+
case condition
|
|
194
|
+
when Symbol then ":#{condition}"
|
|
195
|
+
when Proc then 'Proc'
|
|
196
|
+
when String then condition
|
|
197
|
+
else condition.class.name
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
23
201
|
# Extract namespace from a class name string or class object.
|
|
24
202
|
#
|
|
25
203
|
# Handles both string input (e.g., "Payments::StripeService")
|
|
@@ -70,11 +248,15 @@ module CodebaseIndex
|
|
|
70
248
|
source.scan(/def\s+self\.(\w+[?!=]?)/).flatten
|
|
71
249
|
end
|
|
72
250
|
|
|
73
|
-
# Extract initialize parameters from source code.
|
|
251
|
+
# Extract initialize parameters from source code via regex.
|
|
74
252
|
#
|
|
75
253
|
# Parses the parameter list of the initialize method to determine
|
|
76
254
|
# parameter names, defaults, and whether they are keyword arguments.
|
|
77
255
|
#
|
|
256
|
+
# Note: PhlexExtractor and ViewComponentExtractor override this with a
|
|
257
|
+
# runtime-introspection version that takes a Class object instead of source
|
|
258
|
+
# text, providing richer type information (:req, :opt, :keyreq, :rest, etc.).
|
|
259
|
+
#
|
|
78
260
|
# @param source [String] Ruby source code
|
|
79
261
|
# @return [Array<Hash>] Parameter info hashes with :name, :has_default, :keyword
|
|
80
262
|
def extract_initialize_params(source)
|
|
@@ -54,7 +54,7 @@ module CodebaseIndex
|
|
|
54
54
|
# @return [ExtractedUnit, nil] The extracted unit or nil if not a validator
|
|
55
55
|
def extract_validator_file(file_path)
|
|
56
56
|
source = File.read(file_path)
|
|
57
|
-
class_name = extract_class_name(file_path, source)
|
|
57
|
+
class_name = extract_class_name(file_path, source, 'validators')
|
|
58
58
|
|
|
59
59
|
return nil unless class_name
|
|
60
60
|
return nil unless validator_file?(source)
|
|
@@ -82,16 +82,6 @@ module CodebaseIndex
|
|
|
82
82
|
# Class Discovery
|
|
83
83
|
# ──────────────────────────────────────────────────────────────────────
|
|
84
84
|
|
|
85
|
-
def extract_class_name(file_path, source)
|
|
86
|
-
return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
|
|
87
|
-
|
|
88
|
-
file_path
|
|
89
|
-
.sub("#{Rails.root}/", '')
|
|
90
|
-
.sub(%r{^app/validators/}, '')
|
|
91
|
-
.sub('.rb', '')
|
|
92
|
-
.camelize
|
|
93
|
-
end
|
|
94
|
-
|
|
95
85
|
def validator_file?(source)
|
|
96
86
|
source.match?(/< ActiveModel::Validator/) ||
|
|
97
87
|
source.match?(/< ActiveModel::EachValidator/) ||
|
|
@@ -176,7 +166,7 @@ module CodebaseIndex
|
|
|
176
166
|
|
|
177
167
|
def extract_error_messages(source)
|
|
178
168
|
# errors.add(:attr, "message") or errors.add(variable, "message")
|
|
179
|
-
messages = source.scan(/errors\.add\s*\(\s*:?\w+\s*,\s*["']([^"']+)["']/).flatten
|
|
169
|
+
messages = source.scan(/errors\.add\s*\(\s*:?\w+\s*,\s*["']([^"']+)["']/).flatten
|
|
180
170
|
|
|
181
171
|
# errors.add(:attr, :symbol) or errors.add(variable, :symbol)
|
|
182
172
|
source.scan(/errors\.add\s*\(\s*:?\w+\s*,\s*:(\w+)/).flatten.each { |m| messages << ":#{m}" }
|
|
@@ -186,7 +176,7 @@ module CodebaseIndex
|
|
|
186
176
|
|
|
187
177
|
def extract_options(source)
|
|
188
178
|
# options[:key] access
|
|
189
|
-
options = source.scan(/options\[:(\w+)\]/).flatten
|
|
179
|
+
options = source.scan(/options\[:(\w+)\]/).flatten
|
|
190
180
|
|
|
191
181
|
options.uniq
|
|
192
182
|
end
|
|
@@ -200,10 +190,6 @@ module CodebaseIndex
|
|
|
200
190
|
inferred.empty? ? [] : [inferred]
|
|
201
191
|
end
|
|
202
192
|
|
|
203
|
-
def extract_custom_errors(source)
|
|
204
|
-
source.scan(/class\s+(\w+(?:Error|Exception))\s*</).flatten
|
|
205
|
-
end
|
|
206
|
-
|
|
207
193
|
# ──────────────────────────────────────────────────────────────────────
|
|
208
194
|
# Dependency Extraction
|
|
209
195
|
# ──────────────────────────────────────────────────────────────────────
|
|
@@ -56,8 +56,12 @@ module CodebaseIndex
|
|
|
56
56
|
file_path: source_file_for(component)
|
|
57
57
|
)
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
# Skip components with no resolvable source file (framework/internal)
|
|
60
|
+
return nil unless unit.file_path
|
|
61
|
+
|
|
60
62
|
unit.source_code = read_source(unit.file_path)
|
|
63
|
+
|
|
64
|
+
unit.namespace = extract_namespace(component)
|
|
61
65
|
unit.metadata = extract_metadata(component, unit.source_code)
|
|
62
66
|
unit.dependencies = extract_dependencies(component, unit.source_code)
|
|
63
67
|
|
|
@@ -86,7 +90,10 @@ module CodebaseIndex
|
|
|
86
90
|
defined?(ViewComponent::Preview) && klass < ViewComponent::Preview
|
|
87
91
|
end
|
|
88
92
|
|
|
89
|
-
# Locate the source file for a component class
|
|
93
|
+
# Locate the source file for a component class.
|
|
94
|
+
#
|
|
95
|
+
# Convention paths first, then introspection via {#resolve_source_location}
|
|
96
|
+
# which filters out vendor/node_modules paths.
|
|
90
97
|
#
|
|
91
98
|
# @param component [Class]
|
|
92
99
|
# @return [String, nil]
|
|
@@ -99,13 +106,7 @@ module CodebaseIndex
|
|
|
99
106
|
found = possible_paths.find { |p| File.exist?(p) }
|
|
100
107
|
return found.to_s if found
|
|
101
108
|
|
|
102
|
-
|
|
103
|
-
if component.instance_methods(false).any?
|
|
104
|
-
method = component.instance_methods(false).first
|
|
105
|
-
component.instance_method(method).source_location&.first
|
|
106
|
-
end
|
|
107
|
-
rescue StandardError
|
|
108
|
-
nil
|
|
109
|
+
resolve_source_location(component, app_root: Rails.root.to_s, fallback: nil)
|
|
109
110
|
end
|
|
110
111
|
|
|
111
112
|
# @param file_path [String, nil]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
# Shared filename helpers for converting unit identifiers to safe filenames.
|
|
7
|
+
#
|
|
8
|
+
# Used by Extractor (writing) and IndexValidator (reading) to ensure
|
|
9
|
+
# filename generation is consistent across both sides.
|
|
10
|
+
module FilenameUtils
|
|
11
|
+
# Convert an identifier to a safe filename (legacy format).
|
|
12
|
+
#
|
|
13
|
+
# @param identifier [String] The unit identifier (e.g., "Admin::UsersController")
|
|
14
|
+
# @return [String] A filesystem-safe filename (e.g., "Admin__UsersController.json")
|
|
15
|
+
def safe_filename(identifier)
|
|
16
|
+
"#{identifier.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')}.json"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Convert an identifier to a collision-safe filename (current format).
|
|
20
|
+
#
|
|
21
|
+
# Appends a short SHA256 digest to disambiguate identifiers that normalize
|
|
22
|
+
# to the same safe_filename.
|
|
23
|
+
#
|
|
24
|
+
# @param identifier [String] The unit identifier
|
|
25
|
+
# @return [String] Collision-safe filename (e.g., "Admin__UsersController_a1b2c3d4.json")
|
|
26
|
+
def collision_safe_filename(identifier)
|
|
27
|
+
base = identifier.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')
|
|
28
|
+
digest = Digest::SHA256.hexdigest(identifier)[0, 8]
|
|
29
|
+
"#{base}_#{digest}.json"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -185,10 +185,7 @@ module CodebaseIndex
|
|
|
185
185
|
|
|
186
186
|
# Detect response calls (render, redirect_to, head, render_*).
|
|
187
187
|
def response_call?(node)
|
|
188
|
-
|
|
189
|
-
return true if node.method_name&.start_with?('render')
|
|
190
|
-
|
|
191
|
-
false
|
|
188
|
+
RESPONSE_METHODS.include?(node.method_name) || node.method_name&.start_with?('render')
|
|
192
189
|
end
|
|
193
190
|
|
|
194
191
|
# Detect dynamic dispatch (send, public_send).
|
|
@@ -25,16 +25,6 @@ module CodebaseIndex
|
|
|
25
25
|
def format(_assembled_context)
|
|
26
26
|
raise NotImplementedError, "#{self.class}#format must be implemented"
|
|
27
27
|
end
|
|
28
|
-
|
|
29
|
-
private
|
|
30
|
-
|
|
31
|
-
# Estimate token count using the project convention.
|
|
32
|
-
#
|
|
33
|
-
# @param text [String]
|
|
34
|
-
# @return [Integer]
|
|
35
|
-
def estimate_tokens(text)
|
|
36
|
-
(text.length / 4.0).ceil
|
|
37
|
-
end
|
|
38
28
|
end
|
|
39
29
|
end
|
|
40
30
|
end
|
|
@@ -89,7 +89,7 @@ module CodebaseIndex
|
|
|
89
89
|
dependents: dependents
|
|
90
90
|
}
|
|
91
91
|
end
|
|
92
|
-
identifiers_with_dependents.
|
|
92
|
+
identifiers_with_dependents.max_by(limit) { |h| h[:dependent_count] }
|
|
93
93
|
end
|
|
94
94
|
|
|
95
95
|
# Detect circular dependency chains in the graph.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module MCP
|
|
5
|
+
# Shared setup logic for MCP server executables.
|
|
6
|
+
#
|
|
7
|
+
# Validates the index directory, checks for a manifest, and builds
|
|
8
|
+
# an optional retriever for semantic search — all duplicated between
|
|
9
|
+
# the stdio and HTTP server entry points.
|
|
10
|
+
#
|
|
11
|
+
module Bootstrapper
|
|
12
|
+
# Resolve and validate the index directory from CLI args or environment.
|
|
13
|
+
#
|
|
14
|
+
# @param argv [Array<String>] Command-line arguments
|
|
15
|
+
# @return [String] Validated index directory path
|
|
16
|
+
def self.resolve_index_dir(argv)
|
|
17
|
+
dir = argv[0] || ENV['CODEBASE_INDEX_DIR'] || Dir.pwd
|
|
18
|
+
|
|
19
|
+
unless Dir.exist?(dir)
|
|
20
|
+
warn "Error: Index directory does not exist: #{dir}"
|
|
21
|
+
exit 1
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
unless File.exist?(File.join(dir, 'manifest.json'))
|
|
25
|
+
warn "Error: No manifest.json found in: #{dir}"
|
|
26
|
+
warn 'Run `bundle exec rake codebase_index:extract` in your Rails app first.'
|
|
27
|
+
exit 1
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
dir
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Attempt to build a retriever for semantic search.
|
|
34
|
+
#
|
|
35
|
+
# Auto-configures from environment variables when no explicit configuration
|
|
36
|
+
# exists. Returns nil if embedding is unavailable or setup fails.
|
|
37
|
+
#
|
|
38
|
+
# @return [CodebaseIndex::Retriever, nil]
|
|
39
|
+
def self.build_retriever
|
|
40
|
+
config = CodebaseIndex.configuration
|
|
41
|
+
|
|
42
|
+
openai_key = ENV.fetch('OPENAI_API_KEY', nil)
|
|
43
|
+
if !config.embedding_provider && openai_key
|
|
44
|
+
config.vector_store = :in_memory
|
|
45
|
+
config.metadata_store = :in_memory
|
|
46
|
+
config.graph_store = :in_memory
|
|
47
|
+
config.embedding_provider = :openai
|
|
48
|
+
config.embedding_options = { api_key: openai_key }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
CodebaseIndex::Builder.new(config).build_retriever if config.embedding_provider
|
|
52
|
+
rescue StandardError => e
|
|
53
|
+
warn "Note: Semantic search unavailable (#{e.message}). Using pattern-based search only."
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -53,18 +53,20 @@ module CodebaseIndex
|
|
|
53
53
|
# @param data [Hash] Search results with :query, :result_count, :results
|
|
54
54
|
# @return [String] Markdown search results
|
|
55
55
|
def render_search(data, **)
|
|
56
|
+
query = fetch_key(data, :query)
|
|
57
|
+
count = fetch_key(data, :result_count, 0)
|
|
58
|
+
results = fetch_key(data, :results, [])
|
|
59
|
+
|
|
56
60
|
lines = []
|
|
57
|
-
lines << "## Search: \"#{
|
|
58
|
-
count = data[:result_count] || data['result_count'] || 0
|
|
61
|
+
lines << "## Search: \"#{query}\""
|
|
59
62
|
lines << ''
|
|
60
63
|
lines << "#{count} result#{'s' unless count == 1} found."
|
|
61
64
|
lines << ''
|
|
62
65
|
|
|
63
|
-
results = data[:results] || data['results'] || []
|
|
64
66
|
results.each do |r|
|
|
65
|
-
ident = r
|
|
66
|
-
type = r
|
|
67
|
-
match = r
|
|
67
|
+
ident = fetch_key(r, :identifier)
|
|
68
|
+
type = fetch_key(r, :type)
|
|
69
|
+
match = fetch_key(r, :match_field)
|
|
68
70
|
line = "- **#{ident}** (#{type})"
|
|
69
71
|
line += " — matched in #{match}" if match
|
|
70
72
|
lines << line
|
|
@@ -92,7 +94,7 @@ module CodebaseIndex
|
|
|
92
94
|
# @param data [Hash] Structure data with :manifest and optional :summary
|
|
93
95
|
# @return [String] Markdown structure overview
|
|
94
96
|
def render_structure(data, **)
|
|
95
|
-
manifest = data
|
|
97
|
+
manifest = fetch_key(data, :manifest, {})
|
|
96
98
|
lines = []
|
|
97
99
|
lines << '## Codebase Structure'
|
|
98
100
|
lines << ''
|
|
@@ -113,7 +115,7 @@ module CodebaseIndex
|
|
|
113
115
|
lines << ''
|
|
114
116
|
end
|
|
115
117
|
|
|
116
|
-
summary = data
|
|
118
|
+
summary = fetch_key(data, :summary)
|
|
117
119
|
if summary
|
|
118
120
|
lines << '### Summary'
|
|
119
121
|
lines << ''
|
|
@@ -132,14 +134,14 @@ module CodebaseIndex
|
|
|
132
134
|
lines << '## Graph Analysis'
|
|
133
135
|
lines << ''
|
|
134
136
|
|
|
135
|
-
stats = data
|
|
137
|
+
stats = fetch_key(data, :stats)
|
|
136
138
|
if stats.is_a?(Hash)
|
|
137
139
|
stats.each { |k, v| lines << "- **#{k}:** #{v}" }
|
|
138
140
|
lines << ''
|
|
139
141
|
end
|
|
140
142
|
|
|
141
143
|
%w[orphans dead_ends hubs cycles bridges].each do |section|
|
|
142
|
-
items = data
|
|
144
|
+
items = fetch_key(data, section)
|
|
143
145
|
next unless items.is_a?(Array) && items.any?
|
|
144
146
|
|
|
145
147
|
lines << "### #{section.tr('_', ' ').capitalize}"
|
|
@@ -171,17 +173,14 @@ module CodebaseIndex
|
|
|
171
173
|
lines = []
|
|
172
174
|
lines << '## PageRank Scores'
|
|
173
175
|
lines << ''
|
|
174
|
-
lines << "#{data
|
|
176
|
+
lines << "#{fetch_key(data, :total_nodes)} nodes in graph."
|
|
175
177
|
lines << ''
|
|
176
178
|
lines << '| Rank | Identifier | Type | Score |'
|
|
177
179
|
lines << '|------|-----------|------|-------|'
|
|
178
180
|
|
|
179
|
-
results = data
|
|
181
|
+
results = fetch_key(data, :results, [])
|
|
180
182
|
results.each_with_index do |r, i|
|
|
181
|
-
|
|
182
|
-
type = r[:type] || r['type']
|
|
183
|
-
score = r[:score] || r['score']
|
|
184
|
-
lines << "| #{i + 1} | #{ident} | #{type} | #{score} |"
|
|
183
|
+
lines << "| #{i + 1} | #{fetch_key(r, :identifier)} | #{fetch_key(r, :type)} | #{fetch_key(r, :score)} |"
|
|
185
184
|
end
|
|
186
185
|
|
|
187
186
|
lines.join("\n").rstrip
|
|
@@ -192,19 +191,20 @@ module CodebaseIndex
|
|
|
192
191
|
# @param data [Hash] Framework search results
|
|
193
192
|
# @return [String] Markdown framework results
|
|
194
193
|
def render_framework(data, **)
|
|
194
|
+
keyword = fetch_key(data, :keyword)
|
|
195
|
+
count = fetch_key(data, :result_count, 0)
|
|
196
|
+
results = fetch_key(data, :results, [])
|
|
197
|
+
|
|
195
198
|
lines = []
|
|
196
|
-
keyword = data[:keyword] || data['keyword']
|
|
197
|
-
count = data[:result_count] || data['result_count'] || 0
|
|
198
199
|
lines << "## Framework: \"#{keyword}\""
|
|
199
200
|
lines << ''
|
|
200
201
|
lines << "#{count} result#{'s' unless count == 1} found."
|
|
201
202
|
lines << ''
|
|
202
203
|
|
|
203
|
-
results = data[:results] || data['results'] || []
|
|
204
204
|
results.each do |r|
|
|
205
|
-
ident = r
|
|
206
|
-
type = r
|
|
207
|
-
file = r
|
|
205
|
+
ident = fetch_key(r, :identifier)
|
|
206
|
+
type = fetch_key(r, :type)
|
|
207
|
+
file = fetch_key(r, :file_path)
|
|
208
208
|
line = "- **#{ident}** (#{type})"
|
|
209
209
|
line += " — `#{file}`" if file
|
|
210
210
|
lines << line
|
|
@@ -218,8 +218,10 @@ module CodebaseIndex
|
|
|
218
218
|
# @param data [Hash] Recent changes with :result_count and :results
|
|
219
219
|
# @return [String] Markdown table of recent changes
|
|
220
220
|
def render_recent_changes(data, **)
|
|
221
|
+
count = fetch_key(data, :result_count, 0)
|
|
222
|
+
results = fetch_key(data, :results, [])
|
|
223
|
+
|
|
221
224
|
lines = []
|
|
222
|
-
count = data[:result_count] || data['result_count'] || 0
|
|
223
225
|
lines << '## Recent Changes'
|
|
224
226
|
lines << ''
|
|
225
227
|
lines << "#{count} recently modified unit#{'s' unless count == 1}."
|
|
@@ -227,12 +229,11 @@ module CodebaseIndex
|
|
|
227
229
|
lines << '| Identifier | Type | Last Modified | Author |'
|
|
228
230
|
lines << '|-----------|------|---------------|--------|'
|
|
229
231
|
|
|
230
|
-
results = data[:results] || data['results'] || []
|
|
231
232
|
results.each do |r|
|
|
232
|
-
ident = r
|
|
233
|
-
type = r
|
|
234
|
-
modified = r
|
|
235
|
-
author = r
|
|
233
|
+
ident = fetch_key(r, :identifier)
|
|
234
|
+
type = fetch_key(r, :type)
|
|
235
|
+
modified = fetch_key(r, :last_modified) || '-'
|
|
236
|
+
author = fetch_key(r, :author) || '-'
|
|
236
237
|
lines << "| #{ident} | #{type} | #{modified} | #{author} |"
|
|
237
238
|
end
|
|
238
239
|
|
|
@@ -257,23 +258,23 @@ module CodebaseIndex
|
|
|
257
258
|
private
|
|
258
259
|
|
|
259
260
|
def render_traversal(label, data)
|
|
260
|
-
root = data
|
|
261
|
+
root = fetch_key(data, :root)
|
|
261
262
|
found = data[:found] || data['found']
|
|
262
|
-
nodes = data
|
|
263
|
-
message = data
|
|
263
|
+
nodes = fetch_key(data, :nodes, {})
|
|
264
|
+
message = fetch_key(data, :message)
|
|
264
265
|
|
|
265
266
|
lines = []
|
|
266
267
|
lines << "## #{label} of #{root}"
|
|
267
268
|
lines << ''
|
|
268
269
|
|
|
269
270
|
if found == false
|
|
270
|
-
|
|
271
|
+
lines << (message || "Identifier '#{root}' not found in the index.")
|
|
271
272
|
return lines.join("\n").rstrip
|
|
272
273
|
end
|
|
273
274
|
|
|
274
275
|
nodes.each do |id, info|
|
|
275
|
-
depth = info
|
|
276
|
-
deps = info
|
|
276
|
+
depth = fetch_key(info, :depth) || 0
|
|
277
|
+
deps = fetch_key(info, :deps, [])
|
|
277
278
|
indent = ' ' * depth
|
|
278
279
|
lines << "#{indent}- **#{id}**"
|
|
279
280
|
deps.each { |d| lines << "#{indent} - #{d}" }
|