codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/string/inflections'
|
|
4
|
+
require 'digest'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'pathname'
|
|
7
|
+
require 'set'
|
|
8
|
+
|
|
9
|
+
module CodebaseIndex
|
|
10
|
+
module MCP
|
|
11
|
+
# Reads extraction output from disk for the MCP server.
|
|
12
|
+
#
|
|
13
|
+
# Lazy-loads unit JSON files on demand with an LRU-ish cache cap.
|
|
14
|
+
# Builds an identifier index from _index.json files for fast lookups.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# reader = IndexReader.new("/path/to/codebase_index")
|
|
18
|
+
# reader.find_unit("Post") # => Hash (full unit data)
|
|
19
|
+
# reader.list_units(type: "model") # => Array<Hash>
|
|
20
|
+
#
|
|
21
|
+
class IndexReader
|
|
22
|
+
# Directories that correspond to extractor types in the output.
|
|
23
|
+
# Must stay in sync with Extractor::EXTRACTORS keys.
|
|
24
|
+
TYPE_DIRS = %w[
|
|
25
|
+
models controllers graphql components view_components
|
|
26
|
+
services jobs mailers serializers managers policies validators
|
|
27
|
+
concerns routes middleware i18n pundit_policies configurations
|
|
28
|
+
engines view_templates migrations action_cable_channels
|
|
29
|
+
scheduled_jobs rake_tasks state_machines events decorators
|
|
30
|
+
database_views caching factories test_mappings rails_source
|
|
31
|
+
poros libs
|
|
32
|
+
].freeze
|
|
33
|
+
|
|
34
|
+
# Singular type name for each directory (used in search filtering).
|
|
35
|
+
# Derived from TYPE_DIRS via ActiveSupport singularize — no manual sync needed.
|
|
36
|
+
DIR_TO_TYPE = TYPE_DIRS.to_h { |dir| [dir, dir.singularize] }.freeze
|
|
37
|
+
|
|
38
|
+
TYPE_TO_DIR = DIR_TO_TYPE.invert.freeze
|
|
39
|
+
|
|
40
|
+
# Maximum number of loaded unit files to cache in memory.
|
|
41
|
+
MAX_UNIT_CACHE = 50
|
|
42
|
+
|
|
43
|
+
# @param index_dir [String] Path to extraction output directory
|
|
44
|
+
# @raise [ArgumentError] if directory doesn't exist or has no manifest.json
|
|
45
|
+
def initialize(index_dir)
|
|
46
|
+
@index_dir = Pathname.new(index_dir)
|
|
47
|
+
raise ArgumentError, "Index directory does not exist: #{index_dir}" unless @index_dir.directory?
|
|
48
|
+
raise ArgumentError, "No manifest.json found in: #{index_dir}" unless @index_dir.join('manifest.json').file?
|
|
49
|
+
|
|
50
|
+
@unit_cache = {}
|
|
51
|
+
@unit_cache_order = []
|
|
52
|
+
@identifier_map = nil
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Clear all cached state so the next access re-reads from disk.
|
|
56
|
+
#
|
|
57
|
+
# @return [void]
|
|
58
|
+
def reload!
|
|
59
|
+
@unit_cache = {}
|
|
60
|
+
@unit_cache_order = []
|
|
61
|
+
@identifier_map = nil
|
|
62
|
+
@index_cache = {}
|
|
63
|
+
@manifest = nil
|
|
64
|
+
@summary = nil
|
|
65
|
+
@dependency_graph = nil
|
|
66
|
+
@graph_analysis = nil
|
|
67
|
+
@raw_graph_data = nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @return [Hash] Parsed manifest.json
|
|
71
|
+
def manifest
|
|
72
|
+
@manifest ||= parse_json('manifest.json')
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# @return [String, nil] SUMMARY.md content, or nil if not present
|
|
76
|
+
def summary
|
|
77
|
+
@summary ||= begin
|
|
78
|
+
path = @index_dir.join('SUMMARY.md')
|
|
79
|
+
path.file? ? path.read : nil
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @return [CodebaseIndex::DependencyGraph] Graph loaded from disk
|
|
84
|
+
def dependency_graph
|
|
85
|
+
@dependency_graph ||= begin
|
|
86
|
+
data = parse_json('dependency_graph.json')
|
|
87
|
+
CodebaseIndex::DependencyGraph.from_h(data)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @return [Hash] Parsed graph_analysis.json
|
|
92
|
+
def graph_analysis
|
|
93
|
+
@graph_analysis ||= parse_json('graph_analysis.json')
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Find a single unit by identifier.
|
|
97
|
+
#
|
|
98
|
+
# @param identifier [String] Unit identifier (e.g. "Post", "Api::V1::HealthController")
|
|
99
|
+
# @return [Hash, nil] Full unit data or nil if not found
|
|
100
|
+
def find_unit(identifier)
|
|
101
|
+
location = identifier_map[identifier]
|
|
102
|
+
return nil unless location
|
|
103
|
+
|
|
104
|
+
load_unit(location[:type_dir], location[:filename])
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# List units, optionally filtered by type.
|
|
108
|
+
#
|
|
109
|
+
# @param type [String, nil] Singular type name (e.g. "model", "controller")
|
|
110
|
+
# @return [Array<Hash>] Index entries for matching units
|
|
111
|
+
def list_units(type: nil)
|
|
112
|
+
dirs = if type
|
|
113
|
+
dir = TYPE_TO_DIR[type]
|
|
114
|
+
dir ? [dir] : []
|
|
115
|
+
else
|
|
116
|
+
TYPE_DIRS
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
dirs.flat_map { |dir| read_index(dir) }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Search units by case-insensitive pattern.
|
|
123
|
+
#
|
|
124
|
+
# Phase 1: match identifiers from index files (cheap).
|
|
125
|
+
# Phase 2: lazy-load unit files for metadata/source_code matching.
|
|
126
|
+
#
|
|
127
|
+
# @param query [String] Search pattern (treated as case-insensitive regex)
|
|
128
|
+
# @param types [Array<String>, nil] Filter to these singular type names
|
|
129
|
+
# @param fields [Array<String>] Fields to search: "identifier", "metadata", "source_code"
|
|
130
|
+
# @param limit [Integer] Maximum results to return
|
|
131
|
+
# @return [Array<Hash>] Matches with :identifier, :type, :match_field keys
|
|
132
|
+
def search(query, types: nil, fields: %w[identifier], limit: 20)
|
|
133
|
+
pattern = Regexp.new(Regexp.escape(query), Regexp::IGNORECASE)
|
|
134
|
+
results = []
|
|
135
|
+
|
|
136
|
+
dirs = if types
|
|
137
|
+
types.filter_map { |t| TYPE_TO_DIR[t] }
|
|
138
|
+
else
|
|
139
|
+
TYPE_DIRS
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
dirs.each do |dir|
|
|
143
|
+
type_name = DIR_TO_TYPE[dir]
|
|
144
|
+
entries = read_index(dir)
|
|
145
|
+
|
|
146
|
+
entries.each do |entry|
|
|
147
|
+
break if results.size >= limit
|
|
148
|
+
|
|
149
|
+
id = entry['identifier']
|
|
150
|
+
|
|
151
|
+
# Phase 1: identifier matching
|
|
152
|
+
if fields.include?('identifier') && pattern.match?(id)
|
|
153
|
+
results << { identifier: id, type: type_name, match_field: 'identifier' }
|
|
154
|
+
next
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Phase 2: metadata/source_code matching (requires loading full unit)
|
|
158
|
+
next unless fields.include?('metadata') || fields.include?('source_code')
|
|
159
|
+
|
|
160
|
+
unit = find_unit(id)
|
|
161
|
+
next unless unit
|
|
162
|
+
|
|
163
|
+
if fields.include?('source_code') && unit['source_code'] && pattern.match?(unit['source_code'])
|
|
164
|
+
results << { identifier: id, type: type_name, match_field: 'source_code' }
|
|
165
|
+
elsif fields.include?('metadata') && unit['metadata'] && pattern.match?(unit['metadata'].to_json)
|
|
166
|
+
results << { identifier: id, type: type_name, match_field: 'metadata' }
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
results.first(limit)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# BFS traversal of forward dependencies.
|
|
175
|
+
#
|
|
176
|
+
# @param identifier [String] Starting unit identifier
|
|
177
|
+
# @param depth [Integer] Maximum traversal depth
|
|
178
|
+
# @param types [Array<String>, nil] Filter to these singular type names
|
|
179
|
+
# @return [Hash] { root:, nodes: { id => { type:, depth:, deps: [] } } }
|
|
180
|
+
def traverse_dependencies(identifier, depth: 2, types: nil)
|
|
181
|
+
traverse(identifier, depth: depth, types: types, direction: :forward)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# BFS traversal of reverse dependencies (dependents).
|
|
185
|
+
#
|
|
186
|
+
# @param identifier [String] Starting unit identifier
|
|
187
|
+
# @param depth [Integer] Maximum traversal depth
|
|
188
|
+
# @param types [Array<String>, nil] Filter to these singular type names
|
|
189
|
+
# @return [Hash] { root:, nodes: { id => { type:, depth:, deps: [] } } }
|
|
190
|
+
def traverse_dependents(identifier, depth: 2, types: nil)
|
|
191
|
+
traverse(identifier, depth: depth, types: types, direction: :reverse)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Search rails_source units by concept keyword.
|
|
195
|
+
#
|
|
196
|
+
# Matches the keyword (case-insensitive) against identifier, source_code,
|
|
197
|
+
# and metadata fields of rails_source type units.
|
|
198
|
+
#
|
|
199
|
+
# @param keyword [String] Concept keyword to match (e.g. "ActiveRecord", "routing", "persistence")
|
|
200
|
+
# @param limit [Integer] Maximum results to return
|
|
201
|
+
# @return [Array<Hash>] Matching rails_source unit summaries
|
|
202
|
+
def framework_sources(keyword, limit: 20)
|
|
203
|
+
pattern = Regexp.new(Regexp.escape(keyword), Regexp::IGNORECASE)
|
|
204
|
+
results = []
|
|
205
|
+
|
|
206
|
+
entries = read_index('rails_source')
|
|
207
|
+
entries.each do |entry|
|
|
208
|
+
break if results.size >= limit
|
|
209
|
+
|
|
210
|
+
id = entry['identifier']
|
|
211
|
+
unit = find_unit(id)
|
|
212
|
+
next unless unit
|
|
213
|
+
|
|
214
|
+
matched = pattern.match?(id) ||
|
|
215
|
+
(unit['source_code'] && pattern.match?(unit['source_code'])) ||
|
|
216
|
+
(unit['metadata'] && pattern.match?(unit['metadata'].to_json))
|
|
217
|
+
|
|
218
|
+
next unless matched
|
|
219
|
+
|
|
220
|
+
results << {
|
|
221
|
+
identifier: id,
|
|
222
|
+
type: 'rails_source',
|
|
223
|
+
file_path: unit['file_path'],
|
|
224
|
+
metadata: unit['metadata']
|
|
225
|
+
}
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
results
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Return units sorted by most recent git modification.
|
|
232
|
+
#
|
|
233
|
+
# Reads all units that have metadata.git.last_modified and returns
|
|
234
|
+
# them sorted descending by that timestamp.
|
|
235
|
+
#
|
|
236
|
+
# @param limit [Integer] Maximum results to return
|
|
237
|
+
# @param types [Array<String>, nil] Filter to these singular type names
|
|
238
|
+
# @return [Array<Hash>] Units sorted by last_modified descending
|
|
239
|
+
def recent_changes(limit: 10, types: nil)
|
|
240
|
+
dirs = if types
|
|
241
|
+
types.filter_map { |t| TYPE_TO_DIR[t] }
|
|
242
|
+
else
|
|
243
|
+
TYPE_DIRS
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
units_with_dates = []
|
|
247
|
+
|
|
248
|
+
dirs.each do |dir|
|
|
249
|
+
entries = read_index(dir)
|
|
250
|
+
entries.each do |entry|
|
|
251
|
+
id = entry['identifier']
|
|
252
|
+
unit = find_unit(id)
|
|
253
|
+
next unless unit
|
|
254
|
+
|
|
255
|
+
last_modified = unit.dig('metadata', 'git', 'last_modified')
|
|
256
|
+
next unless last_modified
|
|
257
|
+
|
|
258
|
+
units_with_dates << {
|
|
259
|
+
identifier: id,
|
|
260
|
+
type: DIR_TO_TYPE[dir],
|
|
261
|
+
file_path: unit['file_path'],
|
|
262
|
+
last_modified: last_modified
|
|
263
|
+
}
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
units_with_dates
|
|
268
|
+
.sort_by { |u| u[:last_modified] }
|
|
269
|
+
.reverse
|
|
270
|
+
.first(limit)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# @return [Hash] Raw dependency graph data from JSON
|
|
274
|
+
def raw_graph_data
|
|
275
|
+
@raw_graph_data ||= parse_json('dependency_graph.json')
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
private
|
|
279
|
+
|
|
280
|
+
# Build identifier → { type_dir, filename } map from all _index.json files.
|
|
281
|
+
def identifier_map
|
|
282
|
+
@identifier_map ||= build_identifier_map
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def build_identifier_map
|
|
286
|
+
map = {}
|
|
287
|
+
TYPE_DIRS.each do |dir|
|
|
288
|
+
entries = read_index(dir)
|
|
289
|
+
entries.each do |entry|
|
|
290
|
+
id = entry['identifier']
|
|
291
|
+
base = id.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')
|
|
292
|
+
digest = Digest::SHA256.hexdigest(id)[0, 8]
|
|
293
|
+
filename = "#{base}_#{digest}.json"
|
|
294
|
+
map[id] = { type_dir: dir, filename: filename }
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
map
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Read and cache an _index.json file for a type directory.
|
|
301
|
+
def read_index(dir)
|
|
302
|
+
@index_cache ||= {}
|
|
303
|
+
@index_cache[dir] ||= begin
|
|
304
|
+
path = @index_dir.join(dir, '_index.json')
|
|
305
|
+
path.file? ? JSON.parse(path.read) : []
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Load a unit JSON file with LRU cache eviction.
|
|
310
|
+
def load_unit(type_dir, filename)
|
|
311
|
+
cache_key = "#{type_dir}/#{filename}"
|
|
312
|
+
|
|
313
|
+
if @unit_cache.key?(cache_key)
|
|
314
|
+
# Move to end (most recently used)
|
|
315
|
+
@unit_cache_order.delete(cache_key)
|
|
316
|
+
@unit_cache_order.push(cache_key)
|
|
317
|
+
return @unit_cache[cache_key]
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
path = @index_dir.join(type_dir, filename)
|
|
321
|
+
return nil unless path.file?
|
|
322
|
+
|
|
323
|
+
data = JSON.parse(path.read)
|
|
324
|
+
|
|
325
|
+
# Evict oldest if at capacity
|
|
326
|
+
if @unit_cache.size >= MAX_UNIT_CACHE
|
|
327
|
+
oldest = @unit_cache_order.shift
|
|
328
|
+
@unit_cache.delete(oldest)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
@unit_cache[cache_key] = data
|
|
332
|
+
@unit_cache_order.push(cache_key)
|
|
333
|
+
data
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# Parse a JSON file relative to the index directory.
|
|
337
|
+
def parse_json(filename)
|
|
338
|
+
path = @index_dir.join(filename)
|
|
339
|
+
JSON.parse(path.read)
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
# BFS traversal in either direction.
|
|
343
|
+
def traverse(identifier, depth:, types:, direction:)
|
|
344
|
+
graph_data = raw_graph_data
|
|
345
|
+
nodes_data = graph_data['nodes'] || {}
|
|
346
|
+
|
|
347
|
+
return { root: identifier, found: false, nodes: {} } unless nodes_data.key?(identifier)
|
|
348
|
+
|
|
349
|
+
type_set = types&.to_set
|
|
350
|
+
visited = Set.new([identifier])
|
|
351
|
+
queue = [[identifier, 0]]
|
|
352
|
+
result_nodes = {}
|
|
353
|
+
|
|
354
|
+
while queue.any?
|
|
355
|
+
current, current_depth = queue.shift
|
|
356
|
+
|
|
357
|
+
neighbors = if direction == :forward
|
|
358
|
+
(graph_data['edges'] || {})[current] || []
|
|
359
|
+
else
|
|
360
|
+
(graph_data['reverse'] || {})[current] || []
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Filter by type if requested
|
|
364
|
+
filtered = if type_set
|
|
365
|
+
neighbors.select do |n|
|
|
366
|
+
node_meta = nodes_data[n]
|
|
367
|
+
node_meta && type_set.include?(node_meta['type'])
|
|
368
|
+
end
|
|
369
|
+
else
|
|
370
|
+
neighbors
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
node_meta = nodes_data[current]
|
|
374
|
+
result_nodes[current] = {
|
|
375
|
+
type: node_meta&.dig('type'),
|
|
376
|
+
depth: current_depth,
|
|
377
|
+
deps: filtered
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
next if current_depth >= depth
|
|
381
|
+
|
|
382
|
+
filtered.each do |neighbor|
|
|
383
|
+
unless visited.include?(neighbor)
|
|
384
|
+
visited.add(neighbor)
|
|
385
|
+
queue.push([neighbor, current_depth + 1])
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
{ root: identifier, found: true, nodes: result_nodes }
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module MCP
|
|
5
|
+
module Renderers
|
|
6
|
+
# Renders MCP tool responses as Markdown wrapped in XML boundary tags.
|
|
7
|
+
# Matches Anthropic's recommended format: XML tags for section boundaries,
|
|
8
|
+
# Markdown for content.
|
|
9
|
+
class ClaudeRenderer < MarkdownRenderer
|
|
10
|
+
# @param data [Hash] Unit data
|
|
11
|
+
# @return [String] XML-wrapped Markdown
|
|
12
|
+
def render_lookup(data, **)
|
|
13
|
+
return 'Unit not found' unless data.is_a?(Hash) && data['identifier']
|
|
14
|
+
|
|
15
|
+
content = super
|
|
16
|
+
wrap_xml('lookup_result', content,
|
|
17
|
+
identifier: data['identifier'], type: data['type'])
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# @param data [Hash] Search results
|
|
21
|
+
# @return [String] XML-wrapped Markdown
|
|
22
|
+
def render_search(data, **)
|
|
23
|
+
content = super
|
|
24
|
+
query = data[:query] || data['query']
|
|
25
|
+
wrap_xml('search_results', content, query: query)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def render_dependencies(data, **)
|
|
29
|
+
content = super
|
|
30
|
+
root = data[:root] || data['root']
|
|
31
|
+
wrap_xml('dependencies', content, root: root)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def render_dependents(data, **)
|
|
35
|
+
content = super
|
|
36
|
+
root = data[:root] || data['root']
|
|
37
|
+
wrap_xml('dependents', content, root: root)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def render_structure(data, **)
|
|
41
|
+
wrap_xml('structure', super)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def render_graph_analysis(data, **)
|
|
45
|
+
wrap_xml('graph_analysis', super)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def render_pagerank(data, **)
|
|
49
|
+
wrap_xml('pagerank', super)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def render_framework(data, **)
|
|
53
|
+
content = super
|
|
54
|
+
keyword = data[:keyword] || data['keyword']
|
|
55
|
+
wrap_xml('framework_results', content, keyword: keyword)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def render_recent_changes(data, **)
|
|
59
|
+
wrap_xml('recent_changes', super)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def render_default(data)
|
|
63
|
+
wrap_xml('result', super)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
# Wrap content in an XML tag with optional attributes.
|
|
69
|
+
#
|
|
70
|
+
# @param tag [String] XML tag name
|
|
71
|
+
# @param content [String] Inner content
|
|
72
|
+
# @param attrs [Hash] XML attributes
|
|
73
|
+
# @return [String] XML-wrapped content
|
|
74
|
+
def wrap_xml(tag, content, **attrs)
|
|
75
|
+
attr_str = attrs.map { |k, v| " #{k}=\"#{v}\"" }.join
|
|
76
|
+
"<#{tag}#{attr_str}>\n#{content}\n</#{tag}>"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module MCP
|
|
5
|
+
module Renderers
|
|
6
|
+
# Passthrough renderer that returns JSON.pretty_generate output.
|
|
7
|
+
# Preserves backward-compatible behavior.
|
|
8
|
+
class JsonRenderer < ToolResponseRenderer
|
|
9
|
+
# @param data [Object] Any JSON-serializable data
|
|
10
|
+
# @return [String] Pretty-printed JSON
|
|
11
|
+
def render_default(data)
|
|
12
|
+
JSON.pretty_generate(data)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|