codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,394 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/core_ext/string/inflections'
4
+ require 'digest'
5
+ require 'json'
6
+ require 'pathname'
7
+ require 'set'
8
+
9
+ module CodebaseIndex
10
+ module MCP
11
+ # Reads extraction output from disk for the MCP server.
12
+ #
13
+ # Lazy-loads unit JSON files on demand with an LRU-ish cache cap.
14
+ # Builds an identifier index from _index.json files for fast lookups.
15
+ #
16
+ # @example
17
+ # reader = IndexReader.new("/path/to/codebase_index")
18
+ # reader.find_unit("Post") # => Hash (full unit data)
19
+ # reader.list_units(type: "model") # => Array<Hash>
20
+ #
21
+ class IndexReader
22
+ # Directories that correspond to extractor types in the output.
23
+ # Must stay in sync with Extractor::EXTRACTORS keys.
24
+ TYPE_DIRS = %w[
25
+ models controllers graphql components view_components
26
+ services jobs mailers serializers managers policies validators
27
+ concerns routes middleware i18n pundit_policies configurations
28
+ engines view_templates migrations action_cable_channels
29
+ scheduled_jobs rake_tasks state_machines events decorators
30
+ database_views caching factories test_mappings rails_source
31
+ poros libs
32
+ ].freeze
33
+
34
+ # Singular type name for each directory (used in search filtering).
35
+ # Derived from TYPE_DIRS via ActiveSupport singularize — no manual sync needed.
36
+ DIR_TO_TYPE = TYPE_DIRS.to_h { |dir| [dir, dir.singularize] }.freeze
37
+
38
+ TYPE_TO_DIR = DIR_TO_TYPE.invert.freeze
39
+
40
+ # Maximum number of loaded unit files to cache in memory.
41
+ MAX_UNIT_CACHE = 50
42
+
43
+ # @param index_dir [String] Path to extraction output directory
44
+ # @raise [ArgumentError] if directory doesn't exist or has no manifest.json
45
+ def initialize(index_dir)
46
+ @index_dir = Pathname.new(index_dir)
47
+ raise ArgumentError, "Index directory does not exist: #{index_dir}" unless @index_dir.directory?
48
+ raise ArgumentError, "No manifest.json found in: #{index_dir}" unless @index_dir.join('manifest.json').file?
49
+
50
+ @unit_cache = {}
51
+ @unit_cache_order = []
52
+ @identifier_map = nil
53
+ end
54
+
55
+ # Clear all cached state so the next access re-reads from disk.
56
+ #
57
+ # @return [void]
58
+ def reload!
59
+ @unit_cache = {}
60
+ @unit_cache_order = []
61
+ @identifier_map = nil
62
+ @index_cache = {}
63
+ @manifest = nil
64
+ @summary = nil
65
+ @dependency_graph = nil
66
+ @graph_analysis = nil
67
+ @raw_graph_data = nil
68
+ end
69
+
70
+ # @return [Hash] Parsed manifest.json
71
+ def manifest
72
+ @manifest ||= parse_json('manifest.json')
73
+ end
74
+
75
+ # @return [String, nil] SUMMARY.md content, or nil if not present
76
+ def summary
77
+ @summary ||= begin
78
+ path = @index_dir.join('SUMMARY.md')
79
+ path.file? ? path.read : nil
80
+ end
81
+ end
82
+
83
+ # @return [CodebaseIndex::DependencyGraph] Graph loaded from disk
84
+ def dependency_graph
85
+ @dependency_graph ||= begin
86
+ data = parse_json('dependency_graph.json')
87
+ CodebaseIndex::DependencyGraph.from_h(data)
88
+ end
89
+ end
90
+
91
+ # @return [Hash] Parsed graph_analysis.json
92
+ def graph_analysis
93
+ @graph_analysis ||= parse_json('graph_analysis.json')
94
+ end
95
+
96
+ # Find a single unit by identifier.
97
+ #
98
+ # @param identifier [String] Unit identifier (e.g. "Post", "Api::V1::HealthController")
99
+ # @return [Hash, nil] Full unit data or nil if not found
100
+ def find_unit(identifier)
101
+ location = identifier_map[identifier]
102
+ return nil unless location
103
+
104
+ load_unit(location[:type_dir], location[:filename])
105
+ end
106
+
107
+ # List units, optionally filtered by type.
108
+ #
109
+ # @param type [String, nil] Singular type name (e.g. "model", "controller")
110
+ # @return [Array<Hash>] Index entries for matching units
111
+ def list_units(type: nil)
112
+ dirs = if type
113
+ dir = TYPE_TO_DIR[type]
114
+ dir ? [dir] : []
115
+ else
116
+ TYPE_DIRS
117
+ end
118
+
119
+ dirs.flat_map { |dir| read_index(dir) }
120
+ end
121
+
122
+ # Search units by case-insensitive pattern.
123
+ #
124
+ # Phase 1: match identifiers from index files (cheap).
125
+ # Phase 2: lazy-load unit files for metadata/source_code matching.
126
+ #
127
+ # @param query [String] Search pattern (treated as case-insensitive regex)
128
+ # @param types [Array<String>, nil] Filter to these singular type names
129
+ # @param fields [Array<String>] Fields to search: "identifier", "metadata", "source_code"
130
+ # @param limit [Integer] Maximum results to return
131
+ # @return [Array<Hash>] Matches with :identifier, :type, :match_field keys
132
+ def search(query, types: nil, fields: %w[identifier], limit: 20)
133
+ pattern = Regexp.new(Regexp.escape(query), Regexp::IGNORECASE)
134
+ results = []
135
+
136
+ dirs = if types
137
+ types.filter_map { |t| TYPE_TO_DIR[t] }
138
+ else
139
+ TYPE_DIRS
140
+ end
141
+
142
+ dirs.each do |dir|
143
+ type_name = DIR_TO_TYPE[dir]
144
+ entries = read_index(dir)
145
+
146
+ entries.each do |entry|
147
+ break if results.size >= limit
148
+
149
+ id = entry['identifier']
150
+
151
+ # Phase 1: identifier matching
152
+ if fields.include?('identifier') && pattern.match?(id)
153
+ results << { identifier: id, type: type_name, match_field: 'identifier' }
154
+ next
155
+ end
156
+
157
+ # Phase 2: metadata/source_code matching (requires loading full unit)
158
+ next unless fields.include?('metadata') || fields.include?('source_code')
159
+
160
+ unit = find_unit(id)
161
+ next unless unit
162
+
163
+ if fields.include?('source_code') && unit['source_code'] && pattern.match?(unit['source_code'])
164
+ results << { identifier: id, type: type_name, match_field: 'source_code' }
165
+ elsif fields.include?('metadata') && unit['metadata'] && pattern.match?(unit['metadata'].to_json)
166
+ results << { identifier: id, type: type_name, match_field: 'metadata' }
167
+ end
168
+ end
169
+ end
170
+
171
+ results.first(limit)
172
+ end
173
+
174
+ # BFS traversal of forward dependencies.
175
+ #
176
+ # @param identifier [String] Starting unit identifier
177
+ # @param depth [Integer] Maximum traversal depth
178
+ # @param types [Array<String>, nil] Filter to these singular type names
179
+ # @return [Hash] { root:, nodes: { id => { type:, depth:, deps: [] } } }
180
+ def traverse_dependencies(identifier, depth: 2, types: nil)
181
+ traverse(identifier, depth: depth, types: types, direction: :forward)
182
+ end
183
+
184
+ # BFS traversal of reverse dependencies (dependents).
185
+ #
186
+ # @param identifier [String] Starting unit identifier
187
+ # @param depth [Integer] Maximum traversal depth
188
+ # @param types [Array<String>, nil] Filter to these singular type names
189
+ # @return [Hash] { root:, nodes: { id => { type:, depth:, deps: [] } } }
190
+ def traverse_dependents(identifier, depth: 2, types: nil)
191
+ traverse(identifier, depth: depth, types: types, direction: :reverse)
192
+ end
193
+
194
+ # Search rails_source units by concept keyword.
195
+ #
196
+ # Matches the keyword (case-insensitive) against identifier, source_code,
197
+ # and metadata fields of rails_source type units.
198
+ #
199
+ # @param keyword [String] Concept keyword to match (e.g. "ActiveRecord", "routing", "persistence")
200
+ # @param limit [Integer] Maximum results to return
201
+ # @return [Array<Hash>] Matching rails_source unit summaries
202
+ def framework_sources(keyword, limit: 20)
203
+ pattern = Regexp.new(Regexp.escape(keyword), Regexp::IGNORECASE)
204
+ results = []
205
+
206
+ entries = read_index('rails_source')
207
+ entries.each do |entry|
208
+ break if results.size >= limit
209
+
210
+ id = entry['identifier']
211
+ unit = find_unit(id)
212
+ next unless unit
213
+
214
+ matched = pattern.match?(id) ||
215
+ (unit['source_code'] && pattern.match?(unit['source_code'])) ||
216
+ (unit['metadata'] && pattern.match?(unit['metadata'].to_json))
217
+
218
+ next unless matched
219
+
220
+ results << {
221
+ identifier: id,
222
+ type: 'rails_source',
223
+ file_path: unit['file_path'],
224
+ metadata: unit['metadata']
225
+ }
226
+ end
227
+
228
+ results
229
+ end
230
+
231
+ # Return units sorted by most recent git modification.
232
+ #
233
+ # Reads all units that have metadata.git.last_modified and returns
234
+ # them sorted descending by that timestamp.
235
+ #
236
+ # @param limit [Integer] Maximum results to return
237
+ # @param types [Array<String>, nil] Filter to these singular type names
238
+ # @return [Array<Hash>] Units sorted by last_modified descending
239
+ def recent_changes(limit: 10, types: nil)
240
+ dirs = if types
241
+ types.filter_map { |t| TYPE_TO_DIR[t] }
242
+ else
243
+ TYPE_DIRS
244
+ end
245
+
246
+ units_with_dates = []
247
+
248
+ dirs.each do |dir|
249
+ entries = read_index(dir)
250
+ entries.each do |entry|
251
+ id = entry['identifier']
252
+ unit = find_unit(id)
253
+ next unless unit
254
+
255
+ last_modified = unit.dig('metadata', 'git', 'last_modified')
256
+ next unless last_modified
257
+
258
+ units_with_dates << {
259
+ identifier: id,
260
+ type: DIR_TO_TYPE[dir],
261
+ file_path: unit['file_path'],
262
+ last_modified: last_modified
263
+ }
264
+ end
265
+ end
266
+
267
+ units_with_dates
268
+ .sort_by { |u| u[:last_modified] }
269
+ .reverse
270
+ .first(limit)
271
+ end
272
+
273
+ # @return [Hash] Raw dependency graph data from JSON
274
+ def raw_graph_data
275
+ @raw_graph_data ||= parse_json('dependency_graph.json')
276
+ end
277
+
278
+ private
279
+
280
+ # Build identifier → { type_dir, filename } map from all _index.json files.
281
+ def identifier_map
282
+ @identifier_map ||= build_identifier_map
283
+ end
284
+
285
+ def build_identifier_map
286
+ map = {}
287
+ TYPE_DIRS.each do |dir|
288
+ entries = read_index(dir)
289
+ entries.each do |entry|
290
+ id = entry['identifier']
291
+ base = id.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')
292
+ digest = Digest::SHA256.hexdigest(id)[0, 8]
293
+ filename = "#{base}_#{digest}.json"
294
+ map[id] = { type_dir: dir, filename: filename }
295
+ end
296
+ end
297
+ map
298
+ end
299
+
300
+ # Read and cache an _index.json file for a type directory.
301
+ def read_index(dir)
302
+ @index_cache ||= {}
303
+ @index_cache[dir] ||= begin
304
+ path = @index_dir.join(dir, '_index.json')
305
+ path.file? ? JSON.parse(path.read) : []
306
+ end
307
+ end
308
+
309
+ # Load a unit JSON file with LRU cache eviction.
310
+ def load_unit(type_dir, filename)
311
+ cache_key = "#{type_dir}/#{filename}"
312
+
313
+ if @unit_cache.key?(cache_key)
314
+ # Move to end (most recently used)
315
+ @unit_cache_order.delete(cache_key)
316
+ @unit_cache_order.push(cache_key)
317
+ return @unit_cache[cache_key]
318
+ end
319
+
320
+ path = @index_dir.join(type_dir, filename)
321
+ return nil unless path.file?
322
+
323
+ data = JSON.parse(path.read)
324
+
325
+ # Evict oldest if at capacity
326
+ if @unit_cache.size >= MAX_UNIT_CACHE
327
+ oldest = @unit_cache_order.shift
328
+ @unit_cache.delete(oldest)
329
+ end
330
+
331
+ @unit_cache[cache_key] = data
332
+ @unit_cache_order.push(cache_key)
333
+ data
334
+ end
335
+
336
+ # Parse a JSON file relative to the index directory.
337
+ def parse_json(filename)
338
+ path = @index_dir.join(filename)
339
+ JSON.parse(path.read)
340
+ end
341
+
342
+ # BFS traversal in either direction.
343
+ def traverse(identifier, depth:, types:, direction:)
344
+ graph_data = raw_graph_data
345
+ nodes_data = graph_data['nodes'] || {}
346
+
347
+ return { root: identifier, found: false, nodes: {} } unless nodes_data.key?(identifier)
348
+
349
+ type_set = types&.to_set
350
+ visited = Set.new([identifier])
351
+ queue = [[identifier, 0]]
352
+ result_nodes = {}
353
+
354
+ while queue.any?
355
+ current, current_depth = queue.shift
356
+
357
+ neighbors = if direction == :forward
358
+ (graph_data['edges'] || {})[current] || []
359
+ else
360
+ (graph_data['reverse'] || {})[current] || []
361
+ end
362
+
363
+ # Filter by type if requested
364
+ filtered = if type_set
365
+ neighbors.select do |n|
366
+ node_meta = nodes_data[n]
367
+ node_meta && type_set.include?(node_meta['type'])
368
+ end
369
+ else
370
+ neighbors
371
+ end
372
+
373
+ node_meta = nodes_data[current]
374
+ result_nodes[current] = {
375
+ type: node_meta&.dig('type'),
376
+ depth: current_depth,
377
+ deps: filtered
378
+ }
379
+
380
+ next if current_depth >= depth
381
+
382
+ filtered.each do |neighbor|
383
+ unless visited.include?(neighbor)
384
+ visited.add(neighbor)
385
+ queue.push([neighbor, current_depth + 1])
386
+ end
387
+ end
388
+ end
389
+
390
+ { root: identifier, found: true, nodes: result_nodes }
391
+ end
392
+ end
393
+ end
394
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module MCP
5
+ module Renderers
6
+ # Renders MCP tool responses as Markdown wrapped in XML boundary tags.
7
+ # Matches Anthropic's recommended format: XML tags for section boundaries,
8
+ # Markdown for content.
9
+ class ClaudeRenderer < MarkdownRenderer
10
+ # @param data [Hash] Unit data
11
+ # @return [String] XML-wrapped Markdown
12
+ def render_lookup(data, **)
13
+ return 'Unit not found' unless data.is_a?(Hash) && data['identifier']
14
+
15
+ content = super
16
+ wrap_xml('lookup_result', content,
17
+ identifier: data['identifier'], type: data['type'])
18
+ end
19
+
20
+ # @param data [Hash] Search results
21
+ # @return [String] XML-wrapped Markdown
22
+ def render_search(data, **)
23
+ content = super
24
+ query = data[:query] || data['query']
25
+ wrap_xml('search_results', content, query: query)
26
+ end
27
+
28
+ def render_dependencies(data, **)
29
+ content = super
30
+ root = data[:root] || data['root']
31
+ wrap_xml('dependencies', content, root: root)
32
+ end
33
+
34
+ def render_dependents(data, **)
35
+ content = super
36
+ root = data[:root] || data['root']
37
+ wrap_xml('dependents', content, root: root)
38
+ end
39
+
40
+ def render_structure(data, **)
41
+ wrap_xml('structure', super)
42
+ end
43
+
44
+ def render_graph_analysis(data, **)
45
+ wrap_xml('graph_analysis', super)
46
+ end
47
+
48
+ def render_pagerank(data, **)
49
+ wrap_xml('pagerank', super)
50
+ end
51
+
52
+ def render_framework(data, **)
53
+ content = super
54
+ keyword = data[:keyword] || data['keyword']
55
+ wrap_xml('framework_results', content, keyword: keyword)
56
+ end
57
+
58
+ def render_recent_changes(data, **)
59
+ wrap_xml('recent_changes', super)
60
+ end
61
+
62
+ def render_default(data)
63
+ wrap_xml('result', super)
64
+ end
65
+
66
+ private
67
+
68
+ # Wrap content in an XML tag with optional attributes.
69
+ #
70
+ # @param tag [String] XML tag name
71
+ # @param content [String] Inner content
72
+ # @param attrs [Hash] XML attributes
73
+ # @return [String] XML-wrapped content
74
+ def wrap_xml(tag, content, **attrs)
75
+ attr_str = attrs.map { |k, v| " #{k}=\"#{v}\"" }.join
76
+ "<#{tag}#{attr_str}>\n#{content}\n</#{tag}>"
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module MCP
5
+ module Renderers
6
+ # Passthrough renderer that returns JSON.pretty_generate output.
7
+ # Preserves backward-compatible behavior.
8
+ class JsonRenderer < ToolResponseRenderer
9
+ # @param data [Object] Any JSON-serializable data
10
+ # @return [String] Pretty-printed JSON
11
+ def render_default(data)
12
+ JSON.pretty_generate(data)
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end