codebase_index 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +95 -300
- data/exe/codebase-index-mcp +3 -31
- data/exe/codebase-index-mcp-http +3 -31
- data/lib/codebase_index/ast/method_extractor.rb +3 -8
- data/lib/codebase_index/ast/node.rb +28 -0
- data/lib/codebase_index/ast/parser.rb +53 -92
- data/lib/codebase_index/builder.rb +67 -4
- data/lib/codebase_index/cache/cache_middleware.rb +199 -0
- data/lib/codebase_index/cache/cache_store.rb +264 -0
- data/lib/codebase_index/cache/redis_cache_store.rb +116 -0
- data/lib/codebase_index/cache/solid_cache_store.rb +111 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +29 -24
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/job_adapter.rb +68 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +7 -40
- data/lib/codebase_index/console/bridge.rb +7 -0
- data/lib/codebase_index/console/console_response_renderer.rb +3 -7
- data/lib/codebase_index/console/embedded_executor.rb +2 -1
- data/lib/codebase_index/console/server.rb +1 -4
- data/lib/codebase_index/dependency_graph.rb +28 -19
- data/lib/codebase_index/embedding/indexer.rb +18 -8
- data/lib/codebase_index/embedding/openai.rb +27 -6
- data/lib/codebase_index/embedding/provider.rb +29 -2
- data/lib/codebase_index/evaluation/evaluator.rb +5 -12
- data/lib/codebase_index/extractor.rb +40 -44
- data/lib/codebase_index/extractors/action_cable_extractor.rb +9 -36
- data/lib/codebase_index/extractors/callback_analyzer.rb +22 -8
- data/lib/codebase_index/extractors/controller_extractor.rb +3 -93
- data/lib/codebase_index/extractors/decorator_extractor.rb +7 -14
- data/lib/codebase_index/extractors/engine_extractor.rb +20 -1
- data/lib/codebase_index/extractors/graphql_extractor.rb +4 -29
- data/lib/codebase_index/extractors/job_extractor.rb +11 -6
- data/lib/codebase_index/extractors/lib_extractor.rb +0 -31
- data/lib/codebase_index/extractors/mailer_extractor.rb +15 -85
- data/lib/codebase_index/extractors/manager_extractor.rb +1 -15
- data/lib/codebase_index/extractors/model_extractor.rb +20 -53
- data/lib/codebase_index/extractors/phlex_extractor.rb +8 -8
- data/lib/codebase_index/extractors/policy_extractor.rb +1 -24
- data/lib/codebase_index/extractors/poro_extractor.rb +0 -17
- data/lib/codebase_index/extractors/serializer_extractor.rb +12 -7
- data/lib/codebase_index/extractors/service_extractor.rb +1 -38
- data/lib/codebase_index/extractors/shared_utility_methods.rb +183 -1
- data/lib/codebase_index/extractors/validator_extractor.rb +3 -17
- data/lib/codebase_index/extractors/view_component_extractor.rb +10 -9
- data/lib/codebase_index/filename_utils.rb +32 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +1 -4
- data/lib/codebase_index/formatting/base.rb +0 -10
- data/lib/codebase_index/graph_analyzer.rb +1 -1
- data/lib/codebase_index/mcp/bootstrapper.rb +58 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +35 -34
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +29 -29
- data/lib/codebase_index/mcp/server.rb +59 -68
- data/lib/codebase_index/mcp/tool_response_renderer.rb +23 -0
- data/lib/codebase_index/notion/client.rb +2 -2
- data/lib/codebase_index/notion/mapper.rb +1 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +3 -11
- data/lib/codebase_index/notion/mappers/model_mapper.rb +20 -23
- data/lib/codebase_index/notion/mappers/shared.rb +22 -0
- data/lib/codebase_index/observability/health_check.rb +0 -2
- data/lib/codebase_index/observability/structured_logger.rb +12 -30
- data/lib/codebase_index/operator/pipeline_guard.rb +0 -7
- data/lib/codebase_index/resilience/index_validator.rb +3 -21
- data/lib/codebase_index/retrieval/context_assembler.rb +19 -7
- data/lib/codebase_index/retrieval/query_classifier.rb +14 -12
- data/lib/codebase_index/retrieval/ranker.rb +6 -2
- data/lib/codebase_index/retrieval/search_executor.rb +8 -19
- data/lib/codebase_index/retriever.rb +1 -9
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +5 -25
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +6 -7
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +58 -53
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +11 -7
- data/lib/codebase_index/session_tracer/file_store.rb +1 -8
- data/lib/codebase_index/session_tracer/redis_store.rb +1 -7
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +4 -13
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +1 -7
- data/lib/codebase_index/session_tracer/store.rb +14 -0
- data/lib/codebase_index/storage/metadata_store.rb +37 -10
- data/lib/codebase_index/storage/pgvector.rb +37 -5
- data/lib/codebase_index/storage/qdrant.rb +39 -6
- data/lib/codebase_index/storage/vector_store.rb +11 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +14 -10
- data/lib/codebase_index/token_utils.rb +19 -0
- data/lib/codebase_index/version.rb +1 -1
- data/lib/codebase_index.rb +25 -6
- data/lib/tasks/codebase_index.rake +2 -2
- metadata +11 -2
data/exe/codebase-index-mcp-http
CHANGED
|
@@ -16,40 +16,12 @@ require_relative '../lib/codebase_index'
|
|
|
16
16
|
require_relative '../lib/codebase_index/dependency_graph'
|
|
17
17
|
require_relative '../lib/codebase_index/graph_analyzer'
|
|
18
18
|
require_relative '../lib/codebase_index/mcp/server'
|
|
19
|
+
require_relative '../lib/codebase_index/mcp/bootstrapper'
|
|
19
20
|
require_relative '../lib/codebase_index/embedding/text_preparer'
|
|
20
21
|
require_relative '../lib/codebase_index/embedding/indexer'
|
|
21
22
|
|
|
22
|
-
index_dir = ARGV
|
|
23
|
-
|
|
24
|
-
unless Dir.exist?(index_dir)
|
|
25
|
-
warn "Error: Index directory does not exist: #{index_dir}"
|
|
26
|
-
exit 1
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
unless File.exist?(File.join(index_dir, 'manifest.json'))
|
|
30
|
-
warn "Error: No manifest.json found in: #{index_dir}"
|
|
31
|
-
warn 'Run `bundle exec rake codebase_index:extract` in your Rails app first.'
|
|
32
|
-
exit 1
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Attempt to build a retriever for semantic search.
|
|
36
|
-
# Auto-configures from environment variables when no explicit configuration exists.
|
|
37
|
-
retriever = begin
|
|
38
|
-
config = CodebaseIndex.configuration
|
|
39
|
-
|
|
40
|
-
if !config.embedding_provider && ENV.fetch('OPENAI_API_KEY', nil)
|
|
41
|
-
config.vector_store = :in_memory
|
|
42
|
-
config.metadata_store = :in_memory
|
|
43
|
-
config.graph_store = :in_memory
|
|
44
|
-
config.embedding_provider = :openai
|
|
45
|
-
config.embedding_options = { api_key: ENV.fetch('OPENAI_API_KEY', nil) }
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
CodebaseIndex::Builder.new(config).build_retriever if config.embedding_provider
|
|
49
|
-
rescue StandardError => e
|
|
50
|
-
warn "Note: Semantic search unavailable (#{e.message}). Using pattern-based search only."
|
|
51
|
-
nil
|
|
52
|
-
end
|
|
23
|
+
index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
|
|
24
|
+
retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
|
|
53
25
|
|
|
54
26
|
port = (ENV['PORT'] || 9292).to_i
|
|
55
27
|
host = ENV['HOST'] || 'localhost'
|
|
@@ -16,6 +16,8 @@ module CodebaseIndex
|
|
|
16
16
|
# # => "def create\n @user = User.find(params[:id])\nend\n"
|
|
17
17
|
#
|
|
18
18
|
class MethodExtractor
|
|
19
|
+
include SourceSpan
|
|
20
|
+
|
|
19
21
|
# @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
|
|
20
22
|
def initialize(parser: nil)
|
|
21
23
|
@parser = parser || Parser.new
|
|
@@ -62,14 +64,7 @@ module CodebaseIndex
|
|
|
62
64
|
return node.source if node.source
|
|
63
65
|
|
|
64
66
|
# Fallback: extract by line range
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
lines = source.lines
|
|
68
|
-
start_idx = node.line - 1
|
|
69
|
-
end_idx = node.end_line - 1
|
|
70
|
-
return nil if start_idx.negative? || end_idx >= lines.length
|
|
71
|
-
|
|
72
|
-
lines[start_idx..end_idx].join
|
|
67
|
+
extract_source_span(source, node.line, node.end_line)
|
|
73
68
|
end
|
|
74
69
|
end
|
|
75
70
|
end
|
|
@@ -84,5 +84,33 @@ module CodebaseIndex
|
|
|
84
84
|
end
|
|
85
85
|
end
|
|
86
86
|
end
|
|
87
|
+
|
|
88
|
+
# Mixin for line-range source extraction, shared across Parser, MethodExtractor,
|
|
89
|
+
# and ClassAnalyzer.
|
|
90
|
+
#
|
|
91
|
+
# @example
|
|
92
|
+
# include Ast::SourceSpan
|
|
93
|
+
# extract_source_span(source, node.line, node.end_line)
|
|
94
|
+
#
|
|
95
|
+
module SourceSpan
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
# Extract source lines for a 1-based start/end line range.
|
|
99
|
+
#
|
|
100
|
+
# @param source [String] Full source text
|
|
101
|
+
# @param start_line [Integer, nil] 1-based start line
|
|
102
|
+
# @param end_line [Integer, nil] 1-based end line
|
|
103
|
+
# @return [String, nil] Extracted lines joined, or nil if out of range
|
|
104
|
+
def extract_source_span(source, start_line, end_line)
|
|
105
|
+
return nil unless start_line && end_line
|
|
106
|
+
|
|
107
|
+
lines = source.lines
|
|
108
|
+
start_idx = start_line - 1
|
|
109
|
+
end_idx = end_line - 1
|
|
110
|
+
return nil if start_idx.negative? || end_idx >= lines.length
|
|
111
|
+
|
|
112
|
+
lines[start_idx..end_idx].join
|
|
113
|
+
end
|
|
114
|
+
end
|
|
87
115
|
end
|
|
88
116
|
end
|
|
@@ -15,6 +15,8 @@ module CodebaseIndex
|
|
|
15
15
|
# root.find_all(:def).first.method_name #=> "bar"
|
|
16
16
|
#
|
|
17
17
|
class Parser
|
|
18
|
+
include SourceSpan
|
|
19
|
+
|
|
18
20
|
# Parse Ruby source into a normalized AST.
|
|
19
21
|
#
|
|
20
22
|
# @param source [String] Ruby source code
|
|
@@ -26,8 +28,6 @@ module CodebaseIndex
|
|
|
26
28
|
else
|
|
27
29
|
parse_with_parser_gem(source)
|
|
28
30
|
end
|
|
29
|
-
rescue CodebaseIndex::ExtractionError
|
|
30
|
-
raise
|
|
31
31
|
rescue StandardError => e
|
|
32
32
|
raise CodebaseIndex::ExtractionError, "Failed to parse source: #{e.message}"
|
|
33
33
|
end
|
|
@@ -36,15 +36,12 @@ module CodebaseIndex
|
|
|
36
36
|
#
|
|
37
37
|
# @return [Boolean]
|
|
38
38
|
def prism_available?
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
@prism_available
|
|
39
|
+
return @prism_available unless @prism_available.nil?
|
|
40
|
+
|
|
41
|
+
require 'prism'
|
|
42
|
+
@prism_available = true
|
|
43
|
+
rescue LoadError
|
|
44
|
+
@prism_available = false
|
|
48
45
|
end
|
|
49
46
|
|
|
50
47
|
private
|
|
@@ -105,10 +102,8 @@ module CodebaseIndex
|
|
|
105
102
|
)
|
|
106
103
|
when Prism::ConstantPathNode
|
|
107
104
|
convert_prism_constant_path(prism_node, source)
|
|
108
|
-
when Prism::IfNode
|
|
105
|
+
when Prism::IfNode, Prism::UnlessNode
|
|
109
106
|
convert_prism_if(prism_node, source)
|
|
110
|
-
when Prism::UnlessNode
|
|
111
|
-
convert_prism_unless(prism_node, source)
|
|
112
107
|
when Prism::CaseNode
|
|
113
108
|
convert_prism_case(prism_node, source)
|
|
114
109
|
when Prism::BeginNode
|
|
@@ -223,17 +218,7 @@ module CodebaseIndex
|
|
|
223
218
|
def convert_prism_class(prism_node, source)
|
|
224
219
|
name_node = convert_prism_node(prism_node.constant_path, source)
|
|
225
220
|
superclass = prism_node.superclass ? convert_prism_node(prism_node.superclass, source) : nil
|
|
226
|
-
body_children =
|
|
227
|
-
if prism_node.body.is_a?(Prism::StatementsNode)
|
|
228
|
-
prism_node.body.body.map do |c|
|
|
229
|
-
convert_prism_node(c, source)
|
|
230
|
-
end
|
|
231
|
-
else
|
|
232
|
-
[convert_prism_node(prism_node.body, source)]
|
|
233
|
-
end
|
|
234
|
-
else
|
|
235
|
-
[]
|
|
236
|
-
end
|
|
221
|
+
body_children = extract_prism_body_children(prism_node, source)
|
|
237
222
|
|
|
238
223
|
children = [name_node, superclass] + body_children
|
|
239
224
|
|
|
@@ -248,17 +233,7 @@ module CodebaseIndex
|
|
|
248
233
|
|
|
249
234
|
def convert_prism_module(prism_node, source)
|
|
250
235
|
name_node = convert_prism_node(prism_node.constant_path, source)
|
|
251
|
-
body_children =
|
|
252
|
-
if prism_node.body.is_a?(Prism::StatementsNode)
|
|
253
|
-
prism_node.body.body.map do |c|
|
|
254
|
-
convert_prism_node(c, source)
|
|
255
|
-
end
|
|
256
|
-
else
|
|
257
|
-
[convert_prism_node(prism_node.body, source)]
|
|
258
|
-
end
|
|
259
|
-
else
|
|
260
|
-
[]
|
|
261
|
-
end
|
|
236
|
+
body_children = extract_prism_body_children(prism_node, source)
|
|
262
237
|
|
|
263
238
|
children = [name_node] + body_children
|
|
264
239
|
|
|
@@ -272,15 +247,7 @@ module CodebaseIndex
|
|
|
272
247
|
end
|
|
273
248
|
|
|
274
249
|
def convert_prism_def(prism_node, source)
|
|
275
|
-
body_children =
|
|
276
|
-
if prism_node.body.is_a?(Prism::StatementsNode)
|
|
277
|
-
prism_node.body.body.map { |c| convert_prism_node(c, source) }
|
|
278
|
-
else
|
|
279
|
-
[convert_prism_node(prism_node.body, source)]
|
|
280
|
-
end
|
|
281
|
-
else
|
|
282
|
-
[]
|
|
283
|
-
end
|
|
250
|
+
body_children = extract_prism_body_children(prism_node, source)
|
|
284
251
|
|
|
285
252
|
is_class_method = prism_node.respond_to?(:receiver) && prism_node.receiver
|
|
286
253
|
receiver_text = if is_class_method
|
|
@@ -346,13 +313,14 @@ module CodebaseIndex
|
|
|
346
313
|
|
|
347
314
|
def convert_prism_constant_path(prism_node, _source)
|
|
348
315
|
parent_text = (extract_const_path_text(prism_node.parent) if prism_node.parent)
|
|
316
|
+
const_name = prism_node.respond_to?(:name) ? prism_node.name.to_s : prism_node.child.name.to_s
|
|
349
317
|
|
|
350
318
|
Node.new(
|
|
351
319
|
type: :const,
|
|
352
320
|
children: [],
|
|
353
321
|
line: line_for_prism(prism_node),
|
|
354
322
|
receiver: parent_text,
|
|
355
|
-
method_name:
|
|
323
|
+
method_name: const_name
|
|
356
324
|
)
|
|
357
325
|
end
|
|
358
326
|
|
|
@@ -364,23 +332,8 @@ module CodebaseIndex
|
|
|
364
332
|
end
|
|
365
333
|
|
|
366
334
|
then_body = prism_node.statements ? convert_prism_node(prism_node.statements, source) : nil
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
Node.new(
|
|
370
|
-
type: :if,
|
|
371
|
-
children: [condition, then_body, else_body].compact,
|
|
372
|
-
line: line_for_prism(prism_node),
|
|
373
|
-
end_line: end_line_for_prism(prism_node),
|
|
374
|
-
source: condition_source
|
|
375
|
-
)
|
|
376
|
-
end
|
|
377
|
-
|
|
378
|
-
def convert_prism_unless(prism_node, source)
|
|
379
|
-
condition = convert_prism_node(prism_node.predicate, source)
|
|
380
|
-
condition_source = extract_prism_source_text(prism_node.predicate, source)
|
|
381
|
-
|
|
382
|
-
then_body = prism_node.statements ? convert_prism_node(prism_node.statements, source) : nil
|
|
383
|
-
else_body = prism_node.else_clause ? convert_prism_node(prism_node.else_clause, source) : nil
|
|
335
|
+
else_clause = prism_else_clause(prism_node)
|
|
336
|
+
else_body = else_clause ? convert_prism_node(else_clause, source) : nil
|
|
384
337
|
|
|
385
338
|
Node.new(
|
|
386
339
|
type: :if,
|
|
@@ -395,10 +348,21 @@ module CodebaseIndex
|
|
|
395
348
|
children = []
|
|
396
349
|
children << convert_prism_node(prism_node.predicate, source) if prism_node.predicate
|
|
397
350
|
prism_node.conditions.each { |c| children << convert_prism_node(c, source) }
|
|
398
|
-
|
|
351
|
+
else_clause = prism_else_clause(prism_node)
|
|
352
|
+
children << convert_prism_node(else_clause, source) if else_clause
|
|
399
353
|
Node.new(type: :case, children: children, line: line_for_prism(prism_node))
|
|
400
354
|
end
|
|
401
355
|
|
|
356
|
+
def extract_prism_body_children(prism_node, source)
|
|
357
|
+
return [] unless prism_node.body
|
|
358
|
+
|
|
359
|
+
if prism_node.body.is_a?(Prism::StatementsNode)
|
|
360
|
+
prism_node.body.body.map { |c| convert_prism_node(c, source) }
|
|
361
|
+
else
|
|
362
|
+
[convert_prism_node(prism_node.body, source)]
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
402
366
|
def convert_prism_children(statements_node, source)
|
|
403
367
|
return [] unless statements_node
|
|
404
368
|
|
|
@@ -410,12 +374,19 @@ module CodebaseIndex
|
|
|
410
374
|
end
|
|
411
375
|
|
|
412
376
|
def extract_prism_generic_children(prism_node, source)
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
377
|
+
prism_node.child_nodes.compact.filter_map { |child| convert_prism_node(child, source) }
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
# Portable accessor for the else/consequent clause of if/unless/case nodes.
|
|
381
|
+
# Prism < 1.0 uses :consequent, Prism >= 1.0 uses :else_clause/:subsequent.
|
|
382
|
+
def prism_else_clause(node)
|
|
383
|
+
if node.respond_to?(:consequent)
|
|
384
|
+
node.consequent
|
|
385
|
+
elsif node.respond_to?(:else_clause)
|
|
386
|
+
node.else_clause
|
|
387
|
+
elsif node.respond_to?(:subsequent)
|
|
388
|
+
node.subsequent
|
|
417
389
|
end
|
|
418
|
-
children
|
|
419
390
|
end
|
|
420
391
|
|
|
421
392
|
def line_for_prism(node)
|
|
@@ -427,12 +398,7 @@ module CodebaseIndex
|
|
|
427
398
|
end
|
|
428
399
|
|
|
429
400
|
def extract_prism_source_span(node, source)
|
|
430
|
-
|
|
431
|
-
start_idx = node.location.start_line - 1
|
|
432
|
-
end_idx = node.location.end_line - 1
|
|
433
|
-
return nil if start_idx.negative? || end_idx >= lines.length
|
|
434
|
-
|
|
435
|
-
lines[start_idx..end_idx].join
|
|
401
|
+
extract_source_span(source, node.location.start_line, node.location.end_line)
|
|
436
402
|
end
|
|
437
403
|
|
|
438
404
|
def extract_prism_source_text(node, source)
|
|
@@ -463,17 +429,13 @@ module CodebaseIndex
|
|
|
463
429
|
node.name.to_s
|
|
464
430
|
when Prism::ConstantPathNode
|
|
465
431
|
parent = node.parent ? extract_const_path_text(node.parent) : nil
|
|
466
|
-
|
|
432
|
+
const_name = node.respond_to?(:name) ? node.name.to_s : node.child.name.to_s
|
|
433
|
+
[parent, const_name].compact.join('::')
|
|
467
434
|
end
|
|
468
435
|
end
|
|
469
436
|
|
|
470
437
|
def extract_const_name(node)
|
|
471
|
-
|
|
472
|
-
when Prism::ConstantReadNode
|
|
473
|
-
node.name.to_s
|
|
474
|
-
when Prism::ConstantPathNode
|
|
475
|
-
extract_const_path_text(node)
|
|
476
|
-
end
|
|
438
|
+
extract_const_path_text(node)
|
|
477
439
|
end
|
|
478
440
|
|
|
479
441
|
# ── Parser gem fallback ──────────────────────────────────────────────
|
|
@@ -490,7 +452,7 @@ module CodebaseIndex
|
|
|
490
452
|
name_node = convert_parser_node(parser_node.children[0], source)
|
|
491
453
|
superclass = parser_node.children[1] ? convert_parser_node(parser_node.children[1], source) : nil
|
|
492
454
|
body = parser_node.children[2] ? convert_parser_node(parser_node.children[2], source) : nil
|
|
493
|
-
body_children = body
|
|
455
|
+
body_children = parser_body_children(body)
|
|
494
456
|
children = [name_node, superclass] + body_children
|
|
495
457
|
Node.new(
|
|
496
458
|
type: :class,
|
|
@@ -502,7 +464,7 @@ module CodebaseIndex
|
|
|
502
464
|
when :module
|
|
503
465
|
name_node = convert_parser_node(parser_node.children[0], source)
|
|
504
466
|
body = parser_node.children[1] ? convert_parser_node(parser_node.children[1], source) : nil
|
|
505
|
-
body_children = body
|
|
467
|
+
body_children = parser_body_children(body)
|
|
506
468
|
children = [name_node] + body_children
|
|
507
469
|
Node.new(
|
|
508
470
|
type: :module,
|
|
@@ -513,7 +475,7 @@ module CodebaseIndex
|
|
|
513
475
|
)
|
|
514
476
|
when :def
|
|
515
477
|
body = parser_node.children[2] ? convert_parser_node(parser_node.children[2], source) : nil
|
|
516
|
-
body_children = body
|
|
478
|
+
body_children = parser_body_children(body)
|
|
517
479
|
Node.new(
|
|
518
480
|
type: :def,
|
|
519
481
|
children: body_children,
|
|
@@ -524,7 +486,7 @@ module CodebaseIndex
|
|
|
524
486
|
)
|
|
525
487
|
when :defs
|
|
526
488
|
body = parser_node.children[3] ? convert_parser_node(parser_node.children[3], source) : nil
|
|
527
|
-
body_children = body
|
|
489
|
+
body_children = parser_body_children(body)
|
|
528
490
|
receiver = parser_node.children[0].type == :self ? 'self' : parser_node.children[0].to_s
|
|
529
491
|
Node.new(
|
|
530
492
|
type: :defs,
|
|
@@ -605,13 +567,12 @@ module CodebaseIndex
|
|
|
605
567
|
end
|
|
606
568
|
end
|
|
607
569
|
|
|
608
|
-
def
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
end_idx = node.loc.expression.last_line - 1
|
|
612
|
-
return nil if start_idx.negative? || end_idx >= lines.length
|
|
570
|
+
def parser_body_children(body_node)
|
|
571
|
+
body_node&.type == :begin ? body_node.children : [body_node].compact
|
|
572
|
+
end
|
|
613
573
|
|
|
614
|
-
|
|
574
|
+
def extract_parser_source_span(node, source)
|
|
575
|
+
extract_source_span(source, node.loc.line, node.loc.expression.last_line)
|
|
615
576
|
end
|
|
616
577
|
|
|
617
578
|
def extract_parser_source_text(node, source)
|
|
@@ -26,7 +26,7 @@ module CodebaseIndex
|
|
|
26
26
|
# config.vector_store_options = { url: ENV['QDRANT_URL'], collection: 'myapp' }
|
|
27
27
|
# end
|
|
28
28
|
#
|
|
29
|
-
class Builder
|
|
29
|
+
class Builder # rubocop:disable Metrics/ClassLength
|
|
30
30
|
# Named presets mapping to default adapter types.
|
|
31
31
|
#
|
|
32
32
|
# :local — fully local, no external services required
|
|
@@ -74,14 +74,25 @@ module CodebaseIndex
|
|
|
74
74
|
|
|
75
75
|
# Build a {Retriever} wired with adapters from the configuration.
|
|
76
76
|
#
|
|
77
|
-
#
|
|
77
|
+
# When `cache_enabled` is true, the embedding provider is wrapped with
|
|
78
|
+
# {Cache::CachedEmbeddingProvider} and the retriever is wrapped with
|
|
79
|
+
# {Cache::CachedRetriever} for transparent caching of expensive operations.
|
|
80
|
+
#
|
|
81
|
+
# @return [Retriever, Cache::CachedRetriever] A fully wired retriever
|
|
78
82
|
def build_retriever
|
|
79
|
-
|
|
83
|
+
provider = build_embedding_provider
|
|
84
|
+
cache = build_cache_store
|
|
85
|
+
|
|
86
|
+
provider = wrap_with_embedding_cache(provider, cache) if cache
|
|
87
|
+
|
|
88
|
+
retriever = Retriever.new(
|
|
80
89
|
vector_store: build_vector_store,
|
|
81
90
|
metadata_store: build_metadata_store,
|
|
82
91
|
graph_store: build_graph_store,
|
|
83
|
-
embedding_provider:
|
|
92
|
+
embedding_provider: provider
|
|
84
93
|
)
|
|
94
|
+
|
|
95
|
+
cache ? wrap_with_retriever_cache(retriever, cache) : retriever
|
|
85
96
|
end
|
|
86
97
|
|
|
87
98
|
# Instantiate the vector store adapter specified by the configuration.
|
|
@@ -133,5 +144,57 @@ module CodebaseIndex
|
|
|
133
144
|
else raise ArgumentError, "Unknown graph_store: #{@config.graph_store}"
|
|
134
145
|
end
|
|
135
146
|
end
|
|
147
|
+
|
|
148
|
+
# Build a cache store from configuration, or nil if caching is disabled.
|
|
149
|
+
#
|
|
150
|
+
# @return [Cache::CacheStore, nil]
|
|
151
|
+
def build_cache_store
|
|
152
|
+
return nil unless @config.cache_enabled
|
|
153
|
+
|
|
154
|
+
opts = @config.cache_options || {}
|
|
155
|
+
|
|
156
|
+
case @config.cache_store
|
|
157
|
+
when :memory
|
|
158
|
+
Cache::InMemory.new(max_entries: opts.fetch(:max_entries, 500))
|
|
159
|
+
when :redis
|
|
160
|
+
require_relative 'cache/redis_cache_store'
|
|
161
|
+
Cache::RedisCacheStore.new(redis: opts.fetch(:redis), default_ttl: opts[:default_ttl])
|
|
162
|
+
when :solid_cache
|
|
163
|
+
require_relative 'cache/solid_cache_store'
|
|
164
|
+
Cache::SolidCacheStore.new(cache: opts.fetch(:cache), default_ttl: opts[:default_ttl])
|
|
165
|
+
when Cache::CacheStore
|
|
166
|
+
@config.cache_store
|
|
167
|
+
else
|
|
168
|
+
raise ArgumentError, "Unknown cache_store: #{@config.cache_store}"
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Wrap an embedding provider with caching.
|
|
173
|
+
#
|
|
174
|
+
# @param provider [Embedding::Provider::Interface]
|
|
175
|
+
# @param cache [Cache::CacheStore]
|
|
176
|
+
# @return [Cache::CachedEmbeddingProvider]
|
|
177
|
+
def wrap_with_embedding_cache(provider, cache)
|
|
178
|
+
ttls = (@config.cache_options || {}).fetch(:ttl, {})
|
|
179
|
+
Cache::CachedEmbeddingProvider.new(
|
|
180
|
+
provider: provider,
|
|
181
|
+
cache_store: cache,
|
|
182
|
+
ttl: ttls.fetch(:embeddings, Cache::DEFAULT_TTLS[:embeddings])
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Wrap a retriever with caching.
|
|
187
|
+
#
|
|
188
|
+
# @param retriever [Retriever]
|
|
189
|
+
# @param cache [Cache::CacheStore]
|
|
190
|
+
# @return [Cache::CachedRetriever]
|
|
191
|
+
def wrap_with_retriever_cache(retriever, cache)
|
|
192
|
+
ttls = (@config.cache_options || {}).fetch(:ttl, {})
|
|
193
|
+
Cache::CachedRetriever.new(
|
|
194
|
+
retriever: retriever,
|
|
195
|
+
cache_store: cache,
|
|
196
|
+
context_ttl: ttls.fetch(:context, Cache::DEFAULT_TTLS[:context])
|
|
197
|
+
)
|
|
198
|
+
end
|
|
136
199
|
end
|
|
137
200
|
end
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require_relative 'cache_store'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Cache
|
|
8
|
+
# Decorator that wraps an embedding provider with cache-through logic.
|
|
9
|
+
#
|
|
10
|
+
# Implements the same {Embedding::Provider::Interface} so it can be
|
|
11
|
+
# injected transparently in place of the real provider. On cache hit,
|
|
12
|
+
# the expensive API call (OpenAI, Ollama) is skipped entirely.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# real_provider = Embedding::Provider::OpenAI.new(api_key: key)
|
|
16
|
+
# cached = CachedEmbeddingProvider.new(provider: real_provider, cache_store: store)
|
|
17
|
+
# cached.embed("How does User work?") # API call + cache write
|
|
18
|
+
# cached.embed("How does User work?") # cache hit, no API call
|
|
19
|
+
#
|
|
20
|
+
class CachedEmbeddingProvider
|
|
21
|
+
include Embedding::Provider::Interface
|
|
22
|
+
|
|
23
|
+
# @param provider [Embedding::Provider::Interface] The real embedding provider
|
|
24
|
+
# @param cache_store [CacheStore] Cache backend instance
|
|
25
|
+
# @param ttl [Integer] TTL for cached embeddings in seconds
|
|
26
|
+
def initialize(provider:, cache_store:, ttl: DEFAULT_TTLS[:embeddings])
|
|
27
|
+
@provider = provider
|
|
28
|
+
@cache_store = cache_store
|
|
29
|
+
@ttl = ttl
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Embed a single text, returning a cached vector when available.
|
|
33
|
+
#
|
|
34
|
+
# @param text [String] Text to embed
|
|
35
|
+
# @return [Array<Float>] Embedding vector
|
|
36
|
+
def embed(text)
|
|
37
|
+
key = embedding_key(text)
|
|
38
|
+
@cache_store.fetch(key, ttl: @ttl) { @provider.embed(text) }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Embed a batch of texts, using cached vectors for any previously seen texts.
|
|
42
|
+
#
|
|
43
|
+
# Only texts that are not already cached are sent to the real provider.
|
|
44
|
+
# Results are merged back in original order.
|
|
45
|
+
#
|
|
46
|
+
# @param texts [Array<String>] Texts to embed
|
|
47
|
+
# @return [Array<Array<Float>>] Embedding vectors (same order as input)
|
|
48
|
+
def embed_batch(texts)
|
|
49
|
+
results, misses, miss_indices = partition_cached(texts)
|
|
50
|
+
|
|
51
|
+
if misses.any?
|
|
52
|
+
fresh_vectors = @provider.embed_batch(misses)
|
|
53
|
+
misses.each_with_index do |text, i|
|
|
54
|
+
results[miss_indices[i]] = fresh_vectors[i]
|
|
55
|
+
begin
|
|
56
|
+
@cache_store.write(embedding_key(text), fresh_vectors[i], ttl: @ttl)
|
|
57
|
+
rescue StandardError => e
|
|
58
|
+
warn("[CodebaseIndex] CachedEmbeddingProvider cache write failed: #{e.message}")
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
results
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Delegate dimensions to the underlying provider.
|
|
67
|
+
#
|
|
68
|
+
# @return [Integer]
|
|
69
|
+
def dimensions
|
|
70
|
+
@provider.dimensions
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Delegate model_name to the underlying provider.
|
|
74
|
+
#
|
|
75
|
+
# @return [String]
|
|
76
|
+
def model_name
|
|
77
|
+
@provider.model_name
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
# Split texts into cached hits and uncached misses.
|
|
83
|
+
#
|
|
84
|
+
# @param texts [Array<String>]
|
|
85
|
+
# @return [Array(Array, Array<String>, Array<Integer>)]
|
|
86
|
+
def partition_cached(texts)
|
|
87
|
+
results = Array.new(texts.size)
|
|
88
|
+
misses = []
|
|
89
|
+
miss_indices = []
|
|
90
|
+
|
|
91
|
+
texts.each_with_index do |text, idx|
|
|
92
|
+
cached = @cache_store.read(embedding_key(text))
|
|
93
|
+
if cached
|
|
94
|
+
results[idx] = cached
|
|
95
|
+
else
|
|
96
|
+
misses << text
|
|
97
|
+
miss_indices << idx
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
[results, misses, miss_indices]
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Build a cache key for an embedding text.
|
|
105
|
+
#
|
|
106
|
+
# @param text [String]
|
|
107
|
+
# @return [String]
|
|
108
|
+
def embedding_key(text)
|
|
109
|
+
Cache.cache_key(:embeddings, Digest::SHA256.hexdigest(text))
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Decorator that wraps a {Retriever} with result caching.
|
|
114
|
+
#
|
|
115
|
+
# Caches the full formatted context output (the most token-expensive artifact)
|
|
116
|
+
# keyed by query + budget. Also caches the structural context overview
|
|
117
|
+
# separately with a longer TTL.
|
|
118
|
+
#
|
|
119
|
+
# @example
|
|
120
|
+
# retriever = CodebaseIndex::Retriever.new(...)
|
|
121
|
+
# cached = CachedRetriever.new(retriever: retriever, cache_store: store)
|
|
122
|
+
# cached.retrieve("How does User work?") # full pipeline + cache
|
|
123
|
+
# cached.retrieve("How does User work?") # instant cache hit
|
|
124
|
+
#
|
|
125
|
+
class CachedRetriever
|
|
126
|
+
# @param retriever [Retriever] The real retriever instance
|
|
127
|
+
# @param cache_store [CacheStore] Cache backend instance
|
|
128
|
+
# @param context_ttl [Integer] TTL for formatted context results
|
|
129
|
+
def initialize(retriever:, cache_store:, context_ttl: DEFAULT_TTLS[:context])
|
|
130
|
+
@retriever = retriever
|
|
131
|
+
@cache_store = cache_store
|
|
132
|
+
@context_ttl = context_ttl
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Execute the retrieval pipeline with context-level caching.
|
|
136
|
+
#
|
|
137
|
+
# On cache hit, returns a RetrievalResult reconstructed from cached data
|
|
138
|
+
# without running any pipeline stages. On miss, delegates to the real
|
|
139
|
+
# retriever and caches the serializable parts of the result.
|
|
140
|
+
#
|
|
141
|
+
# @param query [String] Natural language query
|
|
142
|
+
# @param budget [Integer] Token budget
|
|
143
|
+
# @return [Retriever::RetrievalResult]
|
|
144
|
+
def retrieve(query, budget: 8000)
|
|
145
|
+
key = context_key(query, budget)
|
|
146
|
+
cached = @cache_store.read(key)
|
|
147
|
+
|
|
148
|
+
if cached
|
|
149
|
+
return Retriever::RetrievalResult.new(
|
|
150
|
+
context: cached['context'],
|
|
151
|
+
sources: cached['sources'],
|
|
152
|
+
classification: nil,
|
|
153
|
+
strategy: cached['strategy']&.to_sym,
|
|
154
|
+
tokens_used: cached['tokens_used'],
|
|
155
|
+
budget: budget,
|
|
156
|
+
trace: nil
|
|
157
|
+
)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
result = @retriever.retrieve(query, budget: budget)
|
|
161
|
+
|
|
162
|
+
begin
|
|
163
|
+
@cache_store.write(key, serialize_result(result), ttl: @context_ttl)
|
|
164
|
+
rescue StandardError => e
|
|
165
|
+
warn("[CodebaseIndex] CachedRetriever cache write failed: #{e.message}")
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
result
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
private
|
|
172
|
+
|
|
173
|
+
# Build a cache key for a context result.
|
|
174
|
+
#
|
|
175
|
+
# @param query [String]
|
|
176
|
+
# @param budget [Integer]
|
|
177
|
+
# @return [String]
|
|
178
|
+
def context_key(query, budget)
|
|
179
|
+
Cache.cache_key(:context, query, budget.to_s)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Serialize a RetrievalResult to a JSON-safe hash.
|
|
183
|
+
#
|
|
184
|
+
# Only caches the fields needed to reconstruct a useful result:
|
|
185
|
+
# context string, sources list, strategy, and token count.
|
|
186
|
+
#
|
|
187
|
+
# @param result [Retriever::RetrievalResult]
|
|
188
|
+
# @return [Hash]
|
|
189
|
+
def serialize_result(result)
|
|
190
|
+
{
|
|
191
|
+
'context' => result.context,
|
|
192
|
+
'sources' => result.sources,
|
|
193
|
+
'strategy' => result.strategy&.to_s,
|
|
194
|
+
'tokens_used' => result.tokens_used
|
|
195
|
+
}
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|