codebase_index 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/codebase-index-mcp +2 -1
- data/exe/codebase-index-mcp-http +2 -1
- data/lib/codebase_index/console/embedded_executor.rb +89 -8
- data/lib/codebase_index/console/rack_middleware.rb +5 -2
- data/lib/codebase_index/console/server.rb +25 -2
- data/lib/codebase_index/extractor.rb +80 -4
- data/lib/codebase_index/extractors/concern_extractor.rb +48 -9
- data/lib/codebase_index/extractors/model_extractor.rb +64 -3
- data/lib/codebase_index/mcp/bootstrapper.rb +38 -0
- data/lib/codebase_index/mcp/server.rb +36 -1
- data/lib/codebase_index/temporal/json_snapshot_store.rb +245 -0
- data/lib/codebase_index/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a55ea6a46950c68c6cc1b39f9008bbff466dea9e20d8b646564d4cc489060552
|
|
4
|
+
data.tar.gz: ca3f117e515fb14f2ec6069c4e6ef541b661e90f0e3d33daa9f9fab43afb746a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e9ff9f423902e1651540b9f933e9a1afab31b3b4ebb0347a368769a83aaf8c1a493e7a33ac68d14a7733753da085dc830e9509911d0bc49205b21e2e07d82de3
|
|
7
|
+
data.tar.gz: ab8630e12cd123cfeeeee6796d0529cdbaa0f8e6e8c0fb9d7d95dadf753b8a8fbb0d5e2136ff2353cc3aac8b0acb5f525f75f903bc777e86326468c6a759ae8a
|
data/exe/codebase-index-mcp
CHANGED
|
@@ -21,8 +21,9 @@ require_relative '../lib/codebase_index/embedding/indexer'
|
|
|
21
21
|
|
|
22
22
|
index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
|
|
23
23
|
retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
|
|
24
|
+
snapshot_store = CodebaseIndex::MCP::Bootstrapper.build_snapshot_store(index_dir)
|
|
24
25
|
|
|
25
|
-
server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever)
|
|
26
|
+
server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
|
|
26
27
|
|
|
27
28
|
# Pin protocol version for broad client compatibility (Claude Code, Cursor, etc.)
|
|
28
29
|
if ENV['MCP_PROTOCOL_VERSION']
|
data/exe/codebase-index-mcp-http
CHANGED
|
@@ -22,11 +22,12 @@ require_relative '../lib/codebase_index/embedding/indexer'
|
|
|
22
22
|
|
|
23
23
|
index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
|
|
24
24
|
retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
|
|
25
|
+
snapshot_store = CodebaseIndex::MCP::Bootstrapper.build_snapshot_store(index_dir)
|
|
25
26
|
|
|
26
27
|
port = (ENV['PORT'] || 9292).to_i
|
|
27
28
|
host = ENV['HOST'] || 'localhost'
|
|
28
29
|
|
|
29
|
-
server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever)
|
|
30
|
+
server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
|
|
30
31
|
transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
|
|
31
32
|
server.transport = transport
|
|
32
33
|
|
|
@@ -23,13 +23,23 @@ module CodebaseIndex
|
|
|
23
23
|
|
|
24
24
|
TIER1_TOOLS = Bridge::TIER1_TOOLS
|
|
25
25
|
|
|
26
|
+
# Tools gated behind the read_tools_enabled flag.
|
|
27
|
+
# sql/query have existing safety gates (SqlValidator, SafeContext rollback)
|
|
28
|
+
# but require explicit opt-in for embedded mode.
|
|
29
|
+
EMBEDDED_READ_TOOLS = %w[sql query].freeze
|
|
30
|
+
|
|
31
|
+
MAX_SQL_LIMIT = 10_000
|
|
32
|
+
MAX_QUERY_LIMIT = 10_000
|
|
33
|
+
|
|
26
34
|
# @param model_validator [ModelValidator] Validates model/column names
|
|
27
35
|
# @param safe_context [SafeContext] Wraps execution in rolled-back transaction
|
|
28
36
|
# @param connection [Object, nil] Database connection for adapter detection
|
|
29
|
-
|
|
37
|
+
# @param read_tools_enabled [Boolean] Enable sql/query tools in embedded mode (default: false)
|
|
38
|
+
def initialize(model_validator:, safe_context:, connection: nil, read_tools_enabled: false)
|
|
30
39
|
@model_validator = model_validator
|
|
31
40
|
@safe_context = safe_context
|
|
32
41
|
@connection = connection
|
|
42
|
+
@read_tools_enabled = read_tools_enabled
|
|
33
43
|
end
|
|
34
44
|
|
|
35
45
|
# Execute a tool request and return a response hash.
|
|
@@ -46,7 +56,7 @@ module CodebaseIndex
|
|
|
46
56
|
tool = request['tool']
|
|
47
57
|
params = request['params'] || {}
|
|
48
58
|
|
|
49
|
-
unless TIER1_TOOLS.include?(tool)
|
|
59
|
+
unless TIER1_TOOLS.include?(tool) || (@read_tools_enabled && EMBEDDED_READ_TOOLS.include?(tool))
|
|
50
60
|
return { 'ok' => false,
|
|
51
61
|
'error' => 'Not yet implemented in embedded mode',
|
|
52
62
|
'error_type' => 'unsupported' }
|
|
@@ -72,8 +82,10 @@ module CodebaseIndex
|
|
|
72
82
|
# @return [Hash] Tool result
|
|
73
83
|
def dispatch(tool, params)
|
|
74
84
|
case tool
|
|
75
|
-
when 'status'
|
|
76
|
-
when 'schema'
|
|
85
|
+
when 'status' then handle_status
|
|
86
|
+
when 'schema' then handle_schema(params)
|
|
87
|
+
when 'sql' then handle_sql(params)
|
|
88
|
+
when 'query' then handle_query(params)
|
|
77
89
|
else
|
|
78
90
|
validate_model!(params)
|
|
79
91
|
send(:"handle_#{tool}", params)
|
|
@@ -211,17 +223,86 @@ module CodebaseIndex
|
|
|
211
223
|
{ 'status' => 'ok', 'models' => @model_validator.model_names, 'adapter' => adapter }
|
|
212
224
|
end
|
|
213
225
|
|
|
226
|
+
# ── Read tools (sql/query, gated by read_tools_enabled) ────────────
|
|
227
|
+
|
|
228
|
+
# Execute validated read-only SQL via ActiveRecord's select_all.
|
|
229
|
+
#
|
|
230
|
+
# @param params [Hash] Must contain 'sql'; optional 'limit'
|
|
231
|
+
# @return [Hash] Columns and rows
|
|
232
|
+
def handle_sql(params)
|
|
233
|
+
sql = params['sql']
|
|
234
|
+
raise ValidationError, 'Missing required parameter: sql' unless sql
|
|
235
|
+
|
|
236
|
+
require_relative 'sql_validator'
|
|
237
|
+
SqlValidator.new.validate!(sql)
|
|
238
|
+
|
|
239
|
+
limit = params['limit'] ? [params['limit'].to_i, MAX_SQL_LIMIT].min : nil
|
|
240
|
+
query_sql = limit ? "SELECT * FROM (#{sql}) AS _limited LIMIT #{limit}" : sql
|
|
241
|
+
result = active_connection.select_all(query_sql)
|
|
242
|
+
|
|
243
|
+
{ 'columns' => result.columns, 'rows' => result.rows, 'count' => result.rows.size }
|
|
244
|
+
rescue SqlValidationError => e
|
|
245
|
+
raise ValidationError, e.message
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Build and execute a structured ActiveRecord query.
|
|
249
|
+
#
|
|
250
|
+
# @param params [Hash] Must contain 'model' and 'select'
|
|
251
|
+
# @return [Hash] Columns and rows
|
|
252
|
+
def handle_query(params)
|
|
253
|
+
validate_model!(params)
|
|
254
|
+
model = resolve_model(params['model'])
|
|
255
|
+
relation = build_query_relation(model, params)
|
|
256
|
+
result = active_connection.select_all(relation.to_sql)
|
|
257
|
+
{ 'columns' => result.columns, 'rows' => result.rows, 'count' => result.rows.size }
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Build an ActiveRecord relation from structured query parameters.
|
|
261
|
+
#
|
|
262
|
+
# @param model [Class] ActiveRecord model class
|
|
263
|
+
# @param params [Hash] Query parameters (select, joins, scope, group_by, having, order, limit)
|
|
264
|
+
# @return [ActiveRecord::Relation]
|
|
265
|
+
def build_query_relation(model, params)
|
|
266
|
+
relation = apply_query_clauses(model.all, params)
|
|
267
|
+
limit = params['limit'] ? [params['limit'].to_i, MAX_QUERY_LIMIT].min : MAX_QUERY_LIMIT
|
|
268
|
+
relation.limit(limit)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Apply select/joins/scope/group/having/order clauses to a relation.
|
|
272
|
+
#
|
|
273
|
+
# @param relation [ActiveRecord::Relation]
|
|
274
|
+
# @param params [Hash]
|
|
275
|
+
# @return [ActiveRecord::Relation]
|
|
276
|
+
def apply_query_clauses(relation, params) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
277
|
+
relation = relation.select(params['select']) if params['select']
|
|
278
|
+
relation = relation.joins(params['joins'].map(&:to_sym)) if params['joins']&.any?
|
|
279
|
+
relation = apply_scope(relation, params['scope'])
|
|
280
|
+
relation = relation.group(params['group_by']) if params['group_by']&.any?
|
|
281
|
+
relation = relation.having(params['having']) if params['having']
|
|
282
|
+
relation = relation.order(params['order']) if params['order']
|
|
283
|
+
relation
|
|
284
|
+
end
|
|
285
|
+
|
|
214
286
|
# ── Helpers ──────────────────────────────────────────────────────────
|
|
215
287
|
|
|
216
288
|
# Apply scope conditions (WHERE clauses) to a relation.
|
|
217
289
|
#
|
|
290
|
+
# Accepts Hash form for simple equality conditions, or Array form
|
|
291
|
+
# for parameterized SQL (e.g., JSON column queries like
|
|
292
|
+
# ["preferences->>'theme' = ?", "dark"]).
|
|
293
|
+
#
|
|
218
294
|
# @param relation [ActiveRecord::Relation, Class] Model or relation
|
|
219
|
-
# @param scope [Hash, nil] Filter conditions
|
|
295
|
+
# @param scope [Hash, Array, nil] Filter conditions
|
|
220
296
|
# @return [ActiveRecord::Relation]
|
|
221
297
|
def apply_scope(relation, scope)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
298
|
+
case scope
|
|
299
|
+
when Hash
|
|
300
|
+
scope.any? ? relation.where(scope) : relation
|
|
301
|
+
when Array
|
|
302
|
+
scope.any? ? relation.where(*scope) : relation
|
|
303
|
+
else
|
|
304
|
+
relation
|
|
305
|
+
end
|
|
225
306
|
end
|
|
226
307
|
|
|
227
308
|
# Apply column selection to a relation.
|
|
@@ -16,9 +16,11 @@ module CodebaseIndex
|
|
|
16
16
|
class RackMiddleware
|
|
17
17
|
# @param app [#call] The next Rack app in the middleware stack
|
|
18
18
|
# @param path [String] URL path to mount the MCP endpoint (default: '/mcp/console')
|
|
19
|
-
|
|
19
|
+
# @param embedded_read_tools [Boolean] Enable sql/query tools in embedded mode (default: false)
|
|
20
|
+
def initialize(app, path: '/mcp/console', embedded_read_tools: false)
|
|
20
21
|
@app = app
|
|
21
22
|
@path = path
|
|
23
|
+
@embedded_read_tools = embedded_read_tools
|
|
22
24
|
@mutex = Mutex.new
|
|
23
25
|
@transport = nil
|
|
24
26
|
end
|
|
@@ -71,7 +73,8 @@ module CodebaseIndex
|
|
|
71
73
|
server = Server.build_embedded(
|
|
72
74
|
model_validator: validator,
|
|
73
75
|
safe_context: safe_context,
|
|
74
|
-
redacted_columns: redacted
|
|
76
|
+
redacted_columns: redacted,
|
|
77
|
+
read_tools_enabled: @embedded_read_tools
|
|
75
78
|
)
|
|
76
79
|
|
|
77
80
|
@transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
|
|
@@ -56,12 +56,15 @@ module CodebaseIndex
|
|
|
56
56
|
# @param safe_context [SafeContext] Wraps queries in rolled-back transactions
|
|
57
57
|
# @param redacted_columns [Array<String>] Column names to redact from output
|
|
58
58
|
# @param connection [Object, nil] Database connection for adapter detection
|
|
59
|
+
# @param read_tools_enabled [Boolean] Enable sql/query tools in embedded mode (default: false)
|
|
59
60
|
# @return [MCP::Server] Configured server ready for transport
|
|
60
|
-
def build_embedded(model_validator:, safe_context:, redacted_columns: [], connection: nil
|
|
61
|
+
def build_embedded(model_validator:, safe_context:, redacted_columns: [], connection: nil,
|
|
62
|
+
read_tools_enabled: false)
|
|
61
63
|
require_relative 'embedded_executor'
|
|
62
64
|
|
|
63
65
|
executor = EmbeddedExecutor.new(
|
|
64
|
-
model_validator: model_validator, safe_context: safe_context,
|
|
66
|
+
model_validator: model_validator, safe_context: safe_context,
|
|
67
|
+
connection: connection, read_tools_enabled: read_tools_enabled
|
|
65
68
|
)
|
|
66
69
|
redact_ctx = if redacted_columns.any?
|
|
67
70
|
SafeContext.new(connection: nil,
|
|
@@ -568,15 +571,35 @@ module CodebaseIndex
|
|
|
568
571
|
def define_console_tool(server, conn_mgr, name, description, properties:, required: nil,
|
|
569
572
|
safe_ctx: nil, renderer: nil, &tool_block)
|
|
570
573
|
bridge_method = method(:send_to_bridge)
|
|
574
|
+
coerce_method = method(:coerce_integer_args!)
|
|
575
|
+
integer_keys = integer_property_keys(properties)
|
|
571
576
|
schema = { properties: properties }
|
|
572
577
|
schema[:required] = required if required&.any?
|
|
573
578
|
server.define_tool(name: name, description: description, input_schema: schema) do |server_context:, **args|
|
|
579
|
+
coerce_method.call(args, integer_keys)
|
|
574
580
|
request = tool_block.call(args)
|
|
575
581
|
bridge_method.call(conn_mgr, request.transform_keys(&:to_s), safe_ctx, renderer: renderer)
|
|
576
582
|
end
|
|
577
583
|
end
|
|
578
584
|
# rubocop:enable Metrics/ParameterLists
|
|
579
585
|
|
|
586
|
+
# Pre-compute property keys declared as integer in a schema.
|
|
587
|
+
#
|
|
588
|
+
# @param properties [Hash] Tool schema properties
|
|
589
|
+
# @return [Array<Symbol>]
|
|
590
|
+
def integer_property_keys(properties)
|
|
591
|
+
properties.select { |_k, v| v[:type] == 'integer' }.keys.map(&:to_sym)
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
# Coerce string values to integers for known integer keys.
|
|
595
|
+
#
|
|
596
|
+
# @param args [Hash] Tool arguments (mutated in place)
|
|
597
|
+
# @param keys [Array<Symbol>] Keys that should be integers
|
|
598
|
+
# @return [void]
|
|
599
|
+
def coerce_integer_args!(args, keys)
|
|
600
|
+
keys.each { |k| args[k] = args[k].to_i if args[k].is_a?(String) }
|
|
601
|
+
end
|
|
602
|
+
|
|
580
603
|
# Schema property helpers for concise tool definitions.
|
|
581
604
|
def str_prop(desc) = { type: 'string', description: desc }
|
|
582
605
|
def int_prop(desc) = { type: 'integer', description: desc }
|
|
@@ -207,6 +207,7 @@ module CodebaseIndex
|
|
|
207
207
|
@output_dir = Pathname.new(output_dir || Rails.root.join('tmp/codebase_index'))
|
|
208
208
|
@dependency_graph = DependencyGraph.new
|
|
209
209
|
@results = {}
|
|
210
|
+
@extractors = {}
|
|
210
211
|
end
|
|
211
212
|
|
|
212
213
|
# ══════════════════════════════════════════════════════════════════════
|
|
@@ -268,6 +269,7 @@ module CodebaseIndex
|
|
|
268
269
|
write_graph_analysis
|
|
269
270
|
write_manifest
|
|
270
271
|
write_structural_summary
|
|
272
|
+
capture_snapshot
|
|
271
273
|
|
|
272
274
|
log_summary
|
|
273
275
|
|
|
@@ -317,6 +319,7 @@ module CodebaseIndex
|
|
|
317
319
|
write_dependency_graph
|
|
318
320
|
write_manifest
|
|
319
321
|
write_structural_summary
|
|
322
|
+
capture_snapshot
|
|
320
323
|
|
|
321
324
|
affected_ids
|
|
322
325
|
end
|
|
@@ -377,6 +380,7 @@ module CodebaseIndex
|
|
|
377
380
|
start_time = Time.current
|
|
378
381
|
|
|
379
382
|
extractor = extractor_class.new
|
|
383
|
+
@extractors[type] = extractor
|
|
380
384
|
units = extractor.extract_all
|
|
381
385
|
|
|
382
386
|
@results[type] = units
|
|
@@ -410,12 +414,16 @@ module CodebaseIndex
|
|
|
410
414
|
start_time = Time.current
|
|
411
415
|
|
|
412
416
|
extractor = extractor_class.new
|
|
417
|
+
results_mutex.synchronize { @extractors[type] = extractor }
|
|
418
|
+
|
|
413
419
|
units = extractor.extract_all
|
|
414
420
|
|
|
415
421
|
elapsed = Time.current - start_time
|
|
416
422
|
Rails.logger.info "[CodebaseIndex] [Thread] Extracted #{units.size} #{type} in #{elapsed.round(2)}s"
|
|
417
423
|
|
|
418
|
-
results_mutex.synchronize
|
|
424
|
+
results_mutex.synchronize do
|
|
425
|
+
@results[type] = units
|
|
426
|
+
end
|
|
419
427
|
rescue StandardError => e
|
|
420
428
|
Rails.logger.error "[CodebaseIndex] [Thread] #{type} failed: #{e.message}"
|
|
421
429
|
results_mutex.synchronize { @results[type] = [] }
|
|
@@ -743,9 +751,10 @@ module CodebaseIndex
|
|
|
743
751
|
total_units: @results.values.sum(&:size),
|
|
744
752
|
total_chunks: @results.sum { |_, units| units.sum { |u| u.chunks.size } },
|
|
745
753
|
|
|
746
|
-
# Git info
|
|
747
|
-
|
|
748
|
-
|
|
754
|
+
# Git info — fall back to env vars for Docker/worktree environments
|
|
755
|
+
# where the git repo may not be directly accessible
|
|
756
|
+
git_sha: run_git('rev-parse', 'HEAD').presence || ENV['GIT_SHA'].presence,
|
|
757
|
+
git_branch: run_git('rev-parse', '--abbrev-ref', 'HEAD').presence || ENV['GIT_BRANCH'].presence,
|
|
749
758
|
|
|
750
759
|
# For change detection
|
|
751
760
|
gemfile_lock_sha: gemfile_lock_sha,
|
|
@@ -758,6 +767,63 @@ module CodebaseIndex
|
|
|
758
767
|
)
|
|
759
768
|
end
|
|
760
769
|
|
|
770
|
+
# Capture a temporal snapshot after extraction completes.
|
|
771
|
+
#
|
|
772
|
+
# Reads the manifest and computes per-unit content hashes, then delegates
|
|
773
|
+
# to the SnapshotStore for storage and diff computation. Requires
|
|
774
|
+
# enable_snapshots and a valid git_sha in the manifest.
|
|
775
|
+
#
|
|
776
|
+
# @return [void]
|
|
777
|
+
def capture_snapshot
|
|
778
|
+
return unless CodebaseIndex.configuration.enable_snapshots
|
|
779
|
+
|
|
780
|
+
manifest_path = @output_dir.join('manifest.json')
|
|
781
|
+
return unless manifest_path.exist?
|
|
782
|
+
|
|
783
|
+
manifest = JSON.parse(File.read(manifest_path))
|
|
784
|
+
return unless manifest['git_sha']
|
|
785
|
+
|
|
786
|
+
store = build_snapshot_store
|
|
787
|
+
return unless store
|
|
788
|
+
|
|
789
|
+
unit_hashes = @results.flat_map do |type, units|
|
|
790
|
+
units.map do |unit|
|
|
791
|
+
{
|
|
792
|
+
'identifier' => unit.identifier,
|
|
793
|
+
'type' => type.to_s,
|
|
794
|
+
'source_hash' => Digest::SHA256.hexdigest(unit.source_code.to_s),
|
|
795
|
+
'metadata_hash' => Digest::SHA256.hexdigest(unit.metadata.to_json),
|
|
796
|
+
'dependencies_hash' => Digest::SHA256.hexdigest(unit.dependencies.to_json)
|
|
797
|
+
}
|
|
798
|
+
end
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
store.capture(manifest, unit_hashes)
|
|
802
|
+
Rails.logger.info "[CodebaseIndex] Snapshot captured for #{manifest['git_sha'][0..7]}"
|
|
803
|
+
rescue StandardError => e
|
|
804
|
+
Rails.logger.error "[CodebaseIndex] Snapshot capture failed (#{e.class}): #{e.message}"
|
|
805
|
+
end
|
|
806
|
+
|
|
807
|
+
# Build a snapshot store, preferring SQLite with JSON file fallback.
|
|
808
|
+
#
|
|
809
|
+
# @return [CodebaseIndex::Temporal::SnapshotStore, CodebaseIndex::Temporal::JsonSnapshotStore, nil]
|
|
810
|
+
def build_snapshot_store
|
|
811
|
+
require 'sqlite3'
|
|
812
|
+
require_relative 'db/migrator'
|
|
813
|
+
require_relative 'temporal/snapshot_store'
|
|
814
|
+
|
|
815
|
+
db_path = @output_dir.join('codebase_index.sqlite3')
|
|
816
|
+
db = SQLite3::Database.new(db_path.to_s)
|
|
817
|
+
db.results_as_hash = true
|
|
818
|
+
|
|
819
|
+
Db::Migrator.new(connection: db).migrate!
|
|
820
|
+
Temporal::SnapshotStore.new(connection: db)
|
|
821
|
+
rescue LoadError
|
|
822
|
+
Rails.logger.info '[CodebaseIndex] sqlite3 gem not available, using JSON snapshot store'
|
|
823
|
+
require_relative 'temporal/json_snapshot_store'
|
|
824
|
+
Temporal::JsonSnapshotStore.new(dir: @output_dir.to_s)
|
|
825
|
+
end
|
|
826
|
+
|
|
761
827
|
# Write a compact TOC-style summary of extracted units.
|
|
762
828
|
#
|
|
763
829
|
# Produces a SUMMARY.md under 8K tokens (~24KB) by listing one line per
|
|
@@ -886,6 +952,16 @@ module CodebaseIndex
|
|
|
886
952
|
Rails.logger.info "[CodebaseIndex] Total: #{total} units, #{chunks} chunks"
|
|
887
953
|
Rails.logger.info "[CodebaseIndex] Output: #{@output_dir}"
|
|
888
954
|
Rails.logger.info '[CodebaseIndex] ═══════════════════════════════════════════'
|
|
955
|
+
|
|
956
|
+
all_warnings = @extractors.flat_map do |_type, ext|
|
|
957
|
+
ext.respond_to?(:warnings) ? ext.warnings : []
|
|
958
|
+
end
|
|
959
|
+
|
|
960
|
+
return if all_warnings.empty?
|
|
961
|
+
|
|
962
|
+
Rails.logger.warn '[CodebaseIndex] ───────────────────────────────────────────'
|
|
963
|
+
Rails.logger.warn "[CodebaseIndex] Warnings (#{all_warnings.size}):"
|
|
964
|
+
all_warnings.each { |w| Rails.logger.warn "[CodebaseIndex] #{w}" }
|
|
889
965
|
end
|
|
890
966
|
|
|
891
967
|
# ──────────────────────────────────────────────────────────────────────
|
|
@@ -8,7 +8,8 @@ module CodebaseIndex
|
|
|
8
8
|
# ConcernExtractor handles ActiveSupport::Concern module extraction.
|
|
9
9
|
#
|
|
10
10
|
# Concerns are mixins that extend model and controller behavior.
|
|
11
|
-
# They live in `app/models/concerns/` and `app/controllers/concerns
|
|
11
|
+
# They live in `app/models/concerns/` and `app/controllers/concerns/`,
|
|
12
|
+
# as well as nested directories like `app/models/gateway/stripe/concerns/`.
|
|
12
13
|
#
|
|
13
14
|
# We extract:
|
|
14
15
|
# - Module name and namespace
|
|
@@ -25,13 +26,20 @@ module CodebaseIndex
|
|
|
25
26
|
include SharedUtilityMethods
|
|
26
27
|
include SharedDependencyScanner
|
|
27
28
|
|
|
28
|
-
#
|
|
29
|
+
# Canonical concern directories (used as fallback if glob finds nothing).
|
|
29
30
|
CONCERN_DIRECTORIES = %w[
|
|
30
31
|
app/models/concerns
|
|
31
32
|
app/controllers/concerns
|
|
32
33
|
].freeze
|
|
33
34
|
|
|
34
35
|
def initialize
|
|
36
|
+
# Discover all concerns/ directories under app/, including deeply nested ones
|
|
37
|
+
# like app/models/gateway/stripe/webhook/concerns/.
|
|
38
|
+
@directories = Dir[Rails.root.join('app/**/concerns')].map { |d| Pathname.new(d) }
|
|
39
|
+
.select(&:directory?)
|
|
40
|
+
# Fall back to canonical directories if glob finds nothing.
|
|
41
|
+
return unless @directories.empty?
|
|
42
|
+
|
|
35
43
|
@directories = CONCERN_DIRECTORIES.map { |d| Rails.root.join(d) }
|
|
36
44
|
.select(&:directory?)
|
|
37
45
|
end
|
|
@@ -91,10 +99,11 @@ module CodebaseIndex
|
|
|
91
99
|
modules = source.scan(/^\s*module\s+([\w:]+)/).flatten
|
|
92
100
|
return modules.last if modules.any?
|
|
93
101
|
|
|
94
|
-
# Infer from file path
|
|
102
|
+
# Infer from file path — strip everything up to and including the first concerns/ dir.
|
|
103
|
+
# Handles canonical (app/models/concerns/) and nested (app/models/foo/concerns/) paths.
|
|
95
104
|
relative = file_path.sub("#{Rails.root}/", '')
|
|
96
105
|
relative
|
|
97
|
-
.sub(%r{^app
|
|
106
|
+
.sub(%r{^app/.*?/concerns/}, '')
|
|
98
107
|
.sub('.rb', '')
|
|
99
108
|
.split('/')
|
|
100
109
|
.map { |segment| segment.split('_').map(&:capitalize).join }
|
|
@@ -179,9 +188,9 @@ module CodebaseIndex
|
|
|
179
188
|
# @param file_path [String] Path to the concern file
|
|
180
189
|
# @return [String] One of "model", "controller", "unknown"
|
|
181
190
|
def detect_concern_scope(file_path)
|
|
182
|
-
if file_path.include?('app/models/
|
|
191
|
+
if file_path.include?('app/models/')
|
|
183
192
|
'model'
|
|
184
|
-
elsif file_path.include?('app/controllers/
|
|
193
|
+
elsif file_path.include?('app/controllers/')
|
|
185
194
|
'controller'
|
|
186
195
|
else
|
|
187
196
|
'unknown'
|
|
@@ -196,7 +205,10 @@ module CodebaseIndex
|
|
|
196
205
|
source.scan(/^\s*def\s+(\w+[?!=]?)/).flatten.reject { |m| m.start_with?('self.') }
|
|
197
206
|
end
|
|
198
207
|
|
|
199
|
-
# Detect other modules included by this concern.
|
|
208
|
+
# Detect other modules included or extended by this concern (for metadata display).
|
|
209
|
+
#
|
|
210
|
+
# Returns all module names found in include/extend calls, excluding
|
|
211
|
+
# ActiveSupport::Concern itself.
|
|
200
212
|
#
|
|
201
213
|
# @param source [String] Ruby source code
|
|
202
214
|
# @return [Array<String>] Module names
|
|
@@ -205,6 +217,28 @@ module CodebaseIndex
|
|
|
205
217
|
.reject { |m| m == 'ActiveSupport::Concern' }
|
|
206
218
|
end
|
|
207
219
|
|
|
220
|
+
# Detect modules explicitly included by this concern.
|
|
221
|
+
#
|
|
222
|
+
# Scans for bare +include ModuleName+ calls, excluding ActiveSupport::Concern.
|
|
223
|
+
#
|
|
224
|
+
# @param source [String] Ruby source code
|
|
225
|
+
# @return [Array<String>] Included module names
|
|
226
|
+
def detect_includes(source)
|
|
227
|
+
source.scan(/\binclude\s+([\w:]+)/).flatten
|
|
228
|
+
.reject { |m| m == 'ActiveSupport::Concern' }
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Detect modules explicitly extended by this concern.
|
|
232
|
+
#
|
|
233
|
+
# Scans for bare +extend ModuleName+ calls, excluding ActiveSupport::Concern.
|
|
234
|
+
#
|
|
235
|
+
# @param source [String] Ruby source code
|
|
236
|
+
# @return [Array<String>] Extended module names
|
|
237
|
+
def detect_extends(source)
|
|
238
|
+
source.scan(/\bextend\s+([\w:]+)/).flatten
|
|
239
|
+
.reject { |m| m == 'ActiveSupport::Concern' }
|
|
240
|
+
end
|
|
241
|
+
|
|
208
242
|
# Detect callback declarations.
|
|
209
243
|
#
|
|
210
244
|
# @param source [String] Ruby source code
|
|
@@ -236,11 +270,16 @@ module CodebaseIndex
|
|
|
236
270
|
# @param source [String] Ruby source code
|
|
237
271
|
# @return [Array<Hash>] Dependency hashes
|
|
238
272
|
def extract_dependencies(source)
|
|
239
|
-
#
|
|
240
|
-
deps =
|
|
273
|
+
# Concerns included by this concern (add instance-level behavior)
|
|
274
|
+
deps = detect_includes(source).map do |mod|
|
|
241
275
|
{ type: :concern, target: mod, via: :include }
|
|
242
276
|
end
|
|
243
277
|
|
|
278
|
+
# Concerns extended by this concern (add class-level behavior)
|
|
279
|
+
detect_extends(source).each do |mod|
|
|
280
|
+
deps << { type: :concern, target: mod, via: :extend }
|
|
281
|
+
end
|
|
282
|
+
|
|
244
283
|
# Standard dependency scanning
|
|
245
284
|
deps.concat(scan_model_dependencies(source))
|
|
246
285
|
deps.concat(scan_service_dependencies(source))
|
|
@@ -36,8 +36,12 @@ module CodebaseIndex
|
|
|
36
36
|
(?:after|before)_(?:add|remove)_for_ # collection callbacks
|
|
37
37
|
)/x
|
|
38
38
|
|
|
39
|
+
# Warnings collected during extraction (skipped associations, failed models)
|
|
40
|
+
attr_reader :warnings
|
|
41
|
+
|
|
39
42
|
def initialize
|
|
40
43
|
@concern_cache = {}
|
|
44
|
+
@warnings = []
|
|
41
45
|
end
|
|
42
46
|
|
|
43
47
|
# Extract all ActiveRecord models in the application
|
|
@@ -79,6 +83,7 @@ module CodebaseIndex
|
|
|
79
83
|
|
|
80
84
|
unit
|
|
81
85
|
rescue StandardError => e
|
|
86
|
+
@warnings << "Failed to extract model #{model.name}: #{e.message}"
|
|
82
87
|
Rails.logger.error("Failed to extract model #{model.name}: #{e.message}")
|
|
83
88
|
nil
|
|
84
89
|
end
|
|
@@ -232,6 +237,37 @@ module CodebaseIndex
|
|
|
232
237
|
end
|
|
233
238
|
end
|
|
234
239
|
|
|
240
|
+
# Get modules extended specifically in this model (not inherited).
|
|
241
|
+
#
|
|
242
|
+
# Extended modules live on the singleton class and add class-level methods.
|
|
243
|
+
# Ruby builtins (Kernel, PP, etc.) are filtered out by comparing against
|
|
244
|
+
# Object.singleton_class.included_modules.
|
|
245
|
+
#
|
|
246
|
+
# @param model [Class] The ActiveRecord model class
|
|
247
|
+
# @return [Array<Module>] App-defined modules extended by this model
|
|
248
|
+
def extract_extended_modules(model)
|
|
249
|
+
app_root = Rails.root.to_s
|
|
250
|
+
builtin_modules = Object.singleton_class.included_modules.map(&:name).compact.to_set
|
|
251
|
+
|
|
252
|
+
model.singleton_class.included_modules.select do |mod|
|
|
253
|
+
next false unless mod.name
|
|
254
|
+
next false if builtin_modules.include?(mod.name)
|
|
255
|
+
|
|
256
|
+
# Skip obvious non-app modules (from gems/stdlib)
|
|
257
|
+
if Object.respond_to?(:const_source_location)
|
|
258
|
+
loc = Object.const_source_location(mod.name)
|
|
259
|
+
next false if loc && !app_source?(loc.first, app_root)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Include if it's in app/models/concerns or app/controllers/concerns
|
|
263
|
+
mod.name.include?('Concerns') ||
|
|
264
|
+
# Or if it's namespaced under the model's parent
|
|
265
|
+
mod.name.start_with?("#{model.module_parent}::") ||
|
|
266
|
+
# Or if it's defined within the application
|
|
267
|
+
defined_in_app?(mod)
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
235
271
|
# Check if a module is defined within the Rails application
|
|
236
272
|
#
|
|
237
273
|
# @param mod [Module] The module to check
|
|
@@ -312,6 +348,13 @@ module CodebaseIndex
|
|
|
312
348
|
table_exists: model.table_exists?,
|
|
313
349
|
column_count: model.table_exists? ? model.columns.size : 0,
|
|
314
350
|
column_names: model.table_exists? ? model.column_names : [],
|
|
351
|
+
columns: if model.table_exists?
|
|
352
|
+
model.columns.map do |col|
|
|
353
|
+
{ 'name' => col.name, 'type' => col.sql_type, 'null' => col.null, 'default' => col.default }
|
|
354
|
+
end
|
|
355
|
+
else
|
|
356
|
+
[]
|
|
357
|
+
end,
|
|
315
358
|
|
|
316
359
|
# ActiveStorage / ActionText
|
|
317
360
|
active_storage_attachments: extract_active_storage_attachments(source),
|
|
@@ -415,9 +458,11 @@ module CodebaseIndex
|
|
|
415
458
|
result
|
|
416
459
|
end
|
|
417
460
|
|
|
418
|
-
# Extract all associations with full details
|
|
461
|
+
# Extract all associations with full details.
|
|
462
|
+
# Broken associations (e.g. missing class_name) are skipped with a warning
|
|
463
|
+
# instead of aborting the entire model extraction.
|
|
419
464
|
def extract_associations(model)
|
|
420
|
-
model.reflect_on_all_associations.
|
|
465
|
+
model.reflect_on_all_associations.filter_map do |assoc|
|
|
421
466
|
{
|
|
422
467
|
name: assoc.name,
|
|
423
468
|
type: assoc.macro, # :belongs_to, :has_many, :has_one, :has_and_belongs_to_many
|
|
@@ -428,6 +473,9 @@ module CodebaseIndex
|
|
|
428
473
|
foreign_key: assoc.foreign_key,
|
|
429
474
|
inverse_of: assoc.inverse_of&.name
|
|
430
475
|
}
|
|
476
|
+
rescue NameError => e
|
|
477
|
+
@warnings << "[#{model.name}] Skipping broken association #{assoc.name}: #{e.message}"
|
|
478
|
+
nil
|
|
431
479
|
end
|
|
432
480
|
end
|
|
433
481
|
|
|
@@ -562,8 +610,21 @@ module CodebaseIndex
|
|
|
562
610
|
# Extract what this model depends on
|
|
563
611
|
def extract_dependencies(model, source = nil)
|
|
564
612
|
# Associations point to other models
|
|
565
|
-
deps = model.reflect_on_all_associations.
|
|
613
|
+
deps = model.reflect_on_all_associations.filter_map do |assoc|
|
|
566
614
|
{ type: :model, target: assoc.class_name, via: :association }
|
|
615
|
+
rescue NameError => e
|
|
616
|
+
@warnings << "[#{model.name}] Skipping broken association dep #{assoc.name}: #{e.message}"
|
|
617
|
+
nil
|
|
618
|
+
end
|
|
619
|
+
|
|
620
|
+
# Included concerns add instance-level behavior
|
|
621
|
+
extract_included_modules(model).each do |mod|
|
|
622
|
+
deps << { type: :concern, target: mod.name, via: :include }
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
# Extended modules add class-level behavior (not inlined into source)
|
|
626
|
+
extract_extended_modules(model).each do |mod|
|
|
627
|
+
deps << { type: :concern, target: mod.name, via: :extend }
|
|
567
628
|
end
|
|
568
629
|
|
|
569
630
|
# Parse source for service/mailer/job references
|
|
@@ -30,6 +30,44 @@ module CodebaseIndex
|
|
|
30
30
|
dir
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
# Build a snapshot store for temporal tracking.
|
|
34
|
+
#
|
|
35
|
+
# Auto-enables when a SQLite database already exists in the index directory,
|
|
36
|
+
# or when CODEBASE_INDEX_SNAPSHOTS=true is set. The database is created and
|
|
37
|
+
# migrated automatically. Falls back to JSON file store when SQLite is
|
|
38
|
+
# unavailable or encounters errors.
|
|
39
|
+
#
|
|
40
|
+
# @param index_dir [String] Path to extraction output directory
|
|
41
|
+
# @return [CodebaseIndex::Temporal::SnapshotStore, CodebaseIndex::Temporal::JsonSnapshotStore, nil]
|
|
42
|
+
def self.build_snapshot_store(index_dir)
|
|
43
|
+
db_path = File.join(index_dir, 'codebase_index.sqlite3')
|
|
44
|
+
enabled = ENV['CODEBASE_INDEX_SNAPSHOTS'] == 'true' ||
|
|
45
|
+
CodebaseIndex.configuration.enable_snapshots ||
|
|
46
|
+
File.exist?(db_path)
|
|
47
|
+
|
|
48
|
+
return nil unless enabled
|
|
49
|
+
|
|
50
|
+
begin
|
|
51
|
+
require 'sqlite3'
|
|
52
|
+
require_relative '../db/migrator'
|
|
53
|
+
require_relative '../temporal/snapshot_store'
|
|
54
|
+
|
|
55
|
+
db = SQLite3::Database.new(db_path)
|
|
56
|
+
db.results_as_hash = true
|
|
57
|
+
|
|
58
|
+
CodebaseIndex::Db::Migrator.new(connection: db).migrate!
|
|
59
|
+
CodebaseIndex::Temporal::SnapshotStore.new(connection: db)
|
|
60
|
+
rescue LoadError
|
|
61
|
+
warn 'Note: sqlite3 gem not available, using JSON file-based snapshot store.'
|
|
62
|
+
require_relative '../temporal/json_snapshot_store'
|
|
63
|
+
CodebaseIndex::Temporal::JsonSnapshotStore.new(dir: index_dir)
|
|
64
|
+
rescue StandardError => e
|
|
65
|
+
warn "Note: SQLite snapshot store failed (#{e.class}: #{e.message}), using JSON fallback."
|
|
66
|
+
require_relative '../temporal/json_snapshot_store'
|
|
67
|
+
CodebaseIndex::Temporal::JsonSnapshotStore.new(dir: index_dir)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
33
71
|
# Attempt to build a retriever for semantic search.
|
|
34
72
|
#
|
|
35
73
|
# Auto-configures from environment variables when no explicit configuration
|
|
@@ -106,6 +106,16 @@ module CodebaseIndex
|
|
|
106
106
|
value.is_a?(String) ? [value] : value
|
|
107
107
|
end
|
|
108
108
|
|
|
109
|
+
# Coerce a value to an Integer. Converts String representations
|
|
110
|
+
# to Integer; leaves existing Integers and nil unchanged.
|
|
111
|
+
# MCP clients may send "2" (string) instead of 2 (integer).
|
|
112
|
+
#
|
|
113
|
+
# @param value [String, Integer, nil] The input value
|
|
114
|
+
# @return [Integer, nil]
|
|
115
|
+
def coerce_integer(value)
|
|
116
|
+
value.is_a?(String) ? value.to_i : value
|
|
117
|
+
end
|
|
118
|
+
|
|
109
119
|
# Apply offset+limit pagination to a single section key within a container hash.
|
|
110
120
|
# Adds `_total`, `_truncated`, and `_offset` metadata keys when truncating.
|
|
111
121
|
#
|
|
@@ -166,6 +176,7 @@ module CodebaseIndex
|
|
|
166
176
|
|
|
167
177
|
def define_search_tool(server, reader, respond, renderer)
|
|
168
178
|
coerce = method(:coerce_array)
|
|
179
|
+
coerce_int = method(:coerce_integer)
|
|
169
180
|
server.define_tool(
|
|
170
181
|
name: 'search',
|
|
171
182
|
description: 'Search code units by pattern. Matches against identifiers by default; can also search source_code and metadata fields.',
|
|
@@ -187,6 +198,7 @@ module CodebaseIndex
|
|
|
187
198
|
) do |query:, server_context:, types: nil, fields: nil, limit: nil|
|
|
188
199
|
types = coerce.call(types)
|
|
189
200
|
fields = coerce.call(fields)
|
|
201
|
+
limit = coerce_int.call(limit)
|
|
190
202
|
results = reader.search(
|
|
191
203
|
query,
|
|
192
204
|
types: types,
|
|
@@ -203,6 +215,7 @@ module CodebaseIndex
|
|
|
203
215
|
|
|
204
216
|
def define_traversal_tool(server, reader, respond, renderer, name:, description:, reader_method:, render_key:)
|
|
205
217
|
coerce = method(:coerce_array)
|
|
218
|
+
coerce_int = method(:coerce_integer)
|
|
206
219
|
server.define_tool(
|
|
207
220
|
name: name,
|
|
208
221
|
description: description,
|
|
@@ -219,6 +232,7 @@ module CodebaseIndex
|
|
|
219
232
|
}
|
|
220
233
|
) do |identifier:, server_context:, depth: nil, types: nil|
|
|
221
234
|
types = coerce.call(types)
|
|
235
|
+
depth = coerce_int.call(depth)
|
|
222
236
|
result = reader.send(reader_method, identifier, depth: depth || 2, types: types)
|
|
223
237
|
if result[:found] == false
|
|
224
238
|
result[:message] =
|
|
@@ -249,6 +263,7 @@ module CodebaseIndex
|
|
|
249
263
|
|
|
250
264
|
def define_graph_analysis_tool(server, reader, respond, renderer)
|
|
251
265
|
paginate = method(:paginate_section)
|
|
266
|
+
coerce_int = method(:coerce_integer)
|
|
252
267
|
server.define_tool(
|
|
253
268
|
name: 'graph_analysis',
|
|
254
269
|
description: 'Get structural analysis of the dependency graph: orphans, dead ends, hubs, cycles, and bridges.',
|
|
@@ -264,6 +279,8 @@ module CodebaseIndex
|
|
|
264
279
|
}
|
|
265
280
|
}
|
|
266
281
|
) do |server_context:, analysis: nil, limit: nil, offset: nil|
|
|
282
|
+
limit = coerce_int.call(limit)
|
|
283
|
+
offset = coerce_int.call(offset)
|
|
267
284
|
data = reader.graph_analysis
|
|
268
285
|
section = analysis || 'all'
|
|
269
286
|
effective_offset = offset || 0
|
|
@@ -290,6 +307,7 @@ module CodebaseIndex
|
|
|
290
307
|
|
|
291
308
|
def define_pagerank_tool(server, reader, respond, renderer)
|
|
292
309
|
coerce = method(:coerce_array)
|
|
310
|
+
coerce_int = method(:coerce_integer)
|
|
293
311
|
server.define_tool(
|
|
294
312
|
name: 'pagerank',
|
|
295
313
|
description: 'Get PageRank importance scores for code units. Higher scores indicate more structurally important nodes.',
|
|
@@ -304,6 +322,7 @@ module CodebaseIndex
|
|
|
304
322
|
}
|
|
305
323
|
) do |server_context:, limit: nil, types: nil|
|
|
306
324
|
types = coerce.call(types)
|
|
325
|
+
limit = coerce_int.call(limit)
|
|
307
326
|
scores = reader.dependency_graph.pagerank
|
|
308
327
|
graph_data = reader.raw_graph_data
|
|
309
328
|
nodes = graph_data['nodes'] || {}
|
|
@@ -329,6 +348,7 @@ module CodebaseIndex
|
|
|
329
348
|
end
|
|
330
349
|
|
|
331
350
|
def define_framework_tool(server, reader, respond, renderer)
|
|
351
|
+
coerce_int = method(:coerce_integer)
|
|
332
352
|
server.define_tool(
|
|
333
353
|
name: 'framework',
|
|
334
354
|
description: 'Search Rails framework source units by concept keyword. Matches against identifier, ' \
|
|
@@ -342,6 +362,7 @@ module CodebaseIndex
|
|
|
342
362
|
required: ['keyword']
|
|
343
363
|
}
|
|
344
364
|
) do |keyword:, server_context:, limit: nil|
|
|
365
|
+
limit = coerce_int.call(limit)
|
|
345
366
|
results = reader.framework_sources(keyword, limit: limit || 20)
|
|
346
367
|
respond.call(renderer.render(:framework, {
|
|
347
368
|
keyword: keyword,
|
|
@@ -353,6 +374,7 @@ module CodebaseIndex
|
|
|
353
374
|
|
|
354
375
|
def define_recent_changes_tool(server, reader, respond, renderer)
|
|
355
376
|
coerce = method(:coerce_array)
|
|
377
|
+
coerce_int = method(:coerce_integer)
|
|
356
378
|
server.define_tool(
|
|
357
379
|
name: 'recent_changes',
|
|
358
380
|
description: 'List recently modified code units sorted by git last_modified timestamp. ' \
|
|
@@ -368,6 +390,7 @@ module CodebaseIndex
|
|
|
368
390
|
}
|
|
369
391
|
) do |server_context:, limit: nil, types: nil|
|
|
370
392
|
types = coerce.call(types)
|
|
393
|
+
limit = coerce_int.call(limit)
|
|
371
394
|
results = reader.recent_changes(limit: limit || 10, types: types)
|
|
372
395
|
respond.call(renderer.render(:recent_changes, {
|
|
373
396
|
result_count: results.size,
|
|
@@ -395,6 +418,7 @@ module CodebaseIndex
|
|
|
395
418
|
end
|
|
396
419
|
|
|
397
420
|
def define_retrieve_tool(server, retriever, respond)
|
|
421
|
+
coerce_int = method(:coerce_integer)
|
|
398
422
|
server.define_tool(
|
|
399
423
|
name: 'codebase_retrieve',
|
|
400
424
|
description: 'Retrieve relevant codebase context for a natural language query using semantic search. ' \
|
|
@@ -408,6 +432,7 @@ module CodebaseIndex
|
|
|
408
432
|
required: ['query']
|
|
409
433
|
}
|
|
410
434
|
) do |query:, server_context:, budget: nil|
|
|
435
|
+
budget = coerce_int.call(budget)
|
|
411
436
|
if retriever
|
|
412
437
|
result = retriever.retrieve(query, budget: budget || 8000)
|
|
413
438
|
respond.call(result.context)
|
|
@@ -423,6 +448,7 @@ module CodebaseIndex
|
|
|
423
448
|
def define_trace_flow_tool(server, reader, index_dir, respond, renderer)
|
|
424
449
|
require_relative '../flow_assembler'
|
|
425
450
|
require_relative '../dependency_graph'
|
|
451
|
+
coerce_int = method(:coerce_integer)
|
|
426
452
|
|
|
427
453
|
server.define_tool(
|
|
428
454
|
name: 'trace_flow',
|
|
@@ -441,7 +467,7 @@ module CodebaseIndex
|
|
|
441
467
|
required: ['entry_point']
|
|
442
468
|
}
|
|
443
469
|
) do |entry_point:, server_context:, depth: nil|
|
|
444
|
-
max_depth = depth || 3
|
|
470
|
+
max_depth = coerce_int.call(depth) || 3
|
|
445
471
|
graph = reader.dependency_graph
|
|
446
472
|
|
|
447
473
|
assembler = CodebaseIndex::FlowAssembler.new(
|
|
@@ -457,6 +483,7 @@ module CodebaseIndex
|
|
|
457
483
|
end
|
|
458
484
|
|
|
459
485
|
def define_session_trace_tool(server, reader, respond)
|
|
486
|
+
coerce_int = method(:coerce_integer)
|
|
460
487
|
server.define_tool(
|
|
461
488
|
name: 'session_trace',
|
|
462
489
|
description: 'Assemble context from a browser session trace (requires session tracer middleware)',
|
|
@@ -469,6 +496,8 @@ module CodebaseIndex
|
|
|
469
496
|
required: ['session_id']
|
|
470
497
|
}
|
|
471
498
|
) do |session_id:, server_context:, budget: nil, depth: nil|
|
|
499
|
+
budget = coerce_int.call(budget)
|
|
500
|
+
depth = coerce_int.call(depth)
|
|
472
501
|
store = CodebaseIndex.configuration.session_store
|
|
473
502
|
next respond.call(JSON.pretty_generate({ error: 'Session tracer not configured' })) unless store
|
|
474
503
|
|
|
@@ -651,6 +680,7 @@ module CodebaseIndex
|
|
|
651
680
|
end
|
|
652
681
|
|
|
653
682
|
def define_retrieval_rate_tool(server, feedback_store, respond)
|
|
683
|
+
coerce_int = method(:coerce_integer)
|
|
654
684
|
server.define_tool(
|
|
655
685
|
name: 'retrieval_rate',
|
|
656
686
|
description: 'Record a quality rating for a retrieval result (1-5 scale).',
|
|
@@ -665,6 +695,7 @@ module CodebaseIndex
|
|
|
665
695
|
) do |query:, score:, server_context:, comment: nil|
|
|
666
696
|
next respond.call('Feedback store is not configured.') unless feedback_store
|
|
667
697
|
|
|
698
|
+
score = coerce_int.call(score)
|
|
668
699
|
feedback_store.record_rating(query: query, score: score, comment: comment)
|
|
669
700
|
respond.call(JSON.pretty_generate({ recorded: true, type: 'rating', query: query, score: score }))
|
|
670
701
|
end
|
|
@@ -740,6 +771,7 @@ module CodebaseIndex
|
|
|
740
771
|
end
|
|
741
772
|
|
|
742
773
|
def define_list_snapshots_tool(server, snapshot_store, respond)
|
|
774
|
+
coerce_int = method(:coerce_integer)
|
|
743
775
|
server.define_tool(
|
|
744
776
|
name: 'list_snapshots',
|
|
745
777
|
description: 'List temporal snapshots of past extraction runs, optionally filtered by branch.',
|
|
@@ -752,6 +784,7 @@ module CodebaseIndex
|
|
|
752
784
|
) do |server_context:, limit: nil, branch: nil|
|
|
753
785
|
next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
|
|
754
786
|
|
|
787
|
+
limit = coerce_int.call(limit)
|
|
755
788
|
results = snapshot_store.list(limit: limit || 20, branch: branch)
|
|
756
789
|
respond.call(JSON.pretty_generate({ snapshot_count: results.size, snapshots: results }))
|
|
757
790
|
end
|
|
@@ -783,6 +816,7 @@ module CodebaseIndex
|
|
|
783
816
|
end
|
|
784
817
|
|
|
785
818
|
def define_unit_history_tool(server, snapshot_store, respond)
|
|
819
|
+
coerce_int = method(:coerce_integer)
|
|
786
820
|
server.define_tool(
|
|
787
821
|
name: 'unit_history',
|
|
788
822
|
description: 'Show the history of a single unit across extraction snapshots. Tracks when source changed.',
|
|
@@ -796,6 +830,7 @@ module CodebaseIndex
|
|
|
796
830
|
) do |identifier:, server_context:, limit: nil|
|
|
797
831
|
next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
|
|
798
832
|
|
|
833
|
+
limit = coerce_int.call(limit)
|
|
799
834
|
entries = snapshot_store.unit_history(identifier, limit: limit || 20)
|
|
800
835
|
respond.call(JSON.pretty_generate({
|
|
801
836
|
identifier: identifier,
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'time'
|
|
5
|
+
require 'digest'
|
|
6
|
+
|
|
7
|
+
module CodebaseIndex
|
|
8
|
+
module Temporal
|
|
9
|
+
# JSON-file-based snapshot store for temporal tracking without SQLite.
|
|
10
|
+
#
|
|
11
|
+
# Stores snapshots as individual JSON files in a `snapshots/` subdirectory
|
|
12
|
+
# of the index output directory. Each file is named by git SHA and contains
|
|
13
|
+
# manifest metadata plus per-unit content hashes.
|
|
14
|
+
#
|
|
15
|
+
# Implements the same public interface as SnapshotStore so the MCP server
|
|
16
|
+
# tools work identically.
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# store = JsonSnapshotStore.new(dir: '/app/tmp/codebase_index')
|
|
20
|
+
# store.capture(manifest, unit_hashes)
|
|
21
|
+
# store.list # => [{ git_sha: "abc123", ... }]
|
|
22
|
+
# store.diff("abc123", "def456") # => { added: [...], modified: [...], deleted: [...] }
|
|
23
|
+
#
|
|
24
|
+
class JsonSnapshotStore # rubocop:disable Metrics/ClassLength
|
|
25
|
+
def initialize(dir:)
|
|
26
|
+
@dir = File.join(dir, 'snapshots')
|
|
27
|
+
FileUtils.mkdir_p(@dir)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def capture(manifest, unit_hashes)
|
|
31
|
+
git_sha = mget(manifest, 'git_sha')
|
|
32
|
+
return nil unless git_sha
|
|
33
|
+
|
|
34
|
+
previous = find_latest
|
|
35
|
+
snapshot = build_snapshot(manifest, git_sha, unit_hashes)
|
|
36
|
+
|
|
37
|
+
if previous
|
|
38
|
+
diff_result = compute_diff(previous[:units], index_units(unit_hashes))
|
|
39
|
+
snapshot[:units_added] = diff_result[:added].size
|
|
40
|
+
snapshot[:units_modified] = diff_result[:modified].size
|
|
41
|
+
snapshot[:units_deleted] = diff_result[:deleted].size
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
write_snapshot(git_sha, snapshot)
|
|
45
|
+
snapshot.except(:units)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def list(limit: 20, branch: nil)
|
|
49
|
+
snapshots = load_all_summaries
|
|
50
|
+
snapshots.select! { |s| s[:git_branch] == branch } if branch
|
|
51
|
+
snapshots.sort_by { |s| s[:extracted_at] || '' }.reverse.first(limit)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def find(git_sha)
|
|
55
|
+
path = snapshot_path(git_sha)
|
|
56
|
+
return nil unless File.exist?(path)
|
|
57
|
+
|
|
58
|
+
data = JSON.parse(File.read(path))
|
|
59
|
+
symbolize_snapshot(data).except(:units)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def diff(sha_a, sha_b)
|
|
63
|
+
snap_a = load_snapshot_with_units(sha_a)
|
|
64
|
+
snap_b = load_snapshot_with_units(sha_b)
|
|
65
|
+
|
|
66
|
+
return { added: [], modified: [], deleted: [] } unless snap_a && snap_b
|
|
67
|
+
|
|
68
|
+
compute_diff(snap_a[:units], snap_b[:units])
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def unit_history(identifier, limit: 20)
|
|
72
|
+
snapshots = load_all_with_units
|
|
73
|
+
.sort_by { |s| s[:extracted_at] || '' }
|
|
74
|
+
.reverse
|
|
75
|
+
.first(limit)
|
|
76
|
+
|
|
77
|
+
entries = snapshots.filter_map do |snap|
|
|
78
|
+
unit = snap[:units]&.[](identifier)
|
|
79
|
+
next unless unit
|
|
80
|
+
|
|
81
|
+
{
|
|
82
|
+
git_sha: snap[:git_sha],
|
|
83
|
+
extracted_at: snap[:extracted_at],
|
|
84
|
+
git_branch: snap[:git_branch],
|
|
85
|
+
unit_type: unit[:unit_type],
|
|
86
|
+
source_hash: unit[:source_hash],
|
|
87
|
+
metadata_hash: unit[:metadata_hash],
|
|
88
|
+
dependencies_hash: unit[:dependencies_hash]
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
mark_changed_entries(entries)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
def mget(hash, key)
|
|
98
|
+
hash[key] || hash[key.to_sym]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def build_snapshot(manifest, git_sha, unit_hashes)
|
|
102
|
+
{
|
|
103
|
+
git_sha: git_sha,
|
|
104
|
+
git_branch: mget(manifest, 'git_branch'),
|
|
105
|
+
extracted_at: mget(manifest, 'extracted_at') || Time.now.iso8601,
|
|
106
|
+
rails_version: mget(manifest, 'rails_version'),
|
|
107
|
+
ruby_version: mget(manifest, 'ruby_version'),
|
|
108
|
+
total_units: mget(manifest, 'total_units') || unit_hashes.size,
|
|
109
|
+
unit_counts: mget(manifest, 'counts') || {},
|
|
110
|
+
gemfile_lock_sha: mget(manifest, 'gemfile_lock_sha'),
|
|
111
|
+
schema_sha: mget(manifest, 'schema_sha'),
|
|
112
|
+
units_added: 0,
|
|
113
|
+
units_modified: 0,
|
|
114
|
+
units_deleted: 0,
|
|
115
|
+
units: index_units(unit_hashes)
|
|
116
|
+
}
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def index_units(unit_hashes)
|
|
120
|
+
unit_hashes.filter_map do |uh|
|
|
121
|
+
id = mget(uh, 'identifier')
|
|
122
|
+
next if id.nil?
|
|
123
|
+
|
|
124
|
+
[id, {
|
|
125
|
+
unit_type: mget(uh, 'type').to_s,
|
|
126
|
+
source_hash: mget(uh, 'source_hash'),
|
|
127
|
+
metadata_hash: mget(uh, 'metadata_hash'),
|
|
128
|
+
dependencies_hash: mget(uh, 'dependencies_hash')
|
|
129
|
+
}]
|
|
130
|
+
end.to_h
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
134
|
+
added = []
|
|
135
|
+
modified = []
|
|
136
|
+
deleted = []
|
|
137
|
+
|
|
138
|
+
units_b.each do |identifier, data_b|
|
|
139
|
+
if units_a.key?(identifier)
|
|
140
|
+
data_a = units_a[identifier]
|
|
141
|
+
if data_a[:source_hash] != data_b[:source_hash] ||
|
|
142
|
+
data_a[:metadata_hash] != data_b[:metadata_hash] ||
|
|
143
|
+
data_a[:dependencies_hash] != data_b[:dependencies_hash]
|
|
144
|
+
modified << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
145
|
+
end
|
|
146
|
+
else
|
|
147
|
+
added << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
units_a.each do |identifier, data_a|
|
|
152
|
+
deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
{ added: added, modified: modified, deleted: deleted }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def mark_changed_entries(entries)
|
|
159
|
+
entries.each_with_index do |entry, i|
|
|
160
|
+
entry[:changed] = if i == entries.size - 1
|
|
161
|
+
true
|
|
162
|
+
else
|
|
163
|
+
entry[:source_hash] != entries[i + 1][:source_hash]
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
entries
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def snapshot_path(git_sha)
|
|
170
|
+
raise ArgumentError, "Invalid git SHA: #{git_sha}" unless git_sha.match?(/\A[0-9a-f]+\z/i)
|
|
171
|
+
|
|
172
|
+
File.join(@dir, "#{git_sha}.json")
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def write_snapshot(git_sha, data)
|
|
176
|
+
File.write(snapshot_path(git_sha), JSON.pretty_generate(data))
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def load_snapshot_with_units(git_sha)
|
|
180
|
+
path = snapshot_path(git_sha)
|
|
181
|
+
return nil unless File.exist?(path)
|
|
182
|
+
|
|
183
|
+
symbolize_snapshot(JSON.parse(File.read(path)))
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def load_all_summaries
|
|
187
|
+
Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
|
|
188
|
+
data = JSON.parse(File.read(path))
|
|
189
|
+
symbolize_snapshot(data).except(:units)
|
|
190
|
+
rescue JSON::ParserError => e
|
|
191
|
+
warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
|
|
192
|
+
nil
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def load_all_with_units
|
|
197
|
+
Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
|
|
198
|
+
symbolize_snapshot(JSON.parse(File.read(path)))
|
|
199
|
+
rescue JSON::ParserError => e
|
|
200
|
+
warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
|
|
201
|
+
nil
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def find_latest
|
|
206
|
+
snapshots = load_all_summaries
|
|
207
|
+
return nil if snapshots.empty?
|
|
208
|
+
|
|
209
|
+
latest = snapshots.max_by { |s| s[:extracted_at] || '' }
|
|
210
|
+
load_snapshot_with_units(latest[:git_sha])
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def symbolize_snapshot(data)
|
|
214
|
+
{
|
|
215
|
+
git_sha: data['git_sha'],
|
|
216
|
+
git_branch: data['git_branch'],
|
|
217
|
+
extracted_at: data['extracted_at'],
|
|
218
|
+
rails_version: data['rails_version'],
|
|
219
|
+
ruby_version: data['ruby_version'],
|
|
220
|
+
total_units: data['total_units'],
|
|
221
|
+
unit_counts: data['unit_counts'] || {},
|
|
222
|
+
gemfile_lock_sha: data['gemfile_lock_sha'],
|
|
223
|
+
schema_sha: data['schema_sha'],
|
|
224
|
+
units_added: data['units_added'],
|
|
225
|
+
units_modified: data['units_modified'],
|
|
226
|
+
units_deleted: data['units_deleted'],
|
|
227
|
+
units: symbolize_units(data['units'])
|
|
228
|
+
}
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def symbolize_units(units)
|
|
232
|
+
return {} unless units
|
|
233
|
+
|
|
234
|
+
units.transform_values do |v|
|
|
235
|
+
{
|
|
236
|
+
unit_type: v['unit_type'],
|
|
237
|
+
source_hash: v['source_hash'],
|
|
238
|
+
metadata_hash: v['metadata_hash'],
|
|
239
|
+
dependencies_hash: v['dependencies_hash']
|
|
240
|
+
}
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: codebase_index
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Leah Armstrong
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mcp
|
|
@@ -227,6 +227,7 @@ files:
|
|
|
227
227
|
- lib/codebase_index/storage/pgvector.rb
|
|
228
228
|
- lib/codebase_index/storage/qdrant.rb
|
|
229
229
|
- lib/codebase_index/storage/vector_store.rb
|
|
230
|
+
- lib/codebase_index/temporal/json_snapshot_store.rb
|
|
230
231
|
- lib/codebase_index/temporal/snapshot_store.rb
|
|
231
232
|
- lib/codebase_index/token_utils.rb
|
|
232
233
|
- lib/codebase_index/version.rb
|