codebase_index 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 982e7949df0e0db9249705ab9f009121c3c8156582c63712f0613fccc998337d
4
- data.tar.gz: 4fb41c658901cd26606e44164da7059a7d62aa39c795a682020cfbb6252311be
3
+ metadata.gz: a55ea6a46950c68c6cc1b39f9008bbff466dea9e20d8b646564d4cc489060552
4
+ data.tar.gz: ca3f117e515fb14f2ec6069c4e6ef541b661e90f0e3d33daa9f9fab43afb746a
5
5
  SHA512:
6
- metadata.gz: 6b62fe0a1d8b0db683744214461ec5d0029e41cf4538b7313ce2701a3a985bf9b1d06ce955acb225db09e93aaa72bcbc5e3b40cd534f7d682d98d190a670d722
7
- data.tar.gz: f0a948295982aa85951fa8cca96cc8c30b317176a53424fe18e50cc1d3e28b17df5fc9dfb6a191e6450a1318b3b0f81a0f2cdf20fe7326c0c4e492a7f8b47f70
6
+ metadata.gz: e9ff9f423902e1651540b9f933e9a1afab31b3b4ebb0347a368769a83aaf8c1a493e7a33ac68d14a7733753da085dc830e9509911d0bc49205b21e2e07d82de3
7
+ data.tar.gz: ab8630e12cd123cfeeeee6796d0529cdbaa0f8e6e8c0fb9d7d95dadf753b8a8fbb0d5e2136ff2353cc3aac8b0acb5f525f75f903bc777e86326468c6a759ae8a
@@ -21,8 +21,9 @@ require_relative '../lib/codebase_index/embedding/indexer'
21
21
 
22
22
  index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
23
23
  retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
24
+ snapshot_store = CodebaseIndex::MCP::Bootstrapper.build_snapshot_store(index_dir)
24
25
 
25
- server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever)
26
+ server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
26
27
 
27
28
  # Pin protocol version for broad client compatibility (Claude Code, Cursor, etc.)
28
29
  if ENV['MCP_PROTOCOL_VERSION']
@@ -22,11 +22,12 @@ require_relative '../lib/codebase_index/embedding/indexer'
22
22
 
23
23
  index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
24
24
  retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
25
+ snapshot_store = CodebaseIndex::MCP::Bootstrapper.build_snapshot_store(index_dir)
25
26
 
26
27
  port = (ENV['PORT'] || 9292).to_i
27
28
  host = ENV['HOST'] || 'localhost'
28
29
 
29
- server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever)
30
+ server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
30
31
  transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
31
32
  server.transport = transport
32
33
 
@@ -23,13 +23,23 @@ module CodebaseIndex
23
23
 
24
24
  TIER1_TOOLS = Bridge::TIER1_TOOLS
25
25
 
26
+ # Tools gated behind the read_tools_enabled flag.
27
+ # sql/query have existing safety gates (SqlValidator, SafeContext rollback)
28
+ # but require explicit opt-in for embedded mode.
29
+ EMBEDDED_READ_TOOLS = %w[sql query].freeze
30
+
31
+ MAX_SQL_LIMIT = 10_000
32
+ MAX_QUERY_LIMIT = 10_000
33
+
26
34
  # @param model_validator [ModelValidator] Validates model/column names
27
35
  # @param safe_context [SafeContext] Wraps execution in rolled-back transaction
28
36
  # @param connection [Object, nil] Database connection for adapter detection
29
- def initialize(model_validator:, safe_context:, connection: nil)
37
+ # @param read_tools_enabled [Boolean] Enable sql/query tools in embedded mode (default: false)
38
+ def initialize(model_validator:, safe_context:, connection: nil, read_tools_enabled: false)
30
39
  @model_validator = model_validator
31
40
  @safe_context = safe_context
32
41
  @connection = connection
42
+ @read_tools_enabled = read_tools_enabled
33
43
  end
34
44
 
35
45
  # Execute a tool request and return a response hash.
@@ -46,7 +56,7 @@ module CodebaseIndex
46
56
  tool = request['tool']
47
57
  params = request['params'] || {}
48
58
 
49
- unless TIER1_TOOLS.include?(tool)
59
+ unless TIER1_TOOLS.include?(tool) || (@read_tools_enabled && EMBEDDED_READ_TOOLS.include?(tool))
50
60
  return { 'ok' => false,
51
61
  'error' => 'Not yet implemented in embedded mode',
52
62
  'error_type' => 'unsupported' }
@@ -72,8 +82,10 @@ module CodebaseIndex
72
82
  # @return [Hash] Tool result
73
83
  def dispatch(tool, params)
74
84
  case tool
75
- when 'status' then handle_status
76
- when 'schema' then handle_schema(params)
85
+ when 'status' then handle_status
86
+ when 'schema' then handle_schema(params)
87
+ when 'sql' then handle_sql(params)
88
+ when 'query' then handle_query(params)
77
89
  else
78
90
  validate_model!(params)
79
91
  send(:"handle_#{tool}", params)
@@ -211,17 +223,86 @@ module CodebaseIndex
211
223
  { 'status' => 'ok', 'models' => @model_validator.model_names, 'adapter' => adapter }
212
224
  end
213
225
 
226
+ # ── Read tools (sql/query, gated by read_tools_enabled) ────────────
227
+
228
+ # Execute validated read-only SQL via ActiveRecord's select_all.
229
+ #
230
+ # @param params [Hash] Must contain 'sql'; optional 'limit'
231
+ # @return [Hash] Columns and rows
232
+ def handle_sql(params)
233
+ sql = params['sql']
234
+ raise ValidationError, 'Missing required parameter: sql' unless sql
235
+
236
+ require_relative 'sql_validator'
237
+ SqlValidator.new.validate!(sql)
238
+
239
+ limit = params['limit'] ? [params['limit'].to_i, MAX_SQL_LIMIT].min : nil
240
+ query_sql = limit ? "SELECT * FROM (#{sql}) AS _limited LIMIT #{limit}" : sql
241
+ result = active_connection.select_all(query_sql)
242
+
243
+ { 'columns' => result.columns, 'rows' => result.rows, 'count' => result.rows.size }
244
+ rescue SqlValidationError => e
245
+ raise ValidationError, e.message
246
+ end
247
+
248
+ # Build and execute a structured ActiveRecord query.
249
+ #
250
+ # @param params [Hash] Must contain 'model' and 'select'
251
+ # @return [Hash] Columns and rows
252
+ def handle_query(params)
253
+ validate_model!(params)
254
+ model = resolve_model(params['model'])
255
+ relation = build_query_relation(model, params)
256
+ result = active_connection.select_all(relation.to_sql)
257
+ { 'columns' => result.columns, 'rows' => result.rows, 'count' => result.rows.size }
258
+ end
259
+
260
+ # Build an ActiveRecord relation from structured query parameters.
261
+ #
262
+ # @param model [Class] ActiveRecord model class
263
+ # @param params [Hash] Query parameters (select, joins, scope, group_by, having, order, limit)
264
+ # @return [ActiveRecord::Relation]
265
+ def build_query_relation(model, params)
266
+ relation = apply_query_clauses(model.all, params)
267
+ limit = params['limit'] ? [params['limit'].to_i, MAX_QUERY_LIMIT].min : MAX_QUERY_LIMIT
268
+ relation.limit(limit)
269
+ end
270
+
271
+ # Apply select/joins/scope/group/having/order clauses to a relation.
272
+ #
273
+ # @param relation [ActiveRecord::Relation]
274
+ # @param params [Hash]
275
+ # @return [ActiveRecord::Relation]
276
+ def apply_query_clauses(relation, params) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
277
+ relation = relation.select(params['select']) if params['select']
278
+ relation = relation.joins(params['joins'].map(&:to_sym)) if params['joins']&.any?
279
+ relation = apply_scope(relation, params['scope'])
280
+ relation = relation.group(params['group_by']) if params['group_by']&.any?
281
+ relation = relation.having(params['having']) if params['having']
282
+ relation = relation.order(params['order']) if params['order']
283
+ relation
284
+ end
285
+
214
286
  # ── Helpers ──────────────────────────────────────────────────────────
215
287
 
216
288
  # Apply scope conditions (WHERE clauses) to a relation.
217
289
  #
290
+ # Accepts Hash form for simple equality conditions, or Array form
291
+ # for parameterized SQL (e.g., JSON column queries like
292
+ # ["preferences->>'theme' = ?", "dark"]).
293
+ #
218
294
  # @param relation [ActiveRecord::Relation, Class] Model or relation
219
- # @param scope [Hash, nil] Filter conditions
295
+ # @param scope [Hash, Array, nil] Filter conditions
220
296
  # @return [ActiveRecord::Relation]
221
297
  def apply_scope(relation, scope)
222
- return relation unless scope.is_a?(Hash) && scope.any?
223
-
224
- relation.where(scope)
298
+ case scope
299
+ when Hash
300
+ scope.any? ? relation.where(scope) : relation
301
+ when Array
302
+ scope.any? ? relation.where(*scope) : relation
303
+ else
304
+ relation
305
+ end
225
306
  end
226
307
 
227
308
  # Apply column selection to a relation.
@@ -16,9 +16,11 @@ module CodebaseIndex
16
16
  class RackMiddleware
17
17
  # @param app [#call] The next Rack app in the middleware stack
18
18
  # @param path [String] URL path to mount the MCP endpoint (default: '/mcp/console')
19
- def initialize(app, path: '/mcp/console')
19
+ # @param embedded_read_tools [Boolean] Enable sql/query tools in embedded mode (default: false)
20
+ def initialize(app, path: '/mcp/console', embedded_read_tools: false)
20
21
  @app = app
21
22
  @path = path
23
+ @embedded_read_tools = embedded_read_tools
22
24
  @mutex = Mutex.new
23
25
  @transport = nil
24
26
  end
@@ -71,7 +73,8 @@ module CodebaseIndex
71
73
  server = Server.build_embedded(
72
74
  model_validator: validator,
73
75
  safe_context: safe_context,
74
- redacted_columns: redacted
76
+ redacted_columns: redacted,
77
+ read_tools_enabled: @embedded_read_tools
75
78
  )
76
79
 
77
80
  @transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
@@ -56,12 +56,15 @@ module CodebaseIndex
56
56
  # @param safe_context [SafeContext] Wraps queries in rolled-back transactions
57
57
  # @param redacted_columns [Array<String>] Column names to redact from output
58
58
  # @param connection [Object, nil] Database connection for adapter detection
59
+ # @param read_tools_enabled [Boolean] Enable sql/query tools in embedded mode (default: false)
59
60
  # @return [MCP::Server] Configured server ready for transport
60
- def build_embedded(model_validator:, safe_context:, redacted_columns: [], connection: nil)
61
+ def build_embedded(model_validator:, safe_context:, redacted_columns: [], connection: nil,
62
+ read_tools_enabled: false)
61
63
  require_relative 'embedded_executor'
62
64
 
63
65
  executor = EmbeddedExecutor.new(
64
- model_validator: model_validator, safe_context: safe_context, connection: connection
66
+ model_validator: model_validator, safe_context: safe_context,
67
+ connection: connection, read_tools_enabled: read_tools_enabled
65
68
  )
66
69
  redact_ctx = if redacted_columns.any?
67
70
  SafeContext.new(connection: nil,
@@ -568,15 +571,35 @@ module CodebaseIndex
568
571
  def define_console_tool(server, conn_mgr, name, description, properties:, required: nil,
569
572
  safe_ctx: nil, renderer: nil, &tool_block)
570
573
  bridge_method = method(:send_to_bridge)
574
+ coerce_method = method(:coerce_integer_args!)
575
+ integer_keys = integer_property_keys(properties)
571
576
  schema = { properties: properties }
572
577
  schema[:required] = required if required&.any?
573
578
  server.define_tool(name: name, description: description, input_schema: schema) do |server_context:, **args|
579
+ coerce_method.call(args, integer_keys)
574
580
  request = tool_block.call(args)
575
581
  bridge_method.call(conn_mgr, request.transform_keys(&:to_s), safe_ctx, renderer: renderer)
576
582
  end
577
583
  end
578
584
  # rubocop:enable Metrics/ParameterLists
579
585
 
586
+ # Pre-compute property keys declared as integer in a schema.
587
+ #
588
+ # @param properties [Hash] Tool schema properties
589
+ # @return [Array<Symbol>]
590
+ def integer_property_keys(properties)
591
+ properties.select { |_k, v| v[:type] == 'integer' }.keys.map(&:to_sym)
592
+ end
593
+
594
+ # Coerce string values to integers for known integer keys.
595
+ #
596
+ # @param args [Hash] Tool arguments (mutated in place)
597
+ # @param keys [Array<Symbol>] Keys that should be integers
598
+ # @return [void]
599
+ def coerce_integer_args!(args, keys)
600
+ keys.each { |k| args[k] = args[k].to_i if args[k].is_a?(String) }
601
+ end
602
+
580
603
  # Schema property helpers for concise tool definitions.
581
604
  def str_prop(desc) = { type: 'string', description: desc }
582
605
  def int_prop(desc) = { type: 'integer', description: desc }
@@ -207,6 +207,7 @@ module CodebaseIndex
207
207
  @output_dir = Pathname.new(output_dir || Rails.root.join('tmp/codebase_index'))
208
208
  @dependency_graph = DependencyGraph.new
209
209
  @results = {}
210
+ @extractors = {}
210
211
  end
211
212
 
212
213
  # ══════════════════════════════════════════════════════════════════════
@@ -268,6 +269,7 @@ module CodebaseIndex
268
269
  write_graph_analysis
269
270
  write_manifest
270
271
  write_structural_summary
272
+ capture_snapshot
271
273
 
272
274
  log_summary
273
275
 
@@ -317,6 +319,7 @@ module CodebaseIndex
317
319
  write_dependency_graph
318
320
  write_manifest
319
321
  write_structural_summary
322
+ capture_snapshot
320
323
 
321
324
  affected_ids
322
325
  end
@@ -377,6 +380,7 @@ module CodebaseIndex
377
380
  start_time = Time.current
378
381
 
379
382
  extractor = extractor_class.new
383
+ @extractors[type] = extractor
380
384
  units = extractor.extract_all
381
385
 
382
386
  @results[type] = units
@@ -410,12 +414,16 @@ module CodebaseIndex
410
414
  start_time = Time.current
411
415
 
412
416
  extractor = extractor_class.new
417
+ results_mutex.synchronize { @extractors[type] = extractor }
418
+
413
419
  units = extractor.extract_all
414
420
 
415
421
  elapsed = Time.current - start_time
416
422
  Rails.logger.info "[CodebaseIndex] [Thread] Extracted #{units.size} #{type} in #{elapsed.round(2)}s"
417
423
 
418
- results_mutex.synchronize { @results[type] = units }
424
+ results_mutex.synchronize do
425
+ @results[type] = units
426
+ end
419
427
  rescue StandardError => e
420
428
  Rails.logger.error "[CodebaseIndex] [Thread] #{type} failed: #{e.message}"
421
429
  results_mutex.synchronize { @results[type] = [] }
@@ -743,9 +751,10 @@ module CodebaseIndex
743
751
  total_units: @results.values.sum(&:size),
744
752
  total_chunks: @results.sum { |_, units| units.sum { |u| u.chunks.size } },
745
753
 
746
- # Git info
747
- git_sha: run_git('rev-parse', 'HEAD').presence,
748
- git_branch: run_git('rev-parse', '--abbrev-ref', 'HEAD').presence,
754
+ # Git info — fall back to env vars for Docker/worktree environments
755
+ # where the git repo may not be directly accessible
756
+ git_sha: run_git('rev-parse', 'HEAD').presence || ENV['GIT_SHA'].presence,
757
+ git_branch: run_git('rev-parse', '--abbrev-ref', 'HEAD').presence || ENV['GIT_BRANCH'].presence,
749
758
 
750
759
  # For change detection
751
760
  gemfile_lock_sha: gemfile_lock_sha,
@@ -758,6 +767,63 @@ module CodebaseIndex
758
767
  )
759
768
  end
760
769
 
770
+ # Capture a temporal snapshot after extraction completes.
771
+ #
772
+ # Reads the manifest and computes per-unit content hashes, then delegates
773
+ # to the SnapshotStore for storage and diff computation. Requires
774
+ # enable_snapshots and a valid git_sha in the manifest.
775
+ #
776
+ # @return [void]
777
+ def capture_snapshot
778
+ return unless CodebaseIndex.configuration.enable_snapshots
779
+
780
+ manifest_path = @output_dir.join('manifest.json')
781
+ return unless manifest_path.exist?
782
+
783
+ manifest = JSON.parse(File.read(manifest_path))
784
+ return unless manifest['git_sha']
785
+
786
+ store = build_snapshot_store
787
+ return unless store
788
+
789
+ unit_hashes = @results.flat_map do |type, units|
790
+ units.map do |unit|
791
+ {
792
+ 'identifier' => unit.identifier,
793
+ 'type' => type.to_s,
794
+ 'source_hash' => Digest::SHA256.hexdigest(unit.source_code.to_s),
795
+ 'metadata_hash' => Digest::SHA256.hexdigest(unit.metadata.to_json),
796
+ 'dependencies_hash' => Digest::SHA256.hexdigest(unit.dependencies.to_json)
797
+ }
798
+ end
799
+ end
800
+
801
+ store.capture(manifest, unit_hashes)
802
+ Rails.logger.info "[CodebaseIndex] Snapshot captured for #{manifest['git_sha'][0..7]}"
803
+ rescue StandardError => e
804
+ Rails.logger.error "[CodebaseIndex] Snapshot capture failed (#{e.class}): #{e.message}"
805
+ end
806
+
807
+ # Build a snapshot store, preferring SQLite with JSON file fallback.
808
+ #
809
+ # @return [CodebaseIndex::Temporal::SnapshotStore, CodebaseIndex::Temporal::JsonSnapshotStore, nil]
810
+ def build_snapshot_store
811
+ require 'sqlite3'
812
+ require_relative 'db/migrator'
813
+ require_relative 'temporal/snapshot_store'
814
+
815
+ db_path = @output_dir.join('codebase_index.sqlite3')
816
+ db = SQLite3::Database.new(db_path.to_s)
817
+ db.results_as_hash = true
818
+
819
+ Db::Migrator.new(connection: db).migrate!
820
+ Temporal::SnapshotStore.new(connection: db)
821
+ rescue LoadError
822
+ Rails.logger.info '[CodebaseIndex] sqlite3 gem not available, using JSON snapshot store'
823
+ require_relative 'temporal/json_snapshot_store'
824
+ Temporal::JsonSnapshotStore.new(dir: @output_dir.to_s)
825
+ end
826
+
761
827
  # Write a compact TOC-style summary of extracted units.
762
828
  #
763
829
  # Produces a SUMMARY.md under 8K tokens (~24KB) by listing one line per
@@ -886,6 +952,16 @@ module CodebaseIndex
886
952
  Rails.logger.info "[CodebaseIndex] Total: #{total} units, #{chunks} chunks"
887
953
  Rails.logger.info "[CodebaseIndex] Output: #{@output_dir}"
888
954
  Rails.logger.info '[CodebaseIndex] ═══════════════════════════════════════════'
955
+
956
+ all_warnings = @extractors.flat_map do |_type, ext|
957
+ ext.respond_to?(:warnings) ? ext.warnings : []
958
+ end
959
+
960
+ return if all_warnings.empty?
961
+
962
+ Rails.logger.warn '[CodebaseIndex] ───────────────────────────────────────────'
963
+ Rails.logger.warn "[CodebaseIndex] Warnings (#{all_warnings.size}):"
964
+ all_warnings.each { |w| Rails.logger.warn "[CodebaseIndex] #{w}" }
889
965
  end
890
966
 
891
967
  # ──────────────────────────────────────────────────────────────────────
@@ -8,7 +8,8 @@ module CodebaseIndex
8
8
  # ConcernExtractor handles ActiveSupport::Concern module extraction.
9
9
  #
10
10
  # Concerns are mixins that extend model and controller behavior.
11
- # They live in `app/models/concerns/` and `app/controllers/concerns/`.
11
+ # They live in `app/models/concerns/` and `app/controllers/concerns/`,
12
+ # as well as nested directories like `app/models/gateway/stripe/concerns/`.
12
13
  #
13
14
  # We extract:
14
15
  # - Module name and namespace
@@ -25,13 +26,20 @@ module CodebaseIndex
25
26
  include SharedUtilityMethods
26
27
  include SharedDependencyScanner
27
28
 
28
- # Directories to scan for concern modules
29
+ # Canonical concern directories (used as fallback if glob finds nothing).
29
30
  CONCERN_DIRECTORIES = %w[
30
31
  app/models/concerns
31
32
  app/controllers/concerns
32
33
  ].freeze
33
34
 
34
35
  def initialize
36
+ # Discover all concerns/ directories under app/, including deeply nested ones
37
+ # like app/models/gateway/stripe/webhook/concerns/.
38
+ @directories = Dir[Rails.root.join('app/**/concerns')].map { |d| Pathname.new(d) }
39
+ .select(&:directory?)
40
+ # Fall back to canonical directories if glob finds nothing.
41
+ return unless @directories.empty?
42
+
35
43
  @directories = CONCERN_DIRECTORIES.map { |d| Rails.root.join(d) }
36
44
  .select(&:directory?)
37
45
  end
@@ -91,10 +99,11 @@ module CodebaseIndex
91
99
  modules = source.scan(/^\s*module\s+([\w:]+)/).flatten
92
100
  return modules.last if modules.any?
93
101
 
94
- # Infer from file path
102
+ # Infer from file path — strip everything up to and including the first concerns/ dir.
103
+ # Handles canonical (app/models/concerns/) and nested (app/models/foo/concerns/) paths.
95
104
  relative = file_path.sub("#{Rails.root}/", '')
96
105
  relative
97
- .sub(%r{^app/(models|controllers)/concerns/}, '')
106
+ .sub(%r{^app/.*?/concerns/}, '')
98
107
  .sub('.rb', '')
99
108
  .split('/')
100
109
  .map { |segment| segment.split('_').map(&:capitalize).join }
@@ -179,9 +188,9 @@ module CodebaseIndex
179
188
  # @param file_path [String] Path to the concern file
180
189
  # @return [String] One of "model", "controller", "unknown"
181
190
  def detect_concern_scope(file_path)
182
- if file_path.include?('app/models/concerns')
191
+ if file_path.include?('app/models/')
183
192
  'model'
184
- elsif file_path.include?('app/controllers/concerns')
193
+ elsif file_path.include?('app/controllers/')
185
194
  'controller'
186
195
  else
187
196
  'unknown'
@@ -196,7 +205,10 @@ module CodebaseIndex
196
205
  source.scan(/^\s*def\s+(\w+[?!=]?)/).flatten.reject { |m| m.start_with?('self.') }
197
206
  end
198
207
 
199
- # Detect other modules included by this concern.
208
+ # Detect other modules included or extended by this concern (for metadata display).
209
+ #
210
+ # Returns all module names found in include/extend calls, excluding
211
+ # ActiveSupport::Concern itself.
200
212
  #
201
213
  # @param source [String] Ruby source code
202
214
  # @return [Array<String>] Module names
@@ -205,6 +217,28 @@ module CodebaseIndex
205
217
  .reject { |m| m == 'ActiveSupport::Concern' }
206
218
  end
207
219
 
220
+ # Detect modules explicitly included by this concern.
221
+ #
222
+ # Scans for bare +include ModuleName+ calls, excluding ActiveSupport::Concern.
223
+ #
224
+ # @param source [String] Ruby source code
225
+ # @return [Array<String>] Included module names
226
+ def detect_includes(source)
227
+ source.scan(/\binclude\s+([\w:]+)/).flatten
228
+ .reject { |m| m == 'ActiveSupport::Concern' }
229
+ end
230
+
231
+ # Detect modules explicitly extended by this concern.
232
+ #
233
+ # Scans for bare +extend ModuleName+ calls, excluding ActiveSupport::Concern.
234
+ #
235
+ # @param source [String] Ruby source code
236
+ # @return [Array<String>] Extended module names
237
+ def detect_extends(source)
238
+ source.scan(/\bextend\s+([\w:]+)/).flatten
239
+ .reject { |m| m == 'ActiveSupport::Concern' }
240
+ end
241
+
208
242
  # Detect callback declarations.
209
243
  #
210
244
  # @param source [String] Ruby source code
@@ -236,11 +270,16 @@ module CodebaseIndex
236
270
  # @param source [String] Ruby source code
237
271
  # @return [Array<Hash>] Dependency hashes
238
272
  def extract_dependencies(source)
239
- # Other concerns included by this concern
240
- deps = detect_included_modules(source).map do |mod|
273
+ # Concerns included by this concern (add instance-level behavior)
274
+ deps = detect_includes(source).map do |mod|
241
275
  { type: :concern, target: mod, via: :include }
242
276
  end
243
277
 
278
+ # Concerns extended by this concern (add class-level behavior)
279
+ detect_extends(source).each do |mod|
280
+ deps << { type: :concern, target: mod, via: :extend }
281
+ end
282
+
244
283
  # Standard dependency scanning
245
284
  deps.concat(scan_model_dependencies(source))
246
285
  deps.concat(scan_service_dependencies(source))
@@ -36,8 +36,12 @@ module CodebaseIndex
36
36
  (?:after|before)_(?:add|remove)_for_ # collection callbacks
37
37
  )/x
38
38
 
39
+ # Warnings collected during extraction (skipped associations, failed models)
40
+ attr_reader :warnings
41
+
39
42
  def initialize
40
43
  @concern_cache = {}
44
+ @warnings = []
41
45
  end
42
46
 
43
47
  # Extract all ActiveRecord models in the application
@@ -79,6 +83,7 @@ module CodebaseIndex
79
83
 
80
84
  unit
81
85
  rescue StandardError => e
86
+ @warnings << "Failed to extract model #{model.name}: #{e.message}"
82
87
  Rails.logger.error("Failed to extract model #{model.name}: #{e.message}")
83
88
  nil
84
89
  end
@@ -232,6 +237,37 @@ module CodebaseIndex
232
237
  end
233
238
  end
234
239
 
240
+ # Get modules extended specifically in this model (not inherited).
241
+ #
242
+ # Extended modules live on the singleton class and add class-level methods.
243
+ # Ruby builtins (Kernel, PP, etc.) are filtered out by comparing against
244
+ # Object.singleton_class.included_modules.
245
+ #
246
+ # @param model [Class] The ActiveRecord model class
247
+ # @return [Array<Module>] App-defined modules extended by this model
248
+ def extract_extended_modules(model)
249
+ app_root = Rails.root.to_s
250
+ builtin_modules = Object.singleton_class.included_modules.map(&:name).compact.to_set
251
+
252
+ model.singleton_class.included_modules.select do |mod|
253
+ next false unless mod.name
254
+ next false if builtin_modules.include?(mod.name)
255
+
256
+ # Skip obvious non-app modules (from gems/stdlib)
257
+ if Object.respond_to?(:const_source_location)
258
+ loc = Object.const_source_location(mod.name)
259
+ next false if loc && !app_source?(loc.first, app_root)
260
+ end
261
+
262
+ # Include if it's in app/models/concerns or app/controllers/concerns
263
+ mod.name.include?('Concerns') ||
264
+ # Or if it's namespaced under the model's parent
265
+ mod.name.start_with?("#{model.module_parent}::") ||
266
+ # Or if it's defined within the application
267
+ defined_in_app?(mod)
268
+ end
269
+ end
270
+
235
271
  # Check if a module is defined within the Rails application
236
272
  #
237
273
  # @param mod [Module] The module to check
@@ -312,6 +348,13 @@ module CodebaseIndex
312
348
  table_exists: model.table_exists?,
313
349
  column_count: model.table_exists? ? model.columns.size : 0,
314
350
  column_names: model.table_exists? ? model.column_names : [],
351
+ columns: if model.table_exists?
352
+ model.columns.map do |col|
353
+ { 'name' => col.name, 'type' => col.sql_type, 'null' => col.null, 'default' => col.default }
354
+ end
355
+ else
356
+ []
357
+ end,
315
358
 
316
359
  # ActiveStorage / ActionText
317
360
  active_storage_attachments: extract_active_storage_attachments(source),
@@ -415,9 +458,11 @@ module CodebaseIndex
415
458
  result
416
459
  end
417
460
 
418
- # Extract all associations with full details
461
+ # Extract all associations with full details.
462
+ # Broken associations (e.g. missing class_name) are skipped with a warning
463
+ # instead of aborting the entire model extraction.
419
464
  def extract_associations(model)
420
- model.reflect_on_all_associations.map do |assoc|
465
+ model.reflect_on_all_associations.filter_map do |assoc|
421
466
  {
422
467
  name: assoc.name,
423
468
  type: assoc.macro, # :belongs_to, :has_many, :has_one, :has_and_belongs_to_many
@@ -428,6 +473,9 @@ module CodebaseIndex
428
473
  foreign_key: assoc.foreign_key,
429
474
  inverse_of: assoc.inverse_of&.name
430
475
  }
476
+ rescue NameError => e
477
+ @warnings << "[#{model.name}] Skipping broken association #{assoc.name}: #{e.message}"
478
+ nil
431
479
  end
432
480
  end
433
481
 
@@ -562,8 +610,21 @@ module CodebaseIndex
562
610
  # Extract what this model depends on
563
611
  def extract_dependencies(model, source = nil)
564
612
  # Associations point to other models
565
- deps = model.reflect_on_all_associations.map do |assoc|
613
+ deps = model.reflect_on_all_associations.filter_map do |assoc|
566
614
  { type: :model, target: assoc.class_name, via: :association }
615
+ rescue NameError => e
616
+ @warnings << "[#{model.name}] Skipping broken association dep #{assoc.name}: #{e.message}"
617
+ nil
618
+ end
619
+
620
+ # Included concerns add instance-level behavior
621
+ extract_included_modules(model).each do |mod|
622
+ deps << { type: :concern, target: mod.name, via: :include }
623
+ end
624
+
625
+ # Extended modules add class-level behavior (not inlined into source)
626
+ extract_extended_modules(model).each do |mod|
627
+ deps << { type: :concern, target: mod.name, via: :extend }
567
628
  end
568
629
 
569
630
  # Parse source for service/mailer/job references
@@ -30,6 +30,44 @@ module CodebaseIndex
30
30
  dir
31
31
  end
32
32
 
33
+ # Build a snapshot store for temporal tracking.
34
+ #
35
+ # Auto-enables when a SQLite database already exists in the index directory,
36
+ # or when CODEBASE_INDEX_SNAPSHOTS=true is set. The database is created and
37
+ # migrated automatically. Falls back to JSON file store when SQLite is
38
+ # unavailable or encounters errors.
39
+ #
40
+ # @param index_dir [String] Path to extraction output directory
41
+ # @return [CodebaseIndex::Temporal::SnapshotStore, CodebaseIndex::Temporal::JsonSnapshotStore, nil]
42
+ def self.build_snapshot_store(index_dir)
43
+ db_path = File.join(index_dir, 'codebase_index.sqlite3')
44
+ enabled = ENV['CODEBASE_INDEX_SNAPSHOTS'] == 'true' ||
45
+ CodebaseIndex.configuration.enable_snapshots ||
46
+ File.exist?(db_path)
47
+
48
+ return nil unless enabled
49
+
50
+ begin
51
+ require 'sqlite3'
52
+ require_relative '../db/migrator'
53
+ require_relative '../temporal/snapshot_store'
54
+
55
+ db = SQLite3::Database.new(db_path)
56
+ db.results_as_hash = true
57
+
58
+ CodebaseIndex::Db::Migrator.new(connection: db).migrate!
59
+ CodebaseIndex::Temporal::SnapshotStore.new(connection: db)
60
+ rescue LoadError
61
+ warn 'Note: sqlite3 gem not available, using JSON file-based snapshot store.'
62
+ require_relative '../temporal/json_snapshot_store'
63
+ CodebaseIndex::Temporal::JsonSnapshotStore.new(dir: index_dir)
64
+ rescue StandardError => e
65
+ warn "Note: SQLite snapshot store failed (#{e.class}: #{e.message}), using JSON fallback."
66
+ require_relative '../temporal/json_snapshot_store'
67
+ CodebaseIndex::Temporal::JsonSnapshotStore.new(dir: index_dir)
68
+ end
69
+ end
70
+
33
71
  # Attempt to build a retriever for semantic search.
34
72
  #
35
73
  # Auto-configures from environment variables when no explicit configuration
@@ -106,6 +106,16 @@ module CodebaseIndex
106
106
  value.is_a?(String) ? [value] : value
107
107
  end
108
108
 
109
+ # Coerce a value to an Integer. Converts String representations
110
+ # to Integer; leaves existing Integers and nil unchanged.
111
+ # MCP clients may send "2" (string) instead of 2 (integer).
112
+ #
113
+ # @param value [String, Integer, nil] The input value
114
+ # @return [Integer, nil]
115
+ def coerce_integer(value)
116
+ value.is_a?(String) ? value.to_i : value
117
+ end
118
+
109
119
  # Apply offset+limit pagination to a single section key within a container hash.
110
120
  # Adds `_total`, `_truncated`, and `_offset` metadata keys when truncating.
111
121
  #
@@ -166,6 +176,7 @@ module CodebaseIndex
166
176
 
167
177
  def define_search_tool(server, reader, respond, renderer)
168
178
  coerce = method(:coerce_array)
179
+ coerce_int = method(:coerce_integer)
169
180
  server.define_tool(
170
181
  name: 'search',
171
182
  description: 'Search code units by pattern. Matches against identifiers by default; can also search source_code and metadata fields.',
@@ -187,6 +198,7 @@ module CodebaseIndex
187
198
  ) do |query:, server_context:, types: nil, fields: nil, limit: nil|
188
199
  types = coerce.call(types)
189
200
  fields = coerce.call(fields)
201
+ limit = coerce_int.call(limit)
190
202
  results = reader.search(
191
203
  query,
192
204
  types: types,
@@ -203,6 +215,7 @@ module CodebaseIndex
203
215
 
204
216
  def define_traversal_tool(server, reader, respond, renderer, name:, description:, reader_method:, render_key:)
205
217
  coerce = method(:coerce_array)
218
+ coerce_int = method(:coerce_integer)
206
219
  server.define_tool(
207
220
  name: name,
208
221
  description: description,
@@ -219,6 +232,7 @@ module CodebaseIndex
219
232
  }
220
233
  ) do |identifier:, server_context:, depth: nil, types: nil|
221
234
  types = coerce.call(types)
235
+ depth = coerce_int.call(depth)
222
236
  result = reader.send(reader_method, identifier, depth: depth || 2, types: types)
223
237
  if result[:found] == false
224
238
  result[:message] =
@@ -249,6 +263,7 @@ module CodebaseIndex
249
263
 
250
264
  def define_graph_analysis_tool(server, reader, respond, renderer)
251
265
  paginate = method(:paginate_section)
266
+ coerce_int = method(:coerce_integer)
252
267
  server.define_tool(
253
268
  name: 'graph_analysis',
254
269
  description: 'Get structural analysis of the dependency graph: orphans, dead ends, hubs, cycles, and bridges.',
@@ -264,6 +279,8 @@ module CodebaseIndex
264
279
  }
265
280
  }
266
281
  ) do |server_context:, analysis: nil, limit: nil, offset: nil|
282
+ limit = coerce_int.call(limit)
283
+ offset = coerce_int.call(offset)
267
284
  data = reader.graph_analysis
268
285
  section = analysis || 'all'
269
286
  effective_offset = offset || 0
@@ -290,6 +307,7 @@ module CodebaseIndex
290
307
 
291
308
  def define_pagerank_tool(server, reader, respond, renderer)
292
309
  coerce = method(:coerce_array)
310
+ coerce_int = method(:coerce_integer)
293
311
  server.define_tool(
294
312
  name: 'pagerank',
295
313
  description: 'Get PageRank importance scores for code units. Higher scores indicate more structurally important nodes.',
@@ -304,6 +322,7 @@ module CodebaseIndex
304
322
  }
305
323
  ) do |server_context:, limit: nil, types: nil|
306
324
  types = coerce.call(types)
325
+ limit = coerce_int.call(limit)
307
326
  scores = reader.dependency_graph.pagerank
308
327
  graph_data = reader.raw_graph_data
309
328
  nodes = graph_data['nodes'] || {}
@@ -329,6 +348,7 @@ module CodebaseIndex
329
348
  end
330
349
 
331
350
  def define_framework_tool(server, reader, respond, renderer)
351
+ coerce_int = method(:coerce_integer)
332
352
  server.define_tool(
333
353
  name: 'framework',
334
354
  description: 'Search Rails framework source units by concept keyword. Matches against identifier, ' \
@@ -342,6 +362,7 @@ module CodebaseIndex
342
362
  required: ['keyword']
343
363
  }
344
364
  ) do |keyword:, server_context:, limit: nil|
365
+ limit = coerce_int.call(limit)
345
366
  results = reader.framework_sources(keyword, limit: limit || 20)
346
367
  respond.call(renderer.render(:framework, {
347
368
  keyword: keyword,
@@ -353,6 +374,7 @@ module CodebaseIndex
353
374
 
354
375
  def define_recent_changes_tool(server, reader, respond, renderer)
355
376
  coerce = method(:coerce_array)
377
+ coerce_int = method(:coerce_integer)
356
378
  server.define_tool(
357
379
  name: 'recent_changes',
358
380
  description: 'List recently modified code units sorted by git last_modified timestamp. ' \
@@ -368,6 +390,7 @@ module CodebaseIndex
368
390
  }
369
391
  ) do |server_context:, limit: nil, types: nil|
370
392
  types = coerce.call(types)
393
+ limit = coerce_int.call(limit)
371
394
  results = reader.recent_changes(limit: limit || 10, types: types)
372
395
  respond.call(renderer.render(:recent_changes, {
373
396
  result_count: results.size,
@@ -395,6 +418,7 @@ module CodebaseIndex
395
418
  end
396
419
 
397
420
  def define_retrieve_tool(server, retriever, respond)
421
+ coerce_int = method(:coerce_integer)
398
422
  server.define_tool(
399
423
  name: 'codebase_retrieve',
400
424
  description: 'Retrieve relevant codebase context for a natural language query using semantic search. ' \
@@ -408,6 +432,7 @@ module CodebaseIndex
408
432
  required: ['query']
409
433
  }
410
434
  ) do |query:, server_context:, budget: nil|
435
+ budget = coerce_int.call(budget)
411
436
  if retriever
412
437
  result = retriever.retrieve(query, budget: budget || 8000)
413
438
  respond.call(result.context)
@@ -423,6 +448,7 @@ module CodebaseIndex
423
448
  def define_trace_flow_tool(server, reader, index_dir, respond, renderer)
424
449
  require_relative '../flow_assembler'
425
450
  require_relative '../dependency_graph'
451
+ coerce_int = method(:coerce_integer)
426
452
 
427
453
  server.define_tool(
428
454
  name: 'trace_flow',
@@ -441,7 +467,7 @@ module CodebaseIndex
441
467
  required: ['entry_point']
442
468
  }
443
469
  ) do |entry_point:, server_context:, depth: nil|
444
- max_depth = depth || 3
470
+ max_depth = coerce_int.call(depth) || 3
445
471
  graph = reader.dependency_graph
446
472
 
447
473
  assembler = CodebaseIndex::FlowAssembler.new(
@@ -457,6 +483,7 @@ module CodebaseIndex
457
483
  end
458
484
 
459
485
  def define_session_trace_tool(server, reader, respond)
486
+ coerce_int = method(:coerce_integer)
460
487
  server.define_tool(
461
488
  name: 'session_trace',
462
489
  description: 'Assemble context from a browser session trace (requires session tracer middleware)',
@@ -469,6 +496,8 @@ module CodebaseIndex
469
496
  required: ['session_id']
470
497
  }
471
498
  ) do |session_id:, server_context:, budget: nil, depth: nil|
499
+ budget = coerce_int.call(budget)
500
+ depth = coerce_int.call(depth)
472
501
  store = CodebaseIndex.configuration.session_store
473
502
  next respond.call(JSON.pretty_generate({ error: 'Session tracer not configured' })) unless store
474
503
 
@@ -651,6 +680,7 @@ module CodebaseIndex
651
680
  end
652
681
 
653
682
  def define_retrieval_rate_tool(server, feedback_store, respond)
683
+ coerce_int = method(:coerce_integer)
654
684
  server.define_tool(
655
685
  name: 'retrieval_rate',
656
686
  description: 'Record a quality rating for a retrieval result (1-5 scale).',
@@ -665,6 +695,7 @@ module CodebaseIndex
665
695
  ) do |query:, score:, server_context:, comment: nil|
666
696
  next respond.call('Feedback store is not configured.') unless feedback_store
667
697
 
698
+ score = coerce_int.call(score)
668
699
  feedback_store.record_rating(query: query, score: score, comment: comment)
669
700
  respond.call(JSON.pretty_generate({ recorded: true, type: 'rating', query: query, score: score }))
670
701
  end
@@ -740,6 +771,7 @@ module CodebaseIndex
740
771
  end
741
772
 
742
773
  def define_list_snapshots_tool(server, snapshot_store, respond)
774
+ coerce_int = method(:coerce_integer)
743
775
  server.define_tool(
744
776
  name: 'list_snapshots',
745
777
  description: 'List temporal snapshots of past extraction runs, optionally filtered by branch.',
@@ -752,6 +784,7 @@ module CodebaseIndex
752
784
  ) do |server_context:, limit: nil, branch: nil|
753
785
  next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
754
786
 
787
+ limit = coerce_int.call(limit)
755
788
  results = snapshot_store.list(limit: limit || 20, branch: branch)
756
789
  respond.call(JSON.pretty_generate({ snapshot_count: results.size, snapshots: results }))
757
790
  end
@@ -783,6 +816,7 @@ module CodebaseIndex
783
816
  end
784
817
 
785
818
  def define_unit_history_tool(server, snapshot_store, respond)
819
+ coerce_int = method(:coerce_integer)
786
820
  server.define_tool(
787
821
  name: 'unit_history',
788
822
  description: 'Show the history of a single unit across extraction snapshots. Tracks when source changed.',
@@ -796,6 +830,7 @@ module CodebaseIndex
796
830
  ) do |identifier:, server_context:, limit: nil|
797
831
  next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
798
832
 
833
+ limit = coerce_int.call(limit)
799
834
  entries = snapshot_store.unit_history(identifier, limit: limit || 20)
800
835
  respond.call(JSON.pretty_generate({
801
836
  identifier: identifier,
@@ -0,0 +1,245 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+ require 'digest'
6
+
7
+ module CodebaseIndex
8
+ module Temporal
9
+ # JSON-file-based snapshot store for temporal tracking without SQLite.
10
+ #
11
+ # Stores snapshots as individual JSON files in a `snapshots/` subdirectory
12
+ # of the index output directory. Each file is named by git SHA and contains
13
+ # manifest metadata plus per-unit content hashes.
14
+ #
15
+ # Implements the same public interface as SnapshotStore so the MCP server
16
+ # tools work identically.
17
+ #
18
+ # @example
19
+ # store = JsonSnapshotStore.new(dir: '/app/tmp/codebase_index')
20
+ # store.capture(manifest, unit_hashes)
21
+ # store.list # => [{ git_sha: "abc123", ... }]
22
+ # store.diff("abc123", "def456") # => { added: [...], modified: [...], deleted: [...] }
23
+ #
24
+ class JsonSnapshotStore # rubocop:disable Metrics/ClassLength
25
+ def initialize(dir:)
26
+ @dir = File.join(dir, 'snapshots')
27
+ FileUtils.mkdir_p(@dir)
28
+ end
29
+
30
+ def capture(manifest, unit_hashes)
31
+ git_sha = mget(manifest, 'git_sha')
32
+ return nil unless git_sha
33
+
34
+ previous = find_latest
35
+ snapshot = build_snapshot(manifest, git_sha, unit_hashes)
36
+
37
+ if previous
38
+ diff_result = compute_diff(previous[:units], index_units(unit_hashes))
39
+ snapshot[:units_added] = diff_result[:added].size
40
+ snapshot[:units_modified] = diff_result[:modified].size
41
+ snapshot[:units_deleted] = diff_result[:deleted].size
42
+ end
43
+
44
+ write_snapshot(git_sha, snapshot)
45
+ snapshot.except(:units)
46
+ end
47
+
48
+ def list(limit: 20, branch: nil)
49
+ snapshots = load_all_summaries
50
+ snapshots.select! { |s| s[:git_branch] == branch } if branch
51
+ snapshots.sort_by { |s| s[:extracted_at] || '' }.reverse.first(limit)
52
+ end
53
+
54
+ def find(git_sha)
55
+ path = snapshot_path(git_sha)
56
+ return nil unless File.exist?(path)
57
+
58
+ data = JSON.parse(File.read(path))
59
+ symbolize_snapshot(data).except(:units)
60
+ end
61
+
62
+ def diff(sha_a, sha_b)
63
+ snap_a = load_snapshot_with_units(sha_a)
64
+ snap_b = load_snapshot_with_units(sha_b)
65
+
66
+ return { added: [], modified: [], deleted: [] } unless snap_a && snap_b
67
+
68
+ compute_diff(snap_a[:units], snap_b[:units])
69
+ end
70
+
71
+ def unit_history(identifier, limit: 20)
72
+ snapshots = load_all_with_units
73
+ .sort_by { |s| s[:extracted_at] || '' }
74
+ .reverse
75
+ .first(limit)
76
+
77
+ entries = snapshots.filter_map do |snap|
78
+ unit = snap[:units]&.[](identifier)
79
+ next unless unit
80
+
81
+ {
82
+ git_sha: snap[:git_sha],
83
+ extracted_at: snap[:extracted_at],
84
+ git_branch: snap[:git_branch],
85
+ unit_type: unit[:unit_type],
86
+ source_hash: unit[:source_hash],
87
+ metadata_hash: unit[:metadata_hash],
88
+ dependencies_hash: unit[:dependencies_hash]
89
+ }
90
+ end
91
+
92
+ mark_changed_entries(entries)
93
+ end
94
+
95
+ private
96
+
97
+ def mget(hash, key)
98
+ hash[key] || hash[key.to_sym]
99
+ end
100
+
101
+ def build_snapshot(manifest, git_sha, unit_hashes)
102
+ {
103
+ git_sha: git_sha,
104
+ git_branch: mget(manifest, 'git_branch'),
105
+ extracted_at: mget(manifest, 'extracted_at') || Time.now.iso8601,
106
+ rails_version: mget(manifest, 'rails_version'),
107
+ ruby_version: mget(manifest, 'ruby_version'),
108
+ total_units: mget(manifest, 'total_units') || unit_hashes.size,
109
+ unit_counts: mget(manifest, 'counts') || {},
110
+ gemfile_lock_sha: mget(manifest, 'gemfile_lock_sha'),
111
+ schema_sha: mget(manifest, 'schema_sha'),
112
+ units_added: 0,
113
+ units_modified: 0,
114
+ units_deleted: 0,
115
+ units: index_units(unit_hashes)
116
+ }
117
+ end
118
+
119
+ def index_units(unit_hashes)
120
+ unit_hashes.filter_map do |uh|
121
+ id = mget(uh, 'identifier')
122
+ next if id.nil?
123
+
124
+ [id, {
125
+ unit_type: mget(uh, 'type').to_s,
126
+ source_hash: mget(uh, 'source_hash'),
127
+ metadata_hash: mget(uh, 'metadata_hash'),
128
+ dependencies_hash: mget(uh, 'dependencies_hash')
129
+ }]
130
+ end.to_h
131
+ end
132
+
133
+ def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
134
+ added = []
135
+ modified = []
136
+ deleted = []
137
+
138
+ units_b.each do |identifier, data_b|
139
+ if units_a.key?(identifier)
140
+ data_a = units_a[identifier]
141
+ if data_a[:source_hash] != data_b[:source_hash] ||
142
+ data_a[:metadata_hash] != data_b[:metadata_hash] ||
143
+ data_a[:dependencies_hash] != data_b[:dependencies_hash]
144
+ modified << { identifier: identifier, unit_type: data_b[:unit_type] }
145
+ end
146
+ else
147
+ added << { identifier: identifier, unit_type: data_b[:unit_type] }
148
+ end
149
+ end
150
+
151
+ units_a.each do |identifier, data_a|
152
+ deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
153
+ end
154
+
155
+ { added: added, modified: modified, deleted: deleted }
156
+ end
157
+
158
+ def mark_changed_entries(entries)
159
+ entries.each_with_index do |entry, i|
160
+ entry[:changed] = if i == entries.size - 1
161
+ true
162
+ else
163
+ entry[:source_hash] != entries[i + 1][:source_hash]
164
+ end
165
+ end
166
+ entries
167
+ end
168
+
169
+ def snapshot_path(git_sha)
170
+ raise ArgumentError, "Invalid git SHA: #{git_sha}" unless git_sha.match?(/\A[0-9a-f]+\z/i)
171
+
172
+ File.join(@dir, "#{git_sha}.json")
173
+ end
174
+
175
+ def write_snapshot(git_sha, data)
176
+ File.write(snapshot_path(git_sha), JSON.pretty_generate(data))
177
+ end
178
+
179
+ def load_snapshot_with_units(git_sha)
180
+ path = snapshot_path(git_sha)
181
+ return nil unless File.exist?(path)
182
+
183
+ symbolize_snapshot(JSON.parse(File.read(path)))
184
+ end
185
+
186
+ def load_all_summaries
187
+ Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
188
+ data = JSON.parse(File.read(path))
189
+ symbolize_snapshot(data).except(:units)
190
+ rescue JSON::ParserError => e
191
+ warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
192
+ nil
193
+ end
194
+ end
195
+
196
+ def load_all_with_units
197
+ Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
198
+ symbolize_snapshot(JSON.parse(File.read(path)))
199
+ rescue JSON::ParserError => e
200
+ warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
201
+ nil
202
+ end
203
+ end
204
+
205
+ def find_latest
206
+ snapshots = load_all_summaries
207
+ return nil if snapshots.empty?
208
+
209
+ latest = snapshots.max_by { |s| s[:extracted_at] || '' }
210
+ load_snapshot_with_units(latest[:git_sha])
211
+ end
212
+
213
+ def symbolize_snapshot(data)
214
+ {
215
+ git_sha: data['git_sha'],
216
+ git_branch: data['git_branch'],
217
+ extracted_at: data['extracted_at'],
218
+ rails_version: data['rails_version'],
219
+ ruby_version: data['ruby_version'],
220
+ total_units: data['total_units'],
221
+ unit_counts: data['unit_counts'] || {},
222
+ gemfile_lock_sha: data['gemfile_lock_sha'],
223
+ schema_sha: data['schema_sha'],
224
+ units_added: data['units_added'],
225
+ units_modified: data['units_modified'],
226
+ units_deleted: data['units_deleted'],
227
+ units: symbolize_units(data['units'])
228
+ }
229
+ end
230
+
231
+ def symbolize_units(units)
232
+ return {} unless units
233
+
234
+ units.transform_values do |v|
235
+ {
236
+ unit_type: v['unit_type'],
237
+ source_hash: v['source_hash'],
238
+ metadata_hash: v['metadata_hash'],
239
+ dependencies_hash: v['dependencies_hash']
240
+ }
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CodebaseIndex
4
- VERSION = '0.3.1'
4
+ VERSION = '0.3.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: codebase_index
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leah Armstrong
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-04 00:00:00.000000000 Z
11
+ date: 2026-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mcp
@@ -227,6 +227,7 @@ files:
227
227
  - lib/codebase_index/storage/pgvector.rb
228
228
  - lib/codebase_index/storage/qdrant.rb
229
229
  - lib/codebase_index/storage/vector_store.rb
230
+ - lib/codebase_index/temporal/json_snapshot_store.rb
230
231
  - lib/codebase_index/temporal/snapshot_store.rb
231
232
  - lib/codebase_index/token_utils.rb
232
233
  - lib/codebase_index/version.rb