codebase_index 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/codebase_index.rb +3 -243
- metadata +28 -223
- data/CHANGELOG.md +0 -89
- data/CODE_OF_CONDUCT.md +0 -83
- data/CONTRIBUTING.md +0 -65
- data/LICENSE.txt +0 -21
- data/README.md +0 -325
- data/exe/codebase-console +0 -59
- data/exe/codebase-console-mcp +0 -22
- data/exe/codebase-index-mcp +0 -34
- data/exe/codebase-index-mcp-http +0 -37
- data/exe/codebase-index-mcp-start +0 -58
- data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
- data/lib/codebase_index/ast/method_extractor.rb +0 -71
- data/lib/codebase_index/ast/node.rb +0 -116
- data/lib/codebase_index/ast/parser.rb +0 -614
- data/lib/codebase_index/ast.rb +0 -6
- data/lib/codebase_index/builder.rb +0 -200
- data/lib/codebase_index/cache/cache_middleware.rb +0 -199
- data/lib/codebase_index/cache/cache_store.rb +0 -264
- data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
- data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
- data/lib/codebase_index/chunking/chunk.rb +0 -84
- data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
- data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
- data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
- data/lib/codebase_index/console/audit_logger.rb +0 -75
- data/lib/codebase_index/console/bridge.rb +0 -177
- data/lib/codebase_index/console/confirmation.rb +0 -90
- data/lib/codebase_index/console/connection_manager.rb +0 -173
- data/lib/codebase_index/console/console_response_renderer.rb +0 -74
- data/lib/codebase_index/console/embedded_executor.rb +0 -373
- data/lib/codebase_index/console/model_validator.rb +0 -81
- data/lib/codebase_index/console/rack_middleware.rb +0 -87
- data/lib/codebase_index/console/safe_context.rb +0 -82
- data/lib/codebase_index/console/server.rb +0 -612
- data/lib/codebase_index/console/sql_validator.rb +0 -172
- data/lib/codebase_index/console/tools/tier1.rb +0 -118
- data/lib/codebase_index/console/tools/tier2.rb +0 -117
- data/lib/codebase_index/console/tools/tier3.rb +0 -110
- data/lib/codebase_index/console/tools/tier4.rb +0 -79
- data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
- data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
- data/lib/codebase_index/cost_model/estimator.rb +0 -128
- data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
- data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
- data/lib/codebase_index/cost_model.rb +0 -22
- data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
- data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
- data/lib/codebase_index/db/migrator.rb +0 -71
- data/lib/codebase_index/db/schema_version.rb +0 -73
- data/lib/codebase_index/dependency_graph.rb +0 -236
- data/lib/codebase_index/embedding/indexer.rb +0 -140
- data/lib/codebase_index/embedding/openai.rb +0 -126
- data/lib/codebase_index/embedding/provider.rb +0 -162
- data/lib/codebase_index/embedding/text_preparer.rb +0 -112
- data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
- data/lib/codebase_index/evaluation/evaluator.rb +0 -139
- data/lib/codebase_index/evaluation/metrics.rb +0 -79
- data/lib/codebase_index/evaluation/query_set.rb +0 -148
- data/lib/codebase_index/evaluation/report_generator.rb +0 -90
- data/lib/codebase_index/extracted_unit.rb +0 -145
- data/lib/codebase_index/extractor.rb +0 -1028
- data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
- data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
- data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
- data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
- data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
- data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
- data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
- data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
- data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
- data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
- data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
- data/lib/codebase_index/extractors/event_extractor.rb +0 -211
- data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
- data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
- data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
- data/lib/codebase_index/extractors/job_extractor.rb +0 -374
- data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
- data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
- data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
- data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
- data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
- data/lib/codebase_index/extractors/model_extractor.rb +0 -988
- data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
- data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
- data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
- data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
- data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
- data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
- data/lib/codebase_index/extractors/route_extractor.rb +0 -181
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
- data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
- data/lib/codebase_index/extractors/service_extractor.rb +0 -217
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
- data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
- data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
- data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
- data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
- data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
- data/lib/codebase_index/feedback/gap_detector.rb +0 -89
- data/lib/codebase_index/feedback/store.rb +0 -119
- data/lib/codebase_index/filename_utils.rb +0 -32
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
- data/lib/codebase_index/flow_assembler.rb +0 -290
- data/lib/codebase_index/flow_document.rb +0 -191
- data/lib/codebase_index/flow_precomputer.rb +0 -102
- data/lib/codebase_index/formatting/base.rb +0 -30
- data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
- data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
- data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
- data/lib/codebase_index/formatting/human_adapter.rb +0 -78
- data/lib/codebase_index/graph_analyzer.rb +0 -374
- data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
- data/lib/codebase_index/mcp/index_reader.rb +0 -394
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
- data/lib/codebase_index/mcp/server.rb +0 -961
- data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
- data/lib/codebase_index/model_name_cache.rb +0 -51
- data/lib/codebase_index/notion/client.rb +0 -217
- data/lib/codebase_index/notion/exporter.rb +0 -219
- data/lib/codebase_index/notion/mapper.rb +0 -40
- data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
- data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
- data/lib/codebase_index/notion/mappers/shared.rb +0 -22
- data/lib/codebase_index/notion/rate_limiter.rb +0 -68
- data/lib/codebase_index/observability/health_check.rb +0 -79
- data/lib/codebase_index/observability/instrumentation.rb +0 -34
- data/lib/codebase_index/observability/structured_logger.rb +0 -57
- data/lib/codebase_index/operator/error_escalator.rb +0 -81
- data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
- data/lib/codebase_index/operator/status_reporter.rb +0 -80
- data/lib/codebase_index/railtie.rb +0 -38
- data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
- data/lib/codebase_index/resilience/index_validator.rb +0 -167
- data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
- data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
- data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
- data/lib/codebase_index/retrieval/ranker.rb +0 -277
- data/lib/codebase_index/retrieval/search_executor.rb +0 -316
- data/lib/codebase_index/retriever.rb +0 -152
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
- data/lib/codebase_index/ruby_analyzer.rb +0 -87
- data/lib/codebase_index/session_tracer/file_store.rb +0 -104
- data/lib/codebase_index/session_tracer/middleware.rb +0 -143
- data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
- data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
- data/lib/codebase_index/session_tracer/store.rb +0 -81
- data/lib/codebase_index/storage/graph_store.rb +0 -120
- data/lib/codebase_index/storage/metadata_store.rb +0 -196
- data/lib/codebase_index/storage/pgvector.rb +0 -195
- data/lib/codebase_index/storage/qdrant.rb +0 -205
- data/lib/codebase_index/storage/vector_store.rb +0 -167
- data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
- data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
- data/lib/codebase_index/token_utils.rb +0 -19
- data/lib/codebase_index/version.rb +0 -5
- data/lib/generators/codebase_index/install_generator.rb +0 -32
- data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
- data/lib/tasks/codebase_index.rake +0 -597
- data/lib/tasks/codebase_index_evaluation.rake +0 -115
|
@@ -1,345 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'json'
|
|
4
|
-
require 'time'
|
|
5
|
-
|
|
6
|
-
module CodebaseIndex
|
|
7
|
-
module Temporal
|
|
8
|
-
# SnapshotStore captures and queries temporal snapshots of extraction runs.
|
|
9
|
-
#
|
|
10
|
-
# Each snapshot is anchored to a git commit SHA and stores per-unit content
|
|
11
|
-
# hashes for efficient diff computation. Full source is not duplicated —
|
|
12
|
-
# only hashes of source, metadata, and dependencies are stored per snapshot.
|
|
13
|
-
#
|
|
14
|
-
# @example Capturing a snapshot
|
|
15
|
-
# store = SnapshotStore.new(connection: db)
|
|
16
|
-
# store.capture(manifest, unit_hashes)
|
|
17
|
-
#
|
|
18
|
-
# @example Comparing snapshots
|
|
19
|
-
# diff = store.diff("abc123", "def456")
|
|
20
|
-
# diff[:added] # => [{ identifier: "NewModel", ... }]
|
|
21
|
-
# diff[:modified] # => [{ identifier: "User", ... }]
|
|
22
|
-
# diff[:deleted] # => [{ identifier: "OldService", ... }]
|
|
23
|
-
#
|
|
24
|
-
class SnapshotStore # rubocop:disable Metrics/ClassLength
|
|
25
|
-
# @param connection [Object] Database connection supporting #execute and #get_first_row
|
|
26
|
-
def initialize(connection:)
|
|
27
|
-
@db = connection
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# Capture a snapshot after extraction completes.
|
|
31
|
-
#
|
|
32
|
-
# Stores the manifest metadata and per-unit content hashes.
|
|
33
|
-
# Computes diff stats vs. the most recent previous snapshot.
|
|
34
|
-
#
|
|
35
|
-
# @param manifest [Hash] The manifest data (string or symbol keys)
|
|
36
|
-
# @param unit_hashes [Array<Hash>] Per-unit content hashes
|
|
37
|
-
# @return [Hash] Snapshot record with diff stats
|
|
38
|
-
def capture(manifest, unit_hashes)
|
|
39
|
-
git_sha = mget(manifest, 'git_sha')
|
|
40
|
-
return nil unless git_sha
|
|
41
|
-
|
|
42
|
-
previous = find_latest
|
|
43
|
-
upsert_snapshot(manifest, git_sha, unit_hashes.size)
|
|
44
|
-
|
|
45
|
-
snapshot_id = fetch_snapshot_id(git_sha)
|
|
46
|
-
@db.execute('DELETE FROM codebase_snapshot_units WHERE snapshot_id = ?', [snapshot_id])
|
|
47
|
-
insert_unit_hashes(snapshot_id, unit_hashes)
|
|
48
|
-
|
|
49
|
-
update_diff_stats(snapshot_id, previous)
|
|
50
|
-
find(git_sha)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# List snapshots, optionally filtered by branch.
|
|
54
|
-
#
|
|
55
|
-
# @param limit [Integer] Max results (default 20)
|
|
56
|
-
# @param branch [String, nil] Filter by branch name
|
|
57
|
-
# @return [Array<Hash>] Snapshot summaries sorted by extracted_at descending
|
|
58
|
-
def list(limit: 20, branch: nil)
|
|
59
|
-
rows = if branch
|
|
60
|
-
@db.execute(
|
|
61
|
-
'SELECT * FROM codebase_snapshots WHERE git_branch = ? ORDER BY extracted_at DESC LIMIT ?',
|
|
62
|
-
[branch, limit]
|
|
63
|
-
)
|
|
64
|
-
else
|
|
65
|
-
@db.execute(
|
|
66
|
-
'SELECT * FROM codebase_snapshots ORDER BY extracted_at DESC LIMIT ?',
|
|
67
|
-
[limit]
|
|
68
|
-
)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
rows.map { |row| row_to_hash(row) }
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
# Find a specific snapshot by git SHA.
|
|
75
|
-
#
|
|
76
|
-
# @param git_sha [String]
|
|
77
|
-
# @return [Hash, nil] Snapshot metadata or nil if not found
|
|
78
|
-
def find(git_sha)
|
|
79
|
-
row = @db.get_first_row('SELECT * FROM codebase_snapshots WHERE git_sha = ?', [git_sha])
|
|
80
|
-
return nil unless row
|
|
81
|
-
|
|
82
|
-
row_to_hash(row)
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Compute diff between two snapshots.
|
|
86
|
-
#
|
|
87
|
-
# @param sha_a [String] Before snapshot git SHA
|
|
88
|
-
# @param sha_b [String] After snapshot git SHA
|
|
89
|
-
# @return [Hash] {added: [...], modified: [...], deleted: [...]}
|
|
90
|
-
def diff(sha_a, sha_b)
|
|
91
|
-
id_a = fetch_snapshot_id(sha_a)
|
|
92
|
-
id_b = fetch_snapshot_id(sha_b)
|
|
93
|
-
|
|
94
|
-
return { added: [], modified: [], deleted: [] } unless id_a && id_b
|
|
95
|
-
|
|
96
|
-
units_a = load_snapshot_units(id_a)
|
|
97
|
-
units_b = load_snapshot_units(id_b)
|
|
98
|
-
|
|
99
|
-
compute_diff(units_a, units_b)
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
# History of a single unit across snapshots.
|
|
103
|
-
#
|
|
104
|
-
# @param identifier [String] Unit identifier
|
|
105
|
-
# @param limit [Integer] Max snapshots to return (default 20)
|
|
106
|
-
# @return [Array<Hash>] Entries with git_sha, extracted_at, source_hash, changed flag
|
|
107
|
-
def unit_history(identifier, limit: 20)
|
|
108
|
-
rows = @db.execute(<<~SQL, [identifier, limit])
|
|
109
|
-
SELECT su.source_hash, su.metadata_hash, su.dependencies_hash, su.unit_type,
|
|
110
|
-
s.git_sha, s.extracted_at, s.git_branch
|
|
111
|
-
FROM codebase_snapshot_units su
|
|
112
|
-
JOIN codebase_snapshots s ON s.id = su.snapshot_id
|
|
113
|
-
WHERE su.identifier = ?
|
|
114
|
-
ORDER BY s.extracted_at DESC
|
|
115
|
-
LIMIT ?
|
|
116
|
-
SQL
|
|
117
|
-
|
|
118
|
-
entries = rows.map { |row| history_entry_from_row(row) }
|
|
119
|
-
mark_changed_entries(entries)
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
private
|
|
123
|
-
|
|
124
|
-
# Build a history entry hash from a database row.
|
|
125
|
-
#
|
|
126
|
-
# @param row [Hash]
|
|
127
|
-
# @return [Hash]
|
|
128
|
-
def history_entry_from_row(row)
|
|
129
|
-
{
|
|
130
|
-
git_sha: row['git_sha'],
|
|
131
|
-
extracted_at: row['extracted_at'],
|
|
132
|
-
git_branch: row['git_branch'],
|
|
133
|
-
unit_type: row['unit_type'],
|
|
134
|
-
source_hash: row['source_hash'],
|
|
135
|
-
metadata_hash: row['metadata_hash'],
|
|
136
|
-
dependencies_hash: row['dependencies_hash']
|
|
137
|
-
}
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# Mark changed flag on history entries by comparing source hashes.
|
|
141
|
-
#
|
|
142
|
-
# @param entries [Array<Hash>]
|
|
143
|
-
# @return [Array<Hash>]
|
|
144
|
-
def mark_changed_entries(entries)
|
|
145
|
-
entries.each_with_index do |entry, i|
|
|
146
|
-
entry[:changed] = if i == entries.size - 1
|
|
147
|
-
true # Oldest version is always "changed" (first appearance)
|
|
148
|
-
else
|
|
149
|
-
entry[:source_hash] != entries[i + 1][:source_hash]
|
|
150
|
-
end
|
|
151
|
-
end
|
|
152
|
-
entries
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
# Get a value from a hash that may have string or symbol keys.
|
|
156
|
-
#
|
|
157
|
-
# @param hash [Hash]
|
|
158
|
-
# @param key [String]
|
|
159
|
-
# @return [Object, nil]
|
|
160
|
-
def mget(hash, key)
|
|
161
|
-
hash[key] || hash[key.to_sym]
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
# Insert or replace the snapshot row from manifest data.
|
|
165
|
-
#
|
|
166
|
-
# @param manifest [Hash]
|
|
167
|
-
# @param git_sha [String]
|
|
168
|
-
# @param default_total [Integer]
|
|
169
|
-
# @return [void]
|
|
170
|
-
def upsert_snapshot(manifest, git_sha, default_total)
|
|
171
|
-
params = [
|
|
172
|
-
git_sha,
|
|
173
|
-
mget(manifest, 'git_branch'),
|
|
174
|
-
mget(manifest, 'extracted_at') || Time.now.iso8601,
|
|
175
|
-
mget(manifest, 'rails_version'),
|
|
176
|
-
mget(manifest, 'ruby_version'),
|
|
177
|
-
mget(manifest, 'total_units') || default_total,
|
|
178
|
-
JSON.generate(mget(manifest, 'counts') || {}),
|
|
179
|
-
mget(manifest, 'gemfile_lock_sha'),
|
|
180
|
-
mget(manifest, 'schema_sha')
|
|
181
|
-
]
|
|
182
|
-
@db.execute(<<~SQL, params)
|
|
183
|
-
INSERT OR REPLACE INTO codebase_snapshots
|
|
184
|
-
(git_sha, git_branch, extracted_at, rails_version, ruby_version,
|
|
185
|
-
total_units, unit_counts, gemfile_lock_sha, schema_sha)
|
|
186
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
187
|
-
SQL
|
|
188
|
-
end
|
|
189
|
-
|
|
190
|
-
# Update a snapshot's diff stats vs. a previous snapshot.
|
|
191
|
-
#
|
|
192
|
-
# @param snapshot_id [Integer]
|
|
193
|
-
# @param previous [Hash, nil]
|
|
194
|
-
# @return [void]
|
|
195
|
-
def update_diff_stats(snapshot_id, previous)
|
|
196
|
-
diff_stats = compute_diff_stats(snapshot_id, previous)
|
|
197
|
-
@db.execute(
|
|
198
|
-
'UPDATE codebase_snapshots SET units_added = ?, units_modified = ?, units_deleted = ? WHERE id = ?',
|
|
199
|
-
[diff_stats[:added], diff_stats[:modified], diff_stats[:deleted], snapshot_id]
|
|
200
|
-
)
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
# Find the most recent snapshot.
|
|
204
|
-
#
|
|
205
|
-
# @return [Hash, nil]
|
|
206
|
-
def find_latest
|
|
207
|
-
row = @db.get_first_row('SELECT * FROM codebase_snapshots ORDER BY extracted_at DESC LIMIT 1')
|
|
208
|
-
return nil unless row
|
|
209
|
-
|
|
210
|
-
row_to_hash(row)
|
|
211
|
-
end
|
|
212
|
-
|
|
213
|
-
# Fetch a snapshot's ID by git SHA.
|
|
214
|
-
#
|
|
215
|
-
# @param git_sha [String]
|
|
216
|
-
# @return [Integer, nil]
|
|
217
|
-
def fetch_snapshot_id(git_sha)
|
|
218
|
-
@db.get_first_value('SELECT id FROM codebase_snapshots WHERE git_sha = ?', [git_sha])
|
|
219
|
-
end
|
|
220
|
-
|
|
221
|
-
# Insert per-unit hash records for a snapshot.
|
|
222
|
-
#
|
|
223
|
-
# @param snapshot_id [Integer]
|
|
224
|
-
# @param unit_hashes [Array<Hash>]
|
|
225
|
-
# @return [void]
|
|
226
|
-
def insert_unit_hashes(snapshot_id, unit_hashes)
|
|
227
|
-
sql = <<~SQL
|
|
228
|
-
INSERT INTO codebase_snapshot_units
|
|
229
|
-
(snapshot_id, identifier, unit_type, source_hash, metadata_hash, dependencies_hash)
|
|
230
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
231
|
-
SQL
|
|
232
|
-
|
|
233
|
-
# Wrap in a transaction to batch all inserts into a single commit,
|
|
234
|
-
# reducing per-row fsync overhead from O(n) to O(1).
|
|
235
|
-
@db.transaction do
|
|
236
|
-
unit_hashes.each do |uh|
|
|
237
|
-
params = [
|
|
238
|
-
snapshot_id,
|
|
239
|
-
mget(uh, 'identifier'),
|
|
240
|
-
mget(uh, 'type').to_s,
|
|
241
|
-
mget(uh, 'source_hash'),
|
|
242
|
-
mget(uh, 'metadata_hash'),
|
|
243
|
-
mget(uh, 'dependencies_hash')
|
|
244
|
-
]
|
|
245
|
-
@db.execute(sql, params)
|
|
246
|
-
end
|
|
247
|
-
end
|
|
248
|
-
end
|
|
249
|
-
|
|
250
|
-
# Load all unit records for a snapshot as a hash keyed by identifier.
|
|
251
|
-
#
|
|
252
|
-
# @param snapshot_id [Integer]
|
|
253
|
-
# @return [Hash{String => Hash}]
|
|
254
|
-
def load_snapshot_units(snapshot_id)
|
|
255
|
-
sql = <<~SQL
|
|
256
|
-
SELECT identifier, unit_type, source_hash, metadata_hash, dependencies_hash
|
|
257
|
-
FROM codebase_snapshot_units WHERE snapshot_id = ?
|
|
258
|
-
SQL
|
|
259
|
-
rows = @db.execute(sql, [snapshot_id])
|
|
260
|
-
|
|
261
|
-
rows.to_h do |row|
|
|
262
|
-
[row['identifier'], {
|
|
263
|
-
unit_type: row['unit_type'],
|
|
264
|
-
source_hash: row['source_hash'],
|
|
265
|
-
metadata_hash: row['metadata_hash'],
|
|
266
|
-
dependencies_hash: row['dependencies_hash']
|
|
267
|
-
}]
|
|
268
|
-
end
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
# Compute diff between two sets of unit hashes.
|
|
272
|
-
#
|
|
273
|
-
# @param units_a [Hash{String => Hash}] Before
|
|
274
|
-
# @param units_b [Hash{String => Hash}] After
|
|
275
|
-
# @return [Hash] {added: [...], modified: [...], deleted: [...]}
|
|
276
|
-
def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
277
|
-
added = []
|
|
278
|
-
modified = []
|
|
279
|
-
deleted = []
|
|
280
|
-
|
|
281
|
-
# Units in B but not A → added
|
|
282
|
-
# Units in both → check for modifications
|
|
283
|
-
units_b.each do |identifier, data_b|
|
|
284
|
-
if units_a.key?(identifier)
|
|
285
|
-
data_a = units_a[identifier]
|
|
286
|
-
if data_a[:source_hash] != data_b[:source_hash] ||
|
|
287
|
-
data_a[:metadata_hash] != data_b[:metadata_hash] ||
|
|
288
|
-
data_a[:dependencies_hash] != data_b[:dependencies_hash]
|
|
289
|
-
modified << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
290
|
-
end
|
|
291
|
-
else
|
|
292
|
-
added << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
293
|
-
end
|
|
294
|
-
end
|
|
295
|
-
|
|
296
|
-
# Units in A but not B → deleted
|
|
297
|
-
units_a.each do |identifier, data_a|
|
|
298
|
-
deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
|
|
299
|
-
end
|
|
300
|
-
|
|
301
|
-
{ added: added, modified: modified, deleted: deleted }
|
|
302
|
-
end
|
|
303
|
-
|
|
304
|
-
# Compute aggregate diff stats.
|
|
305
|
-
#
|
|
306
|
-
# @param current_snapshot_id [Integer]
|
|
307
|
-
# @param previous_snapshot [Hash, nil]
|
|
308
|
-
# @return [Hash] {added:, modified:, deleted:}
|
|
309
|
-
def compute_diff_stats(current_snapshot_id, previous_snapshot)
|
|
310
|
-
return { added: 0, modified: 0, deleted: 0 } unless previous_snapshot
|
|
311
|
-
|
|
312
|
-
prev_id = fetch_snapshot_id(previous_snapshot[:git_sha])
|
|
313
|
-
return { added: 0, modified: 0, deleted: 0 } unless prev_id
|
|
314
|
-
|
|
315
|
-
units_prev = load_snapshot_units(prev_id)
|
|
316
|
-
units_curr = load_snapshot_units(current_snapshot_id)
|
|
317
|
-
|
|
318
|
-
result = compute_diff(units_prev, units_curr)
|
|
319
|
-
{ added: result[:added].size, modified: result[:modified].size, deleted: result[:deleted].size }
|
|
320
|
-
end
|
|
321
|
-
|
|
322
|
-
# Convert a database row to a normalized hash.
|
|
323
|
-
#
|
|
324
|
-
# @param row [Hash] SQLite3 result row
|
|
325
|
-
# @return [Hash]
|
|
326
|
-
def row_to_hash(row)
|
|
327
|
-
{
|
|
328
|
-
id: row['id'],
|
|
329
|
-
git_sha: row['git_sha'],
|
|
330
|
-
git_branch: row['git_branch'],
|
|
331
|
-
extracted_at: row['extracted_at'],
|
|
332
|
-
rails_version: row['rails_version'],
|
|
333
|
-
ruby_version: row['ruby_version'],
|
|
334
|
-
total_units: row['total_units'],
|
|
335
|
-
unit_counts: row['unit_counts'] ? JSON.parse(row['unit_counts']) : {},
|
|
336
|
-
gemfile_lock_sha: row['gemfile_lock_sha'],
|
|
337
|
-
schema_sha: row['schema_sha'],
|
|
338
|
-
units_added: row['units_added'],
|
|
339
|
-
units_modified: row['units_modified'],
|
|
340
|
-
units_deleted: row['units_deleted']
|
|
341
|
-
}
|
|
342
|
-
end
|
|
343
|
-
end
|
|
344
|
-
end
|
|
345
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module CodebaseIndex
|
|
4
|
-
# Shared token estimation utility.
|
|
5
|
-
#
|
|
6
|
-
# Uses project convention: (string.length / 4.0).ceil
|
|
7
|
-
# See docs/TOKEN_BENCHMARK.md — conservative floor (~10.6% overestimate).
|
|
8
|
-
module TokenUtils
|
|
9
|
-
module_function
|
|
10
|
-
|
|
11
|
-
# Estimate token count for a string.
|
|
12
|
-
#
|
|
13
|
-
# @param text [String] Text to estimate
|
|
14
|
-
# @return [Integer] Estimated token count
|
|
15
|
-
def estimate_tokens(text)
|
|
16
|
-
(text.length / 4.0).ceil
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
end
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rails/generators'
|
|
4
|
-
require 'rails/generators/active_record'
|
|
5
|
-
|
|
6
|
-
module CodebaseIndex
|
|
7
|
-
module Generators
|
|
8
|
-
# Rails generator that creates a migration for CodebaseIndex tables.
|
|
9
|
-
#
|
|
10
|
-
# Usage:
|
|
11
|
-
# rails generate codebase_index:install
|
|
12
|
-
#
|
|
13
|
-
# Creates a migration with codebase_units, codebase_edges, and
|
|
14
|
-
# codebase_embeddings tables. Works with PostgreSQL, MySQL, and SQLite.
|
|
15
|
-
#
|
|
16
|
-
class InstallGenerator < Rails::Generators::Base
|
|
17
|
-
include ActiveRecord::Generators::Migration
|
|
18
|
-
|
|
19
|
-
source_root File.expand_path('templates', __dir__)
|
|
20
|
-
|
|
21
|
-
desc 'Creates a migration for CodebaseIndex tables (units, edges, embeddings)'
|
|
22
|
-
|
|
23
|
-
# @return [void]
|
|
24
|
-
def create_migration_file
|
|
25
|
-
migration_template(
|
|
26
|
-
'create_codebase_index_tables.rb.erb',
|
|
27
|
-
'db/migrate/create_codebase_index_tables.rb'
|
|
28
|
-
)
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
end
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rails/generators'
|
|
4
|
-
require 'rails/generators/active_record'
|
|
5
|
-
|
|
6
|
-
module CodebaseIndex
|
|
7
|
-
module Generators
|
|
8
|
-
# Rails generator that adds pgvector support to CodebaseIndex.
|
|
9
|
-
#
|
|
10
|
-
# Requires the pgvector PostgreSQL extension. Adds a native vector column
|
|
11
|
-
# and HNSW index to the codebase_embeddings table.
|
|
12
|
-
#
|
|
13
|
-
# Usage:
|
|
14
|
-
# rails generate codebase_index:pgvector
|
|
15
|
-
# rails generate codebase_index:pgvector --dimensions 3072
|
|
16
|
-
#
|
|
17
|
-
class PgvectorGenerator < Rails::Generators::Base
|
|
18
|
-
include ActiveRecord::Generators::Migration
|
|
19
|
-
|
|
20
|
-
source_root File.expand_path('templates', __dir__)
|
|
21
|
-
|
|
22
|
-
desc 'Adds pgvector native vector column and HNSW index to codebase_embeddings'
|
|
23
|
-
|
|
24
|
-
class_option :dimensions, type: :numeric, default: 1536,
|
|
25
|
-
desc: 'Vector dimensions (1536 for text-embedding-3-small, 3072 for large)'
|
|
26
|
-
|
|
27
|
-
# @return [void]
|
|
28
|
-
def create_migration_file
|
|
29
|
-
@dimensions = options[:dimensions]
|
|
30
|
-
migration_template(
|
|
31
|
-
'add_pgvector_to_codebase_index.rb.erb',
|
|
32
|
-
'db/migrate/add_pgvector_to_codebase_index.rb'
|
|
33
|
-
)
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
class AddPgvectorToCodebaseIndex < ActiveRecord::Migration[7.0]
|
|
2
|
-
def change
|
|
3
|
-
enable_extension 'vector' unless extension_enabled?('vector')
|
|
4
|
-
|
|
5
|
-
add_column :codebase_embeddings, :embedding_vector, :vector,
|
|
6
|
-
limit: <%= @dimensions || 1536 %>, null: true
|
|
7
|
-
|
|
8
|
-
# HNSW index for fast approximate nearest neighbor search
|
|
9
|
-
# Using cosine distance operator (vector_cosine_ops)
|
|
10
|
-
add_index :codebase_embeddings, :embedding_vector,
|
|
11
|
-
using: :hnsw,
|
|
12
|
-
opclass: :vector_cosine_ops,
|
|
13
|
-
name: 'idx_codebase_embeddings_vector_hnsw'
|
|
14
|
-
end
|
|
15
|
-
end
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
class CreateCodebaseIndexTables < ActiveRecord::Migration[7.0]
|
|
2
|
-
def change
|
|
3
|
-
create_table :codebase_units do |t|
|
|
4
|
-
t.string :unit_type, null: false
|
|
5
|
-
t.string :identifier, null: false
|
|
6
|
-
t.string :namespace
|
|
7
|
-
t.string :file_path, null: false
|
|
8
|
-
t.text :source_code
|
|
9
|
-
t.string :source_hash
|
|
10
|
-
t.json :metadata
|
|
11
|
-
|
|
12
|
-
t.timestamps
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
add_index :codebase_units, :unit_type
|
|
16
|
-
add_index :codebase_units, :identifier, unique: true
|
|
17
|
-
add_index :codebase_units, :file_path
|
|
18
|
-
|
|
19
|
-
create_table :codebase_edges do |t|
|
|
20
|
-
t.references :source, null: false, foreign_key: { to_table: :codebase_units }
|
|
21
|
-
t.references :target, null: false, foreign_key: { to_table: :codebase_units }
|
|
22
|
-
t.string :relationship, null: false
|
|
23
|
-
t.string :via
|
|
24
|
-
|
|
25
|
-
t.datetime :created_at, null: false
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
add_index :codebase_edges, [:source_id, :target_id, :relationship], unique: true,
|
|
29
|
-
name: 'idx_codebase_edges_unique'
|
|
30
|
-
|
|
31
|
-
create_table :codebase_embeddings do |t|
|
|
32
|
-
t.references :unit, null: false, foreign_key: { to_table: :codebase_units }
|
|
33
|
-
t.string :chunk_type
|
|
34
|
-
t.text :embedding, null: false
|
|
35
|
-
t.string :content_hash, null: false
|
|
36
|
-
t.integer :dimensions, null: false
|
|
37
|
-
|
|
38
|
-
t.datetime :created_at, null: false
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
add_index :codebase_embeddings, :content_hash
|
|
42
|
-
end
|
|
43
|
-
end
|