codebase_index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/codebase_index.rb +3 -243
  3. metadata +28 -223
  4. data/CHANGELOG.md +0 -89
  5. data/CODE_OF_CONDUCT.md +0 -83
  6. data/CONTRIBUTING.md +0 -65
  7. data/LICENSE.txt +0 -21
  8. data/README.md +0 -325
  9. data/exe/codebase-console +0 -59
  10. data/exe/codebase-console-mcp +0 -22
  11. data/exe/codebase-index-mcp +0 -34
  12. data/exe/codebase-index-mcp-http +0 -37
  13. data/exe/codebase-index-mcp-start +0 -58
  14. data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
  15. data/lib/codebase_index/ast/method_extractor.rb +0 -71
  16. data/lib/codebase_index/ast/node.rb +0 -116
  17. data/lib/codebase_index/ast/parser.rb +0 -614
  18. data/lib/codebase_index/ast.rb +0 -6
  19. data/lib/codebase_index/builder.rb +0 -200
  20. data/lib/codebase_index/cache/cache_middleware.rb +0 -199
  21. data/lib/codebase_index/cache/cache_store.rb +0 -264
  22. data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
  23. data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
  24. data/lib/codebase_index/chunking/chunk.rb +0 -84
  25. data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
  26. data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
  27. data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
  28. data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
  29. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
  30. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
  31. data/lib/codebase_index/console/audit_logger.rb +0 -75
  32. data/lib/codebase_index/console/bridge.rb +0 -177
  33. data/lib/codebase_index/console/confirmation.rb +0 -90
  34. data/lib/codebase_index/console/connection_manager.rb +0 -173
  35. data/lib/codebase_index/console/console_response_renderer.rb +0 -74
  36. data/lib/codebase_index/console/embedded_executor.rb +0 -373
  37. data/lib/codebase_index/console/model_validator.rb +0 -81
  38. data/lib/codebase_index/console/rack_middleware.rb +0 -87
  39. data/lib/codebase_index/console/safe_context.rb +0 -82
  40. data/lib/codebase_index/console/server.rb +0 -612
  41. data/lib/codebase_index/console/sql_validator.rb +0 -172
  42. data/lib/codebase_index/console/tools/tier1.rb +0 -118
  43. data/lib/codebase_index/console/tools/tier2.rb +0 -117
  44. data/lib/codebase_index/console/tools/tier3.rb +0 -110
  45. data/lib/codebase_index/console/tools/tier4.rb +0 -79
  46. data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
  47. data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
  48. data/lib/codebase_index/cost_model/estimator.rb +0 -128
  49. data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
  50. data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
  51. data/lib/codebase_index/cost_model.rb +0 -22
  52. data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
  53. data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
  54. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
  55. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
  56. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
  57. data/lib/codebase_index/db/migrator.rb +0 -71
  58. data/lib/codebase_index/db/schema_version.rb +0 -73
  59. data/lib/codebase_index/dependency_graph.rb +0 -236
  60. data/lib/codebase_index/embedding/indexer.rb +0 -140
  61. data/lib/codebase_index/embedding/openai.rb +0 -126
  62. data/lib/codebase_index/embedding/provider.rb +0 -162
  63. data/lib/codebase_index/embedding/text_preparer.rb +0 -112
  64. data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
  65. data/lib/codebase_index/evaluation/evaluator.rb +0 -139
  66. data/lib/codebase_index/evaluation/metrics.rb +0 -79
  67. data/lib/codebase_index/evaluation/query_set.rb +0 -148
  68. data/lib/codebase_index/evaluation/report_generator.rb +0 -90
  69. data/lib/codebase_index/extracted_unit.rb +0 -145
  70. data/lib/codebase_index/extractor.rb +0 -1028
  71. data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
  72. data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
  73. data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
  74. data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
  75. data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
  76. data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
  77. data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
  78. data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
  79. data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
  80. data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
  81. data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
  82. data/lib/codebase_index/extractors/event_extractor.rb +0 -211
  83. data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
  84. data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
  85. data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
  86. data/lib/codebase_index/extractors/job_extractor.rb +0 -374
  87. data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
  88. data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
  89. data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
  90. data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
  91. data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
  92. data/lib/codebase_index/extractors/model_extractor.rb +0 -988
  93. data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
  94. data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
  95. data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
  96. data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
  97. data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
  98. data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
  99. data/lib/codebase_index/extractors/route_extractor.rb +0 -181
  100. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
  101. data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
  102. data/lib/codebase_index/extractors/service_extractor.rb +0 -217
  103. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
  104. data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
  105. data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
  106. data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
  107. data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
  108. data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
  109. data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
  110. data/lib/codebase_index/feedback/gap_detector.rb +0 -89
  111. data/lib/codebase_index/feedback/store.rb +0 -119
  112. data/lib/codebase_index/filename_utils.rb +0 -32
  113. data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
  114. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
  115. data/lib/codebase_index/flow_assembler.rb +0 -290
  116. data/lib/codebase_index/flow_document.rb +0 -191
  117. data/lib/codebase_index/flow_precomputer.rb +0 -102
  118. data/lib/codebase_index/formatting/base.rb +0 -30
  119. data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
  120. data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
  121. data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
  122. data/lib/codebase_index/formatting/human_adapter.rb +0 -78
  123. data/lib/codebase_index/graph_analyzer.rb +0 -374
  124. data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
  125. data/lib/codebase_index/mcp/index_reader.rb +0 -394
  126. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
  127. data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
  128. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
  129. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
  130. data/lib/codebase_index/mcp/server.rb +0 -961
  131. data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
  132. data/lib/codebase_index/model_name_cache.rb +0 -51
  133. data/lib/codebase_index/notion/client.rb +0 -217
  134. data/lib/codebase_index/notion/exporter.rb +0 -219
  135. data/lib/codebase_index/notion/mapper.rb +0 -40
  136. data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
  137. data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
  138. data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
  139. data/lib/codebase_index/notion/mappers/shared.rb +0 -22
  140. data/lib/codebase_index/notion/rate_limiter.rb +0 -68
  141. data/lib/codebase_index/observability/health_check.rb +0 -79
  142. data/lib/codebase_index/observability/instrumentation.rb +0 -34
  143. data/lib/codebase_index/observability/structured_logger.rb +0 -57
  144. data/lib/codebase_index/operator/error_escalator.rb +0 -81
  145. data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
  146. data/lib/codebase_index/operator/status_reporter.rb +0 -80
  147. data/lib/codebase_index/railtie.rb +0 -38
  148. data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
  149. data/lib/codebase_index/resilience/index_validator.rb +0 -167
  150. data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
  151. data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
  152. data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
  153. data/lib/codebase_index/retrieval/ranker.rb +0 -277
  154. data/lib/codebase_index/retrieval/search_executor.rb +0 -316
  155. data/lib/codebase_index/retriever.rb +0 -152
  156. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
  157. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
  158. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
  159. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
  160. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
  161. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
  162. data/lib/codebase_index/ruby_analyzer.rb +0 -87
  163. data/lib/codebase_index/session_tracer/file_store.rb +0 -104
  164. data/lib/codebase_index/session_tracer/middleware.rb +0 -143
  165. data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
  166. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
  167. data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
  168. data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
  169. data/lib/codebase_index/session_tracer/store.rb +0 -81
  170. data/lib/codebase_index/storage/graph_store.rb +0 -120
  171. data/lib/codebase_index/storage/metadata_store.rb +0 -196
  172. data/lib/codebase_index/storage/pgvector.rb +0 -195
  173. data/lib/codebase_index/storage/qdrant.rb +0 -205
  174. data/lib/codebase_index/storage/vector_store.rb +0 -167
  175. data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
  176. data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
  177. data/lib/codebase_index/token_utils.rb +0 -19
  178. data/lib/codebase_index/version.rb +0 -5
  179. data/lib/generators/codebase_index/install_generator.rb +0 -32
  180. data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
  181. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
  182. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
  183. data/lib/tasks/codebase_index.rake +0 -597
  184. data/lib/tasks/codebase_index_evaluation.rake +0 -115
@@ -1,345 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'json'
4
- require 'time'
5
-
6
- module CodebaseIndex
7
- module Temporal
8
- # SnapshotStore captures and queries temporal snapshots of extraction runs.
9
- #
10
- # Each snapshot is anchored to a git commit SHA and stores per-unit content
11
- # hashes for efficient diff computation. Full source is not duplicated —
12
- # only hashes of source, metadata, and dependencies are stored per snapshot.
13
- #
14
- # @example Capturing a snapshot
15
- # store = SnapshotStore.new(connection: db)
16
- # store.capture(manifest, unit_hashes)
17
- #
18
- # @example Comparing snapshots
19
- # diff = store.diff("abc123", "def456")
20
- # diff[:added] # => [{ identifier: "NewModel", ... }]
21
- # diff[:modified] # => [{ identifier: "User", ... }]
22
- # diff[:deleted] # => [{ identifier: "OldService", ... }]
23
- #
24
- class SnapshotStore # rubocop:disable Metrics/ClassLength
25
- # @param connection [Object] Database connection supporting #execute and #get_first_row
26
- def initialize(connection:)
27
- @db = connection
28
- end
29
-
30
- # Capture a snapshot after extraction completes.
31
- #
32
- # Stores the manifest metadata and per-unit content hashes.
33
- # Computes diff stats vs. the most recent previous snapshot.
34
- #
35
- # @param manifest [Hash] The manifest data (string or symbol keys)
36
- # @param unit_hashes [Array<Hash>] Per-unit content hashes
37
- # @return [Hash] Snapshot record with diff stats
38
- def capture(manifest, unit_hashes)
39
- git_sha = mget(manifest, 'git_sha')
40
- return nil unless git_sha
41
-
42
- previous = find_latest
43
- upsert_snapshot(manifest, git_sha, unit_hashes.size)
44
-
45
- snapshot_id = fetch_snapshot_id(git_sha)
46
- @db.execute('DELETE FROM codebase_snapshot_units WHERE snapshot_id = ?', [snapshot_id])
47
- insert_unit_hashes(snapshot_id, unit_hashes)
48
-
49
- update_diff_stats(snapshot_id, previous)
50
- find(git_sha)
51
- end
52
-
53
- # List snapshots, optionally filtered by branch.
54
- #
55
- # @param limit [Integer] Max results (default 20)
56
- # @param branch [String, nil] Filter by branch name
57
- # @return [Array<Hash>] Snapshot summaries sorted by extracted_at descending
58
- def list(limit: 20, branch: nil)
59
- rows = if branch
60
- @db.execute(
61
- 'SELECT * FROM codebase_snapshots WHERE git_branch = ? ORDER BY extracted_at DESC LIMIT ?',
62
- [branch, limit]
63
- )
64
- else
65
- @db.execute(
66
- 'SELECT * FROM codebase_snapshots ORDER BY extracted_at DESC LIMIT ?',
67
- [limit]
68
- )
69
- end
70
-
71
- rows.map { |row| row_to_hash(row) }
72
- end
73
-
74
- # Find a specific snapshot by git SHA.
75
- #
76
- # @param git_sha [String]
77
- # @return [Hash, nil] Snapshot metadata or nil if not found
78
- def find(git_sha)
79
- row = @db.get_first_row('SELECT * FROM codebase_snapshots WHERE git_sha = ?', [git_sha])
80
- return nil unless row
81
-
82
- row_to_hash(row)
83
- end
84
-
85
- # Compute diff between two snapshots.
86
- #
87
- # @param sha_a [String] Before snapshot git SHA
88
- # @param sha_b [String] After snapshot git SHA
89
- # @return [Hash] {added: [...], modified: [...], deleted: [...]}
90
- def diff(sha_a, sha_b)
91
- id_a = fetch_snapshot_id(sha_a)
92
- id_b = fetch_snapshot_id(sha_b)
93
-
94
- return { added: [], modified: [], deleted: [] } unless id_a && id_b
95
-
96
- units_a = load_snapshot_units(id_a)
97
- units_b = load_snapshot_units(id_b)
98
-
99
- compute_diff(units_a, units_b)
100
- end
101
-
102
- # History of a single unit across snapshots.
103
- #
104
- # @param identifier [String] Unit identifier
105
- # @param limit [Integer] Max snapshots to return (default 20)
106
- # @return [Array<Hash>] Entries with git_sha, extracted_at, source_hash, changed flag
107
- def unit_history(identifier, limit: 20)
108
- rows = @db.execute(<<~SQL, [identifier, limit])
109
- SELECT su.source_hash, su.metadata_hash, su.dependencies_hash, su.unit_type,
110
- s.git_sha, s.extracted_at, s.git_branch
111
- FROM codebase_snapshot_units su
112
- JOIN codebase_snapshots s ON s.id = su.snapshot_id
113
- WHERE su.identifier = ?
114
- ORDER BY s.extracted_at DESC
115
- LIMIT ?
116
- SQL
117
-
118
- entries = rows.map { |row| history_entry_from_row(row) }
119
- mark_changed_entries(entries)
120
- end
121
-
122
- private
123
-
124
- # Build a history entry hash from a database row.
125
- #
126
- # @param row [Hash]
127
- # @return [Hash]
128
- def history_entry_from_row(row)
129
- {
130
- git_sha: row['git_sha'],
131
- extracted_at: row['extracted_at'],
132
- git_branch: row['git_branch'],
133
- unit_type: row['unit_type'],
134
- source_hash: row['source_hash'],
135
- metadata_hash: row['metadata_hash'],
136
- dependencies_hash: row['dependencies_hash']
137
- }
138
- end
139
-
140
- # Mark changed flag on history entries by comparing source hashes.
141
- #
142
- # @param entries [Array<Hash>]
143
- # @return [Array<Hash>]
144
- def mark_changed_entries(entries)
145
- entries.each_with_index do |entry, i|
146
- entry[:changed] = if i == entries.size - 1
147
- true # Oldest version is always "changed" (first appearance)
148
- else
149
- entry[:source_hash] != entries[i + 1][:source_hash]
150
- end
151
- end
152
- entries
153
- end
154
-
155
- # Get a value from a hash that may have string or symbol keys.
156
- #
157
- # @param hash [Hash]
158
- # @param key [String]
159
- # @return [Object, nil]
160
- def mget(hash, key)
161
- hash[key] || hash[key.to_sym]
162
- end
163
-
164
- # Insert or replace the snapshot row from manifest data.
165
- #
166
- # @param manifest [Hash]
167
- # @param git_sha [String]
168
- # @param default_total [Integer]
169
- # @return [void]
170
- def upsert_snapshot(manifest, git_sha, default_total)
171
- params = [
172
- git_sha,
173
- mget(manifest, 'git_branch'),
174
- mget(manifest, 'extracted_at') || Time.now.iso8601,
175
- mget(manifest, 'rails_version'),
176
- mget(manifest, 'ruby_version'),
177
- mget(manifest, 'total_units') || default_total,
178
- JSON.generate(mget(manifest, 'counts') || {}),
179
- mget(manifest, 'gemfile_lock_sha'),
180
- mget(manifest, 'schema_sha')
181
- ]
182
- @db.execute(<<~SQL, params)
183
- INSERT OR REPLACE INTO codebase_snapshots
184
- (git_sha, git_branch, extracted_at, rails_version, ruby_version,
185
- total_units, unit_counts, gemfile_lock_sha, schema_sha)
186
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
187
- SQL
188
- end
189
-
190
- # Update a snapshot's diff stats vs. a previous snapshot.
191
- #
192
- # @param snapshot_id [Integer]
193
- # @param previous [Hash, nil]
194
- # @return [void]
195
- def update_diff_stats(snapshot_id, previous)
196
- diff_stats = compute_diff_stats(snapshot_id, previous)
197
- @db.execute(
198
- 'UPDATE codebase_snapshots SET units_added = ?, units_modified = ?, units_deleted = ? WHERE id = ?',
199
- [diff_stats[:added], diff_stats[:modified], diff_stats[:deleted], snapshot_id]
200
- )
201
- end
202
-
203
- # Find the most recent snapshot.
204
- #
205
- # @return [Hash, nil]
206
- def find_latest
207
- row = @db.get_first_row('SELECT * FROM codebase_snapshots ORDER BY extracted_at DESC LIMIT 1')
208
- return nil unless row
209
-
210
- row_to_hash(row)
211
- end
212
-
213
- # Fetch a snapshot's ID by git SHA.
214
- #
215
- # @param git_sha [String]
216
- # @return [Integer, nil]
217
- def fetch_snapshot_id(git_sha)
218
- @db.get_first_value('SELECT id FROM codebase_snapshots WHERE git_sha = ?', [git_sha])
219
- end
220
-
221
- # Insert per-unit hash records for a snapshot.
222
- #
223
- # @param snapshot_id [Integer]
224
- # @param unit_hashes [Array<Hash>]
225
- # @return [void]
226
- def insert_unit_hashes(snapshot_id, unit_hashes)
227
- sql = <<~SQL
228
- INSERT INTO codebase_snapshot_units
229
- (snapshot_id, identifier, unit_type, source_hash, metadata_hash, dependencies_hash)
230
- VALUES (?, ?, ?, ?, ?, ?)
231
- SQL
232
-
233
- # Wrap in a transaction to batch all inserts into a single commit,
234
- # reducing per-row fsync overhead from O(n) to O(1).
235
- @db.transaction do
236
- unit_hashes.each do |uh|
237
- params = [
238
- snapshot_id,
239
- mget(uh, 'identifier'),
240
- mget(uh, 'type').to_s,
241
- mget(uh, 'source_hash'),
242
- mget(uh, 'metadata_hash'),
243
- mget(uh, 'dependencies_hash')
244
- ]
245
- @db.execute(sql, params)
246
- end
247
- end
248
- end
249
-
250
- # Load all unit records for a snapshot as a hash keyed by identifier.
251
- #
252
- # @param snapshot_id [Integer]
253
- # @return [Hash{String => Hash}]
254
- def load_snapshot_units(snapshot_id)
255
- sql = <<~SQL
256
- SELECT identifier, unit_type, source_hash, metadata_hash, dependencies_hash
257
- FROM codebase_snapshot_units WHERE snapshot_id = ?
258
- SQL
259
- rows = @db.execute(sql, [snapshot_id])
260
-
261
- rows.to_h do |row|
262
- [row['identifier'], {
263
- unit_type: row['unit_type'],
264
- source_hash: row['source_hash'],
265
- metadata_hash: row['metadata_hash'],
266
- dependencies_hash: row['dependencies_hash']
267
- }]
268
- end
269
- end
270
-
271
- # Compute diff between two sets of unit hashes.
272
- #
273
- # @param units_a [Hash{String => Hash}] Before
274
- # @param units_b [Hash{String => Hash}] After
275
- # @return [Hash] {added: [...], modified: [...], deleted: [...]}
276
- def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
277
- added = []
278
- modified = []
279
- deleted = []
280
-
281
- # Units in B but not A → added
282
- # Units in both → check for modifications
283
- units_b.each do |identifier, data_b|
284
- if units_a.key?(identifier)
285
- data_a = units_a[identifier]
286
- if data_a[:source_hash] != data_b[:source_hash] ||
287
- data_a[:metadata_hash] != data_b[:metadata_hash] ||
288
- data_a[:dependencies_hash] != data_b[:dependencies_hash]
289
- modified << { identifier: identifier, unit_type: data_b[:unit_type] }
290
- end
291
- else
292
- added << { identifier: identifier, unit_type: data_b[:unit_type] }
293
- end
294
- end
295
-
296
- # Units in A but not B → deleted
297
- units_a.each do |identifier, data_a|
298
- deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
299
- end
300
-
301
- { added: added, modified: modified, deleted: deleted }
302
- end
303
-
304
- # Compute aggregate diff stats.
305
- #
306
- # @param current_snapshot_id [Integer]
307
- # @param previous_snapshot [Hash, nil]
308
- # @return [Hash] {added:, modified:, deleted:}
309
- def compute_diff_stats(current_snapshot_id, previous_snapshot)
310
- return { added: 0, modified: 0, deleted: 0 } unless previous_snapshot
311
-
312
- prev_id = fetch_snapshot_id(previous_snapshot[:git_sha])
313
- return { added: 0, modified: 0, deleted: 0 } unless prev_id
314
-
315
- units_prev = load_snapshot_units(prev_id)
316
- units_curr = load_snapshot_units(current_snapshot_id)
317
-
318
- result = compute_diff(units_prev, units_curr)
319
- { added: result[:added].size, modified: result[:modified].size, deleted: result[:deleted].size }
320
- end
321
-
322
- # Convert a database row to a normalized hash.
323
- #
324
- # @param row [Hash] SQLite3 result row
325
- # @return [Hash]
326
- def row_to_hash(row)
327
- {
328
- id: row['id'],
329
- git_sha: row['git_sha'],
330
- git_branch: row['git_branch'],
331
- extracted_at: row['extracted_at'],
332
- rails_version: row['rails_version'],
333
- ruby_version: row['ruby_version'],
334
- total_units: row['total_units'],
335
- unit_counts: row['unit_counts'] ? JSON.parse(row['unit_counts']) : {},
336
- gemfile_lock_sha: row['gemfile_lock_sha'],
337
- schema_sha: row['schema_sha'],
338
- units_added: row['units_added'],
339
- units_modified: row['units_modified'],
340
- units_deleted: row['units_deleted']
341
- }
342
- end
343
- end
344
- end
345
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- # Shared token estimation utility.
5
- #
6
- # Uses project convention: (string.length / 4.0).ceil
7
- # See docs/TOKEN_BENCHMARK.md — conservative floor (~10.6% overestimate).
8
- module TokenUtils
9
- module_function
10
-
11
- # Estimate token count for a string.
12
- #
13
- # @param text [String] Text to estimate
14
- # @return [Integer] Estimated token count
15
- def estimate_tokens(text)
16
- (text.length / 4.0).ceil
17
- end
18
- end
19
- end
@@ -1,5 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- VERSION = '0.3.2'
5
- end
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rails/generators'
4
- require 'rails/generators/active_record'
5
-
6
- module CodebaseIndex
7
- module Generators
8
- # Rails generator that creates a migration for CodebaseIndex tables.
9
- #
10
- # Usage:
11
- # rails generate codebase_index:install
12
- #
13
- # Creates a migration with codebase_units, codebase_edges, and
14
- # codebase_embeddings tables. Works with PostgreSQL, MySQL, and SQLite.
15
- #
16
- class InstallGenerator < Rails::Generators::Base
17
- include ActiveRecord::Generators::Migration
18
-
19
- source_root File.expand_path('templates', __dir__)
20
-
21
- desc 'Creates a migration for CodebaseIndex tables (units, edges, embeddings)'
22
-
23
- # @return [void]
24
- def create_migration_file
25
- migration_template(
26
- 'create_codebase_index_tables.rb.erb',
27
- 'db/migrate/create_codebase_index_tables.rb'
28
- )
29
- end
30
- end
31
- end
32
- end
@@ -1,37 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rails/generators'
4
- require 'rails/generators/active_record'
5
-
6
- module CodebaseIndex
7
- module Generators
8
- # Rails generator that adds pgvector support to CodebaseIndex.
9
- #
10
- # Requires the pgvector PostgreSQL extension. Adds a native vector column
11
- # and HNSW index to the codebase_embeddings table.
12
- #
13
- # Usage:
14
- # rails generate codebase_index:pgvector
15
- # rails generate codebase_index:pgvector --dimensions 3072
16
- #
17
- class PgvectorGenerator < Rails::Generators::Base
18
- include ActiveRecord::Generators::Migration
19
-
20
- source_root File.expand_path('templates', __dir__)
21
-
22
- desc 'Adds pgvector native vector column and HNSW index to codebase_embeddings'
23
-
24
- class_option :dimensions, type: :numeric, default: 1536,
25
- desc: 'Vector dimensions (1536 for text-embedding-3-small, 3072 for large)'
26
-
27
- # @return [void]
28
- def create_migration_file
29
- @dimensions = options[:dimensions]
30
- migration_template(
31
- 'add_pgvector_to_codebase_index.rb.erb',
32
- 'db/migrate/add_pgvector_to_codebase_index.rb'
33
- )
34
- end
35
- end
36
- end
37
- end
@@ -1,15 +0,0 @@
1
- class AddPgvectorToCodebaseIndex < ActiveRecord::Migration[7.0]
2
- def change
3
- enable_extension 'vector' unless extension_enabled?('vector')
4
-
5
- add_column :codebase_embeddings, :embedding_vector, :vector,
6
- limit: <%= @dimensions || 1536 %>, null: true
7
-
8
- # HNSW index for fast approximate nearest neighbor search
9
- # Using cosine distance operator (vector_cosine_ops)
10
- add_index :codebase_embeddings, :embedding_vector,
11
- using: :hnsw,
12
- opclass: :vector_cosine_ops,
13
- name: 'idx_codebase_embeddings_vector_hnsw'
14
- end
15
- end
@@ -1,43 +0,0 @@
1
- class CreateCodebaseIndexTables < ActiveRecord::Migration[7.0]
2
- def change
3
- create_table :codebase_units do |t|
4
- t.string :unit_type, null: false
5
- t.string :identifier, null: false
6
- t.string :namespace
7
- t.string :file_path, null: false
8
- t.text :source_code
9
- t.string :source_hash
10
- t.json :metadata
11
-
12
- t.timestamps
13
- end
14
-
15
- add_index :codebase_units, :unit_type
16
- add_index :codebase_units, :identifier, unique: true
17
- add_index :codebase_units, :file_path
18
-
19
- create_table :codebase_edges do |t|
20
- t.references :source, null: false, foreign_key: { to_table: :codebase_units }
21
- t.references :target, null: false, foreign_key: { to_table: :codebase_units }
22
- t.string :relationship, null: false
23
- t.string :via
24
-
25
- t.datetime :created_at, null: false
26
- end
27
-
28
- add_index :codebase_edges, [:source_id, :target_id, :relationship], unique: true,
29
- name: 'idx_codebase_edges_unique'
30
-
31
- create_table :codebase_embeddings do |t|
32
- t.references :unit, null: false, foreign_key: { to_table: :codebase_units }
33
- t.string :chunk_type
34
- t.text :embedding, null: false
35
- t.string :content_hash, null: false
36
- t.integer :dimensions, null: false
37
-
38
- t.datetime :created_at, null: false
39
- end
40
-
41
- add_index :codebase_embeddings, :content_hash
42
- end
43
- end