@comfanion/usethis_search 3.0.0-dev.25 → 3.0.0-dev.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@comfanion/usethis_search",
3
- "version": "3.0.0-dev.25",
3
+ "version": "3.0.0-dev.27",
4
4
  "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -48,8 +48,6 @@
48
48
  "@opencode-ai/plugin": ">=1.1.0",
49
49
  "@xenova/transformers": "^2.17.0",
50
50
  "glob": "^10.3.10",
51
- "level": "^8.0.1",
52
- "levelgraph": "^4.0.0",
53
51
  "vectordb": "^0.4.0"
54
52
  },
55
53
  "peerDependencies": {
@@ -1,5 +1,4 @@
1
- import levelgraph from "levelgraph"
2
- import { Level } from "level"
1
+ import { Database } from "bun:sqlite"
3
2
  import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
4
3
 
5
4
  export interface Triple {
@@ -13,98 +12,137 @@ export interface Triple {
13
12
  }
14
13
 
15
14
  export class GraphDB {
16
- private db: any
15
+ private db: Database | null = null
17
16
  private initialized: boolean = false
18
17
 
18
+ // Prepared statements (cached for performance)
19
+ private _stmtInsert: any = null
20
+ private _stmtBySubject: any = null
21
+ private _stmtByObject: any = null
22
+ private _stmtByFile: any = null
23
+ private _stmtDeleteByFile: any = null
24
+ private _stmtBySubjectPredicate: any = null
25
+ private _stmtByPredicate: any = null
26
+ private _stmtAll: any = null
27
+
19
28
  constructor(private dbPath: string) {}
20
29
 
21
30
  async init(): Promise<this> {
22
- const levelDb = new Level(this.dbPath)
23
- this.db = levelgraph(levelDb)
31
+ // bun:sqlite uses a file path; append .db if not already
32
+ const fullPath = this.dbPath.endsWith(".db") ? this.dbPath : this.dbPath + ".db"
33
+ this.db = new Database(fullPath)
34
+
35
+ // WAL mode for concurrent readers
36
+ this.db.exec("PRAGMA journal_mode = WAL")
37
+ this.db.exec("PRAGMA synchronous = NORMAL") // faster writes, safe with WAL
38
+ this.db.exec("PRAGMA cache_size = -2000") // 2MB cache
39
+
40
+ // Create triples table
41
+ this.db.exec(`
42
+ CREATE TABLE IF NOT EXISTS triples (
43
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
44
+ subject TEXT NOT NULL,
45
+ predicate TEXT NOT NULL,
46
+ object TEXT NOT NULL,
47
+ weight REAL NOT NULL DEFAULT 0,
48
+ source TEXT NOT NULL DEFAULT '',
49
+ file TEXT NOT NULL DEFAULT '',
50
+ line INTEGER
51
+ )
52
+ `)
53
+
54
+ // Indexes for fast lookups
55
+ this.db.exec("CREATE INDEX IF NOT EXISTS idx_subject ON triples(subject)")
56
+ this.db.exec("CREATE INDEX IF NOT EXISTS idx_object ON triples(object)")
57
+ this.db.exec("CREATE INDEX IF NOT EXISTS idx_file ON triples(file)")
58
+ this.db.exec("CREATE INDEX IF NOT EXISTS idx_predicate ON triples(predicate)")
59
+ this.db.exec("CREATE INDEX IF NOT EXISTS idx_subject_predicate ON triples(subject, predicate)")
60
+
61
+ // Prepare statements
62
+ this._stmtInsert = this.db.prepare(
63
+ "INSERT INTO triples (subject, predicate, object, weight, source, file, line) VALUES (?, ?, ?, ?, ?, ?, ?)"
64
+ )
65
+ this._stmtBySubject = this.db.prepare("SELECT * FROM triples WHERE subject = ?")
66
+ this._stmtByObject = this.db.prepare("SELECT * FROM triples WHERE object = ?")
67
+ this._stmtByFile = this.db.prepare("SELECT * FROM triples WHERE file = ?")
68
+ this._stmtDeleteByFile = this.db.prepare("DELETE FROM triples WHERE file = ?")
69
+ this._stmtBySubjectPredicate = this.db.prepare("SELECT * FROM triples WHERE subject = ? AND predicate = ?")
70
+ this._stmtByPredicate = this.db.prepare("SELECT * FROM triples WHERE predicate = ?")
71
+ this._stmtAll = this.db.prepare("SELECT * FROM triples")
72
+
24
73
  this.initialized = true
25
74
  return this
26
75
  }
27
76
 
77
+ private toTriple(row: any): Triple {
78
+ return {
79
+ subject: row.subject,
80
+ predicate: row.predicate,
81
+ object: row.object,
82
+ weight: row.weight,
83
+ source: row.source,
84
+ file: row.file,
85
+ line: row.line ?? undefined,
86
+ }
87
+ }
88
+
28
89
  async putEdges(triples: Triple[]): Promise<void> {
29
- if (!this.initialized) {
90
+ if (!this.initialized || !this.db) {
30
91
  throw new Error("GraphDB not initialized. Call init() first.")
31
92
  }
32
- await new Promise<void>((resolve, reject) => {
33
- this.db.put(triples, (err: Error | undefined) => {
34
- if (err) reject(err)
35
- else resolve()
36
- })
93
+
94
+ // Batch insert in a single transaction — much faster than individual inserts
95
+ const insertMany = this.db.transaction((items: Triple[]) => {
96
+ for (const t of items) {
97
+ this._stmtInsert.run(t.subject, t.predicate, t.object, t.weight, t.source, t.file, t.line ?? null)
98
+ }
37
99
  })
100
+ insertMany(triples)
38
101
  }
39
102
 
40
103
  async getOutgoing(chunkId: string): Promise<Triple[]> {
41
- if (!this.initialized) {
104
+ if (!this.initialized || !this.db) {
42
105
  throw new Error("GraphDB not initialized. Call init() first.")
43
106
  }
44
- return new Promise<Triple[]>((resolve, reject) => {
45
- this.db.get({ subject: chunkId }, (err: Error | undefined, triples: Triple[]) => {
46
- if (err) reject(err)
47
- else resolve(triples || [])
48
- })
49
- })
107
+ const rows = this._stmtBySubject.all(chunkId)
108
+ return rows.map((r: any) => this.toTriple(r))
50
109
  }
51
110
 
52
111
  async getIncoming(chunkId: string): Promise<Triple[]> {
53
- if (!this.initialized) {
112
+ if (!this.initialized || !this.db) {
54
113
  throw new Error("GraphDB not initialized. Call init() first.")
55
114
  }
56
- return new Promise<Triple[]>((resolve, reject) => {
57
- this.db.get({ object: chunkId }, (err: Error | undefined, triples: Triple[]) => {
58
- if (err) reject(err)
59
- else resolve(triples || [])
60
- })
61
- })
115
+ const rows = this._stmtByObject.all(chunkId)
116
+ return rows.map((r: any) => this.toTriple(r))
62
117
  }
63
118
 
64
119
  async deleteByFile(filePath: string): Promise<void> {
65
- if (!this.initialized) {
120
+ if (!this.initialized || !this.db) {
66
121
  throw new Error("GraphDB not initialized. Call init() first.")
67
122
  }
68
- const allTriples = await new Promise<Triple[]>((resolve, reject) => {
69
- this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
70
- if (err) reject(err)
71
- else resolve(triples || [])
72
- })
73
- })
74
-
75
- const toDelete = allTriples.filter(t => t.file === filePath)
76
-
77
- for (const t of toDelete) {
78
- await new Promise<void>((resolve, reject) => {
79
- this.db.del(t, (err: Error | undefined) => {
80
- if (err) reject(err)
81
- else resolve()
82
- })
83
- })
84
- }
123
+ this._stmtDeleteByFile.run(filePath)
85
124
  }
86
125
 
87
126
  async close(): Promise<void> {
88
127
  if (this.initialized && this.db) {
89
- await new Promise<void>((resolve, reject) => {
90
- this.db.close((err: Error | undefined) => {
91
- if (err) reject(err)
92
- else resolve()
93
- })
94
- })
128
+ this.db.close()
129
+ this.db = null
130
+ this._stmtInsert = null
131
+ this._stmtBySubject = null
132
+ this._stmtByObject = null
133
+ this._stmtByFile = null
134
+ this._stmtDeleteByFile = null
135
+ this._stmtBySubjectPredicate = null
136
+ this._stmtByPredicate = null
137
+ this._stmtAll = null
95
138
  this.initialized = false
96
139
  }
97
140
  }
98
141
 
99
142
  // ---- FR-054: File metadata triples for incremental updates -----------------
100
143
 
101
- /**
102
- * Store graph build metadata for a file as a special triple.
103
- * Subject: `meta:<filePath>`, Predicate: `graph_built`, Object: `<hash>`.
104
- * Weight encodes the timestamp (seconds since epoch).
105
- */
106
144
  async setFileMeta(filePath: string, hash: string, timestamp: number): Promise<void> {
107
- if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
145
+ if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
108
146
 
109
147
  // Remove old meta triple for this file first
110
148
  await this.deleteFileMeta(filePath)
@@ -113,111 +151,61 @@ export class GraphDB {
113
151
  subject: `meta:${filePath}`,
114
152
  predicate: "graph_built",
115
153
  object: hash,
116
- weight: Math.floor(timestamp / 1000), // seconds since epoch fits in weight
154
+ weight: Math.floor(timestamp / 1000),
117
155
  source: "meta",
118
156
  file: filePath,
119
157
  }
120
158
  await this.putEdges([triple])
121
159
  }
122
160
 
123
- /**
124
- * Get the stored graph build metadata for a file.
125
- * Returns { hash, timestamp } or null if not found.
126
- */
127
161
  async getFileMeta(filePath: string): Promise<{ hash: string; timestamp: number } | null> {
128
- if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
129
-
130
- const triples = await new Promise<Triple[]>((resolve, reject) => {
131
- this.db.get(
132
- { subject: `meta:${filePath}`, predicate: "graph_built" },
133
- (err: Error | undefined, result: Triple[]) => {
134
- if (err) reject(err)
135
- else resolve(result || [])
136
- },
137
- )
138
- })
162
+ if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
139
163
 
140
- if (triples.length === 0) return null
164
+ const rows = this._stmtBySubjectPredicate.all(`meta:${filePath}`, "graph_built")
165
+ if (rows.length === 0) return null
141
166
  return {
142
- hash: triples[0].object,
143
- timestamp: triples[0].weight * 1000, // back to ms
167
+ hash: rows[0].object,
168
+ timestamp: rows[0].weight * 1000,
144
169
  }
145
170
  }
146
171
 
147
- /**
148
- * Delete file meta triple.
149
- */
150
172
  async deleteFileMeta(filePath: string): Promise<void> {
151
- if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
173
+ if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
152
174
 
153
175
  try {
154
- const triples = await new Promise<Triple[]>((resolve, reject) => {
155
- this.db.get(
156
- { subject: `meta:${filePath}`, predicate: "graph_built" },
157
- (err: Error | undefined, result: Triple[]) => {
158
- if (err) reject(err)
159
- else resolve(result || [])
160
- },
161
- )
162
- })
163
-
164
- for (const t of triples) {
165
- await new Promise<void>((resolve, reject) => {
166
- this.db.del(t, (err: Error | undefined) => {
167
- if (err) reject(err)
168
- else resolve()
169
- })
170
- })
171
- }
172
- } catch (err) {
173
- // Silently ignore errors (e.g., no meta triple exists)
176
+ this.db!.prepare("DELETE FROM triples WHERE subject = ? AND predicate = ?")
177
+ .run(`meta:${filePath}`, "graph_built")
178
+ } catch {
179
+ // Silently ignore errors
174
180
  }
175
181
  }
176
182
 
177
- /**
178
- * Get all file metadata triples (for validation / stats).
179
- */
180
183
  async getAllFileMeta(): Promise<Array<{ filePath: string; hash: string; timestamp: number }>> {
181
- if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
182
-
183
- const triples = await new Promise<Triple[]>((resolve, reject) => {
184
- this.db.get({ predicate: "graph_built" }, (err: Error | undefined, result: Triple[]) => {
185
- if (err) reject(err)
186
- else resolve(result || [])
187
- })
188
- })
184
+ if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
189
185
 
190
- return triples.map((t) => ({
191
- filePath: t.subject.replace(/^meta:/, ""),
192
- hash: t.object,
193
- timestamp: t.weight * 1000,
186
+ const rows = this._stmtByPredicate.all("graph_built")
187
+ return rows.map((r: any) => ({
188
+ filePath: r.subject.replace(/^meta:/, ""),
189
+ hash: r.object,
190
+ timestamp: r.weight * 1000,
194
191
  }))
195
192
  }
196
193
 
197
- /**
198
- * Get all triples in the graph (for validation/stats).
199
- * Excludes meta, anchor, and structural triples by default.
200
- * Pass includeStructural=true to also get structural edges.
201
- */
202
194
  async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
203
- if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
204
-
205
- const allTriples = await new Promise<Triple[]>((resolve, reject) => {
206
- this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
207
- if (err) reject(err)
208
- else resolve(triples || [])
195
+ if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
196
+
197
+ const allRows = this._stmtAll.all()
198
+ return allRows
199
+ .map((r: any) => this.toTriple(r))
200
+ .filter((t: Triple) => {
201
+ if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
202
+ if (!includeStructural && isStructuralPredicate(t.predicate)) return false
203
+ return true
209
204
  })
210
- })
211
-
212
- return allTriples.filter(t => {
213
- if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
214
- if (!includeStructural && isStructuralPredicate(t.predicate)) return false
215
- return true
216
- })
217
205
  }
218
206
 
219
207
  async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
220
- if (!this.initialized) {
208
+ if (!this.initialized || !this.db) {
221
209
  throw new Error("GraphDB not initialized. Call init() first.")
222
210
  }
223
211
 
@@ -225,7 +213,6 @@ export class GraphDB {
225
213
  const visited = new Set<string>()
226
214
  const self = this
227
215
 
228
- // Resolve the caller's file directly from the node ID
229
216
  const callerFile = filePathFromNodeId(chunkId)
230
217
 
231
218
  async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
@@ -236,19 +223,12 @@ export class GraphDB {
236
223
  visited.add(currentId)
237
224
 
238
225
  try {
239
- const outgoing = await new Promise<Triple[]>((resolve, reject) => {
240
- self.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
241
- if (err) reject(err)
242
- else resolve(triples || [])
243
- })
244
- })
226
+ const outgoing = self._stmtBySubject.all(currentId).map((r: any) => self.toTriple(r))
245
227
 
246
228
  for (const triple of outgoing) {
247
- // Skip meta, anchor, and structural-only edges
248
229
  if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
249
230
  if (isStructuralPredicate(triple.predicate)) continue
250
231
 
251
- // Resolve file for the target node directly from its ID
252
232
  const targetFile = filePathFromNodeId(triple.object)
253
233
  if (!targetFile) continue
254
234
 
@@ -267,12 +247,7 @@ export class GraphDB {
267
247
  }
268
248
  }
269
249
 
270
- const incoming = await new Promise<Triple[]>((resolve, reject) => {
271
- self.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
272
- if (err) reject(err)
273
- else resolve(triples || [])
274
- })
275
- })
250
+ const incoming = self._stmtByObject.all(currentId).map((r: any) => self.toTriple(r))
276
251
 
277
252
  for (const triple of incoming) {
278
253
  if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
@@ -298,7 +273,6 @@ export class GraphDB {
298
273
 
299
274
  await traverse(chunkId, 0, "")
300
275
 
301
- // Remove the caller's own file from results
302
276
  if (callerFile) relatedFiles.delete(callerFile)
303
277
 
304
278
  return Array.from(relatedFiles.entries())
@@ -612,7 +612,148 @@ class CodebaseIndexer {
612
612
  return this.hashes[relPath] !== currentHash;
613
613
  }
614
614
 
615
- // ── Index a single file (v2: cleaning + semantic chunking + metadata) ─────
615
+ // ── Phase 1: Prepare file (chunk + graph, NO embedding) ─────────────────
616
+ // Returns prepared chunk data ready for embedding, or null if skipped.
617
+
618
+ async prepareFile(filePath) {
619
+ const relPath = path.relative(this.root, filePath);
620
+
621
+ let content;
622
+ try {
623
+ content = await fs.readFile(filePath, "utf8");
624
+ } catch {
625
+ return null;
626
+ }
627
+
628
+ const hash = this.fileHash(content);
629
+ if (this.hashes[relPath] === hash) {
630
+ return null; // unchanged
631
+ }
632
+
633
+ // Extract metadata
634
+ const fileMeta = await extractFileMetadata(filePath, content);
635
+ const archived = this.isArchived(relPath, content);
636
+
637
+ // Clean content before chunking
638
+ const cleaned = cleanContent(content, fileMeta.file_type, CLEANING_CONFIG);
639
+
640
+ // Semantic chunking
641
+ const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
642
+
643
+ // Assign chunk IDs
644
+ const chunksWithIds = this.graphBuilder
645
+ ? this.graphBuilder.assignChunkIds(relPath, chunks)
646
+ : chunks.map((c, i) => ({ ...c, chunk_id: `chunk:${relPath}::_chunk_${i}` }));
647
+
648
+ // Build graph edges (Phase 1 — no embedding needed)
649
+ if (this.graphBuilder && this.graphDB) {
650
+ await this.graphDB.deleteByFile(relPath);
651
+ const edgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
652
+
653
+ if (edgesBuilt > 0 || DEBUG) {
654
+ const timestamp = new Date().toISOString().slice(11, 19);
655
+ const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
656
+ if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
657
+ try {
658
+ const logPath = path.join(this.root, ".opencode", "indexer.log");
659
+ const fsSync = await import("fs");
660
+ fsSync.appendFileSync(logPath, `${logMsg}\n`);
661
+ } catch { /* non-fatal */ }
662
+ }
663
+
664
+ try {
665
+ await this.graphDB.setFileMeta(relPath, hash, Date.now());
666
+ } catch { /* non-fatal */ }
667
+ }
668
+
669
+ // Return prepared rows (without vector — Phase 2 fills it)
670
+ const rows = chunksWithIds.map((chunk, i) => ({
671
+ chunk_id: chunk.chunk_id,
672
+ file: relPath,
673
+ chunk_index: i,
674
+ content: chunk.content,
675
+ archived,
676
+ file_type: fileMeta.file_type,
677
+ language: fileMeta.language,
678
+ last_modified: fileMeta.last_modified,
679
+ file_size: fileMeta.file_size,
680
+ heading_context: chunk.heading_context || "",
681
+ function_name: chunk.function_name || "",
682
+ class_name: chunk.class_name || "",
683
+ tags: (fileMeta.tags || []).join(","),
684
+ start_line: chunk.start_line ?? -1,
685
+ end_line: chunk.end_line ?? -1,
686
+ }));
687
+
688
+ return { relPath, hash, rows };
689
+ }
690
+
691
+ // ── Phase 2: Batch embed + store ──────────────────────────────────────────
692
+ // Takes prepared rows from prepareFile(), embeds in batches, stores in LanceDB.
693
+
694
+ async embedAndStore(preparedFiles, batchSize = 32, onProgress = null) {
695
+ if (preparedFiles.length === 0) return 0;
696
+
697
+ // Collect all rows with their content for batch embedding
698
+ const allRows = [];
699
+ for (const pf of preparedFiles) {
700
+ for (const row of pf.rows) {
701
+ allRows.push(row);
702
+ }
703
+ }
704
+
705
+ if (allRows.length === 0) return 0;
706
+
707
+ // Load model once
708
+ const model = await this.loadModel();
709
+
710
+ // Batch embed
711
+ const allData = [];
712
+ for (let i = 0; i < allRows.length; i += batchSize) {
713
+ const batch = allRows.slice(i, i + batchSize);
714
+ const texts = batch.map(r => r.content);
715
+
716
+ // Embed batch — @xenova/transformers processes array inputs efficiently
717
+ const embeddings = [];
718
+ for (const text of texts) {
719
+ const result = await model(text, { pooling: "mean", normalize: true });
720
+ embeddings.push(Array.from(result.data));
721
+ }
722
+
723
+ for (let j = 0; j < batch.length; j++) {
724
+ allData.push({ ...batch[j], vector: embeddings[j] });
725
+ }
726
+
727
+ if (onProgress) {
728
+ onProgress(Math.min(i + batchSize, allRows.length), allRows.length, "embedding");
729
+ }
730
+ }
731
+
732
+ // Bulk store in LanceDB
733
+ const tableName = "chunks";
734
+ const tables = await this.db.tableNames();
735
+ if (tables.includes(tableName)) {
736
+ const table = await this.db.openTable(tableName);
737
+ await table.add(allData);
738
+ } else {
739
+ await this.db.createTable(tableName, allData);
740
+ }
741
+
742
+ // Update hashes for all prepared files
743
+ for (const pf of preparedFiles) {
744
+ this.hashes[pf.relPath] = pf.hash;
745
+ }
746
+ await this.saveHashes();
747
+
748
+ // Invalidate caches
749
+ if (this.bm25) { this.bm25.clear(); this.bm25 = null; }
750
+ this._bm25Rows = null;
751
+ this._chunkCache = null;
752
+
753
+ return allData.length;
754
+ }
755
+
756
+ // ── Index a single file (legacy — used by freshen/on-change) ───────────
616
757
 
617
758
  async indexFile(filePath) {
618
759
  const relPath = path.relative(this.root, filePath);
@@ -1170,31 +1311,64 @@ class CodebaseIndexer {
1170
1311
  }
1171
1312
  }
1172
1313
 
1173
- let indexed = 0;
1174
- let skipped = 0;
1175
1314
  const total = files.length;
1315
+ const CONCURRENCY = 5;
1176
1316
 
1177
- for (let i = 0; i < files.length; i++) {
1178
- const relPath = files[i];
1179
- const filePath = path.join(this.root, relPath);
1180
- try {
1181
- const wasIndexed = await this.indexFile(filePath);
1182
- if (wasIndexed) {
1183
- indexed++;
1184
- // FR-053: progress indicator includes graph building phase
1185
- if (onProgress) onProgress(indexed, total, relPath, i + 1);
1317
+ // ══════════════════════════════════════════════════════════════════════════
1318
+ // Phase 1: Prepare files in parallel (chunk + graph, no embedding)
1319
+ // ══════════════════════════════════════════════════════════════════════════
1320
+ const preparedFiles = [];
1321
+ let prepared = 0;
1322
+ let skipped = 0;
1323
+
1324
+ // Process in batches of CONCURRENCY
1325
+ for (let i = 0; i < files.length; i += CONCURRENCY) {
1326
+ const batch = files.slice(i, i + CONCURRENCY);
1327
+ const promises = batch.map(async (relPath) => {
1328
+ const filePath = path.join(this.root, relPath);
1329
+ try {
1330
+ const result = await this.prepareFile(filePath);
1331
+ return result;
1332
+ } catch {
1333
+ return null;
1334
+ }
1335
+ });
1336
+
1337
+ const results = await Promise.all(promises);
1338
+ for (let j = 0; j < results.length; j++) {
1339
+ if (results[j]) {
1340
+ preparedFiles.push(results[j]);
1341
+ prepared++;
1342
+ if (onProgress) onProgress(prepared, total, results[j].relPath, i + j + 1, "prepare");
1186
1343
  } else {
1187
1344
  skipped++;
1188
1345
  }
1189
- } catch {
1190
- skipped++;
1191
1346
  }
1192
1347
  }
1193
1348
 
1349
+ if (DEBUG) console.log(`[vectorizer] Phase 1 done: ${prepared} files prepared, ${skipped} skipped`);
1350
+
1351
+ // ══════════════════════════════════════════════════════════════════════════
1352
+ // Phase 2: Batch embed + store (sequential, batch forward pass)
1353
+ // ══════════════════════════════════════════════════════════════════════════
1354
+ let chunksEmbedded = 0;
1355
+ if (preparedFiles.length > 0) {
1356
+ const totalChunks = preparedFiles.reduce((sum, pf) => sum + pf.rows.length, 0);
1357
+ if (DEBUG) console.log(`[vectorizer] Phase 2: embedding ${totalChunks} chunks from ${preparedFiles.length} files`);
1358
+
1359
+ chunksEmbedded = await this.embedAndStore(preparedFiles, 32, (done, embedTotal, phase) => {
1360
+ if (onProgress) onProgress(done, embedTotal, `embedding`, done, "embed");
1361
+ });
1362
+
1363
+ if (DEBUG) console.log(`[vectorizer] Phase 2 done: ${chunksEmbedded} chunks embedded and stored`);
1364
+ }
1365
+
1366
+ const indexed = prepared; // file count for backward compat
1367
+
1194
1368
  // FR-005: Build semantic similarity edges as post-pass
1195
1369
  // Disabled by default (O(n²) — slow on large repos). Enable via graph.semantic_edges: true
1196
1370
  let semanticEdges = 0;
1197
- if (indexed > 0 && this.graphBuilder && this.graphDB && GRAPH_CONFIG.semantic_edges) {
1371
+ if (chunksEmbedded > 0 && this.graphBuilder && this.graphDB && GRAPH_CONFIG.semantic_edges) {
1198
1372
  try {
1199
1373
  const tableName = "chunks";
1200
1374
  const tables = await this.db.tableNames();