@jafreck/lore 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +42 -23
  2. package/dist/cli.js +37 -101
  3. package/dist/cli.js.map +1 -1
  4. package/dist/index.d.ts +24 -5
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +23 -4
  7. package/dist/index.js.map +1 -1
  8. package/dist/indexer/call-graph.d.ts +8 -4
  9. package/dist/indexer/call-graph.d.ts.map +1 -1
  10. package/dist/indexer/call-graph.js +150 -81
  11. package/dist/indexer/call-graph.js.map +1 -1
  12. package/dist/indexer/db.d.ts.map +1 -1
  13. package/dist/indexer/db.js +29 -3
  14. package/dist/indexer/db.js.map +1 -1
  15. package/dist/indexer/extractors/php.js +5 -1
  16. package/dist/indexer/extractors/php.js.map +1 -1
  17. package/dist/indexer/index.d.ts +27 -75
  18. package/dist/indexer/index.d.ts.map +1 -1
  19. package/dist/indexer/index.js +146 -904
  20. package/dist/indexer/index.js.map +1 -1
  21. package/dist/indexer/lsp/enrichment.d.ts +2 -0
  22. package/dist/indexer/lsp/enrichment.d.ts.map +1 -1
  23. package/dist/indexer/lsp/enrichment.js +67 -33
  24. package/dist/indexer/lsp/enrichment.js.map +1 -1
  25. package/dist/indexer/pipeline.d.ts +137 -0
  26. package/dist/indexer/pipeline.d.ts.map +1 -0
  27. package/dist/indexer/pipeline.js +84 -0
  28. package/dist/indexer/pipeline.js.map +1 -0
  29. package/dist/indexer/resolution-method.d.ts +40 -0
  30. package/dist/indexer/resolution-method.d.ts.map +1 -0
  31. package/dist/indexer/resolution-method.js +53 -0
  32. package/dist/indexer/resolution-method.js.map +1 -0
  33. package/dist/indexer/stages/dependency-api.d.ts +18 -0
  34. package/dist/indexer/stages/dependency-api.d.ts.map +1 -0
  35. package/dist/indexer/stages/dependency-api.js +176 -0
  36. package/dist/indexer/stages/dependency-api.js.map +1 -0
  37. package/dist/indexer/stages/docs-index.d.ts +20 -0
  38. package/dist/indexer/stages/docs-index.d.ts.map +1 -0
  39. package/dist/indexer/stages/docs-index.js +144 -0
  40. package/dist/indexer/stages/docs-index.js.map +1 -0
  41. package/dist/indexer/stages/embedding.d.ts +17 -0
  42. package/dist/indexer/stages/embedding.d.ts.map +1 -0
  43. package/dist/indexer/stages/embedding.js +222 -0
  44. package/dist/indexer/stages/embedding.js.map +1 -0
  45. package/dist/indexer/stages/import-resolution.d.ts +17 -0
  46. package/dist/indexer/stages/import-resolution.d.ts.map +1 -0
  47. package/dist/indexer/stages/import-resolution.js +49 -0
  48. package/dist/indexer/stages/import-resolution.js.map +1 -0
  49. package/dist/indexer/stages/index.d.ts +12 -0
  50. package/dist/indexer/stages/index.d.ts.map +1 -0
  51. package/dist/indexer/stages/index.js +12 -0
  52. package/dist/indexer/stages/index.js.map +1 -0
  53. package/dist/indexer/stages/lsp-enrichment.d.ts +48 -0
  54. package/dist/indexer/stages/lsp-enrichment.d.ts.map +1 -0
  55. package/dist/indexer/stages/lsp-enrichment.js +174 -0
  56. package/dist/indexer/stages/lsp-enrichment.js.map +1 -0
  57. package/dist/indexer/stages/source-index.d.ts +31 -0
  58. package/dist/indexer/stages/source-index.d.ts.map +1 -0
  59. package/dist/indexer/stages/source-index.js +323 -0
  60. package/dist/indexer/stages/source-index.js.map +1 -0
  61. package/dist/lore-server/db.d.ts +51 -4
  62. package/dist/lore-server/db.d.ts.map +1 -1
  63. package/dist/lore-server/db.js +74 -69
  64. package/dist/lore-server/db.js.map +1 -1
  65. package/dist/lore-server/server.d.ts +5 -0
  66. package/dist/lore-server/server.d.ts.map +1 -1
  67. package/dist/lore-server/server.js +51 -233
  68. package/dist/lore-server/server.js.map +1 -1
  69. package/dist/lore-server/tool-registry.d.ts +101 -0
  70. package/dist/lore-server/tool-registry.d.ts.map +1 -0
  71. package/dist/lore-server/tool-registry.js +227 -0
  72. package/dist/lore-server/tool-registry.js.map +1 -0
  73. package/dist/lore-server/tools/graph.d.ts +7 -0
  74. package/dist/lore-server/tools/graph.d.ts.map +1 -1
  75. package/dist/lore-server/tools/graph.js +64 -7
  76. package/dist/lore-server/tools/graph.js.map +1 -1
  77. package/dist/lore-server/tools/lookup.d.ts +9 -3
  78. package/dist/lore-server/tools/lookup.d.ts.map +1 -1
  79. package/dist/lore-server/tools/lookup.js +9 -4
  80. package/dist/lore-server/tools/lookup.js.map +1 -1
  81. package/dist/lore-server/tools/search.d.ts +4 -0
  82. package/dist/lore-server/tools/search.d.ts.map +1 -1
  83. package/dist/lore-server/tools/search.js +4 -0
  84. package/dist/lore-server/tools/search.js.map +1 -1
  85. package/dist/runtime.d.ts +88 -0
  86. package/dist/runtime.d.ts.map +1 -0
  87. package/dist/runtime.js +153 -0
  88. package/dist/runtime.js.map +1 -0
  89. package/package.json +12 -12
@@ -1,87 +1,40 @@
1
1
  /**
2
2
  * @module indexer/index
3
3
  *
4
- * The `IndexBuilder` class orchestrates the full indexing pipeline:
5
- * walk parse extract → resolve → persist
4
+ * The `IndexBuilder` class is a **façade** over the composable
5
+ * `IndexPipeline` and its stage objects.
6
6
  *
7
- * It also supports incremental updates (`update()`) and a stub for
8
- * LLM-summary ingestion (`ingestSummary()`).
7
+ * For full builds, `build()` delegates entirely to the pipeline which
8
+ * enforces the data-dependency chain:
9
+ * ```
10
+ * SourceIndexStage → DocsIndexStage → ImportResolutionStage
11
+ * → DependencyApiStage → LspEnrichmentStage → ResolutionStage
12
+ * → TestMapStage → HistoryStage → EmbeddingStage
13
+ * ```
14
+ *
15
+ * For incremental updates, `update()` uses stage-extracted helpers
16
+ * while managing the changed-file diff itself.
17
+ *
18
+ * The enrichment → resolution ordering is **load-bearing** and enforced
19
+ * structurally by the pipeline rather than by call-site discipline.
9
20
  */
10
21
  import * as fs from 'node:fs';
11
- import * as crypto from 'node:crypto';
12
- import * as path from 'node:path';
13
22
  import { execFileSync } from 'node:child_process';
14
- import { openDb, setLoreMeta, getLoreMeta, createVec0Tables, LORE_META_INDEX_CHECKPOINT, LORE_META_LAST_HEAD_SHA, LORE_META_COVERAGE_LAST_SOURCE_PATH, LORE_META_COVERAGE_LAST_SOURCE_MTIME, } from './db.js';
15
- import { walkFiles } from './walker.js';
16
- import { detectLanguageForPath } from './walker.js';
17
- import { walkDocumentationFiles } from './walker.js';
18
- import { inferSeededDocNoteKey, buildDocNoteScope } from './docs.js';
19
- import { ingestGitHistory } from './git-history.js';
20
- import { ParserPool } from './parser.js';
21
- import { ImportResolver } from './resolver.js';
22
- import { resolveSymbolEdges, normalizeTypeName } from './call-graph.js';
23
- import { isPublicDeclarationSurfaceSymbol, } from './extractors/types.js';
24
- import { CExtractor } from './extractors/c.js';
25
- import { RustExtractor } from './extractors/rust.js';
26
- import { PythonExtractor } from './extractors/python.js';
27
- import { CppExtractor } from './extractors/cpp.js';
28
- import { TypeScriptExtractor } from './extractors/typescript.js';
29
- import { JavaScriptExtractor } from './extractors/javascript.js';
30
- import { GoExtractor } from './extractors/go.js';
31
- import { JavaExtractor } from './extractors/java.js';
32
- import { CSharpExtractor } from './extractors/csharp.js';
33
- import { RubyExtractor } from './extractors/ruby.js';
34
- import { PhpExtractor } from './extractors/php.js';
35
- import { SwiftExtractor } from './extractors/swift.js';
36
- import { KotlinExtractor } from './extractors/kotlin.js';
37
- import { ScalaExtractor } from './extractors/scala.js';
38
- import { LuaExtractor } from './extractors/lua.js';
39
- import { BashExtractor } from './extractors/bash.js';
40
- import { ElixirExtractor } from './extractors/elixir.js';
41
- import { ZigExtractor } from './extractors/zig.js';
42
- import { DartExtractor } from './extractors/dart.js';
43
- import { OcamlExtractor } from './extractors/ocaml.js';
44
- import { HaskellExtractor } from './extractors/haskell.js';
45
- import { JuliaExtractor } from './extractors/julia.js';
46
- import { ElmExtractor } from './extractors/elm.js';
47
- import { ObjcExtractor } from './extractors/objc.js';
48
- import { DEFAULT_EMBEDDING_MODEL, buildStructuralEmbeddingText } from './embedder.js';
23
+ import { openDb, setLoreMeta, LORE_META_LAST_HEAD_SHA, LORE_META_COVERAGE_LAST_SOURCE_PATH, LORE_META_COVERAGE_LAST_SOURCE_MTIME, } from './db.js';
24
+ import { DEFAULT_EMBEDDING_MODEL } from './embedder.js';
49
25
  import { ingestCoverageReport } from './coverage.js';
26
+ import { resolveSymbolEdges } from './call-graph.js';
50
27
  import { refreshTestMappings } from './test-mapper.js';
51
- import { LspEnrichmentCoordinator } from './lsp/enrichment.js';
28
+ import { ingestGitHistory } from './git-history.js';
52
29
  import { getLogger } from '../logger.js';
53
- // ─── Extractor registry ───────────────────────────────────────────────────────
54
- const EXTRACTORS = {
55
- c: new CExtractor(),
56
- rust: new RustExtractor(),
57
- python: new PythonExtractor(),
58
- cpp: new CppExtractor(),
59
- typescript: new TypeScriptExtractor(),
60
- javascript: new JavaScriptExtractor(),
61
- go: new GoExtractor(),
62
- java: new JavaExtractor(),
63
- csharp: new CSharpExtractor(),
64
- ruby: new RubyExtractor(),
65
- php: new PhpExtractor(),
66
- swift: new SwiftExtractor(),
67
- kotlin: new KotlinExtractor(),
68
- scala: new ScalaExtractor(),
69
- lua: new LuaExtractor(),
70
- bash: new BashExtractor(),
71
- elixir: new ElixirExtractor(),
72
- zig: new ZigExtractor(),
73
- dart: new DartExtractor(),
74
- ocaml: new OcamlExtractor(),
75
- haskell: new HaskellExtractor(),
76
- julia: new JuliaExtractor(),
77
- elm: new ElmExtractor(),
78
- objc: new ObjcExtractor(),
79
- };
80
- /** Number of symbols to embed per batch. */
81
- const EMBED_BATCH_SIZE = 64;
82
- // ─── IndexBuilder ─────────────────────────────────────────────────────────────
30
+ import { IndexPipeline } from './pipeline.js';
31
+ import { SourceIndexStage, DocsIndexStage, ImportResolutionStage, DependencyApiStage, LspEnrichmentStage, EmbeddingStage, } from './stages/index.js';
32
+ // ─── IndexBuilder (façade) ────────────────────────────────────────────────────
83
33
  /**
84
- * Orchestrates the full M1 indexing pipeline.
34
+ * Façade over the composable `IndexPipeline`.
35
+ *
36
+ * Preserves backward-compatible public API while internally delegating to
37
+ * pipeline stages for the actual work.
85
38
  *
86
39
  * @example
87
40
  * ```ts
@@ -92,8 +45,6 @@ const EMBED_BATCH_SIZE = 64;
92
45
  export class IndexBuilder {
93
46
  dbPath;
94
47
  walkerConfig;
95
- pool;
96
- resolver;
97
48
  embedder;
98
49
  history;
99
50
  indexDependencies;
@@ -103,8 +54,6 @@ export class IndexBuilder {
103
54
  constructor(dbPath, walkerConfig, embedder, embeddingModelOrOptions) {
104
55
  this.dbPath = dbPath;
105
56
  this.walkerConfig = walkerConfig;
106
- this.pool = new ParserPool();
107
- this.resolver = new ImportResolver();
108
57
  const opts = typeof embeddingModelOrOptions === 'string'
109
58
  ? { embeddingModel: embeddingModelOrOptions }
110
59
  : (embeddingModelOrOptions ?? {});
@@ -123,249 +72,115 @@ export class IndexBuilder {
123
72
  }
124
73
  // ─── Public API ──────────────────────────────────────────────────────────
125
74
  /**
126
- * Performs a full build: walks all files, parses them, extracts
127
- * symbols/imports/callRefs, resolves imports, and persists everything to
128
- * the database.
75
+ * Performs a full build by running the composable pipeline.
76
+ *
77
+ * The pipeline enforces the enrichment → resolution data-dependency
78
+ * chain structurally (by stage ordering), not by convention.
129
79
  */
130
80
  async build() {
131
81
  const log = getLogger();
132
82
  const buildStart = performance.now();
133
83
  const db = openDb(this.dbPath);
134
84
  const branch = this.resolveBranch();
135
- const lspCoordinator = this.createLspEnrichmentCoordinator();
136
85
  log.indexing('build started', { dbPath: this.dbPath, branch, rootDir: this.walkerConfig.rootDir });
86
+ // Build the pipeline with all stages in dependency order.
87
+ const pipeline = new IndexPipeline([
88
+ new SourceIndexStage(),
89
+ new DocsIndexStage(),
90
+ new ImportResolutionStage(),
91
+ new DependencyApiStage(),
92
+ new LspEnrichmentStage(),
93
+ resolutionStage(),
94
+ testMapStage(),
95
+ historyStage(),
96
+ new EmbeddingStage(),
97
+ ]);
98
+ const context = {
99
+ db,
100
+ dbPath: this.dbPath,
101
+ walkerConfig: this.walkerConfig,
102
+ branch,
103
+ lsp: this.lspSettings,
104
+ embedder: this.embedder,
105
+ log,
106
+ files: [],
107
+ indexDependencies: this.indexDependencies,
108
+ history: this.history,
109
+ docsAutoNotes: this.docsAutoNotes,
110
+ staleSymbolIds: [],
111
+ changedSourcePaths: [],
112
+ changedDocPaths: [],
113
+ };
137
114
  try {
138
- this.saveDocsAutoNotesSetting(db);
139
- const files = await walkFiles(this.walkerConfig);
140
- const docs = await walkDocumentationFiles(this.walkerConfig);
141
- log.indexing('walk complete', { fileCount: files.length, docCount: docs.length });
142
- if (lspCoordinator) {
143
- const languages = new Set(files.map((file) => file.language));
144
- if (this.indexDependencies)
145
- languages.add('typescript');
146
- await lspCoordinator.start(languages);
147
- }
148
- const resumeAt = this.loadBuildCheckpoint(db, branch, files.length);
149
- if (resumeAt > 0) {
150
- log.indexing('resuming from checkpoint', { resumeAt, totalFiles: files.length });
151
- }
152
- db.transaction(() => {
153
- for (let i = resumeAt; i < files.length; i++) {
154
- const file = files[i];
155
- if (!file)
156
- continue;
157
- this.processFile(db, file.path, file.language, branch);
158
- this.saveBuildCheckpoint(db, branch, i + 1, files.length);
159
- }
160
- const seenDocPaths = new Set();
161
- for (const doc of docs) {
162
- seenDocPaths.add(doc.path);
163
- this.processDocumentationFile(db, doc, branch);
164
- this.upsertSeededDocumentationNote(db, doc, branch);
165
- }
166
- this.removeStaleDocumentation(db, branch, seenDocPaths);
167
- })();
168
- this.saveBuildCheckpoint(db, branch, files.length, files.length);
169
- log.indexing('files processed, resolving imports');
170
- this.resolveImports(db, branch);
171
- await this.indexDependencyDeclarations(db, lspCoordinator);
172
- await this.enrichProjectRefs(db, branch, files, lspCoordinator);
173
- refreshTestMappings(db, branch);
174
- resolveSymbolEdges(db);
115
+ await pipeline.run(context, 'build');
175
116
  this.saveLastKnownHead(db);
176
- if (this.embedder) {
177
- log.indexing('embedding started', { model: this.embeddingModel });
178
- await this.embedder.init();
179
- await this.embedStructural(db);
180
- await this.embedDocumentation(db);
181
- log.indexing('embedding complete');
182
- }
183
- if (this.history) {
184
- log.indexing('git history ingestion started');
185
- const historyOptions = typeof this.history === 'object' ? this.history : undefined;
186
- await ingestGitHistory(db, this.walkerConfig.rootDir, historyOptions);
187
- if (this.embedder) {
188
- await this.embedCommitMessages(db);
189
- }
190
- log.indexing('git history ingestion complete');
191
- }
192
117
  // Gather final DB stats for the build summary
193
- let totalSymbols = 0;
194
- try {
195
- totalSymbols = db.prepare('SELECT COUNT(*) AS cnt FROM symbols').get().cnt;
196
- }
197
- catch { /* table may not exist */ }
198
- let totalEdges = 0;
199
- try {
200
- totalEdges = db.prepare('SELECT COUNT(*) AS cnt FROM symbol_refs').get().cnt;
201
- }
202
- catch { /* table may not exist */ }
203
- let totalDocs = 0;
204
- try {
205
- totalDocs = db.prepare('SELECT COUNT(*) AS cnt FROM docs').get().cnt;
206
- }
207
- catch { /* table may not exist */ }
208
- let commitCount;
209
- try {
210
- commitCount = db.prepare('SELECT COUNT(*) AS cnt FROM commits').get().cnt;
211
- }
212
- catch { /* commits table may not exist */ }
213
- const dbSizeBytes = fs.existsSync(this.dbPath) ? fs.statSync(this.dbPath).size : undefined;
118
+ const stats = this.gatherDbStats(db);
214
119
  const indexDurationMs = Math.round(performance.now() - buildStart);
215
120
  log.startup('indexing complete', {
216
121
  dbPath: this.dbPath,
217
- dbSizeBytes,
122
+ dbSizeBytes: fs.existsSync(this.dbPath) ? fs.statSync(this.dbPath).size : undefined,
218
123
  embeddingModel: this.embeddingModel,
219
124
  embeddingReady: !!this.embedder,
220
- totalFiles: files.length,
221
- totalSymbols,
222
- totalDocs,
223
- totalEdges,
224
- commitCount,
125
+ totalFiles: context.files.length,
126
+ ...stats,
225
127
  indexDurationMs,
226
128
  });
227
129
  }
228
130
  finally {
229
- if (lspCoordinator) {
230
- await lspCoordinator.dispose();
231
- }
232
131
  db.close();
233
132
  }
234
133
  }
235
134
  /**
236
135
  * Incrementally re-processes only the listed files and updates the DB.
237
- * Symbols and imports for changed files are deleted then re-inserted.
136
+ *
137
+ * Delegates to the same pipeline as `build()` — each stage handles
138
+ * `'update'` mode by operating only on the changed-file set.
238
139
  *
239
140
  * @param changedFiles Absolute paths of files that have changed.
240
141
  */
241
142
  async update(changedFiles) {
242
143
  const db = openDb(this.dbPath);
243
144
  const branch = this.resolveBranch();
244
- const lspCoordinator = this.createLspEnrichmentCoordinator();
245
- const enrichedFiles = [];
246
- /** Symbol IDs whose embeddings should be removed (from deleted/re-processed files). */
247
- const staleSymbolIds = [];
248
- /** Paths of changed source files — used to look up new file IDs for scoped embedding. */
249
- const changedSourcePaths = [];
250
- /** Paths of changed doc files — used to look up new doc IDs for scoped embedding. */
251
- const changedDocPaths = [];
145
+ const log = getLogger();
146
+ const pipeline = new IndexPipeline([
147
+ new SourceIndexStage(),
148
+ new DocsIndexStage(),
149
+ new ImportResolutionStage(),
150
+ new DependencyApiStage(),
151
+ new LspEnrichmentStage(),
152
+ resolutionStage(),
153
+ testMapStage(),
154
+ historyStage(),
155
+ new EmbeddingStage(),
156
+ ]);
157
+ const context = {
158
+ db,
159
+ dbPath: this.dbPath,
160
+ walkerConfig: this.walkerConfig,
161
+ branch,
162
+ lsp: this.lspSettings,
163
+ embedder: this.embedder,
164
+ log,
165
+ files: [],
166
+ indexDependencies: this.indexDependencies,
167
+ history: this.history,
168
+ docsAutoNotes: this.docsAutoNotes,
169
+ changedFiles,
170
+ staleSymbolIds: [],
171
+ changedSourcePaths: [],
172
+ changedDocPaths: [],
173
+ };
252
174
  try {
253
- this.saveDocsAutoNotesSetting(db);
254
- const docs = await walkDocumentationFiles(this.walkerConfig);
255
- const docsByPath = new Map(docs.map(doc => [doc.path, doc]));
256
- if (lspCoordinator) {
257
- const languages = new Set();
258
- for (const filePath of changedFiles) {
259
- if (!fs.existsSync(filePath))
260
- continue;
261
- const language = detectLanguageForPath(filePath, this.walkerConfig);
262
- if (language)
263
- languages.add(language);
264
- }
265
- if (this.indexDependencies)
266
- languages.add('typescript');
267
- await lspCoordinator.start(languages);
268
- }
269
- db.transaction(() => {
270
- for (const filePath of changedFiles) {
271
- // If the file no longer exists, remove it from the DB
272
- if (!fs.existsSync(filePath)) {
273
- const row = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
274
- if (row) {
275
- // Collect symbol IDs for embedding cleanup before cascade-delete removes them.
276
- const symRows = db.prepare('SELECT id FROM symbols WHERE file_id = ?').all(row.id);
277
- for (const s of symRows)
278
- staleSymbolIds.push(s.id);
279
- // Null out any resolved_id references pointing to this file
280
- db.prepare('UPDATE file_imports SET resolved_id = NULL WHERE resolved_id = ?').run(row.id);
281
- db.prepare('DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)').run(row.id);
282
- db.prepare('DELETE FROM files WHERE id = ?').run(row.id);
283
- }
284
- this.deleteDocumentationByPath(db, filePath, branch);
285
- continue;
286
- }
287
- const language = detectLanguageForPath(filePath, this.walkerConfig);
288
- if (language) {
289
- enrichedFiles.push({ path: filePath, language });
290
- changedSourcePaths.push(filePath);
291
- // Null out resolved_id references pointing to this file before deletion
292
- const existingRow = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
293
- if (existingRow) {
294
- // Collect symbol IDs for embedding cleanup before cascade-delete removes them.
295
- const symRows = db.prepare('SELECT id FROM symbols WHERE file_id = ?').all(existingRow.id);
296
- for (const s of symRows)
297
- staleSymbolIds.push(s.id);
298
- db.prepare('UPDATE file_imports SET resolved_id = NULL WHERE resolved_id = ?').run(existingRow.id);
299
- db.prepare('UPDATE symbol_refs SET callee_id = NULL WHERE callee_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
300
- db.prepare('UPDATE type_refs SET type_id = NULL WHERE type_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
301
- db.prepare('UPDATE symbol_relationships SET target_symbol_id = NULL WHERE target_symbol_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
302
- db.prepare('DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
303
- }
304
- // Delete existing rows for this file (cascade handles symbols/imports)
305
- db.prepare('DELETE FROM files WHERE path = ? AND branch = ?').run(filePath, branch);
306
- this.processFile(db, filePath, language, branch);
307
- }
308
- const changedDoc = docsByPath.get(filePath);
309
- if (changedDoc) {
310
- this.processDocumentationFile(db, changedDoc, branch);
311
- this.upsertSeededDocumentationNote(db, changedDoc, branch);
312
- changedDocPaths.push(filePath);
313
- }
314
- else {
315
- this.deleteDocumentationByPath(db, filePath, branch);
316
- }
317
- }
318
- })();
319
- this.resolveImports(db, branch);
320
- await this.indexDependencyDeclarations(db, lspCoordinator);
321
- await this.enrichProjectRefs(db, branch, enrichedFiles, lspCoordinator);
322
- refreshTestMappings(db, branch);
323
- if (this.history) {
324
- const historyOptions = typeof this.history === 'object' ? this.history : undefined;
325
- await ingestGitHistory(db, this.walkerConfig.rootDir, historyOptions);
326
- }
327
- if (this.embedder) {
328
- await this.embedder.init();
329
- // Clean up orphaned symbol embeddings for symbols that were deleted/replaced.
330
- this.deleteSymbolEmbeddings(db, staleSymbolIds);
331
- // Resolve the new file IDs for the changed source files.
332
- const changedFileIds = [];
333
- for (const p of changedSourcePaths) {
334
- const row = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(p, branch);
335
- if (row)
336
- changedFileIds.push(row.id);
337
- }
338
- // Resolve the new doc IDs for the changed documentation files.
339
- const changedDocIds = [];
340
- for (const p of changedDocPaths) {
341
- const row = db.prepare('SELECT id FROM docs WHERE path = ? AND branch = ?').get(p, branch);
342
- if (row)
343
- changedDocIds.push(row.id);
344
- }
345
- await this.embedStructural(db, changedFileIds);
346
- await this.embedDocumentation(db, changedDocIds);
347
- if (this.history) {
348
- await this.embedCommitMessages(db);
349
- }
350
- }
351
- resolveSymbolEdges(db);
175
+ await pipeline.run(context, 'update');
352
176
  this.saveLastKnownHead(db);
353
177
  }
354
178
  finally {
355
- if (lspCoordinator) {
356
- await lspCoordinator.dispose();
357
- }
358
179
  db.close();
359
180
  }
360
181
  }
361
182
  /**
362
183
  * Writes an LLM-generated summary for a symbol to `symbol_summaries`.
363
- * If an `EmbeddingProvider` was configured, also embeds the summary text
364
- * and persists it to `symbol_semantic_embeddings`.
365
- *
366
- * @param symbolId Row ID of the symbol in the `symbols` table.
367
- * @param summary Natural-language summary text.
368
- * @param model Name of the model that produced the summary.
369
184
  */
370
185
  async ingestSummary(symbolId, summary, model = 'unknown') {
371
186
  const db = openDb(this.dbPath);
@@ -401,404 +216,7 @@ export class IndexBuilder {
401
216
  db.close();
402
217
  }
403
218
  }
404
- // ─── Private helpers ──────────────────────────────────────────────────────
405
- /** Parse one file, extract symbols/imports/callRefs, and insert into the DB. */
406
- processFile(db, filePath, language, branch) {
407
- let source;
408
- try {
409
- source = fs.readFileSync(filePath, 'utf8');
410
- }
411
- catch {
412
- return; // Skip unreadable files
413
- }
414
- const hash = crypto.createHash('sha256').update(source).digest('hex');
415
- // Check if the file is already up-to-date
416
- const existing = db.prepare('SELECT id, last_hash FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
417
- if (existing?.last_hash === hash)
418
- return;
419
- const sizeBytes = Buffer.byteLength(source, 'utf8');
420
- // Upsert the file row
421
- let fileId;
422
- if (existing) {
423
- db.prepare(`UPDATE files SET language = ?, size_bytes = ?, last_hash = ?, source = ?, indexed_at = unixepoch()
424
- WHERE id = ?`).run(language, sizeBytes, hash, source, existing.id);
425
- fileId = existing.id;
426
- // Remove stale symbols / imports / external deps (also clean up FTS5 index)
427
- db.prepare(`DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)`).run(fileId);
428
- db.prepare('DELETE FROM symbol_relationships WHERE file_id = ?').run(fileId);
429
- db.prepare('DELETE FROM type_refs WHERE file_id = ?').run(fileId);
430
- // NULL out cross-file FK references that point to symbols in this file
431
- db.prepare('UPDATE symbol_refs SET callee_id = NULL WHERE callee_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(fileId);
432
- db.prepare('UPDATE type_refs SET type_id = NULL WHERE type_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(fileId);
433
- db.prepare('UPDATE symbol_relationships SET target_symbol_id = NULL WHERE target_symbol_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(fileId);
434
- db.prepare('DELETE FROM symbols WHERE file_id = ?').run(fileId);
435
- db.prepare('DELETE FROM file_imports WHERE file_id = ?').run(fileId);
436
- db.prepare('DELETE FROM external_deps WHERE file_id = ?').run(fileId);
437
- db.prepare('DELETE FROM api_routes WHERE file_id = ?').run(fileId);
438
- // Delete stale annotations so cascade-independent re-index doesn't accumulate duplicates.
439
- db.prepare('DELETE FROM annotations WHERE file_id = ?').run(fileId);
440
- }
441
- else {
442
- const info = db
443
- .prepare(`INSERT INTO files (path, branch, language, size_bytes, last_hash, source)
444
- VALUES (?, ?, ?, ?, ?, ?)`)
445
- .run(filePath, branch, language, sizeBytes, hash, source);
446
- fileId = Number(info.lastInsertRowid);
447
- }
448
- // Parse the source
449
- const tree = this.pool.parse(language, source);
450
- if (!tree)
451
- return;
452
- const extractor = EXTRACTORS[language];
453
- if (!extractor)
454
- return;
455
- const result = extractor.extract(tree, source, filePath);
456
- // Insert symbols and keep FTS5 index in sync
457
- const insertSymbol = db.prepare(`INSERT INTO symbols (file_id, name, kind, start_line, end_line, signature, doc_comment)
458
- VALUES (?, ?, ?, ?, ?, ?, ?)`);
459
- const insertFts = db.prepare(`INSERT INTO symbols_fts(rowid, name, signature, kind) VALUES (?, ?, ?, ?)`);
460
- // Map from callerSymbol name → symbol row ID (for call refs)
461
- const symbolIdMap = new Map();
462
- for (const sym of result.symbols) {
463
- const info = insertSymbol.run(fileId, sym.name, sym.kind, sym.startLine, sym.endLine, sym.signature ?? null, sym.docComment ?? null);
464
- const symId = Number(info.lastInsertRowid);
465
- symbolIdMap.set(sym.name, symId);
466
- insertFts.run(symId, sym.name, buildStructuralEmbeddingText({
467
- name: sym.name,
468
- signature: sym.signature ?? null,
469
- }), sym.kind);
470
- }
471
- const insertRoute = db.prepare(`INSERT INTO api_routes (file_id, method, path, handler_id, handler_name, framework, line, middleware)
472
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
473
- for (const route of result.routes) {
474
- insertRoute.run(fileId, route.method, route.path, symbolIdMap.get(route.handler) ?? null, route.handler, route.framework, route.line, route.middleware ? JSON.stringify(route.middleware) : null);
475
- }
476
- // Insert raw imports (resolved_id will be filled in resolveImports())
477
- const insertImport = db.prepare(`INSERT INTO file_imports (file_id, raw_import) VALUES (?, ?)`);
478
- for (const imp of result.imports) {
479
- insertImport.run(fileId, imp.source);
480
- }
481
- // Insert call refs (callee_id resolved in call-graph phase)
482
- const insertCallRef = db.prepare(`INSERT INTO symbol_refs (caller_id, callee_name, call_line, call_character, call_kind)
483
- VALUES (?, ?, ?, ?, ?)`);
484
- for (const ref of result.callRefs) {
485
- const callerId = symbolIdMap.get(ref.callerSymbol);
486
- if (callerId !== undefined) {
487
- insertCallRef.run(callerId, ref.calleeRaw, ref.line, ref.character ?? null, ref.callKind ?? 'direct');
488
- }
489
- }
490
- // Insert relationships (target_symbol_id resolved in resolveSymbolEdges phase)
491
- const insertRelationship = db.prepare(`INSERT INTO symbol_relationships (file_id, source_symbol_id, target_symbol_name, relationship_type, line, character)
492
- VALUES (?, ?, ?, ?, ?, ?)`);
493
- for (const rel of result.relationships) {
494
- const sourceId = symbolIdMap.get(rel.fromSymbol) ?? null;
495
- insertRelationship.run(fileId, sourceId, rel.toSymbol, rel.kind, rel.line, rel.character ?? null);
496
- }
497
- // Insert type refs (type_id resolved in resolveSymbolEdges phase)
498
- const insertTypeRef = db.prepare(`INSERT INTO type_refs (file_id, symbol_id, type_name, type_name_bare, ref_kind, ref_line, ref_character)
499
- VALUES (?, ?, ?, ?, ?, ?, ?)`);
500
- for (const ref of result.typeRefs) {
501
- const symId = symbolIdMap.get(ref.enclosingSymbol) ?? null;
502
- insertTypeRef.run(fileId, symId, ref.typeRaw, normalizeTypeName(ref.typeRaw), ref.refKind, ref.line, ref.character ?? null);
503
- }
504
- }
505
- processDocumentationFile(db, doc, branch) {
506
- const existing = db.prepare('SELECT id, content_hash FROM docs WHERE path = ? AND branch = ?').get(doc.path, branch);
507
- if (existing?.content_hash === doc.hash) {
508
- return;
509
- }
510
- let docId;
511
- if (existing) {
512
- db.prepare(`UPDATE docs
513
- SET kind = ?, title = ?, content = ?, content_hash = ?, indexed_at = unixepoch()
514
- WHERE id = ?`).run(doc.kind, doc.title, doc.content, doc.hash, existing.id);
515
- docId = existing.id;
516
- }
517
- else {
518
- const info = db.prepare(`INSERT INTO docs (path, branch, kind, title, content, content_hash)
519
- VALUES (?, ?, ?, ?, ?, ?)`).run(doc.path, branch, doc.kind, doc.title, doc.content, doc.hash);
520
- docId = Number(info.lastInsertRowid);
521
- }
522
- const existingSections = db.prepare('SELECT id, section_index FROM doc_sections WHERE doc_id = ?').all(docId);
523
- const insertSection = db.prepare(`INSERT INTO doc_sections (
524
- doc_id, section_index, title, depth, heading_path, line_start, line_end, content, content_hash
525
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
526
- ON CONFLICT(doc_id, section_index) DO UPDATE SET
527
- title = excluded.title,
528
- depth = excluded.depth,
529
- heading_path = excluded.heading_path,
530
- line_start = excluded.line_start,
531
- line_end = excluded.line_end,
532
- content = excluded.content,
533
- content_hash = excluded.content_hash`);
534
- const activeSectionIndexes = new Set();
535
- for (const chunk of doc.chunks) {
536
- activeSectionIndexes.add(chunk.sectionIndex);
537
- insertSection.run(docId, chunk.sectionIndex, chunk.title, chunk.depth, JSON.stringify(chunk.headingPath), chunk.lineStart, chunk.lineEnd, chunk.content, chunk.hash);
538
- }
539
- const staleSectionIds = existingSections
540
- .filter(section => !activeSectionIndexes.has(section.section_index))
541
- .map(section => section.id);
542
- this.deleteDocSectionEmbeddings(db, staleSectionIds);
543
- if (staleSectionIds.length > 0) {
544
- db.prepare(`DELETE FROM doc_sections
545
- WHERE id IN (${staleSectionIds.map(() => '?').join(', ')})`).run(...staleSectionIds);
546
- }
547
- }
548
- upsertSeededDocumentationNote(db, doc, branch) {
549
- if (!this.docsAutoNotes)
550
- return;
551
- const key = inferSeededDocNoteKey(doc);
552
- if (!key)
553
- return;
554
- const scope = buildDocNoteScope(doc.path, branch);
555
- const existing = db.prepare('SELECT content, source_hash FROM notes WHERE key = ? AND scope = ?').get(key, scope);
556
- if (existing?.content === doc.content && existing.source_hash === doc.hash) {
557
- return;
558
- }
559
- db.prepare(`INSERT INTO notes (key, scope, content, model, source_hash, created_at, updated_at)
560
- VALUES (?, ?, ?, ?, ?, unixepoch(), unixepoch())
561
- ON CONFLICT(key, scope) DO UPDATE SET
562
- content = excluded.content,
563
- model = excluded.model,
564
- source_hash = excluded.source_hash,
565
- updated_at = unixepoch()`).run(key, scope, doc.content, 'system:auto-doc-seed', doc.hash);
566
- }
567
- removeStaleDocumentation(db, branch, retainedPaths) {
568
- const docs = db.prepare('SELECT id, path FROM docs WHERE branch = ?').all(branch);
569
- for (const doc of docs) {
570
- if (!retainedPaths.has(doc.path)) {
571
- this.deleteDocumentationById(db, doc.id);
572
- }
573
- }
574
- }
575
- deleteDocumentationByPath(db, docPath, branch) {
576
- const row = db.prepare('SELECT id FROM docs WHERE path = ? AND branch = ?').get(docPath, branch);
577
- if (!row)
578
- return;
579
- this.deleteDocumentationById(db, row.id);
580
- }
581
- deleteDocumentationById(db, docId) {
582
- const sectionIds = db.prepare('SELECT id FROM doc_sections WHERE doc_id = ?').all(docId);
583
- this.deleteDocSectionEmbeddings(db, sectionIds.map(row => row.id));
584
- db.prepare('DELETE FROM docs WHERE id = ?').run(docId);
585
- }
586
- deleteDocSectionEmbeddings(db, sectionIds) {
587
- if (sectionIds.length === 0)
588
- return;
589
- const hasEmbeddingsTable = db.prepare("SELECT 1 AS present FROM sqlite_master WHERE type IN ('table', 'virtual table') AND name = 'doc_section_embeddings'").get();
590
- if (!hasEmbeddingsTable)
591
- return;
592
- db.prepare(`DELETE FROM doc_section_embeddings WHERE rowid IN (${sectionIds.map(() => '?').join(', ')})`).run(...sectionIds);
593
- }
594
- /**
595
- * Remove orphaned rows from the `symbol_embeddings` vec0 table for symbols
596
- * that have been deleted (e.g. file re-processed or removed).
597
- */
598
- deleteSymbolEmbeddings(db, symbolIds) {
599
- if (symbolIds.length === 0)
600
- return;
601
- const hasEmbeddingsTable = db.prepare("SELECT 1 AS present FROM sqlite_master WHERE type IN ('table', 'virtual table') AND name = 'symbol_embeddings'").get();
602
- if (!hasEmbeddingsTable)
603
- return;
604
- db.prepare(`DELETE FROM symbol_embeddings WHERE rowid IN (${symbolIds.map(() => '?').join(', ')})`).run(...symbolIds);
605
- }
606
- /**
607
- * Second pass: resolve raw_import strings to file IDs in the
608
- * `file_imports.resolved_id` column. Also populates `external_deps` for
609
- * any import that resolves to an external package.
610
- */
611
- resolveImports(db, branch) {
612
- const rootDir = this.walkerConfig.rootDir;
613
- // Fetch all unresolved imports with their file's path, language, and file_id
614
- const rows = db
615
- .prepare(`SELECT fi.id, fi.file_id, fi.raw_import, f.path, f.language
616
- FROM file_imports fi
617
- JOIN files f ON f.id = fi.file_id
618
- WHERE fi.resolved_id IS NULL AND f.branch = ?`)
619
- .all(branch);
620
- const updateResolved = db.prepare('UPDATE file_imports SET resolved_id = ? WHERE id = ?');
621
- const insertExternalDep = db.prepare('INSERT OR IGNORE INTO external_deps (file_id, package) VALUES (?, ?)');
622
- for (const row of rows) {
623
- const resolved = this.resolver.resolve({ source: row.raw_import, importedNames: [] }, row.path, rootDir, row.language);
624
- if (resolved.resolvedPath) {
625
- const targetFile = db
626
- .prepare('SELECT id FROM files WHERE path = ? AND branch = ?')
627
- .get(resolved.resolvedPath, branch);
628
- if (targetFile) {
629
- updateResolved.run(targetFile.id, row.id);
630
- }
631
- }
632
- else if (resolved.isExternal && resolved.externalName) {
633
- insertExternalDep.run(row.file_id, resolved.externalName);
634
- }
635
- }
636
- }
637
- async indexDependencyDeclarations(db, lspCoordinator) {
638
- db.prepare('DELETE FROM external_symbols').run();
639
- if (!this.indexDependencies)
640
- return;
641
- const directDependencies = this.loadDirectDependencies();
642
- if (directDependencies.size === 0)
643
- return;
644
- const extractor = EXTRACTORS.typescript;
645
- if (!extractor)
646
- return;
647
- const insertExternalSymbol = db.prepare(`INSERT OR IGNORE INTO external_symbols
648
- (
649
- package_name,
650
- package_version,
651
- source_ref,
652
- symbol_name,
653
- symbol_kind,
654
- signature,
655
- doc_comment,
656
- resolved_type_signature,
657
- resolved_return_type,
658
- definition_uri,
659
- definition_path
660
- )
661
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
662
- for (const [packageName, declaredVersion] of directDependencies) {
663
- const packageDir = path.join(this.walkerConfig.rootDir, 'node_modules', packageName);
664
- if (!fs.existsSync(packageDir) || !fs.statSync(packageDir).isDirectory())
665
- continue;
666
- const packageVersion = this.readInstalledPackageVersion(packageDir) ?? declaredVersion ?? null;
667
- const declarationFiles = this.collectDeclarationFiles(packageDir);
668
- for (const declarationFile of declarationFiles) {
669
- const source = fs.readFileSync(declarationFile, 'utf8');
670
- const tree = this.pool.parse('typescript', source);
671
- if (!tree)
672
- continue;
673
- const result = extractor.extract(tree, source, declarationFile);
674
- const declarationSymbols = result.symbols.filter((symbol) => this.shouldIndexDependencySymbol(symbol));
675
- const enrichmentRows = lspCoordinator
676
- ? await lspCoordinator.enrich({
677
- filePath: declarationFile,
678
- language: 'typescript',
679
- source,
680
- targets: declarationSymbols.map((symbol) => ({
681
- line: symbol.startLine,
682
- character: symbol.startCharacter ?? 0,
683
- })),
684
- })
685
- : declarationSymbols.map(() => null);
686
- for (let i = 0; i < declarationSymbols.length; i++) {
687
- const symbol = declarationSymbols[i];
688
- if (!symbol)
689
- continue;
690
- const metadata = enrichmentRows[i];
691
- insertExternalSymbol.run(packageName, packageVersion, declarationFile, symbol.name, symbol.kind, symbol.signature, symbol.docComment ?? null, metadata?.resolvedTypeSignature ?? null, metadata?.resolvedReturnType ?? null, metadata?.definitionUri ?? null, metadata?.definitionPath ?? null);
692
- }
693
- }
694
- }
695
- }
696
- createLspEnrichmentCoordinator() {
697
- if (!this.lspSettings?.enabled) {
698
- return null;
699
- }
700
- return new LspEnrichmentCoordinator(this.lspSettings, this.walkerConfig.rootDir);
701
- }
702
- async enrichProjectRefs(db, branch, files, lspCoordinator) {
703
- if (!lspCoordinator || files.length === 0)
704
- return;
705
- const selectSymbols = db.prepare(`SELECT s.id, s.name, s.signature, s.start_line
706
- FROM symbols s
707
- JOIN files f ON f.id = s.file_id
708
- WHERE f.path = ? AND f.branch = ?
709
- ORDER BY s.id`);
710
- const selectCallRefs = db.prepare(`SELECT sr.id, sr.call_line, sr.call_character
711
- FROM symbol_refs sr
712
- JOIN symbols s ON s.id = sr.caller_id
713
- JOIN files f ON f.id = s.file_id
714
- WHERE f.path = ? AND f.branch = ?
715
- ORDER BY sr.id`);
716
- const selectTypeRefs = db.prepare(`SELECT tr.id, tr.ref_line, tr.ref_character
717
- FROM type_refs tr
718
- JOIN files f ON f.id = tr.file_id
719
- WHERE f.path = ? AND f.branch = ?
720
- ORDER BY tr.id`);
721
- const selectRelationships = db.prepare(`SELECT sr.id, sr.line, sr.character
722
- FROM symbol_relationships sr
723
- JOIN files f ON f.id = sr.file_id
724
- WHERE f.path = ? AND f.branch = ? AND sr.line IS NOT NULL
725
- ORDER BY sr.id`);
726
- const updateSymbol = db.prepare(`UPDATE symbols
727
- SET resolved_type_signature = ?, resolved_return_type = ?, definition_uri = ?, definition_path = ?
728
- WHERE id = ?`);
729
- const updateSymbolFts = db.prepare('UPDATE symbols_fts SET signature = ? WHERE rowid = ?');
730
- const updateCallRef = db.prepare(`UPDATE symbol_refs
731
- SET resolved_type_signature = ?, resolved_return_type = ?, definition_uri = ?, definition_path = ?
732
- WHERE id = ?`);
733
- const updateTypeRef = db.prepare(`UPDATE type_refs
734
- SET resolved_type_signature = ?, definition_uri = ?, definition_path = ?
735
- WHERE id = ?`);
736
- const updateRelationship = db.prepare(`UPDATE symbol_relationships
737
- SET definition_uri = ?, definition_path = ?
738
- WHERE id = ?`);
739
- for (const file of files) {
740
- if (!file || !fs.existsSync(file.path))
741
- continue;
742
- let source;
743
- try {
744
- source = fs.readFileSync(file.path, 'utf8');
745
- }
746
- catch {
747
- continue;
748
- }
749
- const tagged = [];
750
- const symbols = selectSymbols.all(file.path, branch);
751
- for (const s of symbols) {
752
- tagged.push({ table: 'symbol', rowId: s.id, line: s.start_line, character: 0, name: s.name, signature: s.signature });
753
- }
754
- const callRefs = selectCallRefs.all(file.path, branch);
755
- for (const cr of callRefs) {
756
- tagged.push({ table: 'callRef', rowId: cr.id, line: cr.call_line, character: cr.call_character ?? 0 });
757
- }
758
- const typeRefs = selectTypeRefs.all(file.path, branch);
759
- for (const tr of typeRefs) {
760
- tagged.push({ table: 'typeRef', rowId: tr.id, line: tr.ref_line, character: tr.ref_character ?? 0 });
761
- }
762
- const relationships = selectRelationships.all(file.path, branch);
763
- for (const r of relationships) {
764
- tagged.push({ table: 'relationship', rowId: r.id, line: r.line, character: r.character ?? 0 });
765
- }
766
- if (tagged.length === 0)
767
- continue;
768
- const metadata = await lspCoordinator.enrich({
769
- filePath: file.path,
770
- language: file.language,
771
- source,
772
- targets: tagged.map(t => ({ line: t.line, character: t.character })),
773
- });
774
- for (let i = 0; i < tagged.length; i++) {
775
- const tag = tagged[i];
776
- const m = metadata[i];
777
- if (!m)
778
- continue;
779
- switch (tag.table) {
780
- case 'symbol':
781
- updateSymbol.run(m.resolvedTypeSignature, m.resolvedReturnType, m.definitionUri, m.definitionPath, tag.rowId);
782
- updateSymbolFts.run(buildStructuralEmbeddingText({
783
- name: tag.name,
784
- signature: tag.signature ?? null,
785
- resolvedTypeSignature: m.resolvedTypeSignature,
786
- resolvedReturnType: m.resolvedReturnType,
787
- }), tag.rowId);
788
- break;
789
- case 'callRef':
790
- updateCallRef.run(m.resolvedTypeSignature, m.resolvedReturnType, m.definitionUri, m.definitionPath, tag.rowId);
791
- break;
792
- case 'typeRef':
793
- updateTypeRef.run(m.resolvedTypeSignature, m.definitionUri, m.definitionPath, tag.rowId);
794
- break;
795
- case 'relationship':
796
- updateRelationship.run(m.definitionUri, m.definitionPath, tag.rowId);
797
- break;
798
- }
799
- }
800
- }
801
- }
219
+ // ─── Private helpers (minimal — most logic lives in stages) ─────────────
802
220
  resolveBranch() {
803
221
  if (this.walkerConfig.branch)
804
222
  return this.walkerConfig.branch;
@@ -806,245 +224,69 @@ export class IndexBuilder {
806
224
  }
807
225
  saveLastKnownHead(db) {
808
226
  const headSha = this.readGitValue(['rev-parse', 'HEAD']);
809
- if (headSha) {
227
+ if (headSha)
810
228
  setLoreMeta(db, LORE_META_LAST_HEAD_SHA, headSha);
811
- }
812
- }
813
- saveDocsAutoNotesSetting(db) {
814
- setLoreMeta(db, 'docs_auto_notes', this.docsAutoNotes ? '1' : '0');
815
229
  }
816
230
  readGitValue(args) {
817
231
  try {
818
- const value = execFileSync('git', ['-C', this.walkerConfig.rootDir, ...args], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] }).trim();
819
- return value || undefined;
232
+ return execFileSync('git', ['-C', this.walkerConfig.rootDir, ...args], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] }).trim() || undefined;
820
233
  }
821
234
  catch {
822
235
  return undefined;
823
236
  }
824
237
  }
825
- loadDirectDependencies() {
826
- const packageJsonPath = path.join(this.walkerConfig.rootDir, 'package.json');
827
- if (!fs.existsSync(packageJsonPath))
828
- return new Map();
829
- const raw = fs.readFileSync(packageJsonPath, 'utf8');
830
- const pkg = JSON.parse(raw);
831
- const deps = new Map();
832
- for (const section of [pkg.dependencies, pkg.devDependencies, pkg.peerDependencies]) {
833
- if (!section)
834
- continue;
835
- for (const [name, version] of Object.entries(section)) {
836
- if (!deps.has(name))
837
- deps.set(name, version);
838
- }
839
- }
840
- return deps;
841
- }
842
- readInstalledPackageVersion(packageDir) {
843
- const packageJsonPath = path.join(packageDir, 'package.json');
844
- if (!fs.existsSync(packageJsonPath))
845
- return undefined;
846
- const raw = fs.readFileSync(packageJsonPath, 'utf8');
847
- const pkg = JSON.parse(raw);
848
- return pkg.version;
849
- }
850
- collectDeclarationFiles(packageDir) {
851
- const declarations = [];
852
- const stack = [packageDir];
853
- while (stack.length > 0) {
854
- const currentDir = stack.pop();
855
- if (!currentDir)
856
- continue;
857
- const entries = fs.readdirSync(currentDir, { withFileTypes: true });
858
- for (const entry of entries) {
859
- if (entry.name === 'node_modules')
860
- continue;
861
- const fullPath = path.join(currentDir, entry.name);
862
- if (entry.isDirectory()) {
863
- stack.push(fullPath);
864
- continue;
865
- }
866
- if (entry.isFile() && fullPath.endsWith('.d.ts')) {
867
- declarations.push(fullPath);
868
- }
869
- }
870
- }
871
- return declarations;
872
- }
873
- shouldIndexDependencySymbol(symbol) {
874
- if (!isPublicDeclarationSurfaceSymbol(symbol))
875
- return false;
876
- if (symbol.declarationSurface)
877
- return true;
878
- return !this.hasImplementationBody(symbol);
879
- }
880
- hasImplementationBody(symbol) {
881
- const node = symbol.astNode;
882
- if (!node)
883
- return false;
884
- if (node.type === 'arrow_function' ||
885
- node.type === 'function_expression' ||
886
- node.type === 'generator_function') {
887
- return true;
888
- }
889
- if (node.type === 'class_declaration' ||
890
- node.type === 'interface_declaration' ||
891
- node.type === 'type_alias_declaration') {
892
- return false;
893
- }
894
- const bodyNode = node.childForFieldName('body');
895
- if (!bodyNode)
896
- return false;
897
- return bodyNode.namedChildCount > 0 || bodyNode.text.trim() !== '';
898
- }
899
- loadBuildCheckpoint(db, branch, totalFiles) {
900
- const raw = getLoreMeta(db, LORE_META_INDEX_CHECKPOINT);
901
- if (!raw)
902
- return 0;
238
+ gatherDbStats(db) {
239
+ let totalSymbols = 0;
903
240
  try {
904
- const parsed = JSON.parse(raw);
905
- if (parsed.branch !== branch || parsed.rootDir !== this.walkerConfig.rootDir)
906
- return 0;
907
- const nextFileIndex = parsed.nextFileIndex ?? 0;
908
- return Math.max(0, Math.min(totalFiles, nextFileIndex));
909
- }
910
- catch {
911
- return 0;
912
- }
913
- }
914
- saveBuildCheckpoint(db, branch, nextFileIndex, totalFiles) {
915
- const checkpoint = {
916
- branch,
917
- rootDir: this.walkerConfig.rootDir,
918
- totalFiles,
919
- nextFileIndex,
920
- updatedAt: Math.floor(Date.now() / 1000),
921
- };
922
- setLoreMeta(db, LORE_META_INDEX_CHECKPOINT, JSON.stringify(checkpoint));
923
- }
924
- /**
925
- * Embed structural symbol signatures in batches and persist results to
926
- * the `symbol_embeddings` vec0 virtual table.
927
- *
928
- * Also stores the embedding model name and dims in `lore_meta` and
929
- * creates the vec0 tables if they don't exist yet.
930
- *
931
- * @param fileIds When provided, only embed symbols belonging to these file
932
- * IDs (incremental mode). When omitted, embeds all symbols
933
- * (full-build mode).
934
- */
935
- async embedStructural(db, fileIds) {
936
- const embedder = this.embedder;
937
- setLoreMeta(db, 'embedding_model', embedder.modelName);
938
- setLoreMeta(db, 'embedding_dims', String(embedder.dims));
939
- createVec0Tables(db, embedder.dims);
940
- // Build the query — scoped to specific files when doing an incremental update.
941
- const baseQuery = `SELECT id, name, signature, resolved_type_signature, resolved_return_type
942
- FROM symbols
943
- WHERE (signature IS NOT NULL
944
- OR resolved_type_signature IS NOT NULL
945
- OR resolved_return_type IS NOT NULL)`;
946
- let symbols;
947
- if (fileIds && fileIds.length > 0) {
948
- symbols = db
949
- .prepare(`${baseQuery} AND file_id IN (${fileIds.map(() => '?').join(', ')})`)
950
- .all(...fileIds);
951
- }
952
- else {
953
- symbols = db.prepare(baseQuery).all();
954
- }
955
- const insertEmbed = db.prepare('INSERT OR REPLACE INTO symbol_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
956
- for (let i = 0; i < symbols.length; i += EMBED_BATCH_SIZE) {
957
- const batch = symbols.slice(i, i + EMBED_BATCH_SIZE);
958
- const texts = batch.map((symbol) => buildStructuralEmbeddingText({
959
- name: symbol.name,
960
- signature: symbol.signature,
961
- resolvedTypeSignature: symbol.resolved_type_signature,
962
- resolvedReturnType: symbol.resolved_return_type,
963
- }));
964
- const embeddings = await embedder.embed(texts);
965
- db.transaction(() => {
966
- for (let j = 0; j < batch.length; j++) {
967
- const sym = batch[j];
968
- if (sym)
969
- insertEmbed.run(sym.id, JSON.stringify(embeddings[j]));
970
- }
971
- })();
241
+ totalSymbols = db.prepare('SELECT COUNT(*) AS cnt FROM symbols').get().cnt;
972
242
  }
973
- }
974
- /**
975
- * Embed documentation sections in batches and persist results to
976
- * the `doc_section_embeddings` vec0 virtual table.
977
- *
978
- * @param docIds When provided, only embed sections belonging to these
979
- * doc IDs (incremental mode). When omitted, embeds all
980
- * sections (full-build mode).
981
- */
982
- async embedDocumentation(db, docIds) {
983
- const embedder = this.embedder;
984
- db.exec(`
985
- CREATE VIRTUAL TABLE IF NOT EXISTS doc_section_embeddings USING vec0(
986
- embedding FLOAT[${embedder.dims}]
987
- );
988
- `);
989
- let sections;
990
- if (docIds && docIds.length > 0) {
991
- sections = db.prepare(`SELECT id, title, content
992
- FROM doc_sections
993
- WHERE doc_id IN (${docIds.map(() => '?').join(', ')})
994
- ORDER BY id`).all(...docIds);
995
- }
996
- else {
997
- sections = db.prepare(`SELECT id, title, content
998
- FROM doc_sections
999
- ORDER BY id`).all();
243
+ catch { /* */ }
244
+ let totalEdges = 0;
245
+ try {
246
+ totalEdges = db.prepare('SELECT COUNT(*) AS cnt FROM symbol_refs').get().cnt;
1000
247
  }
1001
- if (sections.length === 0)
1002
- return;
1003
- const insertEmbed = db.prepare('INSERT OR REPLACE INTO doc_section_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
1004
- for (let i = 0; i < sections.length; i += EMBED_BATCH_SIZE) {
1005
- const batch = sections.slice(i, i + EMBED_BATCH_SIZE);
1006
- const texts = batch.map(section => section.content || section.title);
1007
- const embeddings = await embedder.embed(texts);
1008
- db.transaction(() => {
1009
- for (let j = 0; j < batch.length; j++) {
1010
- const section = batch[j];
1011
- if (section) {
1012
- insertEmbed.run(section.id, JSON.stringify(embeddings[j]));
1013
- }
1014
- }
1015
- })();
248
+ catch { /* */ }
249
+ let totalDocs = 0;
250
+ try {
251
+ totalDocs = db.prepare('SELECT COUNT(*) AS cnt FROM docs').get().cnt;
1016
252
  }
1017
- }
1018
- /**
1019
- * Embed commit messages that haven't been embedded yet.
1020
- *
1021
- * Uses a `LEFT JOIN` against `commit_embeddings` to skip commits whose
1022
- * embeddings already exist, so only newly-ingested commits are processed.
1023
- */
1024
- async embedCommitMessages(db) {
1025
- const embedder = this.embedder;
1026
- // Only embed commits that don't already have an embedding row.
1027
- const commits = db.prepare(`SELECT c.rowid, c.message
1028
- FROM commits c
1029
- LEFT JOIN commit_embeddings ce ON ce.rowid = c.rowid
1030
- WHERE length(trim(c.message)) > 0
1031
- AND ce.rowid IS NULL
1032
- ORDER BY c.rowid`).all();
1033
- if (commits.length === 0)
1034
- return;
1035
- const insertEmbed = db.prepare('INSERT OR REPLACE INTO commit_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
1036
- for (let i = 0; i < commits.length; i += EMBED_BATCH_SIZE) {
1037
- const batch = commits.slice(i, i + EMBED_BATCH_SIZE);
1038
- const embeddings = await embedder.embed(batch.map((commit) => commit.message));
1039
- db.transaction(() => {
1040
- for (let j = 0; j < batch.length; j++) {
1041
- const commit = batch[j];
1042
- if (commit) {
1043
- insertEmbed.run(commit.rowid, JSON.stringify(embeddings[j]));
1044
- }
1045
- }
1046
- })();
253
+ catch { /* */ }
254
+ let commitCount;
255
+ try {
256
+ commitCount = db.prepare('SELECT COUNT(*) AS cnt FROM commits').get().cnt;
1047
257
  }
258
+ catch { /* */ }
259
+ return { totalSymbols, totalEdges, totalDocs, commitCount };
1048
260
  }
1049
261
  }
262
+ // ─── Trivial inline stages ────────────────────────────────────────────────────
263
+ // These are single-function-call stages that don't warrant their own files.
264
+ /** Resolve symbol edges (must run after LspEnrichmentStage). */
265
+ function resolutionStage() {
266
+ return {
267
+ name: 'symbol-resolution',
268
+ execute: async (ctx) => { resolveSymbolEdges(ctx.db); },
269
+ };
270
+ }
271
+ /** Refresh test-to-source file mappings. */
272
+ function testMapStage() {
273
+ return {
274
+ name: 'test-map',
275
+ execute: async (ctx) => { refreshTestMappings(ctx.db, ctx.branch); },
276
+ };
277
+ }
278
+ /** Ingest git history. */
279
+ function historyStage() {
280
+ return {
281
+ name: 'git-history',
282
+ execute: async (ctx) => {
283
+ if (!ctx.history)
284
+ return;
285
+ ctx.log.indexing('git history ingestion started');
286
+ const opts = typeof ctx.history === 'object' ? ctx.history : undefined;
287
+ await ingestGitHistory(ctx.db, ctx.walkerConfig.rootDir, opts);
288
+ ctx.log.indexing('git history ingestion complete');
289
+ },
290
+ };
291
+ }
1050
292
  //# sourceMappingURL=index.js.map