@jafreck/lore 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/README.md +63 -35
  2. package/dist/cli.js +41 -38
  3. package/dist/cli.js.map +1 -1
  4. package/dist/index.d.ts +24 -5
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +23 -4
  7. package/dist/index.js.map +1 -1
  8. package/dist/indexer/call-graph.d.ts +29 -8
  9. package/dist/indexer/call-graph.d.ts.map +1 -1
  10. package/dist/indexer/call-graph.js +232 -31
  11. package/dist/indexer/call-graph.js.map +1 -1
  12. package/dist/indexer/config-parser.js +6 -1
  13. package/dist/indexer/config-parser.js.map +1 -1
  14. package/dist/indexer/coverage.js +3 -1
  15. package/dist/indexer/coverage.js.map +1 -1
  16. package/dist/indexer/db.d.ts.map +1 -1
  17. package/dist/indexer/db.js +70 -1
  18. package/dist/indexer/db.js.map +1 -1
  19. package/dist/indexer/docs.d.ts.map +1 -1
  20. package/dist/indexer/docs.js +4 -0
  21. package/dist/indexer/docs.js.map +1 -1
  22. package/dist/indexer/embedder.d.ts.map +1 -1
  23. package/dist/indexer/embedder.js +5 -0
  24. package/dist/indexer/embedder.js.map +1 -1
  25. package/dist/indexer/extractors/bash.d.ts.map +1 -1
  26. package/dist/indexer/extractors/bash.js +29 -1
  27. package/dist/indexer/extractors/bash.js.map +1 -1
  28. package/dist/indexer/extractors/c.d.ts +5 -2
  29. package/dist/indexer/extractors/c.d.ts.map +1 -1
  30. package/dist/indexer/extractors/c.js +277 -8
  31. package/dist/indexer/extractors/c.js.map +1 -1
  32. package/dist/indexer/extractors/cpp.d.ts +5 -2
  33. package/dist/indexer/extractors/cpp.d.ts.map +1 -1
  34. package/dist/indexer/extractors/cpp.js +349 -3
  35. package/dist/indexer/extractors/cpp.js.map +1 -1
  36. package/dist/indexer/extractors/csharp.d.ts.map +1 -1
  37. package/dist/indexer/extractors/csharp.js +166 -1
  38. package/dist/indexer/extractors/csharp.js.map +1 -1
  39. package/dist/indexer/extractors/dart.d.ts.map +1 -1
  40. package/dist/indexer/extractors/dart.js +134 -1
  41. package/dist/indexer/extractors/dart.js.map +1 -1
  42. package/dist/indexer/extractors/elixir.d.ts.map +1 -1
  43. package/dist/indexer/extractors/elixir.js +45 -0
  44. package/dist/indexer/extractors/elixir.js.map +1 -1
  45. package/dist/indexer/extractors/elm.d.ts.map +1 -1
  46. package/dist/indexer/extractors/elm.js +33 -0
  47. package/dist/indexer/extractors/elm.js.map +1 -1
  48. package/dist/indexer/extractors/go.d.ts.map +1 -1
  49. package/dist/indexer/extractors/go.js +206 -1
  50. package/dist/indexer/extractors/go.js.map +1 -1
  51. package/dist/indexer/extractors/haskell.d.ts.map +1 -1
  52. package/dist/indexer/extractors/haskell.js +29 -0
  53. package/dist/indexer/extractors/haskell.js.map +1 -1
  54. package/dist/indexer/extractors/java.d.ts.map +1 -1
  55. package/dist/indexer/extractors/java.js +170 -1
  56. package/dist/indexer/extractors/java.js.map +1 -1
  57. package/dist/indexer/extractors/javascript.d.ts.map +1 -1
  58. package/dist/indexer/extractors/javascript.js +24 -2
  59. package/dist/indexer/extractors/javascript.js.map +1 -1
  60. package/dist/indexer/extractors/julia.d.ts.map +1 -1
  61. package/dist/indexer/extractors/julia.js +23 -1
  62. package/dist/indexer/extractors/julia.js.map +1 -1
  63. package/dist/indexer/extractors/kotlin.d.ts.map +1 -1
  64. package/dist/indexer/extractors/kotlin.js +122 -1
  65. package/dist/indexer/extractors/kotlin.js.map +1 -1
  66. package/dist/indexer/extractors/lua.d.ts.map +1 -1
  67. package/dist/indexer/extractors/lua.js +19 -1
  68. package/dist/indexer/extractors/lua.js.map +1 -1
  69. package/dist/indexer/extractors/objc.d.ts.map +1 -1
  70. package/dist/indexer/extractors/objc.js +171 -1
  71. package/dist/indexer/extractors/objc.js.map +1 -1
  72. package/dist/indexer/extractors/ocaml.d.ts.map +1 -1
  73. package/dist/indexer/extractors/ocaml.js +29 -0
  74. package/dist/indexer/extractors/ocaml.js.map +1 -1
  75. package/dist/indexer/extractors/php.d.ts.map +1 -1
  76. package/dist/indexer/extractors/php.js +137 -1
  77. package/dist/indexer/extractors/php.js.map +1 -1
  78. package/dist/indexer/extractors/python.d.ts.map +1 -1
  79. package/dist/indexer/extractors/python.js +24 -3
  80. package/dist/indexer/extractors/python.js.map +1 -1
  81. package/dist/indexer/extractors/ruby.d.ts.map +1 -1
  82. package/dist/indexer/extractors/ruby.js +23 -1
  83. package/dist/indexer/extractors/ruby.js.map +1 -1
  84. package/dist/indexer/extractors/rust.d.ts.map +1 -1
  85. package/dist/indexer/extractors/rust.js +139 -2
  86. package/dist/indexer/extractors/rust.js.map +1 -1
  87. package/dist/indexer/extractors/scala.d.ts.map +1 -1
  88. package/dist/indexer/extractors/scala.js +24 -1
  89. package/dist/indexer/extractors/scala.js.map +1 -1
  90. package/dist/indexer/extractors/swift.d.ts.map +1 -1
  91. package/dist/indexer/extractors/swift.js +129 -1
  92. package/dist/indexer/extractors/swift.js.map +1 -1
  93. package/dist/indexer/extractors/types.d.ts +78 -2
  94. package/dist/indexer/extractors/types.d.ts.map +1 -1
  95. package/dist/indexer/extractors/types.js +167 -8
  96. package/dist/indexer/extractors/types.js.map +1 -1
  97. package/dist/indexer/extractors/typescript.d.ts.map +1 -1
  98. package/dist/indexer/extractors/typescript.js +217 -1
  99. package/dist/indexer/extractors/typescript.js.map +1 -1
  100. package/dist/indexer/extractors/zig.d.ts.map +1 -1
  101. package/dist/indexer/extractors/zig.js +30 -0
  102. package/dist/indexer/extractors/zig.js.map +1 -1
  103. package/dist/indexer/git-history.d.ts.map +1 -1
  104. package/dist/indexer/git-history.js +4 -8
  105. package/dist/indexer/git-history.js.map +1 -1
  106. package/dist/indexer/git-hooks.js +1 -1
  107. package/dist/indexer/git-hooks.js.map +1 -1
  108. package/dist/indexer/index.d.ts +27 -52
  109. package/dist/indexer/index.d.ts.map +1 -1
  110. package/dist/indexer/index.js +146 -773
  111. package/dist/indexer/index.js.map +1 -1
  112. package/dist/indexer/lsp/client.d.ts.map +1 -1
  113. package/dist/indexer/lsp/client.js +40 -15
  114. package/dist/indexer/lsp/client.js.map +1 -1
  115. package/dist/indexer/lsp/config.d.ts.map +1 -1
  116. package/dist/indexer/lsp/config.js +18 -4
  117. package/dist/indexer/lsp/config.js.map +1 -1
  118. package/dist/indexer/lsp/enrichment.d.ts +2 -0
  119. package/dist/indexer/lsp/enrichment.d.ts.map +1 -1
  120. package/dist/indexer/lsp/enrichment.js +67 -33
  121. package/dist/indexer/lsp/enrichment.js.map +1 -1
  122. package/dist/indexer/pipeline.d.ts +137 -0
  123. package/dist/indexer/pipeline.d.ts.map +1 -0
  124. package/dist/indexer/pipeline.js +84 -0
  125. package/dist/indexer/pipeline.js.map +1 -0
  126. package/dist/indexer/poller.d.ts +8 -0
  127. package/dist/indexer/poller.d.ts.map +1 -1
  128. package/dist/indexer/poller.js +3 -1
  129. package/dist/indexer/poller.js.map +1 -1
  130. package/dist/indexer/resolution-method.d.ts +40 -0
  131. package/dist/indexer/resolution-method.d.ts.map +1 -0
  132. package/dist/indexer/resolution-method.js +53 -0
  133. package/dist/indexer/resolution-method.js.map +1 -0
  134. package/dist/indexer/resolver.d.ts.map +1 -1
  135. package/dist/indexer/resolver.js +8 -4
  136. package/dist/indexer/resolver.js.map +1 -1
  137. package/dist/indexer/stages/dependency-api.d.ts +18 -0
  138. package/dist/indexer/stages/dependency-api.d.ts.map +1 -0
  139. package/dist/indexer/stages/dependency-api.js +174 -0
  140. package/dist/indexer/stages/dependency-api.js.map +1 -0
  141. package/dist/indexer/stages/docs-index.d.ts +20 -0
  142. package/dist/indexer/stages/docs-index.d.ts.map +1 -0
  143. package/dist/indexer/stages/docs-index.js +144 -0
  144. package/dist/indexer/stages/docs-index.js.map +1 -0
  145. package/dist/indexer/stages/embedding.d.ts +17 -0
  146. package/dist/indexer/stages/embedding.d.ts.map +1 -0
  147. package/dist/indexer/stages/embedding.js +217 -0
  148. package/dist/indexer/stages/embedding.js.map +1 -0
  149. package/dist/indexer/stages/import-resolution.d.ts +17 -0
  150. package/dist/indexer/stages/import-resolution.d.ts.map +1 -0
  151. package/dist/indexer/stages/import-resolution.js +47 -0
  152. package/dist/indexer/stages/import-resolution.js.map +1 -0
  153. package/dist/indexer/stages/index.d.ts +12 -0
  154. package/dist/indexer/stages/index.d.ts.map +1 -0
  155. package/dist/indexer/stages/index.js +12 -0
  156. package/dist/indexer/stages/index.js.map +1 -0
  157. package/dist/indexer/stages/lsp-enrichment.d.ts +48 -0
  158. package/dist/indexer/stages/lsp-enrichment.d.ts.map +1 -0
  159. package/dist/indexer/stages/lsp-enrichment.js +158 -0
  160. package/dist/indexer/stages/lsp-enrichment.js.map +1 -0
  161. package/dist/indexer/stages/source-index.d.ts +31 -0
  162. package/dist/indexer/stages/source-index.d.ts.map +1 -0
  163. package/dist/indexer/stages/source-index.js +314 -0
  164. package/dist/indexer/stages/source-index.js.map +1 -0
  165. package/dist/indexer/watcher.d.ts +8 -0
  166. package/dist/indexer/watcher.d.ts.map +1 -1
  167. package/dist/indexer/watcher.js +18 -1
  168. package/dist/indexer/watcher.js.map +1 -1
  169. package/dist/lore-server/db.d.ts +51 -4
  170. package/dist/lore-server/db.d.ts.map +1 -1
  171. package/dist/lore-server/db.js +102 -93
  172. package/dist/lore-server/db.js.map +1 -1
  173. package/dist/lore-server/server.d.ts +5 -0
  174. package/dist/lore-server/server.d.ts.map +1 -1
  175. package/dist/lore-server/server.js +52 -273
  176. package/dist/lore-server/server.js.map +1 -1
  177. package/dist/lore-server/tool-registry.d.ts +101 -0
  178. package/dist/lore-server/tool-registry.d.ts.map +1 -0
  179. package/dist/lore-server/tool-registry.js +227 -0
  180. package/dist/lore-server/tool-registry.js.map +1 -0
  181. package/dist/lore-server/tools/graph.d.ts +11 -3
  182. package/dist/lore-server/tools/graph.d.ts.map +1 -1
  183. package/dist/lore-server/tools/graph.js +108 -13
  184. package/dist/lore-server/tools/graph.js.map +1 -1
  185. package/dist/lore-server/tools/lookup.d.ts +9 -3
  186. package/dist/lore-server/tools/lookup.d.ts.map +1 -1
  187. package/dist/lore-server/tools/lookup.js +9 -4
  188. package/dist/lore-server/tools/lookup.js.map +1 -1
  189. package/dist/lore-server/tools/notes.d.ts.map +1 -1
  190. package/dist/lore-server/tools/notes.js +6 -2
  191. package/dist/lore-server/tools/notes.js.map +1 -1
  192. package/dist/lore-server/tools/search.d.ts +4 -0
  193. package/dist/lore-server/tools/search.d.ts.map +1 -1
  194. package/dist/lore-server/tools/search.js +50 -14
  195. package/dist/lore-server/tools/search.js.map +1 -1
  196. package/dist/runtime.d.ts +88 -0
  197. package/dist/runtime.d.ts.map +1 -0
  198. package/dist/runtime.js +153 -0
  199. package/dist/runtime.js.map +1 -0
  200. package/package.json +12 -12
@@ -1,87 +1,40 @@
1
1
  /**
2
2
  * @module indexer/index
3
3
  *
4
- * The `IndexBuilder` class orchestrates the full indexing pipeline:
5
- * walk parse extract → resolve → persist
4
+ * The `IndexBuilder` class is a **façade** over the composable
5
+ * `IndexPipeline` and its stage objects.
6
6
  *
7
- * It also supports incremental updates (`update()`) and a stub for
8
- * LLM-summary ingestion (`ingestSummary()`).
7
+ * For full builds, `build()` delegates entirely to the pipeline which
8
+ * enforces the data-dependency chain:
9
+ * ```
10
+ * SourceIndexStage → DocsIndexStage → ImportResolutionStage
11
+ * → DependencyApiStage → LspEnrichmentStage → ResolutionStage
12
+ * → TestMapStage → HistoryStage → EmbeddingStage
13
+ * ```
14
+ *
15
+ * For incremental updates, `update()` uses stage-extracted helpers
16
+ * while managing the changed-file diff itself.
17
+ *
18
+ * The enrichment → resolution ordering is **load-bearing** and enforced
19
+ * structurally by the pipeline rather than by call-site discipline.
9
20
  */
10
21
  import * as fs from 'node:fs';
11
- import * as crypto from 'node:crypto';
12
- import * as path from 'node:path';
13
22
  import { execFileSync } from 'node:child_process';
14
- import { openDb, setLoreMeta, getLoreMeta, createVec0Tables, LORE_META_INDEX_CHECKPOINT, LORE_META_LAST_HEAD_SHA, LORE_META_COVERAGE_LAST_SOURCE_PATH, LORE_META_COVERAGE_LAST_SOURCE_MTIME, } from './db.js';
15
- import { walkFiles } from './walker.js';
16
- import { detectLanguageForPath } from './walker.js';
17
- import { walkDocumentationFiles } from './walker.js';
18
- import { inferSeededDocNoteKey, buildDocNoteScope } from './docs.js';
19
- import { ingestGitHistory } from './git-history.js';
20
- import { ParserPool } from './parser.js';
21
- import { ImportResolver } from './resolver.js';
22
- import { buildCallGraph } from './call-graph.js';
23
- import { isPublicDeclarationSurfaceSymbol, } from './extractors/types.js';
24
- import { CExtractor } from './extractors/c.js';
25
- import { RustExtractor } from './extractors/rust.js';
26
- import { PythonExtractor } from './extractors/python.js';
27
- import { CppExtractor } from './extractors/cpp.js';
28
- import { TypeScriptExtractor } from './extractors/typescript.js';
29
- import { JavaScriptExtractor } from './extractors/javascript.js';
30
- import { GoExtractor } from './extractors/go.js';
31
- import { JavaExtractor } from './extractors/java.js';
32
- import { CSharpExtractor } from './extractors/csharp.js';
33
- import { RubyExtractor } from './extractors/ruby.js';
34
- import { PhpExtractor } from './extractors/php.js';
35
- import { SwiftExtractor } from './extractors/swift.js';
36
- import { KotlinExtractor } from './extractors/kotlin.js';
37
- import { ScalaExtractor } from './extractors/scala.js';
38
- import { LuaExtractor } from './extractors/lua.js';
39
- import { BashExtractor } from './extractors/bash.js';
40
- import { ElixirExtractor } from './extractors/elixir.js';
41
- import { ZigExtractor } from './extractors/zig.js';
42
- import { DartExtractor } from './extractors/dart.js';
43
- import { OcamlExtractor } from './extractors/ocaml.js';
44
- import { HaskellExtractor } from './extractors/haskell.js';
45
- import { JuliaExtractor } from './extractors/julia.js';
46
- import { ElmExtractor } from './extractors/elm.js';
47
- import { ObjcExtractor } from './extractors/objc.js';
48
- import { DEFAULT_EMBEDDING_MODEL, buildStructuralEmbeddingText } from './embedder.js';
23
+ import { openDb, setLoreMeta, LORE_META_LAST_HEAD_SHA, LORE_META_COVERAGE_LAST_SOURCE_PATH, LORE_META_COVERAGE_LAST_SOURCE_MTIME, } from './db.js';
24
+ import { DEFAULT_EMBEDDING_MODEL } from './embedder.js';
49
25
  import { ingestCoverageReport } from './coverage.js';
26
+ import { resolveSymbolEdges } from './call-graph.js';
50
27
  import { refreshTestMappings } from './test-mapper.js';
51
- import { LspEnrichmentCoordinator } from './lsp/enrichment.js';
28
+ import { ingestGitHistory } from './git-history.js';
52
29
  import { getLogger } from '../logger.js';
53
- // ─── Extractor registry ───────────────────────────────────────────────────────
54
- const EXTRACTORS = {
55
- c: new CExtractor(),
56
- rust: new RustExtractor(),
57
- python: new PythonExtractor(),
58
- cpp: new CppExtractor(),
59
- typescript: new TypeScriptExtractor(),
60
- javascript: new JavaScriptExtractor(),
61
- go: new GoExtractor(),
62
- java: new JavaExtractor(),
63
- csharp: new CSharpExtractor(),
64
- ruby: new RubyExtractor(),
65
- php: new PhpExtractor(),
66
- swift: new SwiftExtractor(),
67
- kotlin: new KotlinExtractor(),
68
- scala: new ScalaExtractor(),
69
- lua: new LuaExtractor(),
70
- bash: new BashExtractor(),
71
- elixir: new ElixirExtractor(),
72
- zig: new ZigExtractor(),
73
- dart: new DartExtractor(),
74
- ocaml: new OcamlExtractor(),
75
- haskell: new HaskellExtractor(),
76
- julia: new JuliaExtractor(),
77
- elm: new ElmExtractor(),
78
- objc: new ObjcExtractor(),
79
- };
80
- /** Number of symbols to embed per batch. */
81
- const EMBED_BATCH_SIZE = 64;
82
- // ─── IndexBuilder ─────────────────────────────────────────────────────────────
30
+ import { IndexPipeline } from './pipeline.js';
31
+ import { SourceIndexStage, DocsIndexStage, ImportResolutionStage, DependencyApiStage, LspEnrichmentStage, EmbeddingStage, } from './stages/index.js';
32
+ // ─── IndexBuilder (façade) ────────────────────────────────────────────────────
83
33
  /**
84
- * Orchestrates the full M1 indexing pipeline.
34
+ * Façade over the composable `IndexPipeline`.
35
+ *
36
+ * Preserves backward-compatible public API while internally delegating to
37
+ * pipeline stages for the actual work.
85
38
  *
86
39
  * @example
87
40
  * ```ts
@@ -92,8 +45,6 @@ const EMBED_BATCH_SIZE = 64;
92
45
  export class IndexBuilder {
93
46
  dbPath;
94
47
  walkerConfig;
95
- pool;
96
- resolver;
97
48
  embedder;
98
49
  history;
99
50
  indexDependencies;
@@ -103,8 +54,6 @@ export class IndexBuilder {
103
54
  constructor(dbPath, walkerConfig, embedder, embeddingModelOrOptions) {
104
55
  this.dbPath = dbPath;
105
56
  this.walkerConfig = walkerConfig;
106
- this.pool = new ParserPool();
107
- this.resolver = new ImportResolver();
108
57
  const opts = typeof embeddingModelOrOptions === 'string'
109
58
  ? { embeddingModel: embeddingModelOrOptions }
110
59
  : (embeddingModelOrOptions ?? {});
@@ -123,214 +72,115 @@ export class IndexBuilder {
123
72
  }
124
73
  // ─── Public API ──────────────────────────────────────────────────────────
125
74
  /**
126
- * Performs a full build: walks all files, parses them, extracts
127
- * symbols/imports/callRefs, resolves imports, and persists everything to
128
- * the database.
75
+ * Performs a full build by running the composable pipeline.
76
+ *
77
+ * The pipeline enforces the enrichment → resolution data-dependency
78
+ * chain structurally (by stage ordering), not by convention.
129
79
  */
130
80
  async build() {
131
81
  const log = getLogger();
132
82
  const buildStart = performance.now();
133
83
  const db = openDb(this.dbPath);
134
84
  const branch = this.resolveBranch();
135
- const lspCoordinator = this.createLspEnrichmentCoordinator();
136
85
  log.indexing('build started', { dbPath: this.dbPath, branch, rootDir: this.walkerConfig.rootDir });
86
+ // Build the pipeline with all stages in dependency order.
87
+ const pipeline = new IndexPipeline([
88
+ new SourceIndexStage(),
89
+ new DocsIndexStage(),
90
+ new ImportResolutionStage(),
91
+ new DependencyApiStage(),
92
+ new LspEnrichmentStage(),
93
+ resolutionStage(),
94
+ testMapStage(),
95
+ historyStage(),
96
+ new EmbeddingStage(),
97
+ ]);
98
+ const context = {
99
+ db,
100
+ dbPath: this.dbPath,
101
+ walkerConfig: this.walkerConfig,
102
+ branch,
103
+ lsp: this.lspSettings,
104
+ embedder: this.embedder,
105
+ log,
106
+ files: [],
107
+ indexDependencies: this.indexDependencies,
108
+ history: this.history,
109
+ docsAutoNotes: this.docsAutoNotes,
110
+ staleSymbolIds: [],
111
+ changedSourcePaths: [],
112
+ changedDocPaths: [],
113
+ };
137
114
  try {
138
- this.saveDocsAutoNotesSetting(db);
139
- const files = await walkFiles(this.walkerConfig);
140
- const docs = await walkDocumentationFiles(this.walkerConfig);
141
- log.indexing('walk complete', { fileCount: files.length, docCount: docs.length });
142
- if (lspCoordinator) {
143
- const languages = new Set(files.map((file) => file.language));
144
- if (this.indexDependencies)
145
- languages.add('typescript');
146
- await lspCoordinator.start(languages);
147
- }
148
- const resumeAt = this.loadBuildCheckpoint(db, branch, files.length);
149
- if (resumeAt > 0) {
150
- log.indexing('resuming from checkpoint', { resumeAt, totalFiles: files.length });
151
- }
152
- db.transaction(() => {
153
- for (let i = resumeAt; i < files.length; i++) {
154
- const file = files[i];
155
- if (!file)
156
- continue;
157
- this.processFile(db, file.path, file.language, branch);
158
- this.saveBuildCheckpoint(db, branch, i + 1, files.length);
159
- }
160
- const seenDocPaths = new Set();
161
- for (const doc of docs) {
162
- seenDocPaths.add(doc.path);
163
- this.processDocumentationFile(db, doc, branch);
164
- this.upsertSeededDocumentationNote(db, doc, branch);
165
- }
166
- this.removeStaleDocumentation(db, branch, seenDocPaths);
167
- })();
168
- this.saveBuildCheckpoint(db, branch, files.length, files.length);
169
- log.indexing('files processed, resolving imports');
170
- this.resolveImports(db, branch);
171
- await this.indexDependencyDeclarations(db, lspCoordinator);
172
- await this.enrichProjectSymbolsAndCallRefs(db, branch, files, lspCoordinator);
173
- refreshTestMappings(db, branch);
174
- buildCallGraph(db);
115
+ await pipeline.run(context, 'build');
175
116
  this.saveLastKnownHead(db);
176
- if (this.embedder) {
177
- log.indexing('embedding started', { model: this.embeddingModel });
178
- await this.embedder.init();
179
- await this.embedStructural(db);
180
- await this.embedDocumentation(db);
181
- log.indexing('embedding complete');
182
- }
183
- if (this.history) {
184
- log.indexing('git history ingestion started');
185
- const historyOptions = typeof this.history === 'object' ? this.history : undefined;
186
- await ingestGitHistory(db, this.walkerConfig.rootDir, historyOptions);
187
- if (this.embedder) {
188
- await this.embedCommitMessages(db);
189
- }
190
- log.indexing('git history ingestion complete');
191
- }
192
117
  // Gather final DB stats for the build summary
193
- let totalSymbols = 0;
194
- try {
195
- totalSymbols = db.prepare('SELECT COUNT(*) AS cnt FROM symbols').get().cnt;
196
- }
197
- catch { /* table may not exist */ }
198
- let totalEdges = 0;
199
- try {
200
- totalEdges = db.prepare('SELECT COUNT(*) AS cnt FROM call_graph').get().cnt;
201
- }
202
- catch { /* table may not exist */ }
203
- let totalDocs = 0;
204
- try {
205
- totalDocs = db.prepare('SELECT COUNT(*) AS cnt FROM documentation').get().cnt;
206
- }
207
- catch { /* table may not exist */ }
208
- let commitCount;
209
- try {
210
- commitCount = db.prepare('SELECT COUNT(*) AS cnt FROM commits').get().cnt;
211
- }
212
- catch { /* commits table may not exist */ }
213
- const dbSizeBytes = fs.existsSync(this.dbPath) ? fs.statSync(this.dbPath).size : undefined;
118
+ const stats = this.gatherDbStats(db);
214
119
  const indexDurationMs = Math.round(performance.now() - buildStart);
215
120
  log.startup('indexing complete', {
216
121
  dbPath: this.dbPath,
217
- dbSizeBytes,
122
+ dbSizeBytes: fs.existsSync(this.dbPath) ? fs.statSync(this.dbPath).size : undefined,
218
123
  embeddingModel: this.embeddingModel,
219
124
  embeddingReady: !!this.embedder,
220
- totalFiles: files.length,
221
- totalSymbols,
222
- totalDocs,
223
- totalEdges,
224
- commitCount,
125
+ totalFiles: context.files.length,
126
+ ...stats,
225
127
  indexDurationMs,
226
128
  });
227
129
  }
228
130
  finally {
229
- if (lspCoordinator) {
230
- await lspCoordinator.dispose();
231
- }
232
131
  db.close();
233
132
  }
234
133
  }
235
134
  /**
236
135
  * Incrementally re-processes only the listed files and updates the DB.
237
- * Symbols and imports for changed files are deleted then re-inserted.
136
+ *
137
+ * Delegates to the same pipeline as `build()` — each stage handles
138
+ * `'update'` mode by operating only on the changed-file set.
238
139
  *
239
140
  * @param changedFiles Absolute paths of files that have changed.
240
141
  */
241
142
  async update(changedFiles) {
242
143
  const db = openDb(this.dbPath);
243
144
  const branch = this.resolveBranch();
244
- const lspCoordinator = this.createLspEnrichmentCoordinator();
245
- const enrichedFiles = [];
145
+ const log = getLogger();
146
+ const pipeline = new IndexPipeline([
147
+ new SourceIndexStage(),
148
+ new DocsIndexStage(),
149
+ new ImportResolutionStage(),
150
+ new DependencyApiStage(),
151
+ new LspEnrichmentStage(),
152
+ resolutionStage(),
153
+ testMapStage(),
154
+ historyStage(),
155
+ new EmbeddingStage(),
156
+ ]);
157
+ const context = {
158
+ db,
159
+ dbPath: this.dbPath,
160
+ walkerConfig: this.walkerConfig,
161
+ branch,
162
+ lsp: this.lspSettings,
163
+ embedder: this.embedder,
164
+ log,
165
+ files: [],
166
+ indexDependencies: this.indexDependencies,
167
+ history: this.history,
168
+ docsAutoNotes: this.docsAutoNotes,
169
+ changedFiles,
170
+ staleSymbolIds: [],
171
+ changedSourcePaths: [],
172
+ changedDocPaths: [],
173
+ };
246
174
  try {
247
- this.saveDocsAutoNotesSetting(db);
248
- const docs = await walkDocumentationFiles(this.walkerConfig);
249
- const docsByPath = new Map(docs.map(doc => [doc.path, doc]));
250
- if (lspCoordinator) {
251
- const languages = new Set();
252
- for (const filePath of changedFiles) {
253
- if (!fs.existsSync(filePath))
254
- continue;
255
- const language = detectLanguageForPath(filePath, this.walkerConfig);
256
- if (language)
257
- languages.add(language);
258
- }
259
- if (this.indexDependencies)
260
- languages.add('typescript');
261
- await lspCoordinator.start(languages);
262
- }
263
- db.transaction(() => {
264
- for (const filePath of changedFiles) {
265
- // If the file no longer exists, remove it from the DB
266
- if (!fs.existsSync(filePath)) {
267
- const row = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
268
- if (row) {
269
- // Null out any resolved_id references pointing to this file
270
- db.prepare('UPDATE file_imports SET resolved_id = NULL WHERE resolved_id = ?').run(row.id);
271
- db.prepare('DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)').run(row.id);
272
- db.prepare('DELETE FROM files WHERE id = ?').run(row.id);
273
- }
274
- this.deleteDocumentationByPath(db, filePath, branch);
275
- continue;
276
- }
277
- const language = detectLanguageForPath(filePath, this.walkerConfig);
278
- if (language) {
279
- enrichedFiles.push({ path: filePath, language });
280
- // Null out resolved_id references pointing to this file before deletion
281
- const existingRow = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
282
- if (existingRow) {
283
- db.prepare('UPDATE file_imports SET resolved_id = NULL WHERE resolved_id = ?').run(existingRow.id);
284
- db.prepare('DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
285
- }
286
- // Delete existing rows for this file (cascade handles symbols/imports)
287
- db.prepare('DELETE FROM files WHERE path = ? AND branch = ?').run(filePath, branch);
288
- this.processFile(db, filePath, language, branch);
289
- }
290
- const changedDoc = docsByPath.get(filePath);
291
- if (changedDoc) {
292
- this.processDocumentationFile(db, changedDoc, branch);
293
- this.upsertSeededDocumentationNote(db, changedDoc, branch);
294
- }
295
- else {
296
- this.deleteDocumentationByPath(db, filePath, branch);
297
- }
298
- }
299
- })();
300
- this.resolveImports(db, branch);
301
- await this.indexDependencyDeclarations(db, lspCoordinator);
302
- await this.enrichProjectSymbolsAndCallRefs(db, branch, enrichedFiles, lspCoordinator);
303
- refreshTestMappings(db, branch);
304
- if (this.history) {
305
- const historyOptions = typeof this.history === 'object' ? this.history : undefined;
306
- await ingestGitHistory(db, this.walkerConfig.rootDir, historyOptions);
307
- }
308
- if (this.embedder) {
309
- await this.embedder.init();
310
- await this.embedStructural(db);
311
- await this.embedDocumentation(db);
312
- if (this.history) {
313
- await this.embedCommitMessages(db);
314
- }
315
- }
316
- buildCallGraph(db);
175
+ await pipeline.run(context, 'update');
317
176
  this.saveLastKnownHead(db);
318
177
  }
319
178
  finally {
320
- if (lspCoordinator) {
321
- await lspCoordinator.dispose();
322
- }
323
179
  db.close();
324
180
  }
325
181
  }
326
182
  /**
327
183
  * Writes an LLM-generated summary for a symbol to `symbol_summaries`.
328
- * If an `EmbeddingProvider` was configured, also embeds the summary text
329
- * and persists it to `symbol_semantic_embeddings`.
330
- *
331
- * @param symbolId Row ID of the symbol in the `symbols` table.
332
- * @param summary Natural-language summary text.
333
- * @param model Name of the model that produced the summary.
334
184
  */
335
185
  async ingestSummary(symbolId, summary, model = 'unknown') {
336
186
  const db = openDb(this.dbPath);
@@ -366,345 +216,7 @@ export class IndexBuilder {
366
216
  db.close();
367
217
  }
368
218
  }
369
- // ─── Private helpers ──────────────────────────────────────────────────────
370
- /** Parse one file, extract symbols/imports/callRefs, and insert into the DB. */
371
- processFile(db, filePath, language, branch) {
372
- let source;
373
- try {
374
- source = fs.readFileSync(filePath, 'utf8');
375
- }
376
- catch {
377
- return; // Skip unreadable files
378
- }
379
- const hash = crypto.createHash('sha256').update(source).digest('hex');
380
- // Check if the file is already up-to-date
381
- const existing = db.prepare('SELECT id, last_hash FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
382
- if (existing?.last_hash === hash)
383
- return;
384
- const sizeBytes = Buffer.byteLength(source, 'utf8');
385
- // Upsert the file row
386
- let fileId;
387
- if (existing) {
388
- db.prepare(`UPDATE files SET language = ?, size_bytes = ?, last_hash = ?, source = ?, indexed_at = unixepoch()
389
- WHERE id = ?`).run(language, sizeBytes, hash, source, existing.id);
390
- fileId = existing.id;
391
- // Remove stale symbols / imports / external deps (also clean up FTS5 index)
392
- db.prepare(`DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)`).run(fileId);
393
- db.prepare('DELETE FROM symbols WHERE file_id = ?').run(fileId);
394
- db.prepare('DELETE FROM file_imports WHERE file_id = ?').run(fileId);
395
- db.prepare('DELETE FROM external_deps WHERE file_id = ?').run(fileId);
396
- db.prepare('DELETE FROM api_routes WHERE file_id = ?').run(fileId);
397
- }
398
- else {
399
- const info = db
400
- .prepare(`INSERT INTO files (path, branch, language, size_bytes, last_hash, source)
401
- VALUES (?, ?, ?, ?, ?, ?)`)
402
- .run(filePath, branch, language, sizeBytes, hash, source);
403
- fileId = Number(info.lastInsertRowid);
404
- }
405
- // Parse the source
406
- const tree = this.pool.parse(language, source);
407
- if (!tree)
408
- return;
409
- const extractor = EXTRACTORS[language];
410
- if (!extractor)
411
- return;
412
- const result = extractor.extract(tree, source, filePath);
413
- // Insert symbols and keep FTS5 index in sync
414
- const insertSymbol = db.prepare(`INSERT INTO symbols (file_id, name, kind, start_line, end_line, signature, doc_comment)
415
- VALUES (?, ?, ?, ?, ?, ?, ?)`);
416
- const insertFts = db.prepare(`INSERT INTO symbols_fts(rowid, name, signature, kind) VALUES (?, ?, ?, ?)`);
417
- // Map from callerSymbol name → symbol row ID (for call refs)
418
- const symbolIdMap = new Map();
419
- for (const sym of result.symbols) {
420
- const info = insertSymbol.run(fileId, sym.name, sym.kind, sym.startLine, sym.endLine, sym.signature ?? null, sym.docComment ?? null);
421
- const symId = Number(info.lastInsertRowid);
422
- symbolIdMap.set(sym.name, symId);
423
- insertFts.run(symId, sym.name, buildStructuralEmbeddingText({
424
- name: sym.name,
425
- signature: sym.signature ?? null,
426
- }), sym.kind);
427
- }
428
- const insertRoute = db.prepare(`INSERT INTO api_routes (file_id, method, path, handler_id, handler_name, framework, line, middleware)
429
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
430
- for (const route of result.routes) {
431
- insertRoute.run(fileId, route.method, route.path, symbolIdMap.get(route.handler) ?? null, route.handler, route.framework, route.line, route.middleware ? JSON.stringify(route.middleware) : null);
432
- }
433
- // Insert raw imports (resolved_id will be filled in resolveImports())
434
- const insertImport = db.prepare(`INSERT INTO file_imports (file_id, raw_import) VALUES (?, ?)`);
435
- for (const imp of result.imports) {
436
- insertImport.run(fileId, imp.source);
437
- }
438
- // Insert call refs (callee_id resolved in call-graph phase)
439
- const insertCallRef = db.prepare(`INSERT INTO symbol_refs (caller_id, callee_name, call_line)
440
- VALUES (?, ?, ?)`);
441
- for (const ref of result.callRefs) {
442
- const callerId = symbolIdMap.get(ref.callerSymbol);
443
- if (callerId !== undefined) {
444
- insertCallRef.run(callerId, ref.calleeRaw, ref.line);
445
- }
446
- }
447
- }
448
- processDocumentationFile(db, doc, branch) {
449
- const existing = db.prepare('SELECT id, content_hash FROM docs WHERE path = ? AND branch = ?').get(doc.path, branch);
450
- if (existing?.content_hash === doc.hash) {
451
- return;
452
- }
453
- let docId;
454
- if (existing) {
455
- db.prepare(`UPDATE docs
456
- SET kind = ?, title = ?, content = ?, content_hash = ?, indexed_at = unixepoch()
457
- WHERE id = ?`).run(doc.kind, doc.title, doc.content, doc.hash, existing.id);
458
- docId = existing.id;
459
- }
460
- else {
461
- const info = db.prepare(`INSERT INTO docs (path, branch, kind, title, content, content_hash)
462
- VALUES (?, ?, ?, ?, ?, ?)`).run(doc.path, branch, doc.kind, doc.title, doc.content, doc.hash);
463
- docId = Number(info.lastInsertRowid);
464
- }
465
- const existingSections = db.prepare('SELECT id, section_index FROM doc_sections WHERE doc_id = ?').all(docId);
466
- const insertSection = db.prepare(`INSERT INTO doc_sections (
467
- doc_id, section_index, title, depth, heading_path, line_start, line_end, content, content_hash
468
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
469
- ON CONFLICT(doc_id, section_index) DO UPDATE SET
470
- title = excluded.title,
471
- depth = excluded.depth,
472
- heading_path = excluded.heading_path,
473
- line_start = excluded.line_start,
474
- line_end = excluded.line_end,
475
- content = excluded.content,
476
- content_hash = excluded.content_hash`);
477
- const activeSectionIndexes = new Set();
478
- for (const chunk of doc.chunks) {
479
- activeSectionIndexes.add(chunk.sectionIndex);
480
- insertSection.run(docId, chunk.sectionIndex, chunk.title, chunk.depth, JSON.stringify(chunk.headingPath), chunk.lineStart, chunk.lineEnd, chunk.content, chunk.hash);
481
- }
482
- const staleSectionIds = existingSections
483
- .filter(section => !activeSectionIndexes.has(section.section_index))
484
- .map(section => section.id);
485
- this.deleteDocSectionEmbeddings(db, staleSectionIds);
486
- if (staleSectionIds.length > 0) {
487
- db.prepare(`DELETE FROM doc_sections
488
- WHERE id IN (${staleSectionIds.map(() => '?').join(', ')})`).run(...staleSectionIds);
489
- }
490
- }
491
- upsertSeededDocumentationNote(db, doc, branch) {
492
- if (!this.docsAutoNotes)
493
- return;
494
- const key = inferSeededDocNoteKey(doc);
495
- if (!key)
496
- return;
497
- const scope = buildDocNoteScope(doc.path, branch);
498
- const existing = db.prepare('SELECT content, source_hash FROM notes WHERE key = ? AND scope = ?').get(key, scope);
499
- if (existing?.content === doc.content && existing.source_hash === doc.hash) {
500
- return;
501
- }
502
- db.prepare(`INSERT INTO notes (key, scope, content, model, source_hash, created_at, updated_at)
503
- VALUES (?, ?, ?, ?, ?, unixepoch(), unixepoch())
504
- ON CONFLICT(key, scope) DO UPDATE SET
505
- content = excluded.content,
506
- model = excluded.model,
507
- source_hash = excluded.source_hash,
508
- updated_at = unixepoch()`).run(key, scope, doc.content, 'system:auto-doc-seed', doc.hash);
509
- }
510
- removeStaleDocumentation(db, branch, retainedPaths) {
511
- const docs = db.prepare('SELECT id, path FROM docs WHERE branch = ?').all(branch);
512
- for (const doc of docs) {
513
- if (!retainedPaths.has(doc.path)) {
514
- this.deleteDocumentationById(db, doc.id);
515
- }
516
- }
517
- }
518
- deleteDocumentationByPath(db, docPath, branch) {
519
- const row = db.prepare('SELECT id FROM docs WHERE path = ? AND branch = ?').get(docPath, branch);
520
- if (!row)
521
- return;
522
- this.deleteDocumentationById(db, row.id);
523
- }
524
- deleteDocumentationById(db, docId) {
525
- const sectionIds = db.prepare('SELECT id FROM doc_sections WHERE doc_id = ?').all(docId);
526
- this.deleteDocSectionEmbeddings(db, sectionIds.map(row => row.id));
527
- db.prepare('DELETE FROM docs WHERE id = ?').run(docId);
528
- }
529
- deleteDocSectionEmbeddings(db, sectionIds) {
530
- if (sectionIds.length === 0)
531
- return;
532
- const hasEmbeddingsTable = db.prepare("SELECT 1 AS present FROM sqlite_master WHERE type IN ('table', 'virtual table') AND name = 'doc_section_embeddings'").get();
533
- if (!hasEmbeddingsTable)
534
- return;
535
- db.prepare(`DELETE FROM doc_section_embeddings WHERE rowid IN (${sectionIds.map(() => '?').join(', ')})`).run(...sectionIds);
536
- }
537
- /**
538
- * Second pass: resolve raw_import strings to file IDs in the
539
- * `file_imports.resolved_id` column. Also populates `external_deps` for
540
- * any import that resolves to an external package.
541
- */
542
- resolveImports(db, branch) {
543
- const rootDir = this.walkerConfig.rootDir;
544
- // Fetch all unresolved imports with their file's path, language, and file_id
545
- const rows = db
546
- .prepare(`SELECT fi.id, fi.file_id, fi.raw_import, f.path, f.language
547
- FROM file_imports fi
548
- JOIN files f ON f.id = fi.file_id
549
- WHERE fi.resolved_id IS NULL AND f.branch = ?`)
550
- .all(branch);
551
- const updateResolved = db.prepare('UPDATE file_imports SET resolved_id = ? WHERE id = ?');
552
- const insertExternalDep = db.prepare('INSERT OR IGNORE INTO external_deps (file_id, package) VALUES (?, ?)');
553
- for (const row of rows) {
554
- const resolved = this.resolver.resolve({ source: row.raw_import, importedNames: [] }, row.path, rootDir, row.language);
555
- if (resolved.resolvedPath) {
556
- const targetFile = db
557
- .prepare('SELECT id FROM files WHERE path = ? AND branch = ?')
558
- .get(resolved.resolvedPath, branch);
559
- if (targetFile) {
560
- updateResolved.run(targetFile.id, row.id);
561
- }
562
- }
563
- else if (resolved.isExternal && resolved.externalName) {
564
- insertExternalDep.run(row.file_id, resolved.externalName);
565
- }
566
- }
567
- }
568
- async indexDependencyDeclarations(db, lspCoordinator) {
569
- db.prepare('DELETE FROM external_symbols').run();
570
- if (!this.indexDependencies)
571
- return;
572
- const directDependencies = this.loadDirectDependencies();
573
- if (directDependencies.size === 0)
574
- return;
575
- const extractor = EXTRACTORS.typescript;
576
- if (!extractor)
577
- return;
578
- const insertExternalSymbol = db.prepare(`INSERT OR IGNORE INTO external_symbols
579
- (
580
- package_name,
581
- package_version,
582
- source_ref,
583
- symbol_name,
584
- symbol_kind,
585
- signature,
586
- doc_comment,
587
- resolved_type_signature,
588
- resolved_return_type,
589
- definition_uri,
590
- definition_path
591
- )
592
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
593
- for (const [packageName, declaredVersion] of directDependencies) {
594
- const packageDir = path.join(this.walkerConfig.rootDir, 'node_modules', packageName);
595
- if (!fs.existsSync(packageDir) || !fs.statSync(packageDir).isDirectory())
596
- continue;
597
- const packageVersion = this.readInstalledPackageVersion(packageDir) ?? declaredVersion ?? null;
598
- const declarationFiles = this.collectDeclarationFiles(packageDir);
599
- for (const declarationFile of declarationFiles) {
600
- const source = fs.readFileSync(declarationFile, 'utf8');
601
- const tree = this.pool.parse('typescript', source);
602
- if (!tree)
603
- continue;
604
- const result = extractor.extract(tree, source, declarationFile);
605
- const declarationSymbols = result.symbols.filter((symbol) => this.shouldIndexDependencySymbol(symbol));
606
- const enrichmentRows = lspCoordinator
607
- ? await lspCoordinator.enrich({
608
- filePath: declarationFile,
609
- language: 'typescript',
610
- source,
611
- targets: declarationSymbols.map((symbol) => ({
612
- line: symbol.startLine,
613
- character: symbol.startCharacter ?? 0,
614
- })),
615
- })
616
- : declarationSymbols.map(() => null);
617
- for (let i = 0; i < declarationSymbols.length; i++) {
618
- const symbol = declarationSymbols[i];
619
- if (!symbol)
620
- continue;
621
- const metadata = enrichmentRows[i];
622
- insertExternalSymbol.run(packageName, packageVersion, declarationFile, symbol.name, symbol.kind, symbol.signature, symbol.docComment ?? null, metadata?.resolvedTypeSignature ?? null, metadata?.resolvedReturnType ?? null, metadata?.definitionUri ?? null, metadata?.definitionPath ?? null);
623
- }
624
- }
625
- }
626
- }
627
- createLspEnrichmentCoordinator() {
628
- if (!this.lspSettings?.enabled) {
629
- return null;
630
- }
631
- return new LspEnrichmentCoordinator(this.lspSettings, this.walkerConfig.rootDir);
632
- }
633
- async enrichProjectSymbolsAndCallRefs(db, branch, files, lspCoordinator) {
634
- if (!lspCoordinator || files.length === 0)
635
- return;
636
- const selectSymbols = db.prepare(`SELECT s.id, s.name, s.signature, s.start_line
637
- FROM symbols s
638
- JOIN files f ON f.id = s.file_id
639
- WHERE f.path = ? AND f.branch = ?
640
- ORDER BY s.id`);
641
- const selectCallRefs = db.prepare(`SELECT sr.id, sr.call_line
642
- FROM symbol_refs sr
643
- JOIN symbols s ON s.id = sr.caller_id
644
- JOIN files f ON f.id = s.file_id
645
- WHERE f.path = ? AND f.branch = ?
646
- ORDER BY sr.id`);
647
- const updateSymbol = db.prepare(`UPDATE symbols
648
- SET resolved_type_signature = ?, resolved_return_type = ?, definition_uri = ?, definition_path = ?
649
- WHERE id = ?`);
650
- const updateSymbolFts = db.prepare('UPDATE symbols_fts SET signature = ? WHERE rowid = ?');
651
- const updateCallRef = db.prepare(`UPDATE symbol_refs
652
- SET resolved_type_signature = ?, resolved_return_type = ?, definition_uri = ?, definition_path = ?
653
- WHERE id = ?`);
654
- for (const file of files) {
655
- if (!file || !fs.existsSync(file.path))
656
- continue;
657
- let source;
658
- try {
659
- source = fs.readFileSync(file.path, 'utf8');
660
- }
661
- catch {
662
- continue;
663
- }
664
- const symbols = selectSymbols.all(file.path, branch);
665
- if (symbols.length > 0) {
666
- const symbolMetadata = await lspCoordinator.enrich({
667
- filePath: file.path,
668
- language: file.language,
669
- source,
670
- targets: symbols.map((symbol) => ({ line: symbol.start_line, character: 0 })),
671
- });
672
- for (let i = 0; i < symbols.length; i++) {
673
- const symbol = symbols[i];
674
- if (!symbol)
675
- continue;
676
- const metadata = symbolMetadata[i];
677
- if (!metadata)
678
- continue;
679
- updateSymbol.run(metadata.resolvedTypeSignature, metadata.resolvedReturnType, metadata.definitionUri, metadata.definitionPath, symbol.id);
680
- updateSymbolFts.run(buildStructuralEmbeddingText({
681
- name: symbol.name,
682
- signature: symbol.signature,
683
- resolvedTypeSignature: metadata.resolvedTypeSignature,
684
- resolvedReturnType: metadata.resolvedReturnType,
685
- }), symbol.id);
686
- }
687
- }
688
- const callRefs = selectCallRefs.all(file.path, branch);
689
- if (callRefs.length > 0) {
690
- const callRefMetadata = await lspCoordinator.enrich({
691
- filePath: file.path,
692
- language: file.language,
693
- source,
694
- targets: callRefs.map((callRef) => ({ line: callRef.call_line, character: 0 })),
695
- });
696
- for (let i = 0; i < callRefs.length; i++) {
697
- const callRef = callRefs[i];
698
- if (!callRef)
699
- continue;
700
- const metadata = callRefMetadata[i];
701
- if (!metadata)
702
- continue;
703
- updateCallRef.run(metadata.resolvedTypeSignature, metadata.resolvedReturnType, metadata.definitionUri, metadata.definitionPath, callRef.id);
704
- }
705
- }
706
- }
707
- }
219
+ // ─── Private helpers (minimal — most logic lives in stages) ─────────────
708
220
  resolveBranch() {
709
221
  if (this.walkerConfig.branch)
710
222
  return this.walkerConfig.branch;
@@ -712,208 +224,69 @@ export class IndexBuilder {
712
224
  }
713
225
  saveLastKnownHead(db) {
714
226
  const headSha = this.readGitValue(['rev-parse', 'HEAD']);
715
- if (headSha) {
227
+ if (headSha)
716
228
  setLoreMeta(db, LORE_META_LAST_HEAD_SHA, headSha);
717
- }
718
- }
719
- saveDocsAutoNotesSetting(db) {
720
- setLoreMeta(db, 'docs_auto_notes', this.docsAutoNotes ? '1' : '0');
721
229
  }
722
230
  readGitValue(args) {
723
231
  try {
724
- const value = execFileSync('git', ['-C', this.walkerConfig.rootDir, ...args], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] }).trim();
725
- return value || undefined;
232
+ return execFileSync('git', ['-C', this.walkerConfig.rootDir, ...args], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] }).trim() || undefined;
726
233
  }
727
234
  catch {
728
235
  return undefined;
729
236
  }
730
237
  }
731
- loadDirectDependencies() {
732
- const packageJsonPath = path.join(this.walkerConfig.rootDir, 'package.json');
733
- if (!fs.existsSync(packageJsonPath))
734
- return new Map();
735
- const raw = fs.readFileSync(packageJsonPath, 'utf8');
736
- const pkg = JSON.parse(raw);
737
- const deps = new Map();
738
- for (const section of [pkg.dependencies, pkg.devDependencies, pkg.peerDependencies]) {
739
- if (!section)
740
- continue;
741
- for (const [name, version] of Object.entries(section)) {
742
- if (!deps.has(name))
743
- deps.set(name, version);
744
- }
745
- }
746
- return deps;
747
- }
748
- readInstalledPackageVersion(packageDir) {
749
- const packageJsonPath = path.join(packageDir, 'package.json');
750
- if (!fs.existsSync(packageJsonPath))
751
- return undefined;
752
- const raw = fs.readFileSync(packageJsonPath, 'utf8');
753
- const pkg = JSON.parse(raw);
754
- return pkg.version;
755
- }
756
- collectDeclarationFiles(packageDir) {
757
- const declarations = [];
758
- const stack = [packageDir];
759
- while (stack.length > 0) {
760
- const currentDir = stack.pop();
761
- if (!currentDir)
762
- continue;
763
- const entries = fs.readdirSync(currentDir, { withFileTypes: true });
764
- for (const entry of entries) {
765
- if (entry.name === 'node_modules')
766
- continue;
767
- const fullPath = path.join(currentDir, entry.name);
768
- if (entry.isDirectory()) {
769
- stack.push(fullPath);
770
- continue;
771
- }
772
- if (entry.isFile() && fullPath.endsWith('.d.ts')) {
773
- declarations.push(fullPath);
774
- }
775
- }
776
- }
777
- return declarations;
778
- }
779
- shouldIndexDependencySymbol(symbol) {
780
- if (!isPublicDeclarationSurfaceSymbol(symbol))
781
- return false;
782
- if (symbol.declarationSurface)
783
- return true;
784
- return !this.hasImplementationBody(symbol);
785
- }
786
- hasImplementationBody(symbol) {
787
- const node = symbol.astNode;
788
- if (!node)
789
- return false;
790
- if (node.type === 'arrow_function' ||
791
- node.type === 'function_expression' ||
792
- node.type === 'generator_function') {
793
- return true;
794
- }
795
- if (node.type === 'class_declaration' ||
796
- node.type === 'interface_declaration' ||
797
- node.type === 'type_alias_declaration') {
798
- return false;
799
- }
800
- const bodyNode = node.childForFieldName('body');
801
- if (!bodyNode)
802
- return false;
803
- return bodyNode.namedChildCount > 0 || bodyNode.text.trim() !== '';
804
- }
805
- loadBuildCheckpoint(db, branch, totalFiles) {
806
- const raw = getLoreMeta(db, LORE_META_INDEX_CHECKPOINT);
807
- if (!raw)
808
- return 0;
238
+ gatherDbStats(db) {
239
+ let totalSymbols = 0;
809
240
  try {
810
- const parsed = JSON.parse(raw);
811
- if (parsed.branch !== branch || parsed.rootDir !== this.walkerConfig.rootDir)
812
- return 0;
813
- const nextFileIndex = parsed.nextFileIndex ?? 0;
814
- return Math.max(0, Math.min(totalFiles, nextFileIndex));
815
- }
816
- catch {
817
- return 0;
241
+ totalSymbols = db.prepare('SELECT COUNT(*) AS cnt FROM symbols').get().cnt;
818
242
  }
819
- }
820
- saveBuildCheckpoint(db, branch, nextFileIndex, totalFiles) {
821
- const checkpoint = {
822
- branch,
823
- rootDir: this.walkerConfig.rootDir,
824
- totalFiles,
825
- nextFileIndex,
826
- updatedAt: Math.floor(Date.now() / 1000),
827
- };
828
- setLoreMeta(db, LORE_META_INDEX_CHECKPOINT, JSON.stringify(checkpoint));
829
- }
830
- /**
831
- * Embed structural symbol signatures in batches and persist results to
832
- * the `symbol_embeddings` vec0 virtual table.
833
- *
834
- * Also stores the embedding model name and dims in `lore_meta` and
835
- * creates the vec0 tables if they don't exist yet.
836
- */
837
- async embedStructural(db) {
838
- const embedder = this.embedder;
839
- setLoreMeta(db, 'embedding_model', embedder.modelName);
840
- setLoreMeta(db, 'embedding_dims', String(embedder.dims));
841
- createVec0Tables(db, embedder.dims);
842
- // Fetch all symbols that have structural text to embed.
843
- const symbols = db
844
- .prepare(`SELECT id, name, signature, resolved_type_signature, resolved_return_type
845
- FROM symbols
846
- WHERE signature IS NOT NULL
847
- OR resolved_type_signature IS NOT NULL
848
- OR resolved_return_type IS NOT NULL`)
849
- .all();
850
- const insertEmbed = db.prepare('INSERT OR REPLACE INTO symbol_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
851
- for (let i = 0; i < symbols.length; i += EMBED_BATCH_SIZE) {
852
- const batch = symbols.slice(i, i + EMBED_BATCH_SIZE);
853
- const texts = batch.map((symbol) => buildStructuralEmbeddingText({
854
- name: symbol.name,
855
- signature: symbol.signature,
856
- resolvedTypeSignature: symbol.resolved_type_signature,
857
- resolvedReturnType: symbol.resolved_return_type,
858
- }));
859
- const embeddings = await embedder.embed(texts);
860
- db.transaction(() => {
861
- for (let j = 0; j < batch.length; j++) {
862
- const sym = batch[j];
863
- if (sym)
864
- insertEmbed.run(sym.id, JSON.stringify(embeddings[j]));
865
- }
866
- })();
243
+ catch { /* */ }
244
+ let totalEdges = 0;
245
+ try {
246
+ totalEdges = db.prepare('SELECT COUNT(*) AS cnt FROM symbol_refs').get().cnt;
867
247
  }
868
- }
869
- async embedDocumentation(db) {
870
- const embedder = this.embedder;
871
- db.exec(`
872
- CREATE VIRTUAL TABLE IF NOT EXISTS doc_section_embeddings USING vec0(
873
- embedding FLOAT[${embedder.dims}]
874
- );
875
- `);
876
- const sections = db.prepare(`SELECT id, title, content
877
- FROM doc_sections
878
- ORDER BY id`).all();
879
- if (sections.length === 0)
880
- return;
881
- const insertEmbed = db.prepare('INSERT OR REPLACE INTO doc_section_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
882
- for (let i = 0; i < sections.length; i += EMBED_BATCH_SIZE) {
883
- const batch = sections.slice(i, i + EMBED_BATCH_SIZE);
884
- const texts = batch.map(section => section.content || section.title);
885
- const embeddings = await embedder.embed(texts);
886
- db.transaction(() => {
887
- for (let j = 0; j < batch.length; j++) {
888
- const section = batch[j];
889
- if (section) {
890
- insertEmbed.run(section.id, JSON.stringify(embeddings[j]));
891
- }
892
- }
893
- })();
248
+ catch { /* */ }
249
+ let totalDocs = 0;
250
+ try {
251
+ totalDocs = db.prepare('SELECT COUNT(*) AS cnt FROM docs').get().cnt;
894
252
  }
895
- }
896
- async embedCommitMessages(db) {
897
- const embedder = this.embedder;
898
- const commits = db.prepare(`SELECT rowid, message
899
- FROM commits
900
- WHERE length(trim(message)) > 0
901
- ORDER BY rowid`).all();
902
- if (commits.length === 0)
903
- return;
904
- const insertEmbed = db.prepare('INSERT OR REPLACE INTO commit_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
905
- for (let i = 0; i < commits.length; i += EMBED_BATCH_SIZE) {
906
- const batch = commits.slice(i, i + EMBED_BATCH_SIZE);
907
- const embeddings = await embedder.embed(batch.map((commit) => commit.message));
908
- db.transaction(() => {
909
- for (let j = 0; j < batch.length; j++) {
910
- const commit = batch[j];
911
- if (commit) {
912
- insertEmbed.run(commit.rowid, JSON.stringify(embeddings[j]));
913
- }
914
- }
915
- })();
253
+ catch { /* */ }
254
+ let commitCount;
255
+ try {
256
+ commitCount = db.prepare('SELECT COUNT(*) AS cnt FROM commits').get().cnt;
916
257
  }
258
+ catch { /* */ }
259
+ return { totalSymbols, totalEdges, totalDocs, commitCount };
917
260
  }
918
261
  }
262
+ // ─── Trivial inline stages ────────────────────────────────────────────────────
263
+ // These are single-function-call stages that don't warrant their own files.
264
+ /** Resolve symbol edges (must run after LspEnrichmentStage). */
265
+ function resolutionStage() {
266
+ return {
267
+ name: 'symbol-resolution',
268
+ execute: async (ctx) => { resolveSymbolEdges(ctx.db); },
269
+ };
270
+ }
271
+ /** Refresh test-to-source file mappings. */
272
+ function testMapStage() {
273
+ return {
274
+ name: 'test-map',
275
+ execute: async (ctx) => { refreshTestMappings(ctx.db, ctx.branch); },
276
+ };
277
+ }
278
+ /** Ingest git history. */
279
+ function historyStage() {
280
+ return {
281
+ name: 'git-history',
282
+ execute: async (ctx) => {
283
+ if (!ctx.history)
284
+ return;
285
+ ctx.log.indexing('git history ingestion started');
286
+ const opts = typeof ctx.history === 'object' ? ctx.history : undefined;
287
+ await ingestGitHistory(ctx.db, ctx.walkerConfig.rootDir, opts);
288
+ ctx.log.indexing('git history ingestion complete');
289
+ },
290
+ };
291
+ }
919
292
  //# sourceMappingURL=index.js.map