@jafreck/lore 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -23
- package/dist/cli.js +35 -99
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +24 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +23 -4
- package/dist/index.js.map +1 -1
- package/dist/indexer/call-graph.d.ts +8 -4
- package/dist/indexer/call-graph.d.ts.map +1 -1
- package/dist/indexer/call-graph.js +150 -81
- package/dist/indexer/call-graph.js.map +1 -1
- package/dist/indexer/db.d.ts.map +1 -1
- package/dist/indexer/db.js +29 -3
- package/dist/indexer/db.js.map +1 -1
- package/dist/indexer/extractors/php.js +5 -1
- package/dist/indexer/extractors/php.js.map +1 -1
- package/dist/indexer/index.d.ts +27 -75
- package/dist/indexer/index.d.ts.map +1 -1
- package/dist/indexer/index.js +146 -904
- package/dist/indexer/index.js.map +1 -1
- package/dist/indexer/lsp/enrichment.d.ts +2 -0
- package/dist/indexer/lsp/enrichment.d.ts.map +1 -1
- package/dist/indexer/lsp/enrichment.js +67 -33
- package/dist/indexer/lsp/enrichment.js.map +1 -1
- package/dist/indexer/pipeline.d.ts +137 -0
- package/dist/indexer/pipeline.d.ts.map +1 -0
- package/dist/indexer/pipeline.js +84 -0
- package/dist/indexer/pipeline.js.map +1 -0
- package/dist/indexer/resolution-method.d.ts +40 -0
- package/dist/indexer/resolution-method.d.ts.map +1 -0
- package/dist/indexer/resolution-method.js +53 -0
- package/dist/indexer/resolution-method.js.map +1 -0
- package/dist/indexer/stages/dependency-api.d.ts +18 -0
- package/dist/indexer/stages/dependency-api.d.ts.map +1 -0
- package/dist/indexer/stages/dependency-api.js +174 -0
- package/dist/indexer/stages/dependency-api.js.map +1 -0
- package/dist/indexer/stages/docs-index.d.ts +20 -0
- package/dist/indexer/stages/docs-index.d.ts.map +1 -0
- package/dist/indexer/stages/docs-index.js +144 -0
- package/dist/indexer/stages/docs-index.js.map +1 -0
- package/dist/indexer/stages/embedding.d.ts +17 -0
- package/dist/indexer/stages/embedding.d.ts.map +1 -0
- package/dist/indexer/stages/embedding.js +217 -0
- package/dist/indexer/stages/embedding.js.map +1 -0
- package/dist/indexer/stages/import-resolution.d.ts +17 -0
- package/dist/indexer/stages/import-resolution.d.ts.map +1 -0
- package/dist/indexer/stages/import-resolution.js +47 -0
- package/dist/indexer/stages/import-resolution.js.map +1 -0
- package/dist/indexer/stages/index.d.ts +12 -0
- package/dist/indexer/stages/index.d.ts.map +1 -0
- package/dist/indexer/stages/index.js +12 -0
- package/dist/indexer/stages/index.js.map +1 -0
- package/dist/indexer/stages/lsp-enrichment.d.ts +48 -0
- package/dist/indexer/stages/lsp-enrichment.d.ts.map +1 -0
- package/dist/indexer/stages/lsp-enrichment.js +158 -0
- package/dist/indexer/stages/lsp-enrichment.js.map +1 -0
- package/dist/indexer/stages/source-index.d.ts +31 -0
- package/dist/indexer/stages/source-index.d.ts.map +1 -0
- package/dist/indexer/stages/source-index.js +314 -0
- package/dist/indexer/stages/source-index.js.map +1 -0
- package/dist/lore-server/db.d.ts +51 -4
- package/dist/lore-server/db.d.ts.map +1 -1
- package/dist/lore-server/db.js +74 -69
- package/dist/lore-server/db.js.map +1 -1
- package/dist/lore-server/server.d.ts +5 -0
- package/dist/lore-server/server.d.ts.map +1 -1
- package/dist/lore-server/server.js +51 -233
- package/dist/lore-server/server.js.map +1 -1
- package/dist/lore-server/tool-registry.d.ts +101 -0
- package/dist/lore-server/tool-registry.d.ts.map +1 -0
- package/dist/lore-server/tool-registry.js +227 -0
- package/dist/lore-server/tool-registry.js.map +1 -0
- package/dist/lore-server/tools/graph.d.ts +7 -0
- package/dist/lore-server/tools/graph.d.ts.map +1 -1
- package/dist/lore-server/tools/graph.js +64 -7
- package/dist/lore-server/tools/graph.js.map +1 -1
- package/dist/lore-server/tools/lookup.d.ts +9 -3
- package/dist/lore-server/tools/lookup.d.ts.map +1 -1
- package/dist/lore-server/tools/lookup.js +9 -4
- package/dist/lore-server/tools/lookup.js.map +1 -1
- package/dist/lore-server/tools/search.d.ts +4 -0
- package/dist/lore-server/tools/search.d.ts.map +1 -1
- package/dist/lore-server/tools/search.js +4 -0
- package/dist/lore-server/tools/search.js.map +1 -1
- package/dist/runtime.d.ts +88 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +153 -0
- package/dist/runtime.js.map +1 -0
- package/package.json +12 -12
package/dist/indexer/index.js
CHANGED
|
@@ -1,87 +1,40 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @module indexer/index
|
|
3
3
|
*
|
|
4
|
-
* The `IndexBuilder` class
|
|
5
|
-
*
|
|
4
|
+
* The `IndexBuilder` class is a **façade** over the composable
|
|
5
|
+
* `IndexPipeline` and its stage objects.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
7
|
+
* For full builds, `build()` delegates entirely to the pipeline which
|
|
8
|
+
* enforces the data-dependency chain:
|
|
9
|
+
* ```
|
|
10
|
+
* SourceIndexStage → DocsIndexStage → ImportResolutionStage
|
|
11
|
+
* → DependencyApiStage → LspEnrichmentStage → ResolutionStage
|
|
12
|
+
* → TestMapStage → HistoryStage → EmbeddingStage
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* For incremental updates, `update()` uses stage-extracted helpers
|
|
16
|
+
* while managing the changed-file diff itself.
|
|
17
|
+
*
|
|
18
|
+
* The enrichment → resolution ordering is **load-bearing** and enforced
|
|
19
|
+
* structurally by the pipeline rather than by call-site discipline.
|
|
9
20
|
*/
|
|
10
21
|
import * as fs from 'node:fs';
|
|
11
|
-
import * as crypto from 'node:crypto';
|
|
12
|
-
import * as path from 'node:path';
|
|
13
22
|
import { execFileSync } from 'node:child_process';
|
|
14
|
-
import { openDb, setLoreMeta,
|
|
15
|
-
import {
|
|
16
|
-
import { detectLanguageForPath } from './walker.js';
|
|
17
|
-
import { walkDocumentationFiles } from './walker.js';
|
|
18
|
-
import { inferSeededDocNoteKey, buildDocNoteScope } from './docs.js';
|
|
19
|
-
import { ingestGitHistory } from './git-history.js';
|
|
20
|
-
import { ParserPool } from './parser.js';
|
|
21
|
-
import { ImportResolver } from './resolver.js';
|
|
22
|
-
import { resolveSymbolEdges, normalizeTypeName } from './call-graph.js';
|
|
23
|
-
import { isPublicDeclarationSurfaceSymbol, } from './extractors/types.js';
|
|
24
|
-
import { CExtractor } from './extractors/c.js';
|
|
25
|
-
import { RustExtractor } from './extractors/rust.js';
|
|
26
|
-
import { PythonExtractor } from './extractors/python.js';
|
|
27
|
-
import { CppExtractor } from './extractors/cpp.js';
|
|
28
|
-
import { TypeScriptExtractor } from './extractors/typescript.js';
|
|
29
|
-
import { JavaScriptExtractor } from './extractors/javascript.js';
|
|
30
|
-
import { GoExtractor } from './extractors/go.js';
|
|
31
|
-
import { JavaExtractor } from './extractors/java.js';
|
|
32
|
-
import { CSharpExtractor } from './extractors/csharp.js';
|
|
33
|
-
import { RubyExtractor } from './extractors/ruby.js';
|
|
34
|
-
import { PhpExtractor } from './extractors/php.js';
|
|
35
|
-
import { SwiftExtractor } from './extractors/swift.js';
|
|
36
|
-
import { KotlinExtractor } from './extractors/kotlin.js';
|
|
37
|
-
import { ScalaExtractor } from './extractors/scala.js';
|
|
38
|
-
import { LuaExtractor } from './extractors/lua.js';
|
|
39
|
-
import { BashExtractor } from './extractors/bash.js';
|
|
40
|
-
import { ElixirExtractor } from './extractors/elixir.js';
|
|
41
|
-
import { ZigExtractor } from './extractors/zig.js';
|
|
42
|
-
import { DartExtractor } from './extractors/dart.js';
|
|
43
|
-
import { OcamlExtractor } from './extractors/ocaml.js';
|
|
44
|
-
import { HaskellExtractor } from './extractors/haskell.js';
|
|
45
|
-
import { JuliaExtractor } from './extractors/julia.js';
|
|
46
|
-
import { ElmExtractor } from './extractors/elm.js';
|
|
47
|
-
import { ObjcExtractor } from './extractors/objc.js';
|
|
48
|
-
import { DEFAULT_EMBEDDING_MODEL, buildStructuralEmbeddingText } from './embedder.js';
|
|
23
|
+
import { openDb, setLoreMeta, LORE_META_LAST_HEAD_SHA, LORE_META_COVERAGE_LAST_SOURCE_PATH, LORE_META_COVERAGE_LAST_SOURCE_MTIME, } from './db.js';
|
|
24
|
+
import { DEFAULT_EMBEDDING_MODEL } from './embedder.js';
|
|
49
25
|
import { ingestCoverageReport } from './coverage.js';
|
|
26
|
+
import { resolveSymbolEdges } from './call-graph.js';
|
|
50
27
|
import { refreshTestMappings } from './test-mapper.js';
|
|
51
|
-
import {
|
|
28
|
+
import { ingestGitHistory } from './git-history.js';
|
|
52
29
|
import { getLogger } from '../logger.js';
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
rust: new RustExtractor(),
|
|
57
|
-
python: new PythonExtractor(),
|
|
58
|
-
cpp: new CppExtractor(),
|
|
59
|
-
typescript: new TypeScriptExtractor(),
|
|
60
|
-
javascript: new JavaScriptExtractor(),
|
|
61
|
-
go: new GoExtractor(),
|
|
62
|
-
java: new JavaExtractor(),
|
|
63
|
-
csharp: new CSharpExtractor(),
|
|
64
|
-
ruby: new RubyExtractor(),
|
|
65
|
-
php: new PhpExtractor(),
|
|
66
|
-
swift: new SwiftExtractor(),
|
|
67
|
-
kotlin: new KotlinExtractor(),
|
|
68
|
-
scala: new ScalaExtractor(),
|
|
69
|
-
lua: new LuaExtractor(),
|
|
70
|
-
bash: new BashExtractor(),
|
|
71
|
-
elixir: new ElixirExtractor(),
|
|
72
|
-
zig: new ZigExtractor(),
|
|
73
|
-
dart: new DartExtractor(),
|
|
74
|
-
ocaml: new OcamlExtractor(),
|
|
75
|
-
haskell: new HaskellExtractor(),
|
|
76
|
-
julia: new JuliaExtractor(),
|
|
77
|
-
elm: new ElmExtractor(),
|
|
78
|
-
objc: new ObjcExtractor(),
|
|
79
|
-
};
|
|
80
|
-
/** Number of symbols to embed per batch. */
|
|
81
|
-
const EMBED_BATCH_SIZE = 64;
|
|
82
|
-
// ─── IndexBuilder ─────────────────────────────────────────────────────────────
|
|
30
|
+
import { IndexPipeline } from './pipeline.js';
|
|
31
|
+
import { SourceIndexStage, DocsIndexStage, ImportResolutionStage, DependencyApiStage, LspEnrichmentStage, EmbeddingStage, } from './stages/index.js';
|
|
32
|
+
// ─── IndexBuilder (façade) ────────────────────────────────────────────────────
|
|
83
33
|
/**
|
|
84
|
-
*
|
|
34
|
+
* Façade over the composable `IndexPipeline`.
|
|
35
|
+
*
|
|
36
|
+
* Preserves backward-compatible public API while internally delegating to
|
|
37
|
+
* pipeline stages for the actual work.
|
|
85
38
|
*
|
|
86
39
|
* @example
|
|
87
40
|
* ```ts
|
|
@@ -92,8 +45,6 @@ const EMBED_BATCH_SIZE = 64;
|
|
|
92
45
|
export class IndexBuilder {
|
|
93
46
|
dbPath;
|
|
94
47
|
walkerConfig;
|
|
95
|
-
pool;
|
|
96
|
-
resolver;
|
|
97
48
|
embedder;
|
|
98
49
|
history;
|
|
99
50
|
indexDependencies;
|
|
@@ -103,8 +54,6 @@ export class IndexBuilder {
|
|
|
103
54
|
constructor(dbPath, walkerConfig, embedder, embeddingModelOrOptions) {
|
|
104
55
|
this.dbPath = dbPath;
|
|
105
56
|
this.walkerConfig = walkerConfig;
|
|
106
|
-
this.pool = new ParserPool();
|
|
107
|
-
this.resolver = new ImportResolver();
|
|
108
57
|
const opts = typeof embeddingModelOrOptions === 'string'
|
|
109
58
|
? { embeddingModel: embeddingModelOrOptions }
|
|
110
59
|
: (embeddingModelOrOptions ?? {});
|
|
@@ -123,249 +72,115 @@ export class IndexBuilder {
|
|
|
123
72
|
}
|
|
124
73
|
// ─── Public API ──────────────────────────────────────────────────────────
|
|
125
74
|
/**
|
|
126
|
-
* Performs a full build
|
|
127
|
-
*
|
|
128
|
-
* the
|
|
75
|
+
* Performs a full build by running the composable pipeline.
|
|
76
|
+
*
|
|
77
|
+
* The pipeline enforces the enrichment → resolution data-dependency
|
|
78
|
+
* chain structurally (by stage ordering), not by convention.
|
|
129
79
|
*/
|
|
130
80
|
async build() {
|
|
131
81
|
const log = getLogger();
|
|
132
82
|
const buildStart = performance.now();
|
|
133
83
|
const db = openDb(this.dbPath);
|
|
134
84
|
const branch = this.resolveBranch();
|
|
135
|
-
const lspCoordinator = this.createLspEnrichmentCoordinator();
|
|
136
85
|
log.indexing('build started', { dbPath: this.dbPath, branch, rootDir: this.walkerConfig.rootDir });
|
|
86
|
+
// Build the pipeline with all stages in dependency order.
|
|
87
|
+
const pipeline = new IndexPipeline([
|
|
88
|
+
new SourceIndexStage(),
|
|
89
|
+
new DocsIndexStage(),
|
|
90
|
+
new ImportResolutionStage(),
|
|
91
|
+
new DependencyApiStage(),
|
|
92
|
+
new LspEnrichmentStage(),
|
|
93
|
+
resolutionStage(),
|
|
94
|
+
testMapStage(),
|
|
95
|
+
historyStage(),
|
|
96
|
+
new EmbeddingStage(),
|
|
97
|
+
]);
|
|
98
|
+
const context = {
|
|
99
|
+
db,
|
|
100
|
+
dbPath: this.dbPath,
|
|
101
|
+
walkerConfig: this.walkerConfig,
|
|
102
|
+
branch,
|
|
103
|
+
lsp: this.lspSettings,
|
|
104
|
+
embedder: this.embedder,
|
|
105
|
+
log,
|
|
106
|
+
files: [],
|
|
107
|
+
indexDependencies: this.indexDependencies,
|
|
108
|
+
history: this.history,
|
|
109
|
+
docsAutoNotes: this.docsAutoNotes,
|
|
110
|
+
staleSymbolIds: [],
|
|
111
|
+
changedSourcePaths: [],
|
|
112
|
+
changedDocPaths: [],
|
|
113
|
+
};
|
|
137
114
|
try {
|
|
138
|
-
|
|
139
|
-
const files = await walkFiles(this.walkerConfig);
|
|
140
|
-
const docs = await walkDocumentationFiles(this.walkerConfig);
|
|
141
|
-
log.indexing('walk complete', { fileCount: files.length, docCount: docs.length });
|
|
142
|
-
if (lspCoordinator) {
|
|
143
|
-
const languages = new Set(files.map((file) => file.language));
|
|
144
|
-
if (this.indexDependencies)
|
|
145
|
-
languages.add('typescript');
|
|
146
|
-
await lspCoordinator.start(languages);
|
|
147
|
-
}
|
|
148
|
-
const resumeAt = this.loadBuildCheckpoint(db, branch, files.length);
|
|
149
|
-
if (resumeAt > 0) {
|
|
150
|
-
log.indexing('resuming from checkpoint', { resumeAt, totalFiles: files.length });
|
|
151
|
-
}
|
|
152
|
-
db.transaction(() => {
|
|
153
|
-
for (let i = resumeAt; i < files.length; i++) {
|
|
154
|
-
const file = files[i];
|
|
155
|
-
if (!file)
|
|
156
|
-
continue;
|
|
157
|
-
this.processFile(db, file.path, file.language, branch);
|
|
158
|
-
this.saveBuildCheckpoint(db, branch, i + 1, files.length);
|
|
159
|
-
}
|
|
160
|
-
const seenDocPaths = new Set();
|
|
161
|
-
for (const doc of docs) {
|
|
162
|
-
seenDocPaths.add(doc.path);
|
|
163
|
-
this.processDocumentationFile(db, doc, branch);
|
|
164
|
-
this.upsertSeededDocumentationNote(db, doc, branch);
|
|
165
|
-
}
|
|
166
|
-
this.removeStaleDocumentation(db, branch, seenDocPaths);
|
|
167
|
-
})();
|
|
168
|
-
this.saveBuildCheckpoint(db, branch, files.length, files.length);
|
|
169
|
-
log.indexing('files processed, resolving imports');
|
|
170
|
-
this.resolveImports(db, branch);
|
|
171
|
-
await this.indexDependencyDeclarations(db, lspCoordinator);
|
|
172
|
-
await this.enrichProjectRefs(db, branch, files, lspCoordinator);
|
|
173
|
-
refreshTestMappings(db, branch);
|
|
174
|
-
resolveSymbolEdges(db);
|
|
115
|
+
await pipeline.run(context, 'build');
|
|
175
116
|
this.saveLastKnownHead(db);
|
|
176
|
-
if (this.embedder) {
|
|
177
|
-
log.indexing('embedding started', { model: this.embeddingModel });
|
|
178
|
-
await this.embedder.init();
|
|
179
|
-
await this.embedStructural(db);
|
|
180
|
-
await this.embedDocumentation(db);
|
|
181
|
-
log.indexing('embedding complete');
|
|
182
|
-
}
|
|
183
|
-
if (this.history) {
|
|
184
|
-
log.indexing('git history ingestion started');
|
|
185
|
-
const historyOptions = typeof this.history === 'object' ? this.history : undefined;
|
|
186
|
-
await ingestGitHistory(db, this.walkerConfig.rootDir, historyOptions);
|
|
187
|
-
if (this.embedder) {
|
|
188
|
-
await this.embedCommitMessages(db);
|
|
189
|
-
}
|
|
190
|
-
log.indexing('git history ingestion complete');
|
|
191
|
-
}
|
|
192
117
|
// Gather final DB stats for the build summary
|
|
193
|
-
|
|
194
|
-
try {
|
|
195
|
-
totalSymbols = db.prepare('SELECT COUNT(*) AS cnt FROM symbols').get().cnt;
|
|
196
|
-
}
|
|
197
|
-
catch { /* table may not exist */ }
|
|
198
|
-
let totalEdges = 0;
|
|
199
|
-
try {
|
|
200
|
-
totalEdges = db.prepare('SELECT COUNT(*) AS cnt FROM symbol_refs').get().cnt;
|
|
201
|
-
}
|
|
202
|
-
catch { /* table may not exist */ }
|
|
203
|
-
let totalDocs = 0;
|
|
204
|
-
try {
|
|
205
|
-
totalDocs = db.prepare('SELECT COUNT(*) AS cnt FROM docs').get().cnt;
|
|
206
|
-
}
|
|
207
|
-
catch { /* table may not exist */ }
|
|
208
|
-
let commitCount;
|
|
209
|
-
try {
|
|
210
|
-
commitCount = db.prepare('SELECT COUNT(*) AS cnt FROM commits').get().cnt;
|
|
211
|
-
}
|
|
212
|
-
catch { /* commits table may not exist */ }
|
|
213
|
-
const dbSizeBytes = fs.existsSync(this.dbPath) ? fs.statSync(this.dbPath).size : undefined;
|
|
118
|
+
const stats = this.gatherDbStats(db);
|
|
214
119
|
const indexDurationMs = Math.round(performance.now() - buildStart);
|
|
215
120
|
log.startup('indexing complete', {
|
|
216
121
|
dbPath: this.dbPath,
|
|
217
|
-
dbSizeBytes,
|
|
122
|
+
dbSizeBytes: fs.existsSync(this.dbPath) ? fs.statSync(this.dbPath).size : undefined,
|
|
218
123
|
embeddingModel: this.embeddingModel,
|
|
219
124
|
embeddingReady: !!this.embedder,
|
|
220
|
-
totalFiles: files.length,
|
|
221
|
-
|
|
222
|
-
totalDocs,
|
|
223
|
-
totalEdges,
|
|
224
|
-
commitCount,
|
|
125
|
+
totalFiles: context.files.length,
|
|
126
|
+
...stats,
|
|
225
127
|
indexDurationMs,
|
|
226
128
|
});
|
|
227
129
|
}
|
|
228
130
|
finally {
|
|
229
|
-
if (lspCoordinator) {
|
|
230
|
-
await lspCoordinator.dispose();
|
|
231
|
-
}
|
|
232
131
|
db.close();
|
|
233
132
|
}
|
|
234
133
|
}
|
|
235
134
|
/**
|
|
236
135
|
* Incrementally re-processes only the listed files and updates the DB.
|
|
237
|
-
*
|
|
136
|
+
*
|
|
137
|
+
* Delegates to the same pipeline as `build()` — each stage handles
|
|
138
|
+
* `'update'` mode by operating only on the changed-file set.
|
|
238
139
|
*
|
|
239
140
|
* @param changedFiles Absolute paths of files that have changed.
|
|
240
141
|
*/
|
|
241
142
|
async update(changedFiles) {
|
|
242
143
|
const db = openDb(this.dbPath);
|
|
243
144
|
const branch = this.resolveBranch();
|
|
244
|
-
const
|
|
245
|
-
const
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
145
|
+
const log = getLogger();
|
|
146
|
+
const pipeline = new IndexPipeline([
|
|
147
|
+
new SourceIndexStage(),
|
|
148
|
+
new DocsIndexStage(),
|
|
149
|
+
new ImportResolutionStage(),
|
|
150
|
+
new DependencyApiStage(),
|
|
151
|
+
new LspEnrichmentStage(),
|
|
152
|
+
resolutionStage(),
|
|
153
|
+
testMapStage(),
|
|
154
|
+
historyStage(),
|
|
155
|
+
new EmbeddingStage(),
|
|
156
|
+
]);
|
|
157
|
+
const context = {
|
|
158
|
+
db,
|
|
159
|
+
dbPath: this.dbPath,
|
|
160
|
+
walkerConfig: this.walkerConfig,
|
|
161
|
+
branch,
|
|
162
|
+
lsp: this.lspSettings,
|
|
163
|
+
embedder: this.embedder,
|
|
164
|
+
log,
|
|
165
|
+
files: [],
|
|
166
|
+
indexDependencies: this.indexDependencies,
|
|
167
|
+
history: this.history,
|
|
168
|
+
docsAutoNotes: this.docsAutoNotes,
|
|
169
|
+
changedFiles,
|
|
170
|
+
staleSymbolIds: [],
|
|
171
|
+
changedSourcePaths: [],
|
|
172
|
+
changedDocPaths: [],
|
|
173
|
+
};
|
|
252
174
|
try {
|
|
253
|
-
|
|
254
|
-
const docs = await walkDocumentationFiles(this.walkerConfig);
|
|
255
|
-
const docsByPath = new Map(docs.map(doc => [doc.path, doc]));
|
|
256
|
-
if (lspCoordinator) {
|
|
257
|
-
const languages = new Set();
|
|
258
|
-
for (const filePath of changedFiles) {
|
|
259
|
-
if (!fs.existsSync(filePath))
|
|
260
|
-
continue;
|
|
261
|
-
const language = detectLanguageForPath(filePath, this.walkerConfig);
|
|
262
|
-
if (language)
|
|
263
|
-
languages.add(language);
|
|
264
|
-
}
|
|
265
|
-
if (this.indexDependencies)
|
|
266
|
-
languages.add('typescript');
|
|
267
|
-
await lspCoordinator.start(languages);
|
|
268
|
-
}
|
|
269
|
-
db.transaction(() => {
|
|
270
|
-
for (const filePath of changedFiles) {
|
|
271
|
-
// If the file no longer exists, remove it from the DB
|
|
272
|
-
if (!fs.existsSync(filePath)) {
|
|
273
|
-
const row = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
|
|
274
|
-
if (row) {
|
|
275
|
-
// Collect symbol IDs for embedding cleanup before cascade-delete removes them.
|
|
276
|
-
const symRows = db.prepare('SELECT id FROM symbols WHERE file_id = ?').all(row.id);
|
|
277
|
-
for (const s of symRows)
|
|
278
|
-
staleSymbolIds.push(s.id);
|
|
279
|
-
// Null out any resolved_id references pointing to this file
|
|
280
|
-
db.prepare('UPDATE file_imports SET resolved_id = NULL WHERE resolved_id = ?').run(row.id);
|
|
281
|
-
db.prepare('DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)').run(row.id);
|
|
282
|
-
db.prepare('DELETE FROM files WHERE id = ?').run(row.id);
|
|
283
|
-
}
|
|
284
|
-
this.deleteDocumentationByPath(db, filePath, branch);
|
|
285
|
-
continue;
|
|
286
|
-
}
|
|
287
|
-
const language = detectLanguageForPath(filePath, this.walkerConfig);
|
|
288
|
-
if (language) {
|
|
289
|
-
enrichedFiles.push({ path: filePath, language });
|
|
290
|
-
changedSourcePaths.push(filePath);
|
|
291
|
-
// Null out resolved_id references pointing to this file before deletion
|
|
292
|
-
const existingRow = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
|
|
293
|
-
if (existingRow) {
|
|
294
|
-
// Collect symbol IDs for embedding cleanup before cascade-delete removes them.
|
|
295
|
-
const symRows = db.prepare('SELECT id FROM symbols WHERE file_id = ?').all(existingRow.id);
|
|
296
|
-
for (const s of symRows)
|
|
297
|
-
staleSymbolIds.push(s.id);
|
|
298
|
-
db.prepare('UPDATE file_imports SET resolved_id = NULL WHERE resolved_id = ?').run(existingRow.id);
|
|
299
|
-
db.prepare('UPDATE symbol_refs SET callee_id = NULL WHERE callee_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
|
|
300
|
-
db.prepare('UPDATE type_refs SET type_id = NULL WHERE type_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
|
|
301
|
-
db.prepare('UPDATE symbol_relationships SET target_symbol_id = NULL WHERE target_symbol_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
|
|
302
|
-
db.prepare('DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)').run(existingRow.id);
|
|
303
|
-
}
|
|
304
|
-
// Delete existing rows for this file (cascade handles symbols/imports)
|
|
305
|
-
db.prepare('DELETE FROM files WHERE path = ? AND branch = ?').run(filePath, branch);
|
|
306
|
-
this.processFile(db, filePath, language, branch);
|
|
307
|
-
}
|
|
308
|
-
const changedDoc = docsByPath.get(filePath);
|
|
309
|
-
if (changedDoc) {
|
|
310
|
-
this.processDocumentationFile(db, changedDoc, branch);
|
|
311
|
-
this.upsertSeededDocumentationNote(db, changedDoc, branch);
|
|
312
|
-
changedDocPaths.push(filePath);
|
|
313
|
-
}
|
|
314
|
-
else {
|
|
315
|
-
this.deleteDocumentationByPath(db, filePath, branch);
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
})();
|
|
319
|
-
this.resolveImports(db, branch);
|
|
320
|
-
await this.indexDependencyDeclarations(db, lspCoordinator);
|
|
321
|
-
await this.enrichProjectRefs(db, branch, enrichedFiles, lspCoordinator);
|
|
322
|
-
refreshTestMappings(db, branch);
|
|
323
|
-
if (this.history) {
|
|
324
|
-
const historyOptions = typeof this.history === 'object' ? this.history : undefined;
|
|
325
|
-
await ingestGitHistory(db, this.walkerConfig.rootDir, historyOptions);
|
|
326
|
-
}
|
|
327
|
-
if (this.embedder) {
|
|
328
|
-
await this.embedder.init();
|
|
329
|
-
// Clean up orphaned symbol embeddings for symbols that were deleted/replaced.
|
|
330
|
-
this.deleteSymbolEmbeddings(db, staleSymbolIds);
|
|
331
|
-
// Resolve the new file IDs for the changed source files.
|
|
332
|
-
const changedFileIds = [];
|
|
333
|
-
for (const p of changedSourcePaths) {
|
|
334
|
-
const row = db.prepare('SELECT id FROM files WHERE path = ? AND branch = ?').get(p, branch);
|
|
335
|
-
if (row)
|
|
336
|
-
changedFileIds.push(row.id);
|
|
337
|
-
}
|
|
338
|
-
// Resolve the new doc IDs for the changed documentation files.
|
|
339
|
-
const changedDocIds = [];
|
|
340
|
-
for (const p of changedDocPaths) {
|
|
341
|
-
const row = db.prepare('SELECT id FROM docs WHERE path = ? AND branch = ?').get(p, branch);
|
|
342
|
-
if (row)
|
|
343
|
-
changedDocIds.push(row.id);
|
|
344
|
-
}
|
|
345
|
-
await this.embedStructural(db, changedFileIds);
|
|
346
|
-
await this.embedDocumentation(db, changedDocIds);
|
|
347
|
-
if (this.history) {
|
|
348
|
-
await this.embedCommitMessages(db);
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
resolveSymbolEdges(db);
|
|
175
|
+
await pipeline.run(context, 'update');
|
|
352
176
|
this.saveLastKnownHead(db);
|
|
353
177
|
}
|
|
354
178
|
finally {
|
|
355
|
-
if (lspCoordinator) {
|
|
356
|
-
await lspCoordinator.dispose();
|
|
357
|
-
}
|
|
358
179
|
db.close();
|
|
359
180
|
}
|
|
360
181
|
}
|
|
361
182
|
/**
|
|
362
183
|
* Writes an LLM-generated summary for a symbol to `symbol_summaries`.
|
|
363
|
-
* If an `EmbeddingProvider` was configured, also embeds the summary text
|
|
364
|
-
* and persists it to `symbol_semantic_embeddings`.
|
|
365
|
-
*
|
|
366
|
-
* @param symbolId Row ID of the symbol in the `symbols` table.
|
|
367
|
-
* @param summary Natural-language summary text.
|
|
368
|
-
* @param model Name of the model that produced the summary.
|
|
369
184
|
*/
|
|
370
185
|
async ingestSummary(symbolId, summary, model = 'unknown') {
|
|
371
186
|
const db = openDb(this.dbPath);
|
|
@@ -401,404 +216,7 @@ export class IndexBuilder {
|
|
|
401
216
|
db.close();
|
|
402
217
|
}
|
|
403
218
|
}
|
|
404
|
-
// ─── Private helpers
|
|
405
|
-
/** Parse one file, extract symbols/imports/callRefs, and insert into the DB. */
|
|
406
|
-
processFile(db, filePath, language, branch) {
|
|
407
|
-
let source;
|
|
408
|
-
try {
|
|
409
|
-
source = fs.readFileSync(filePath, 'utf8');
|
|
410
|
-
}
|
|
411
|
-
catch {
|
|
412
|
-
return; // Skip unreadable files
|
|
413
|
-
}
|
|
414
|
-
const hash = crypto.createHash('sha256').update(source).digest('hex');
|
|
415
|
-
// Check if the file is already up-to-date
|
|
416
|
-
const existing = db.prepare('SELECT id, last_hash FROM files WHERE path = ? AND branch = ?').get(filePath, branch);
|
|
417
|
-
if (existing?.last_hash === hash)
|
|
418
|
-
return;
|
|
419
|
-
const sizeBytes = Buffer.byteLength(source, 'utf8');
|
|
420
|
-
// Upsert the file row
|
|
421
|
-
let fileId;
|
|
422
|
-
if (existing) {
|
|
423
|
-
db.prepare(`UPDATE files SET language = ?, size_bytes = ?, last_hash = ?, source = ?, indexed_at = unixepoch()
|
|
424
|
-
WHERE id = ?`).run(language, sizeBytes, hash, source, existing.id);
|
|
425
|
-
fileId = existing.id;
|
|
426
|
-
// Remove stale symbols / imports / external deps (also clean up FTS5 index)
|
|
427
|
-
db.prepare(`DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)`).run(fileId);
|
|
428
|
-
db.prepare('DELETE FROM symbol_relationships WHERE file_id = ?').run(fileId);
|
|
429
|
-
db.prepare('DELETE FROM type_refs WHERE file_id = ?').run(fileId);
|
|
430
|
-
// NULL out cross-file FK references that point to symbols in this file
|
|
431
|
-
db.prepare('UPDATE symbol_refs SET callee_id = NULL WHERE callee_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(fileId);
|
|
432
|
-
db.prepare('UPDATE type_refs SET type_id = NULL WHERE type_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(fileId);
|
|
433
|
-
db.prepare('UPDATE symbol_relationships SET target_symbol_id = NULL WHERE target_symbol_id IN (SELECT id FROM symbols WHERE file_id = ?)').run(fileId);
|
|
434
|
-
db.prepare('DELETE FROM symbols WHERE file_id = ?').run(fileId);
|
|
435
|
-
db.prepare('DELETE FROM file_imports WHERE file_id = ?').run(fileId);
|
|
436
|
-
db.prepare('DELETE FROM external_deps WHERE file_id = ?').run(fileId);
|
|
437
|
-
db.prepare('DELETE FROM api_routes WHERE file_id = ?').run(fileId);
|
|
438
|
-
// Delete stale annotations so cascade-independent re-index doesn't accumulate duplicates.
|
|
439
|
-
db.prepare('DELETE FROM annotations WHERE file_id = ?').run(fileId);
|
|
440
|
-
}
|
|
441
|
-
else {
|
|
442
|
-
const info = db
|
|
443
|
-
.prepare(`INSERT INTO files (path, branch, language, size_bytes, last_hash, source)
|
|
444
|
-
VALUES (?, ?, ?, ?, ?, ?)`)
|
|
445
|
-
.run(filePath, branch, language, sizeBytes, hash, source);
|
|
446
|
-
fileId = Number(info.lastInsertRowid);
|
|
447
|
-
}
|
|
448
|
-
// Parse the source
|
|
449
|
-
const tree = this.pool.parse(language, source);
|
|
450
|
-
if (!tree)
|
|
451
|
-
return;
|
|
452
|
-
const extractor = EXTRACTORS[language];
|
|
453
|
-
if (!extractor)
|
|
454
|
-
return;
|
|
455
|
-
const result = extractor.extract(tree, source, filePath);
|
|
456
|
-
// Insert symbols and keep FTS5 index in sync
|
|
457
|
-
const insertSymbol = db.prepare(`INSERT INTO symbols (file_id, name, kind, start_line, end_line, signature, doc_comment)
|
|
458
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
459
|
-
const insertFts = db.prepare(`INSERT INTO symbols_fts(rowid, name, signature, kind) VALUES (?, ?, ?, ?)`);
|
|
460
|
-
// Map from callerSymbol name → symbol row ID (for call refs)
|
|
461
|
-
const symbolIdMap = new Map();
|
|
462
|
-
for (const sym of result.symbols) {
|
|
463
|
-
const info = insertSymbol.run(fileId, sym.name, sym.kind, sym.startLine, sym.endLine, sym.signature ?? null, sym.docComment ?? null);
|
|
464
|
-
const symId = Number(info.lastInsertRowid);
|
|
465
|
-
symbolIdMap.set(sym.name, symId);
|
|
466
|
-
insertFts.run(symId, sym.name, buildStructuralEmbeddingText({
|
|
467
|
-
name: sym.name,
|
|
468
|
-
signature: sym.signature ?? null,
|
|
469
|
-
}), sym.kind);
|
|
470
|
-
}
|
|
471
|
-
const insertRoute = db.prepare(`INSERT INTO api_routes (file_id, method, path, handler_id, handler_name, framework, line, middleware)
|
|
472
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
473
|
-
for (const route of result.routes) {
|
|
474
|
-
insertRoute.run(fileId, route.method, route.path, symbolIdMap.get(route.handler) ?? null, route.handler, route.framework, route.line, route.middleware ? JSON.stringify(route.middleware) : null);
|
|
475
|
-
}
|
|
476
|
-
// Insert raw imports (resolved_id will be filled in resolveImports())
|
|
477
|
-
const insertImport = db.prepare(`INSERT INTO file_imports (file_id, raw_import) VALUES (?, ?)`);
|
|
478
|
-
for (const imp of result.imports) {
|
|
479
|
-
insertImport.run(fileId, imp.source);
|
|
480
|
-
}
|
|
481
|
-
// Insert call refs (callee_id resolved in call-graph phase)
|
|
482
|
-
const insertCallRef = db.prepare(`INSERT INTO symbol_refs (caller_id, callee_name, call_line, call_character, call_kind)
|
|
483
|
-
VALUES (?, ?, ?, ?, ?)`);
|
|
484
|
-
for (const ref of result.callRefs) {
|
|
485
|
-
const callerId = symbolIdMap.get(ref.callerSymbol);
|
|
486
|
-
if (callerId !== undefined) {
|
|
487
|
-
insertCallRef.run(callerId, ref.calleeRaw, ref.line, ref.character ?? null, ref.callKind ?? 'direct');
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
// Insert relationships (target_symbol_id resolved in resolveSymbolEdges phase)
|
|
491
|
-
const insertRelationship = db.prepare(`INSERT INTO symbol_relationships (file_id, source_symbol_id, target_symbol_name, relationship_type, line, character)
|
|
492
|
-
VALUES (?, ?, ?, ?, ?, ?)`);
|
|
493
|
-
for (const rel of result.relationships) {
|
|
494
|
-
const sourceId = symbolIdMap.get(rel.fromSymbol) ?? null;
|
|
495
|
-
insertRelationship.run(fileId, sourceId, rel.toSymbol, rel.kind, rel.line, rel.character ?? null);
|
|
496
|
-
}
|
|
497
|
-
// Insert type refs (type_id resolved in resolveSymbolEdges phase)
|
|
498
|
-
const insertTypeRef = db.prepare(`INSERT INTO type_refs (file_id, symbol_id, type_name, type_name_bare, ref_kind, ref_line, ref_character)
|
|
499
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
500
|
-
for (const ref of result.typeRefs) {
|
|
501
|
-
const symId = symbolIdMap.get(ref.enclosingSymbol) ?? null;
|
|
502
|
-
insertTypeRef.run(fileId, symId, ref.typeRaw, normalizeTypeName(ref.typeRaw), ref.refKind, ref.line, ref.character ?? null);
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
processDocumentationFile(db, doc, branch) {
|
|
506
|
-
const existing = db.prepare('SELECT id, content_hash FROM docs WHERE path = ? AND branch = ?').get(doc.path, branch);
|
|
507
|
-
if (existing?.content_hash === doc.hash) {
|
|
508
|
-
return;
|
|
509
|
-
}
|
|
510
|
-
let docId;
|
|
511
|
-
if (existing) {
|
|
512
|
-
db.prepare(`UPDATE docs
|
|
513
|
-
SET kind = ?, title = ?, content = ?, content_hash = ?, indexed_at = unixepoch()
|
|
514
|
-
WHERE id = ?`).run(doc.kind, doc.title, doc.content, doc.hash, existing.id);
|
|
515
|
-
docId = existing.id;
|
|
516
|
-
}
|
|
517
|
-
else {
|
|
518
|
-
const info = db.prepare(`INSERT INTO docs (path, branch, kind, title, content, content_hash)
|
|
519
|
-
VALUES (?, ?, ?, ?, ?, ?)`).run(doc.path, branch, doc.kind, doc.title, doc.content, doc.hash);
|
|
520
|
-
docId = Number(info.lastInsertRowid);
|
|
521
|
-
}
|
|
522
|
-
const existingSections = db.prepare('SELECT id, section_index FROM doc_sections WHERE doc_id = ?').all(docId);
|
|
523
|
-
const insertSection = db.prepare(`INSERT INTO doc_sections (
|
|
524
|
-
doc_id, section_index, title, depth, heading_path, line_start, line_end, content, content_hash
|
|
525
|
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
526
|
-
ON CONFLICT(doc_id, section_index) DO UPDATE SET
|
|
527
|
-
title = excluded.title,
|
|
528
|
-
depth = excluded.depth,
|
|
529
|
-
heading_path = excluded.heading_path,
|
|
530
|
-
line_start = excluded.line_start,
|
|
531
|
-
line_end = excluded.line_end,
|
|
532
|
-
content = excluded.content,
|
|
533
|
-
content_hash = excluded.content_hash`);
|
|
534
|
-
const activeSectionIndexes = new Set();
|
|
535
|
-
for (const chunk of doc.chunks) {
|
|
536
|
-
activeSectionIndexes.add(chunk.sectionIndex);
|
|
537
|
-
insertSection.run(docId, chunk.sectionIndex, chunk.title, chunk.depth, JSON.stringify(chunk.headingPath), chunk.lineStart, chunk.lineEnd, chunk.content, chunk.hash);
|
|
538
|
-
}
|
|
539
|
-
const staleSectionIds = existingSections
|
|
540
|
-
.filter(section => !activeSectionIndexes.has(section.section_index))
|
|
541
|
-
.map(section => section.id);
|
|
542
|
-
this.deleteDocSectionEmbeddings(db, staleSectionIds);
|
|
543
|
-
if (staleSectionIds.length > 0) {
|
|
544
|
-
db.prepare(`DELETE FROM doc_sections
|
|
545
|
-
WHERE id IN (${staleSectionIds.map(() => '?').join(', ')})`).run(...staleSectionIds);
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
upsertSeededDocumentationNote(db, doc, branch) {
|
|
549
|
-
if (!this.docsAutoNotes)
|
|
550
|
-
return;
|
|
551
|
-
const key = inferSeededDocNoteKey(doc);
|
|
552
|
-
if (!key)
|
|
553
|
-
return;
|
|
554
|
-
const scope = buildDocNoteScope(doc.path, branch);
|
|
555
|
-
const existing = db.prepare('SELECT content, source_hash FROM notes WHERE key = ? AND scope = ?').get(key, scope);
|
|
556
|
-
if (existing?.content === doc.content && existing.source_hash === doc.hash) {
|
|
557
|
-
return;
|
|
558
|
-
}
|
|
559
|
-
db.prepare(`INSERT INTO notes (key, scope, content, model, source_hash, created_at, updated_at)
|
|
560
|
-
VALUES (?, ?, ?, ?, ?, unixepoch(), unixepoch())
|
|
561
|
-
ON CONFLICT(key, scope) DO UPDATE SET
|
|
562
|
-
content = excluded.content,
|
|
563
|
-
model = excluded.model,
|
|
564
|
-
source_hash = excluded.source_hash,
|
|
565
|
-
updated_at = unixepoch()`).run(key, scope, doc.content, 'system:auto-doc-seed', doc.hash);
|
|
566
|
-
}
|
|
567
|
-
removeStaleDocumentation(db, branch, retainedPaths) {
|
|
568
|
-
const docs = db.prepare('SELECT id, path FROM docs WHERE branch = ?').all(branch);
|
|
569
|
-
for (const doc of docs) {
|
|
570
|
-
if (!retainedPaths.has(doc.path)) {
|
|
571
|
-
this.deleteDocumentationById(db, doc.id);
|
|
572
|
-
}
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
deleteDocumentationByPath(db, docPath, branch) {
|
|
576
|
-
const row = db.prepare('SELECT id FROM docs WHERE path = ? AND branch = ?').get(docPath, branch);
|
|
577
|
-
if (!row)
|
|
578
|
-
return;
|
|
579
|
-
this.deleteDocumentationById(db, row.id);
|
|
580
|
-
}
|
|
581
|
-
deleteDocumentationById(db, docId) {
|
|
582
|
-
const sectionIds = db.prepare('SELECT id FROM doc_sections WHERE doc_id = ?').all(docId);
|
|
583
|
-
this.deleteDocSectionEmbeddings(db, sectionIds.map(row => row.id));
|
|
584
|
-
db.prepare('DELETE FROM docs WHERE id = ?').run(docId);
|
|
585
|
-
}
|
|
586
|
-
deleteDocSectionEmbeddings(db, sectionIds) {
|
|
587
|
-
if (sectionIds.length === 0)
|
|
588
|
-
return;
|
|
589
|
-
const hasEmbeddingsTable = db.prepare("SELECT 1 AS present FROM sqlite_master WHERE type IN ('table', 'virtual table') AND name = 'doc_section_embeddings'").get();
|
|
590
|
-
if (!hasEmbeddingsTable)
|
|
591
|
-
return;
|
|
592
|
-
db.prepare(`DELETE FROM doc_section_embeddings WHERE rowid IN (${sectionIds.map(() => '?').join(', ')})`).run(...sectionIds);
|
|
593
|
-
}
|
|
594
|
-
/**
|
|
595
|
-
* Remove orphaned rows from the `symbol_embeddings` vec0 table for symbols
|
|
596
|
-
* that have been deleted (e.g. file re-processed or removed).
|
|
597
|
-
*/
|
|
598
|
-
deleteSymbolEmbeddings(db, symbolIds) {
|
|
599
|
-
if (symbolIds.length === 0)
|
|
600
|
-
return;
|
|
601
|
-
const hasEmbeddingsTable = db.prepare("SELECT 1 AS present FROM sqlite_master WHERE type IN ('table', 'virtual table') AND name = 'symbol_embeddings'").get();
|
|
602
|
-
if (!hasEmbeddingsTable)
|
|
603
|
-
return;
|
|
604
|
-
db.prepare(`DELETE FROM symbol_embeddings WHERE rowid IN (${symbolIds.map(() => '?').join(', ')})`).run(...symbolIds);
|
|
605
|
-
}
|
|
606
|
-
/**
|
|
607
|
-
* Second pass: resolve raw_import strings to file IDs in the
|
|
608
|
-
* `file_imports.resolved_id` column. Also populates `external_deps` for
|
|
609
|
-
* any import that resolves to an external package.
|
|
610
|
-
*/
|
|
611
|
-
resolveImports(db, branch) {
|
|
612
|
-
const rootDir = this.walkerConfig.rootDir;
|
|
613
|
-
// Fetch all unresolved imports with their file's path, language, and file_id
|
|
614
|
-
const rows = db
|
|
615
|
-
.prepare(`SELECT fi.id, fi.file_id, fi.raw_import, f.path, f.language
|
|
616
|
-
FROM file_imports fi
|
|
617
|
-
JOIN files f ON f.id = fi.file_id
|
|
618
|
-
WHERE fi.resolved_id IS NULL AND f.branch = ?`)
|
|
619
|
-
.all(branch);
|
|
620
|
-
const updateResolved = db.prepare('UPDATE file_imports SET resolved_id = ? WHERE id = ?');
|
|
621
|
-
const insertExternalDep = db.prepare('INSERT OR IGNORE INTO external_deps (file_id, package) VALUES (?, ?)');
|
|
622
|
-
for (const row of rows) {
|
|
623
|
-
const resolved = this.resolver.resolve({ source: row.raw_import, importedNames: [] }, row.path, rootDir, row.language);
|
|
624
|
-
if (resolved.resolvedPath) {
|
|
625
|
-
const targetFile = db
|
|
626
|
-
.prepare('SELECT id FROM files WHERE path = ? AND branch = ?')
|
|
627
|
-
.get(resolved.resolvedPath, branch);
|
|
628
|
-
if (targetFile) {
|
|
629
|
-
updateResolved.run(targetFile.id, row.id);
|
|
630
|
-
}
|
|
631
|
-
}
|
|
632
|
-
else if (resolved.isExternal && resolved.externalName) {
|
|
633
|
-
insertExternalDep.run(row.file_id, resolved.externalName);
|
|
634
|
-
}
|
|
635
|
-
}
|
|
636
|
-
}
|
|
637
|
-
async indexDependencyDeclarations(db, lspCoordinator) {
|
|
638
|
-
db.prepare('DELETE FROM external_symbols').run();
|
|
639
|
-
if (!this.indexDependencies)
|
|
640
|
-
return;
|
|
641
|
-
const directDependencies = this.loadDirectDependencies();
|
|
642
|
-
if (directDependencies.size === 0)
|
|
643
|
-
return;
|
|
644
|
-
const extractor = EXTRACTORS.typescript;
|
|
645
|
-
if (!extractor)
|
|
646
|
-
return;
|
|
647
|
-
const insertExternalSymbol = db.prepare(`INSERT OR IGNORE INTO external_symbols
|
|
648
|
-
(
|
|
649
|
-
package_name,
|
|
650
|
-
package_version,
|
|
651
|
-
source_ref,
|
|
652
|
-
symbol_name,
|
|
653
|
-
symbol_kind,
|
|
654
|
-
signature,
|
|
655
|
-
doc_comment,
|
|
656
|
-
resolved_type_signature,
|
|
657
|
-
resolved_return_type,
|
|
658
|
-
definition_uri,
|
|
659
|
-
definition_path
|
|
660
|
-
)
|
|
661
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
662
|
-
for (const [packageName, declaredVersion] of directDependencies) {
|
|
663
|
-
const packageDir = path.join(this.walkerConfig.rootDir, 'node_modules', packageName);
|
|
664
|
-
if (!fs.existsSync(packageDir) || !fs.statSync(packageDir).isDirectory())
|
|
665
|
-
continue;
|
|
666
|
-
const packageVersion = this.readInstalledPackageVersion(packageDir) ?? declaredVersion ?? null;
|
|
667
|
-
const declarationFiles = this.collectDeclarationFiles(packageDir);
|
|
668
|
-
for (const declarationFile of declarationFiles) {
|
|
669
|
-
const source = fs.readFileSync(declarationFile, 'utf8');
|
|
670
|
-
const tree = this.pool.parse('typescript', source);
|
|
671
|
-
if (!tree)
|
|
672
|
-
continue;
|
|
673
|
-
const result = extractor.extract(tree, source, declarationFile);
|
|
674
|
-
const declarationSymbols = result.symbols.filter((symbol) => this.shouldIndexDependencySymbol(symbol));
|
|
675
|
-
const enrichmentRows = lspCoordinator
|
|
676
|
-
? await lspCoordinator.enrich({
|
|
677
|
-
filePath: declarationFile,
|
|
678
|
-
language: 'typescript',
|
|
679
|
-
source,
|
|
680
|
-
targets: declarationSymbols.map((symbol) => ({
|
|
681
|
-
line: symbol.startLine,
|
|
682
|
-
character: symbol.startCharacter ?? 0,
|
|
683
|
-
})),
|
|
684
|
-
})
|
|
685
|
-
: declarationSymbols.map(() => null);
|
|
686
|
-
for (let i = 0; i < declarationSymbols.length; i++) {
|
|
687
|
-
const symbol = declarationSymbols[i];
|
|
688
|
-
if (!symbol)
|
|
689
|
-
continue;
|
|
690
|
-
const metadata = enrichmentRows[i];
|
|
691
|
-
insertExternalSymbol.run(packageName, packageVersion, declarationFile, symbol.name, symbol.kind, symbol.signature, symbol.docComment ?? null, metadata?.resolvedTypeSignature ?? null, metadata?.resolvedReturnType ?? null, metadata?.definitionUri ?? null, metadata?.definitionPath ?? null);
|
|
692
|
-
}
|
|
693
|
-
}
|
|
694
|
-
}
|
|
695
|
-
}
|
|
696
|
-
createLspEnrichmentCoordinator() {
|
|
697
|
-
if (!this.lspSettings?.enabled) {
|
|
698
|
-
return null;
|
|
699
|
-
}
|
|
700
|
-
return new LspEnrichmentCoordinator(this.lspSettings, this.walkerConfig.rootDir);
|
|
701
|
-
}
|
|
702
|
-
async enrichProjectRefs(db, branch, files, lspCoordinator) {
|
|
703
|
-
if (!lspCoordinator || files.length === 0)
|
|
704
|
-
return;
|
|
705
|
-
const selectSymbols = db.prepare(`SELECT s.id, s.name, s.signature, s.start_line
|
|
706
|
-
FROM symbols s
|
|
707
|
-
JOIN files f ON f.id = s.file_id
|
|
708
|
-
WHERE f.path = ? AND f.branch = ?
|
|
709
|
-
ORDER BY s.id`);
|
|
710
|
-
const selectCallRefs = db.prepare(`SELECT sr.id, sr.call_line, sr.call_character
|
|
711
|
-
FROM symbol_refs sr
|
|
712
|
-
JOIN symbols s ON s.id = sr.caller_id
|
|
713
|
-
JOIN files f ON f.id = s.file_id
|
|
714
|
-
WHERE f.path = ? AND f.branch = ?
|
|
715
|
-
ORDER BY sr.id`);
|
|
716
|
-
const selectTypeRefs = db.prepare(`SELECT tr.id, tr.ref_line, tr.ref_character
|
|
717
|
-
FROM type_refs tr
|
|
718
|
-
JOIN files f ON f.id = tr.file_id
|
|
719
|
-
WHERE f.path = ? AND f.branch = ?
|
|
720
|
-
ORDER BY tr.id`);
|
|
721
|
-
const selectRelationships = db.prepare(`SELECT sr.id, sr.line, sr.character
|
|
722
|
-
FROM symbol_relationships sr
|
|
723
|
-
JOIN files f ON f.id = sr.file_id
|
|
724
|
-
WHERE f.path = ? AND f.branch = ? AND sr.line IS NOT NULL
|
|
725
|
-
ORDER BY sr.id`);
|
|
726
|
-
const updateSymbol = db.prepare(`UPDATE symbols
|
|
727
|
-
SET resolved_type_signature = ?, resolved_return_type = ?, definition_uri = ?, definition_path = ?
|
|
728
|
-
WHERE id = ?`);
|
|
729
|
-
const updateSymbolFts = db.prepare('UPDATE symbols_fts SET signature = ? WHERE rowid = ?');
|
|
730
|
-
const updateCallRef = db.prepare(`UPDATE symbol_refs
|
|
731
|
-
SET resolved_type_signature = ?, resolved_return_type = ?, definition_uri = ?, definition_path = ?
|
|
732
|
-
WHERE id = ?`);
|
|
733
|
-
const updateTypeRef = db.prepare(`UPDATE type_refs
|
|
734
|
-
SET resolved_type_signature = ?, definition_uri = ?, definition_path = ?
|
|
735
|
-
WHERE id = ?`);
|
|
736
|
-
const updateRelationship = db.prepare(`UPDATE symbol_relationships
|
|
737
|
-
SET definition_uri = ?, definition_path = ?
|
|
738
|
-
WHERE id = ?`);
|
|
739
|
-
for (const file of files) {
|
|
740
|
-
if (!file || !fs.existsSync(file.path))
|
|
741
|
-
continue;
|
|
742
|
-
let source;
|
|
743
|
-
try {
|
|
744
|
-
source = fs.readFileSync(file.path, 'utf8');
|
|
745
|
-
}
|
|
746
|
-
catch {
|
|
747
|
-
continue;
|
|
748
|
-
}
|
|
749
|
-
const tagged = [];
|
|
750
|
-
const symbols = selectSymbols.all(file.path, branch);
|
|
751
|
-
for (const s of symbols) {
|
|
752
|
-
tagged.push({ table: 'symbol', rowId: s.id, line: s.start_line, character: 0, name: s.name, signature: s.signature });
|
|
753
|
-
}
|
|
754
|
-
const callRefs = selectCallRefs.all(file.path, branch);
|
|
755
|
-
for (const cr of callRefs) {
|
|
756
|
-
tagged.push({ table: 'callRef', rowId: cr.id, line: cr.call_line, character: cr.call_character ?? 0 });
|
|
757
|
-
}
|
|
758
|
-
const typeRefs = selectTypeRefs.all(file.path, branch);
|
|
759
|
-
for (const tr of typeRefs) {
|
|
760
|
-
tagged.push({ table: 'typeRef', rowId: tr.id, line: tr.ref_line, character: tr.ref_character ?? 0 });
|
|
761
|
-
}
|
|
762
|
-
const relationships = selectRelationships.all(file.path, branch);
|
|
763
|
-
for (const r of relationships) {
|
|
764
|
-
tagged.push({ table: 'relationship', rowId: r.id, line: r.line, character: r.character ?? 0 });
|
|
765
|
-
}
|
|
766
|
-
if (tagged.length === 0)
|
|
767
|
-
continue;
|
|
768
|
-
const metadata = await lspCoordinator.enrich({
|
|
769
|
-
filePath: file.path,
|
|
770
|
-
language: file.language,
|
|
771
|
-
source,
|
|
772
|
-
targets: tagged.map(t => ({ line: t.line, character: t.character })),
|
|
773
|
-
});
|
|
774
|
-
for (let i = 0; i < tagged.length; i++) {
|
|
775
|
-
const tag = tagged[i];
|
|
776
|
-
const m = metadata[i];
|
|
777
|
-
if (!m)
|
|
778
|
-
continue;
|
|
779
|
-
switch (tag.table) {
|
|
780
|
-
case 'symbol':
|
|
781
|
-
updateSymbol.run(m.resolvedTypeSignature, m.resolvedReturnType, m.definitionUri, m.definitionPath, tag.rowId);
|
|
782
|
-
updateSymbolFts.run(buildStructuralEmbeddingText({
|
|
783
|
-
name: tag.name,
|
|
784
|
-
signature: tag.signature ?? null,
|
|
785
|
-
resolvedTypeSignature: m.resolvedTypeSignature,
|
|
786
|
-
resolvedReturnType: m.resolvedReturnType,
|
|
787
|
-
}), tag.rowId);
|
|
788
|
-
break;
|
|
789
|
-
case 'callRef':
|
|
790
|
-
updateCallRef.run(m.resolvedTypeSignature, m.resolvedReturnType, m.definitionUri, m.definitionPath, tag.rowId);
|
|
791
|
-
break;
|
|
792
|
-
case 'typeRef':
|
|
793
|
-
updateTypeRef.run(m.resolvedTypeSignature, m.definitionUri, m.definitionPath, tag.rowId);
|
|
794
|
-
break;
|
|
795
|
-
case 'relationship':
|
|
796
|
-
updateRelationship.run(m.definitionUri, m.definitionPath, tag.rowId);
|
|
797
|
-
break;
|
|
798
|
-
}
|
|
799
|
-
}
|
|
800
|
-
}
|
|
801
|
-
}
|
|
219
|
+
// ─── Private helpers (minimal — most logic lives in stages) ─────────────
|
|
802
220
|
resolveBranch() {
|
|
803
221
|
if (this.walkerConfig.branch)
|
|
804
222
|
return this.walkerConfig.branch;
|
|
@@ -806,245 +224,69 @@ export class IndexBuilder {
|
|
|
806
224
|
}
|
|
807
225
|
saveLastKnownHead(db) {
|
|
808
226
|
const headSha = this.readGitValue(['rev-parse', 'HEAD']);
|
|
809
|
-
if (headSha)
|
|
227
|
+
if (headSha)
|
|
810
228
|
setLoreMeta(db, LORE_META_LAST_HEAD_SHA, headSha);
|
|
811
|
-
}
|
|
812
|
-
}
|
|
813
|
-
saveDocsAutoNotesSetting(db) {
|
|
814
|
-
setLoreMeta(db, 'docs_auto_notes', this.docsAutoNotes ? '1' : '0');
|
|
815
229
|
}
|
|
816
230
|
readGitValue(args) {
|
|
817
231
|
try {
|
|
818
|
-
|
|
819
|
-
return value || undefined;
|
|
232
|
+
return execFileSync('git', ['-C', this.walkerConfig.rootDir, ...args], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] }).trim() || undefined;
|
|
820
233
|
}
|
|
821
234
|
catch {
|
|
822
235
|
return undefined;
|
|
823
236
|
}
|
|
824
237
|
}
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
if (!fs.existsSync(packageJsonPath))
|
|
828
|
-
return new Map();
|
|
829
|
-
const raw = fs.readFileSync(packageJsonPath, 'utf8');
|
|
830
|
-
const pkg = JSON.parse(raw);
|
|
831
|
-
const deps = new Map();
|
|
832
|
-
for (const section of [pkg.dependencies, pkg.devDependencies, pkg.peerDependencies]) {
|
|
833
|
-
if (!section)
|
|
834
|
-
continue;
|
|
835
|
-
for (const [name, version] of Object.entries(section)) {
|
|
836
|
-
if (!deps.has(name))
|
|
837
|
-
deps.set(name, version);
|
|
838
|
-
}
|
|
839
|
-
}
|
|
840
|
-
return deps;
|
|
841
|
-
}
|
|
842
|
-
readInstalledPackageVersion(packageDir) {
|
|
843
|
-
const packageJsonPath = path.join(packageDir, 'package.json');
|
|
844
|
-
if (!fs.existsSync(packageJsonPath))
|
|
845
|
-
return undefined;
|
|
846
|
-
const raw = fs.readFileSync(packageJsonPath, 'utf8');
|
|
847
|
-
const pkg = JSON.parse(raw);
|
|
848
|
-
return pkg.version;
|
|
849
|
-
}
|
|
850
|
-
collectDeclarationFiles(packageDir) {
|
|
851
|
-
const declarations = [];
|
|
852
|
-
const stack = [packageDir];
|
|
853
|
-
while (stack.length > 0) {
|
|
854
|
-
const currentDir = stack.pop();
|
|
855
|
-
if (!currentDir)
|
|
856
|
-
continue;
|
|
857
|
-
const entries = fs.readdirSync(currentDir, { withFileTypes: true });
|
|
858
|
-
for (const entry of entries) {
|
|
859
|
-
if (entry.name === 'node_modules')
|
|
860
|
-
continue;
|
|
861
|
-
const fullPath = path.join(currentDir, entry.name);
|
|
862
|
-
if (entry.isDirectory()) {
|
|
863
|
-
stack.push(fullPath);
|
|
864
|
-
continue;
|
|
865
|
-
}
|
|
866
|
-
if (entry.isFile() && fullPath.endsWith('.d.ts')) {
|
|
867
|
-
declarations.push(fullPath);
|
|
868
|
-
}
|
|
869
|
-
}
|
|
870
|
-
}
|
|
871
|
-
return declarations;
|
|
872
|
-
}
|
|
873
|
-
shouldIndexDependencySymbol(symbol) {
|
|
874
|
-
if (!isPublicDeclarationSurfaceSymbol(symbol))
|
|
875
|
-
return false;
|
|
876
|
-
if (symbol.declarationSurface)
|
|
877
|
-
return true;
|
|
878
|
-
return !this.hasImplementationBody(symbol);
|
|
879
|
-
}
|
|
880
|
-
hasImplementationBody(symbol) {
|
|
881
|
-
const node = symbol.astNode;
|
|
882
|
-
if (!node)
|
|
883
|
-
return false;
|
|
884
|
-
if (node.type === 'arrow_function' ||
|
|
885
|
-
node.type === 'function_expression' ||
|
|
886
|
-
node.type === 'generator_function') {
|
|
887
|
-
return true;
|
|
888
|
-
}
|
|
889
|
-
if (node.type === 'class_declaration' ||
|
|
890
|
-
node.type === 'interface_declaration' ||
|
|
891
|
-
node.type === 'type_alias_declaration') {
|
|
892
|
-
return false;
|
|
893
|
-
}
|
|
894
|
-
const bodyNode = node.childForFieldName('body');
|
|
895
|
-
if (!bodyNode)
|
|
896
|
-
return false;
|
|
897
|
-
return bodyNode.namedChildCount > 0 || bodyNode.text.trim() !== '';
|
|
898
|
-
}
|
|
899
|
-
loadBuildCheckpoint(db, branch, totalFiles) {
|
|
900
|
-
const raw = getLoreMeta(db, LORE_META_INDEX_CHECKPOINT);
|
|
901
|
-
if (!raw)
|
|
902
|
-
return 0;
|
|
238
|
+
gatherDbStats(db) {
|
|
239
|
+
let totalSymbols = 0;
|
|
903
240
|
try {
|
|
904
|
-
|
|
905
|
-
if (parsed.branch !== branch || parsed.rootDir !== this.walkerConfig.rootDir)
|
|
906
|
-
return 0;
|
|
907
|
-
const nextFileIndex = parsed.nextFileIndex ?? 0;
|
|
908
|
-
return Math.max(0, Math.min(totalFiles, nextFileIndex));
|
|
909
|
-
}
|
|
910
|
-
catch {
|
|
911
|
-
return 0;
|
|
912
|
-
}
|
|
913
|
-
}
|
|
914
|
-
saveBuildCheckpoint(db, branch, nextFileIndex, totalFiles) {
|
|
915
|
-
const checkpoint = {
|
|
916
|
-
branch,
|
|
917
|
-
rootDir: this.walkerConfig.rootDir,
|
|
918
|
-
totalFiles,
|
|
919
|
-
nextFileIndex,
|
|
920
|
-
updatedAt: Math.floor(Date.now() / 1000),
|
|
921
|
-
};
|
|
922
|
-
setLoreMeta(db, LORE_META_INDEX_CHECKPOINT, JSON.stringify(checkpoint));
|
|
923
|
-
}
|
|
924
|
-
/**
|
|
925
|
-
* Embed structural symbol signatures in batches and persist results to
|
|
926
|
-
* the `symbol_embeddings` vec0 virtual table.
|
|
927
|
-
*
|
|
928
|
-
* Also stores the embedding model name and dims in `lore_meta` and
|
|
929
|
-
* creates the vec0 tables if they don't exist yet.
|
|
930
|
-
*
|
|
931
|
-
* @param fileIds When provided, only embed symbols belonging to these file
|
|
932
|
-
* IDs (incremental mode). When omitted, embeds all symbols
|
|
933
|
-
* (full-build mode).
|
|
934
|
-
*/
|
|
935
|
-
async embedStructural(db, fileIds) {
|
|
936
|
-
const embedder = this.embedder;
|
|
937
|
-
setLoreMeta(db, 'embedding_model', embedder.modelName);
|
|
938
|
-
setLoreMeta(db, 'embedding_dims', String(embedder.dims));
|
|
939
|
-
createVec0Tables(db, embedder.dims);
|
|
940
|
-
// Build the query — scoped to specific files when doing an incremental update.
|
|
941
|
-
const baseQuery = `SELECT id, name, signature, resolved_type_signature, resolved_return_type
|
|
942
|
-
FROM symbols
|
|
943
|
-
WHERE (signature IS NOT NULL
|
|
944
|
-
OR resolved_type_signature IS NOT NULL
|
|
945
|
-
OR resolved_return_type IS NOT NULL)`;
|
|
946
|
-
let symbols;
|
|
947
|
-
if (fileIds && fileIds.length > 0) {
|
|
948
|
-
symbols = db
|
|
949
|
-
.prepare(`${baseQuery} AND file_id IN (${fileIds.map(() => '?').join(', ')})`)
|
|
950
|
-
.all(...fileIds);
|
|
951
|
-
}
|
|
952
|
-
else {
|
|
953
|
-
symbols = db.prepare(baseQuery).all();
|
|
954
|
-
}
|
|
955
|
-
const insertEmbed = db.prepare('INSERT OR REPLACE INTO symbol_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
|
|
956
|
-
for (let i = 0; i < symbols.length; i += EMBED_BATCH_SIZE) {
|
|
957
|
-
const batch = symbols.slice(i, i + EMBED_BATCH_SIZE);
|
|
958
|
-
const texts = batch.map((symbol) => buildStructuralEmbeddingText({
|
|
959
|
-
name: symbol.name,
|
|
960
|
-
signature: symbol.signature,
|
|
961
|
-
resolvedTypeSignature: symbol.resolved_type_signature,
|
|
962
|
-
resolvedReturnType: symbol.resolved_return_type,
|
|
963
|
-
}));
|
|
964
|
-
const embeddings = await embedder.embed(texts);
|
|
965
|
-
db.transaction(() => {
|
|
966
|
-
for (let j = 0; j < batch.length; j++) {
|
|
967
|
-
const sym = batch[j];
|
|
968
|
-
if (sym)
|
|
969
|
-
insertEmbed.run(sym.id, JSON.stringify(embeddings[j]));
|
|
970
|
-
}
|
|
971
|
-
})();
|
|
241
|
+
totalSymbols = db.prepare('SELECT COUNT(*) AS cnt FROM symbols').get().cnt;
|
|
972
242
|
}
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
*
|
|
978
|
-
* @param docIds When provided, only embed sections belonging to these
|
|
979
|
-
* doc IDs (incremental mode). When omitted, embeds all
|
|
980
|
-
* sections (full-build mode).
|
|
981
|
-
*/
|
|
982
|
-
async embedDocumentation(db, docIds) {
|
|
983
|
-
const embedder = this.embedder;
|
|
984
|
-
db.exec(`
|
|
985
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS doc_section_embeddings USING vec0(
|
|
986
|
-
embedding FLOAT[${embedder.dims}]
|
|
987
|
-
);
|
|
988
|
-
`);
|
|
989
|
-
let sections;
|
|
990
|
-
if (docIds && docIds.length > 0) {
|
|
991
|
-
sections = db.prepare(`SELECT id, title, content
|
|
992
|
-
FROM doc_sections
|
|
993
|
-
WHERE doc_id IN (${docIds.map(() => '?').join(', ')})
|
|
994
|
-
ORDER BY id`).all(...docIds);
|
|
995
|
-
}
|
|
996
|
-
else {
|
|
997
|
-
sections = db.prepare(`SELECT id, title, content
|
|
998
|
-
FROM doc_sections
|
|
999
|
-
ORDER BY id`).all();
|
|
243
|
+
catch { /* */ }
|
|
244
|
+
let totalEdges = 0;
|
|
245
|
+
try {
|
|
246
|
+
totalEdges = db.prepare('SELECT COUNT(*) AS cnt FROM symbol_refs').get().cnt;
|
|
1000
247
|
}
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
const batch = sections.slice(i, i + EMBED_BATCH_SIZE);
|
|
1006
|
-
const texts = batch.map(section => section.content || section.title);
|
|
1007
|
-
const embeddings = await embedder.embed(texts);
|
|
1008
|
-
db.transaction(() => {
|
|
1009
|
-
for (let j = 0; j < batch.length; j++) {
|
|
1010
|
-
const section = batch[j];
|
|
1011
|
-
if (section) {
|
|
1012
|
-
insertEmbed.run(section.id, JSON.stringify(embeddings[j]));
|
|
1013
|
-
}
|
|
1014
|
-
}
|
|
1015
|
-
})();
|
|
248
|
+
catch { /* */ }
|
|
249
|
+
let totalDocs = 0;
|
|
250
|
+
try {
|
|
251
|
+
totalDocs = db.prepare('SELECT COUNT(*) AS cnt FROM docs').get().cnt;
|
|
1016
252
|
}
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
* Uses a `LEFT JOIN` against `commit_embeddings` to skip commits whose
|
|
1022
|
-
* embeddings already exist, so only newly-ingested commits are processed.
|
|
1023
|
-
*/
|
|
1024
|
-
async embedCommitMessages(db) {
|
|
1025
|
-
const embedder = this.embedder;
|
|
1026
|
-
// Only embed commits that don't already have an embedding row.
|
|
1027
|
-
const commits = db.prepare(`SELECT c.rowid, c.message
|
|
1028
|
-
FROM commits c
|
|
1029
|
-
LEFT JOIN commit_embeddings ce ON ce.rowid = c.rowid
|
|
1030
|
-
WHERE length(trim(c.message)) > 0
|
|
1031
|
-
AND ce.rowid IS NULL
|
|
1032
|
-
ORDER BY c.rowid`).all();
|
|
1033
|
-
if (commits.length === 0)
|
|
1034
|
-
return;
|
|
1035
|
-
const insertEmbed = db.prepare('INSERT OR REPLACE INTO commit_embeddings(rowid, embedding) VALUES (CAST(? AS INTEGER), json(?))');
|
|
1036
|
-
for (let i = 0; i < commits.length; i += EMBED_BATCH_SIZE) {
|
|
1037
|
-
const batch = commits.slice(i, i + EMBED_BATCH_SIZE);
|
|
1038
|
-
const embeddings = await embedder.embed(batch.map((commit) => commit.message));
|
|
1039
|
-
db.transaction(() => {
|
|
1040
|
-
for (let j = 0; j < batch.length; j++) {
|
|
1041
|
-
const commit = batch[j];
|
|
1042
|
-
if (commit) {
|
|
1043
|
-
insertEmbed.run(commit.rowid, JSON.stringify(embeddings[j]));
|
|
1044
|
-
}
|
|
1045
|
-
}
|
|
1046
|
-
})();
|
|
253
|
+
catch { /* */ }
|
|
254
|
+
let commitCount;
|
|
255
|
+
try {
|
|
256
|
+
commitCount = db.prepare('SELECT COUNT(*) AS cnt FROM commits').get().cnt;
|
|
1047
257
|
}
|
|
258
|
+
catch { /* */ }
|
|
259
|
+
return { totalSymbols, totalEdges, totalDocs, commitCount };
|
|
1048
260
|
}
|
|
1049
261
|
}
|
|
262
|
+
// ─── Trivial inline stages ────────────────────────────────────────────────────
|
|
263
|
+
// These are single-function-call stages that don't warrant their own files.
|
|
264
|
+
/** Resolve symbol edges (must run after LspEnrichmentStage). */
|
|
265
|
+
function resolutionStage() {
|
|
266
|
+
return {
|
|
267
|
+
name: 'symbol-resolution',
|
|
268
|
+
execute: async (ctx) => { resolveSymbolEdges(ctx.db); },
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
/** Refresh test-to-source file mappings. */
|
|
272
|
+
function testMapStage() {
|
|
273
|
+
return {
|
|
274
|
+
name: 'test-map',
|
|
275
|
+
execute: async (ctx) => { refreshTestMappings(ctx.db, ctx.branch); },
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
/** Ingest git history. */
|
|
279
|
+
function historyStage() {
|
|
280
|
+
return {
|
|
281
|
+
name: 'git-history',
|
|
282
|
+
execute: async (ctx) => {
|
|
283
|
+
if (!ctx.history)
|
|
284
|
+
return;
|
|
285
|
+
ctx.log.indexing('git history ingestion started');
|
|
286
|
+
const opts = typeof ctx.history === 'object' ? ctx.history : undefined;
|
|
287
|
+
await ingestGitHistory(ctx.db, ctx.walkerConfig.rootDir, opts);
|
|
288
|
+
ctx.log.indexing('git history ingestion complete');
|
|
289
|
+
},
|
|
290
|
+
};
|
|
291
|
+
}
|
|
1050
292
|
//# sourceMappingURL=index.js.map
|