@optave/codegraph 3.9.4 → 3.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/dist/cli/commands/watch.d.ts.map +1 -1
- package/dist/cli/commands/watch.js +2 -0
- package/dist/cli/commands/watch.js.map +1 -1
- package/dist/cli.js +24 -1
- package/dist/cli.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +2 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +13 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +30 -4
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +129 -3
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +58 -26
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +54 -45
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +17 -0
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/journal.d.ts +15 -0
- package/dist/domain/graph/journal.d.ts.map +1 -1
- package/dist/domain/graph/journal.js +283 -28
- package/dist/domain/graph/journal.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts +17 -0
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +23 -7
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +13 -4
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +174 -80
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/generator.d.ts.map +1 -1
- package/dist/domain/search/generator.js +28 -2
- package/dist/domain/search/generator.js.map +1 -1
- package/dist/domain/wasm-worker-entry.d.ts +24 -0
- package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
- package/dist/domain/wasm-worker-entry.js +643 -0
- package/dist/domain/wasm-worker-entry.js.map +1 -0
- package/dist/domain/wasm-worker-pool.d.ts +59 -0
- package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
- package/dist/domain/wasm-worker-pool.js +312 -0
- package/dist/domain/wasm-worker-pool.js.map +1 -0
- package/dist/domain/wasm-worker-protocol.d.ts +65 -0
- package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
- package/dist/domain/wasm-worker-protocol.js +13 -0
- package/dist/domain/wasm-worker-protocol.js.map +1 -0
- package/dist/extractors/javascript.js +146 -2
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/features/boundaries.d.ts +2 -2
- package/dist/features/boundaries.d.ts.map +1 -1
- package/dist/features/boundaries.js +2 -31
- package/dist/features/boundaries.js.map +1 -1
- package/dist/features/snapshot.d.ts.map +1 -1
- package/dist/features/snapshot.js +99 -13
- package/dist/features/snapshot.js.map +1 -1
- package/dist/graph/algorithms/louvain.d.ts.map +1 -1
- package/dist/graph/algorithms/louvain.js +2 -4
- package/dist/graph/algorithms/louvain.js.map +1 -1
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +12 -2
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/shared/globs.d.ts +40 -0
- package/dist/shared/globs.d.ts.map +1 -0
- package/dist/shared/globs.js +126 -0
- package/dist/shared/globs.js.map +1 -0
- package/dist/types.d.ts +26 -1
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/package.json +7 -7
- package/src/cli/commands/watch.ts +2 -0
- package/src/cli.ts +31 -8
- package/src/domain/graph/builder/context.ts +2 -0
- package/src/domain/graph/builder/helpers.ts +53 -3
- package/src/domain/graph/builder/pipeline.ts +142 -3
- package/src/domain/graph/builder/stages/collect-files.ts +56 -26
- package/src/domain/graph/builder/stages/detect-changes.ts +57 -49
- package/src/domain/graph/builder/stages/finalize.ts +16 -0
- package/src/domain/graph/journal.ts +284 -27
- package/src/domain/graph/watcher.ts +29 -9
- package/src/domain/parser.ts +166 -73
- package/src/domain/search/generator.ts +34 -2
- package/src/domain/wasm-worker-entry.ts +788 -0
- package/src/domain/wasm-worker-pool.ts +330 -0
- package/src/domain/wasm-worker-protocol.ts +81 -0
- package/src/extractors/javascript.ts +149 -2
- package/src/features/boundaries.ts +2 -27
- package/src/features/snapshot.ts +93 -14
- package/src/graph/algorithms/louvain.ts +2 -4
- package/src/infrastructure/config.ts +12 -2
- package/src/shared/globs.ts +121 -0
- package/src/types.ts +26 -1
package/src/domain/parser.ts
CHANGED
|
@@ -13,6 +13,24 @@ import type {
|
|
|
13
13
|
LanguageRegistryEntry,
|
|
14
14
|
TypeMapEntry,
|
|
15
15
|
} from '../types.js';
|
|
16
|
+
import { disposeWasmWorkerPool, getWasmWorkerPool } from './wasm-worker-pool.js';
|
|
17
|
+
import type { WorkerAnalysisOpts } from './wasm-worker-protocol.js';
|
|
18
|
+
|
|
19
|
+
/** Default worker opts: run all analyses so output matches parseFilesFull. */
|
|
20
|
+
const FULL_ANALYSIS: WorkerAnalysisOpts = {
|
|
21
|
+
ast: true,
|
|
22
|
+
complexity: true,
|
|
23
|
+
cfg: true,
|
|
24
|
+
dataflow: true,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
/** Extract-only opts: skip visitor walk for typeMap backfill / similar fast paths. */
|
|
28
|
+
const EXTRACT_ONLY: WorkerAnalysisOpts = {
|
|
29
|
+
ast: false,
|
|
30
|
+
complexity: false,
|
|
31
|
+
cfg: false,
|
|
32
|
+
dataflow: false,
|
|
33
|
+
};
|
|
16
34
|
|
|
17
35
|
// Re-export all extractors for backward compatibility
|
|
18
36
|
export {
|
|
@@ -262,7 +280,7 @@ function disposeMapEntries(entries: Iterable<[string, any]>, label: string): voi
|
|
|
262
280
|
}
|
|
263
281
|
}
|
|
264
282
|
|
|
265
|
-
export function disposeParsers(): void {
|
|
283
|
+
export async function disposeParsers(): Promise<void> {
|
|
266
284
|
if (_cachedParsers) {
|
|
267
285
|
disposeMapEntries(_cachedParsers, 'parser');
|
|
268
286
|
_cachedParsers = null;
|
|
@@ -276,6 +294,7 @@ export function disposeParsers(): void {
|
|
|
276
294
|
_initialized = false;
|
|
277
295
|
_allParsersLoaded = false;
|
|
278
296
|
_loadingPromises.clear();
|
|
297
|
+
await disposeWasmWorkerPool();
|
|
279
298
|
}
|
|
280
299
|
|
|
281
300
|
export function getParser(parsers: Map<string, Parser | null>, filePath: string): Parser | null {
|
|
@@ -286,33 +305,33 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
|
|
|
286
305
|
}
|
|
287
306
|
|
|
288
307
|
/**
|
|
289
|
-
*
|
|
290
|
-
*
|
|
291
|
-
*
|
|
308
|
+
* Backfill missing AST-analysis data (astNodes, dataflow, def.complexity,
|
|
309
|
+
* def.cfg) via the WASM worker pool for files that were parsed by the native
|
|
310
|
+
* engine but are missing one or more analyses.
|
|
311
|
+
*
|
|
312
|
+
* Historically this function populated `symbols._tree` so the main-thread
|
|
313
|
+
* visitor walk in `ast-analysis/engine.ts` could run. After the worker-isolation
|
|
314
|
+
* refactor (#965), the worker runs every visitor itself and returns pre-computed
|
|
315
|
+
* analysis data — `_tree` is never set on the main thread.
|
|
316
|
+
*
|
|
317
|
+
* Name is preserved for caller compatibility; the function now ensures
|
|
318
|
+
* *analysis data* rather than *trees*.
|
|
292
319
|
*/
|
|
293
320
|
export async function ensureWasmTrees(
|
|
294
321
|
fileSymbols: Map<string, any>,
|
|
295
322
|
rootDir: string,
|
|
296
323
|
): Promise<void> {
|
|
297
|
-
//
|
|
298
|
-
const
|
|
324
|
+
// Collect files that still need analysis data and are parseable by WASM.
|
|
325
|
+
const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
|
|
299
326
|
for (const [relPath, symbols] of fileSymbols) {
|
|
300
|
-
if (
|
|
301
|
-
|
|
302
|
-
}
|
|
327
|
+
if (symbols._tree) continue; // legacy path — leave existing trees alone
|
|
328
|
+
if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
|
|
329
|
+
pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
|
|
303
330
|
}
|
|
304
|
-
if (
|
|
305
|
-
const parsers = await ensureParsersForFiles(filePaths);
|
|
331
|
+
if (pending.length === 0) return;
|
|
306
332
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
const ext = path.extname(relPath).toLowerCase();
|
|
310
|
-
const entry = _extToLang.get(ext);
|
|
311
|
-
if (!entry) continue;
|
|
312
|
-
const parser = parsers.get(entry.id);
|
|
313
|
-
if (!parser) continue;
|
|
314
|
-
|
|
315
|
-
const absPath = path.join(rootDir, relPath);
|
|
333
|
+
const pool = getWasmWorkerPool();
|
|
334
|
+
for (const { relPath, absPath, symbols } of pending) {
|
|
316
335
|
let code: string;
|
|
317
336
|
try {
|
|
318
337
|
code = fs.readFileSync(absPath, 'utf-8');
|
|
@@ -320,11 +339,45 @@ export async function ensureWasmTrees(
|
|
|
320
339
|
debug(`ensureWasmTrees: cannot read ${relPath}: ${(e as Error).message}`);
|
|
321
340
|
continue;
|
|
322
341
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
342
|
+
const output = await pool.parse(absPath, code, FULL_ANALYSIS);
|
|
343
|
+
if (!output) continue; // worker crashed or returned null — skip silently
|
|
344
|
+
mergeAnalysisData(symbols, output);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Merge pre-computed analysis data from a worker result onto existing symbols.
|
|
350
|
+
* Only fills gaps — never overwrites fields the caller already populated.
|
|
351
|
+
* Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
|
|
352
|
+
* per-definition complexity and cfg.
|
|
353
|
+
*/
|
|
354
|
+
function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
|
|
355
|
+
if (!symbols._langId && worker._langId) symbols._langId = worker._langId;
|
|
356
|
+
if (!symbols._lineCount && worker._lineCount) symbols._lineCount = worker._lineCount;
|
|
357
|
+
if (!Array.isArray(symbols.astNodes) && Array.isArray(worker.astNodes)) {
|
|
358
|
+
symbols.astNodes = worker.astNodes;
|
|
359
|
+
}
|
|
360
|
+
if (!symbols.dataflow && worker.dataflow) symbols.dataflow = worker.dataflow;
|
|
361
|
+
if (worker.typeMap && worker.typeMap.size > 0) {
|
|
362
|
+
if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
|
|
363
|
+
symbols.typeMap = new Map(worker.typeMap);
|
|
364
|
+
} else {
|
|
365
|
+
for (const [k, v] of worker.typeMap) {
|
|
366
|
+
if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
const existingDefs: any[] = Array.isArray(symbols.definitions) ? symbols.definitions : [];
|
|
371
|
+
const workerDefs: any[] = Array.isArray(worker.definitions) ? worker.definitions : [];
|
|
372
|
+
// Index existing defs by (kind, name, line) — mirrors engine.ts matching key.
|
|
373
|
+
const byKey = new Map<string, any>();
|
|
374
|
+
for (const d of existingDefs) byKey.set(`${d.kind}|${d.name}|${d.line}`, d);
|
|
375
|
+
for (const wd of workerDefs) {
|
|
376
|
+
const existing = byKey.get(`${wd.kind}|${wd.name}|${wd.line}`);
|
|
377
|
+
if (!existing) continue;
|
|
378
|
+
if (!existing.complexity && wd.complexity) existing.complexity = wd.complexity;
|
|
379
|
+
if ((!existing.cfg || !Array.isArray(existing.cfg.blocks)) && wd.cfg?.blocks) {
|
|
380
|
+
existing.cfg = wd.cfg;
|
|
328
381
|
}
|
|
329
382
|
}
|
|
330
383
|
}
|
|
@@ -338,6 +391,27 @@ export function isWasmAvailable(): boolean {
|
|
|
338
391
|
);
|
|
339
392
|
}
|
|
340
393
|
|
|
394
|
+
/**
|
|
395
|
+
* Return the set of lowercase file extensions whose WASM grammar is actually
|
|
396
|
+
* installed on disk. Used to scope engine-parity backfill to files that WASM
|
|
397
|
+
* can recover — languages without an installed grammar are skipped by both
|
|
398
|
+
* engines, so they don't represent a native-engine drop.
|
|
399
|
+
*
|
|
400
|
+
* Cached on first call; the grammars directory is shipped immutable.
|
|
401
|
+
*/
|
|
402
|
+
let _installedWasmExts: Set<string> | null = null;
|
|
403
|
+
export function getInstalledWasmExtensions(): Set<string> {
|
|
404
|
+
if (_installedWasmExts) return _installedWasmExts;
|
|
405
|
+
const exts = new Set<string>();
|
|
406
|
+
for (const entry of LANGUAGE_REGISTRY) {
|
|
407
|
+
if (fs.existsSync(grammarPath(entry.grammarFile))) {
|
|
408
|
+
for (const ext of entry.extensions) exts.add(ext.toLowerCase());
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
_installedWasmExts = exts;
|
|
412
|
+
return exts;
|
|
413
|
+
}
|
|
414
|
+
|
|
341
415
|
// ── Unified API ──────────────────────────────────────────────────────────────
|
|
342
416
|
|
|
343
417
|
function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine {
|
|
@@ -721,23 +795,13 @@ async function backfillTypeMap(
|
|
|
721
795
|
return { typeMap: new Map(), backfilled: false };
|
|
722
796
|
}
|
|
723
797
|
}
|
|
724
|
-
const
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
}
|
|
730
|
-
return { typeMap: extracted.symbols.typeMap, backfilled: true };
|
|
731
|
-
} finally {
|
|
732
|
-
// Free the WASM tree to prevent memory accumulation across repeated builds
|
|
733
|
-
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
|
|
734
|
-
try {
|
|
735
|
-
extracted.tree.delete();
|
|
736
|
-
} catch (e) {
|
|
737
|
-
debug(`backfillTypeMap: WASM tree cleanup failed: ${toErrorMessage(e)}`);
|
|
738
|
-
}
|
|
739
|
-
}
|
|
798
|
+
const pool = getWasmWorkerPool();
|
|
799
|
+
// Extract-only — no visitor walk, we only need the typeMap from this pass.
|
|
800
|
+
const output = await pool.parse(filePath, code, EXTRACT_ONLY);
|
|
801
|
+
if (!output || output.typeMap.size === 0) {
|
|
802
|
+
return { typeMap: new Map(), backfilled: false };
|
|
740
803
|
}
|
|
804
|
+
return { typeMap: output.typeMap, backfilled: true };
|
|
741
805
|
}
|
|
742
806
|
|
|
743
807
|
/**
|
|
@@ -765,7 +829,16 @@ function wasmExtractSymbols(
|
|
|
765
829
|
if (!entry) return null;
|
|
766
830
|
const query = _queryCache.get(entry.id) ?? undefined;
|
|
767
831
|
// Query (web-tree-sitter) is structurally compatible with TreeSitterQuery at runtime
|
|
768
|
-
|
|
832
|
+
let symbols: ExtractorOutput | null;
|
|
833
|
+
try {
|
|
834
|
+
symbols = entry.extractor(tree as any, filePath, query as any);
|
|
835
|
+
} catch (e: unknown) {
|
|
836
|
+
warn(`Extractor error in ${filePath}: ${(e as Error).message}`);
|
|
837
|
+
// Free WASM tree to prevent memory leak — web-tree-sitter trees are backed
|
|
838
|
+
// by WASM linear memory and are not garbage-collected automatically.
|
|
839
|
+
if (typeof (tree as any).delete === 'function') (tree as any).delete();
|
|
840
|
+
return null;
|
|
841
|
+
}
|
|
769
842
|
return symbols ? { symbols, tree, langId: entry.id } : null;
|
|
770
843
|
}
|
|
771
844
|
|
|
@@ -796,10 +869,9 @@ export async function parseFileAuto(
|
|
|
796
869
|
return patched;
|
|
797
870
|
}
|
|
798
871
|
|
|
799
|
-
// WASM path
|
|
800
|
-
const
|
|
801
|
-
|
|
802
|
-
return extracted ? extracted.symbols : null;
|
|
872
|
+
// WASM path — dispatch to isolated worker
|
|
873
|
+
const pool = getWasmWorkerPool();
|
|
874
|
+
return pool.parse(filePath, source, FULL_ANALYSIS);
|
|
803
875
|
}
|
|
804
876
|
|
|
805
877
|
/** Backfill typeMap via WASM for TS/TSX files parsed by the native engine. */
|
|
@@ -812,40 +884,44 @@ async function backfillTypeMapBatch(
|
|
|
812
884
|
);
|
|
813
885
|
if (tsFiles.length === 0) return;
|
|
814
886
|
|
|
815
|
-
const
|
|
887
|
+
const pool = getWasmWorkerPool();
|
|
816
888
|
for (const { filePath, relPath } of tsFiles) {
|
|
817
|
-
let
|
|
889
|
+
let code: string;
|
|
818
890
|
try {
|
|
819
|
-
|
|
820
|
-
extracted = wasmExtractSymbols(parsers, filePath, code);
|
|
821
|
-
if (extracted?.symbols && extracted.symbols.typeMap.size > 0) {
|
|
822
|
-
const symbols = result.get(relPath);
|
|
823
|
-
if (!symbols) continue;
|
|
824
|
-
symbols.typeMap = extracted.symbols.typeMap;
|
|
825
|
-
symbols._typeMapBackfilled = true;
|
|
826
|
-
}
|
|
891
|
+
code = fs.readFileSync(filePath, 'utf-8');
|
|
827
892
|
} catch (e) {
|
|
828
|
-
debug(`batchExtract:
|
|
829
|
-
|
|
830
|
-
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
|
|
831
|
-
try {
|
|
832
|
-
extracted.tree.delete();
|
|
833
|
-
} catch (e) {
|
|
834
|
-
debug(`batchExtract: WASM tree cleanup failed: ${toErrorMessage(e)}`);
|
|
835
|
-
}
|
|
836
|
-
}
|
|
893
|
+
debug(`batchExtract: cannot read ${filePath}: ${toErrorMessage(e)}`);
|
|
894
|
+
continue;
|
|
837
895
|
}
|
|
896
|
+
const output = await pool.parse(filePath, code, EXTRACT_ONLY);
|
|
897
|
+
if (!output || output.typeMap.size === 0) continue;
|
|
898
|
+
const symbols = result.get(relPath);
|
|
899
|
+
if (!symbols) continue;
|
|
900
|
+
symbols.typeMap = output.typeMap;
|
|
901
|
+
symbols._typeMapBackfilled = true;
|
|
838
902
|
}
|
|
839
903
|
}
|
|
840
904
|
|
|
841
|
-
/**
|
|
905
|
+
/**
|
|
906
|
+
* Parse files via WASM engine, returning a Map<relPath, symbols>.
|
|
907
|
+
*
|
|
908
|
+
* Each file is dispatched to the WASM worker pool. The worker parses, extracts,
|
|
909
|
+
* and runs all AST analyses (complexity, CFG, dataflow, ast-store) in its own
|
|
910
|
+
* thread, returning fully pre-computed ExtractorOutput. V8 fatal errors from
|
|
911
|
+
* tree-sitter WASM (#965) kill only the worker — the pool skips the file and
|
|
912
|
+
* restarts the worker for the next one.
|
|
913
|
+
*
|
|
914
|
+
* `_tree` is NEVER set by this path. All downstream analyses operate on the
|
|
915
|
+
* pre-computed `astNodes` / `dataflow` / `def.complexity` / `def.cfg` fields.
|
|
916
|
+
*/
|
|
842
917
|
async function parseFilesWasm(
|
|
843
918
|
filePaths: string[],
|
|
844
919
|
rootDir: string,
|
|
845
920
|
): Promise<Map<string, ExtractorOutput>> {
|
|
846
921
|
const result = new Map<string, ExtractorOutput>();
|
|
847
|
-
const
|
|
922
|
+
const pool = getWasmWorkerPool();
|
|
848
923
|
for (const filePath of filePaths) {
|
|
924
|
+
if (!_extToLang.has(path.extname(filePath).toLowerCase())) continue;
|
|
849
925
|
let code: string;
|
|
850
926
|
try {
|
|
851
927
|
code = fs.readFileSync(filePath, 'utf-8');
|
|
@@ -853,13 +929,10 @@ async function parseFilesWasm(
|
|
|
853
929
|
warn(`Skipping ${path.relative(rootDir, filePath)}: ${(err as Error).message}`);
|
|
854
930
|
continue;
|
|
855
931
|
}
|
|
856
|
-
const
|
|
857
|
-
if (
|
|
932
|
+
const output = await pool.parse(filePath, code, FULL_ANALYSIS);
|
|
933
|
+
if (output) {
|
|
858
934
|
const relPath = path.relative(rootDir, filePath).split(path.sep).join('/');
|
|
859
|
-
|
|
860
|
-
extracted.symbols._langId = extracted.langId;
|
|
861
|
-
extracted.symbols._lineCount = code.split('\n').length;
|
|
862
|
-
result.set(relPath, extracted.symbols);
|
|
935
|
+
result.set(relPath, output);
|
|
863
936
|
}
|
|
864
937
|
}
|
|
865
938
|
return result;
|
|
@@ -884,8 +957,10 @@ export async function parseFilesAuto(
|
|
|
884
957
|
? native.parseFilesFull(filePaths, rootDir)
|
|
885
958
|
: native.parseFiles(filePaths, rootDir, true, true);
|
|
886
959
|
const needsTypeMap: { filePath: string; relPath: string }[] = [];
|
|
960
|
+
const nativeParsed = new Set<string>();
|
|
887
961
|
for (const r of nativeResults) {
|
|
888
962
|
if (!r) continue;
|
|
963
|
+
nativeParsed.add(r.file);
|
|
889
964
|
const patched = patchNativeResult(r);
|
|
890
965
|
const relPath = path.relative(rootDir, r.file).split(path.sep).join('/');
|
|
891
966
|
result.set(relPath, patched);
|
|
@@ -901,6 +976,24 @@ export async function parseFilesAuto(
|
|
|
901
976
|
if (needsTypeMap.length > 0) {
|
|
902
977
|
await backfillTypeMapBatch(needsTypeMap, result);
|
|
903
978
|
}
|
|
979
|
+
|
|
980
|
+
// Engine parity: native may silently drop files whose extensions are in
|
|
981
|
+
// SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
|
|
982
|
+
// extractor/grammar is missing or fails. WASM handles these — fall back so
|
|
983
|
+
// both engines process the same file set (#967). Restrict to installed WASM
|
|
984
|
+
// grammars so we don't warn about files that neither engine can parse.
|
|
985
|
+
const installedExts = getInstalledWasmExtensions();
|
|
986
|
+
const dropped = filePaths.filter(
|
|
987
|
+
(f) => !nativeParsed.has(f) && installedExts.has(path.extname(f).toLowerCase()),
|
|
988
|
+
);
|
|
989
|
+
if (dropped.length > 0) {
|
|
990
|
+
warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
|
|
991
|
+
const wasmResults = await parseFilesWasm(dropped, rootDir);
|
|
992
|
+
for (const [relPath, symbols] of wasmResults) {
|
|
993
|
+
result.set(relPath, symbols);
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
904
997
|
return result;
|
|
905
998
|
}
|
|
906
999
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import fs from 'node:fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
-
import { closeDb, findDbPath, openDb } from '../../db/index.js';
|
|
3
|
+
import { closeDb, findDbPath, getBuildMeta, openDb } from '../../db/index.js';
|
|
4
4
|
import { warn } from '../../infrastructure/logger.js';
|
|
5
5
|
import { DbError } from '../../shared/errors.js';
|
|
6
6
|
import type { BetterSqlite3Database, NodeRow } from '../../types.js';
|
|
@@ -73,6 +73,21 @@ export async function buildEmbeddings(
|
|
|
73
73
|
const db = openDb(dbPath) as BetterSqlite3Database;
|
|
74
74
|
initEmbeddingsSchema(db);
|
|
75
75
|
|
|
76
|
+
// Prefer the repo root recorded at build time — embed may be invoked from a
|
|
77
|
+
// different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
|
|
78
|
+
// positional rootDir will be wrong in that case. For legacy DBs without
|
|
79
|
+
// root_dir metadata, fall back to `<dbParent>` only when the DB lives at
|
|
80
|
+
// the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
|
|
81
|
+
// the caller-provided rootDir (which may be an explicit positional arg).
|
|
82
|
+
// `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
|
|
83
|
+
// conventional-layout check is required to keep the rootDir path reachable.
|
|
84
|
+
const metaRoot = getBuildMeta(db, 'root_dir');
|
|
85
|
+
const resolvedDbPath = path.resolve(dbPath);
|
|
86
|
+
const dbDirName = path.basename(path.dirname(resolvedDbPath));
|
|
87
|
+
const dbParent =
|
|
88
|
+
dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
|
|
89
|
+
const resolvedRoot = metaRoot || dbParent || rootDir;
|
|
90
|
+
|
|
76
91
|
db.exec('DELETE FROM embeddings');
|
|
77
92
|
db.exec('DELETE FROM embedding_meta');
|
|
78
93
|
db.exec('DELETE FROM fts_index');
|
|
@@ -98,13 +113,17 @@ export async function buildEmbeddings(
|
|
|
98
113
|
const config = getModelConfig(modelKey);
|
|
99
114
|
const contextWindow = config.contextWindow;
|
|
100
115
|
let overflowCount = 0;
|
|
116
|
+
let filesRead = 0;
|
|
117
|
+
let filesSkipped = 0;
|
|
101
118
|
|
|
102
119
|
for (const [file, fileNodes] of byFile) {
|
|
103
|
-
const fullPath = path.isAbsolute(file) ? file : path.join(
|
|
120
|
+
const fullPath = path.isAbsolute(file) ? file : path.join(resolvedRoot, file);
|
|
104
121
|
let lines: string[];
|
|
105
122
|
try {
|
|
106
123
|
lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
|
|
124
|
+
filesRead++;
|
|
107
125
|
} catch (err: unknown) {
|
|
126
|
+
filesSkipped++;
|
|
108
127
|
warn(`Cannot read ${file} for embeddings: ${(err as Error).message}`);
|
|
109
128
|
continue;
|
|
110
129
|
}
|
|
@@ -136,6 +155,19 @@ export async function buildEmbeddings(
|
|
|
136
155
|
);
|
|
137
156
|
}
|
|
138
157
|
|
|
158
|
+
// If there were symbols to embed but every file failed to read, the DB was
|
|
159
|
+
// almost certainly built from a different location than the current cwd.
|
|
160
|
+
// Surface this clearly instead of emitting a silent "Stored 0 embeddings".
|
|
161
|
+
if (byFile.size > 0 && filesRead === 0) {
|
|
162
|
+
closeDb(db);
|
|
163
|
+
throw new DbError(
|
|
164
|
+
`embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
|
|
165
|
+
`Tried resolving against: ${resolvedRoot}\n` +
|
|
166
|
+
'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
|
|
167
|
+
{ file: dbPath },
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
|
|
139
171
|
console.log(`Embedding ${texts.length} symbols...`);
|
|
140
172
|
const { vectors, dim } = await embed(texts, modelKey);
|
|
141
173
|
|