@optave/codegraph 3.9.4 → 3.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +3 -2
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
- package/dist/ast-analysis/rules/csharp.js +8 -1
- package/dist/ast-analysis/rules/csharp.js.map +1 -1
- package/dist/ast-analysis/rules/go.d.ts.map +1 -1
- package/dist/ast-analysis/rules/go.js +4 -1
- package/dist/ast-analysis/rules/go.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts +6 -0
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +151 -4
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/rules/java.d.ts.map +1 -1
- package/dist/ast-analysis/rules/java.js +5 -1
- package/dist/ast-analysis/rules/java.js.map +1 -1
- package/dist/ast-analysis/rules/php.d.ts.map +1 -1
- package/dist/ast-analysis/rules/php.js +6 -1
- package/dist/ast-analysis/rules/php.js.map +1 -1
- package/dist/ast-analysis/rules/python.d.ts.map +1 -1
- package/dist/ast-analysis/rules/python.js +5 -1
- package/dist/ast-analysis/rules/python.js.map +1 -1
- package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
- package/dist/ast-analysis/rules/ruby.js +4 -1
- package/dist/ast-analysis/rules/ruby.js.map +1 -1
- package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
- package/dist/ast-analysis/rules/rust.js +5 -1
- package/dist/ast-analysis/rules/rust.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +129 -37
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/cli/commands/watch.d.ts.map +1 -1
- package/dist/cli/commands/watch.js +2 -0
- package/dist/cli/commands/watch.js.map +1 -1
- package/dist/cli.js +24 -1
- package/dist/cli.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +2 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +13 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +30 -4
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +141 -3
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +58 -26
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +54 -45
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +17 -0
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/journal.d.ts +15 -0
- package/dist/domain/graph/journal.d.ts.map +1 -1
- package/dist/domain/graph/journal.js +283 -28
- package/dist/domain/graph/journal.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts +17 -0
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +23 -7
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +53 -4
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +278 -80
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/generator.d.ts.map +1 -1
- package/dist/domain/search/generator.js +28 -2
- package/dist/domain/search/generator.js.map +1 -1
- package/dist/domain/search/models.js +1 -1
- package/dist/domain/wasm-worker-entry.d.ts +24 -0
- package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
- package/dist/domain/wasm-worker-entry.js +644 -0
- package/dist/domain/wasm-worker-entry.js.map +1 -0
- package/dist/domain/wasm-worker-pool.d.ts +59 -0
- package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
- package/dist/domain/wasm-worker-pool.js +312 -0
- package/dist/domain/wasm-worker-pool.js.map +1 -0
- package/dist/domain/wasm-worker-protocol.d.ts +65 -0
- package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
- package/dist/domain/wasm-worker-protocol.js +13 -0
- package/dist/domain/wasm-worker-protocol.js.map +1 -0
- package/dist/extractors/javascript.js +146 -2
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/features/ast.d.ts.map +1 -1
- package/dist/features/ast.js +11 -9
- package/dist/features/ast.js.map +1 -1
- package/dist/features/boundaries.d.ts +2 -2
- package/dist/features/boundaries.d.ts.map +1 -1
- package/dist/features/boundaries.js +2 -31
- package/dist/features/boundaries.js.map +1 -1
- package/dist/features/snapshot.d.ts.map +1 -1
- package/dist/features/snapshot.js +99 -13
- package/dist/features/snapshot.js.map +1 -1
- package/dist/graph/algorithms/louvain.d.ts.map +1 -1
- package/dist/graph/algorithms/louvain.js +2 -4
- package/dist/graph/algorithms/louvain.js.map +1 -1
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +12 -2
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/shared/globs.d.ts +40 -0
- package/dist/shared/globs.d.ts.map +1 -0
- package/dist/shared/globs.js +126 -0
- package/dist/shared/globs.js.map +1 -0
- package/dist/types.d.ts +26 -1
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/package.json +7 -7
- package/src/ast-analysis/engine.ts +11 -1
- package/src/ast-analysis/rules/csharp.ts +8 -1
- package/src/ast-analysis/rules/go.ts +4 -1
- package/src/ast-analysis/rules/index.ts +181 -4
- package/src/ast-analysis/rules/java.ts +5 -1
- package/src/ast-analysis/rules/php.ts +6 -1
- package/src/ast-analysis/rules/python.ts +5 -1
- package/src/ast-analysis/rules/ruby.ts +4 -1
- package/src/ast-analysis/rules/rust.ts +5 -1
- package/src/ast-analysis/visitors/ast-store-visitor.ts +129 -34
- package/src/cli/commands/watch.ts +2 -0
- package/src/cli.ts +31 -8
- package/src/domain/graph/builder/context.ts +2 -0
- package/src/domain/graph/builder/helpers.ts +53 -3
- package/src/domain/graph/builder/pipeline.ts +162 -3
- package/src/domain/graph/builder/stages/collect-files.ts +56 -26
- package/src/domain/graph/builder/stages/detect-changes.ts +57 -49
- package/src/domain/graph/builder/stages/finalize.ts +16 -0
- package/src/domain/graph/journal.ts +284 -27
- package/src/domain/graph/watcher.ts +29 -9
- package/src/domain/parser.ts +288 -73
- package/src/domain/search/generator.ts +34 -2
- package/src/domain/search/models.ts +1 -1
- package/src/domain/wasm-worker-entry.ts +798 -0
- package/src/domain/wasm-worker-pool.ts +330 -0
- package/src/domain/wasm-worker-protocol.ts +81 -0
- package/src/extractors/javascript.ts +149 -2
- package/src/features/ast.ts +22 -9
- package/src/features/boundaries.ts +2 -27
- package/src/features/snapshot.ts +93 -14
- package/src/graph/algorithms/louvain.ts +2 -4
- package/src/infrastructure/config.ts +12 -2
- package/src/shared/globs.ts +121 -0
- package/src/types.ts +26 -1
|
@@ -7,7 +7,7 @@ import { DbError } from '../../shared/errors.js';
|
|
|
7
7
|
import { createParseTreeCache, getActiveEngine } from '../parser.js';
|
|
8
8
|
import { type IncrementalStmts, rebuildFile } from './builder/incremental.js';
|
|
9
9
|
import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js';
|
|
10
|
-
import {
|
|
10
|
+
import { appendJournalEntriesAndStampHeader } from './journal.js';
|
|
11
11
|
|
|
12
12
|
function shouldIgnorePath(filePath: string): boolean {
|
|
13
13
|
const parts = filePath.split(path.sep);
|
|
@@ -100,7 +100,7 @@ function writeJournalAndChangeEvents(rootDir: string, updates: RebuildResult[]):
|
|
|
100
100
|
deleted: r.deleted || false,
|
|
101
101
|
}));
|
|
102
102
|
try {
|
|
103
|
-
|
|
103
|
+
appendJournalEntriesAndStampHeader(rootDir, entries, Date.now());
|
|
104
104
|
} catch (e: unknown) {
|
|
105
105
|
debug(`Journal write failed (non-fatal): ${(e as Error).message}`);
|
|
106
106
|
}
|
|
@@ -165,8 +165,8 @@ interface WatcherContext {
|
|
|
165
165
|
}
|
|
166
166
|
|
|
167
167
|
/** Initialize DB, engine, cache, and statements for watch mode. */
|
|
168
|
-
function setupWatcher(rootDir: string, opts: { engine?: string }): WatcherContext {
|
|
169
|
-
const dbPath = path.join(rootDir, '.codegraph', 'graph.db');
|
|
168
|
+
function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string }): WatcherContext {
|
|
169
|
+
const dbPath = opts.dbPath ?? path.join(rootDir, '.codegraph', 'graph.db');
|
|
170
170
|
if (!fs.existsSync(dbPath)) {
|
|
171
171
|
throw new DbError('No graph.db found. Run `codegraph build` first.', { file: dbPath });
|
|
172
172
|
}
|
|
@@ -274,17 +274,37 @@ function startNativeWatcher(ctx: WatcherContext): () => void {
|
|
|
274
274
|
return () => watcher.close();
|
|
275
275
|
}
|
|
276
276
|
|
|
277
|
+
/**
|
|
278
|
+
* Build journal entries for a pending-path set, detecting deletions by
|
|
279
|
+
* existence check.
|
|
280
|
+
*
|
|
281
|
+
* `ctx.pending` is an untyped `Set<string>` — it carries no event-type
|
|
282
|
+
* metadata. Without this check, a file deleted during the watch session
|
|
283
|
+
* would be journaled as "changed", causing the next incremental build to
|
|
284
|
+
* try to re-parse a non-existent file instead of removing it from the graph.
|
|
285
|
+
* Mirrors the deletion detection in `rebuildFile` (see builder/incremental.ts).
|
|
286
|
+
*
|
|
287
|
+
* Exported for unit-testing; prefer `setupShutdownHandler` in production paths.
|
|
288
|
+
*/
|
|
289
|
+
export function buildFlushEntriesFromPending(
|
|
290
|
+
rootDir: string,
|
|
291
|
+
pending: Iterable<string>,
|
|
292
|
+
): Array<{ file: string; deleted: boolean }> {
|
|
293
|
+
return [...pending].map((filePath) => ({
|
|
294
|
+
file: normalizePath(path.relative(rootDir, filePath)),
|
|
295
|
+
deleted: !fs.existsSync(filePath),
|
|
296
|
+
}));
|
|
297
|
+
}
|
|
298
|
+
|
|
277
299
|
/** Register SIGINT handler to flush journal and clean up. */
|
|
278
300
|
function setupShutdownHandler(ctx: WatcherContext, cleanup: () => void): void {
|
|
279
301
|
process.once('SIGINT', () => {
|
|
280
302
|
info('Stopping watcher...');
|
|
281
303
|
cleanup();
|
|
282
304
|
if (ctx.pending.size > 0) {
|
|
283
|
-
const entries =
|
|
284
|
-
file: normalizePath(path.relative(ctx.rootDir, filePath)),
|
|
285
|
-
}));
|
|
305
|
+
const entries = buildFlushEntriesFromPending(ctx.rootDir, ctx.pending);
|
|
286
306
|
try {
|
|
287
|
-
|
|
307
|
+
appendJournalEntriesAndStampHeader(ctx.rootDir, entries, Date.now());
|
|
288
308
|
} catch (e: unknown) {
|
|
289
309
|
debug(`Journal flush on exit failed (non-fatal): ${(e as Error).message}`);
|
|
290
310
|
}
|
|
@@ -297,7 +317,7 @@ function setupShutdownHandler(ctx: WatcherContext, cleanup: () => void): void {
|
|
|
297
317
|
|
|
298
318
|
export async function watchProject(
|
|
299
319
|
rootDir: string,
|
|
300
|
-
opts: { engine?: string; poll?: boolean; pollInterval?: number } = {},
|
|
320
|
+
opts: { engine?: string; poll?: boolean; pollInterval?: number; dbPath?: string } = {},
|
|
301
321
|
): Promise<void> {
|
|
302
322
|
const ctx = setupWatcher(rootDir, opts);
|
|
303
323
|
|
package/src/domain/parser.ts
CHANGED
|
@@ -13,6 +13,24 @@ import type {
|
|
|
13
13
|
LanguageRegistryEntry,
|
|
14
14
|
TypeMapEntry,
|
|
15
15
|
} from '../types.js';
|
|
16
|
+
import { disposeWasmWorkerPool, getWasmWorkerPool } from './wasm-worker-pool.js';
|
|
17
|
+
import type { WorkerAnalysisOpts } from './wasm-worker-protocol.js';
|
|
18
|
+
|
|
19
|
+
/** Default worker opts: run all analyses so output matches parseFilesFull. */
|
|
20
|
+
const FULL_ANALYSIS: WorkerAnalysisOpts = {
|
|
21
|
+
ast: true,
|
|
22
|
+
complexity: true,
|
|
23
|
+
cfg: true,
|
|
24
|
+
dataflow: true,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
/** Extract-only opts: skip visitor walk for typeMap backfill / similar fast paths. */
|
|
28
|
+
const EXTRACT_ONLY: WorkerAnalysisOpts = {
|
|
29
|
+
ast: false,
|
|
30
|
+
complexity: false,
|
|
31
|
+
cfg: false,
|
|
32
|
+
dataflow: false,
|
|
33
|
+
};
|
|
16
34
|
|
|
17
35
|
// Re-export all extractors for backward compatibility
|
|
18
36
|
export {
|
|
@@ -262,7 +280,7 @@ function disposeMapEntries(entries: Iterable<[string, any]>, label: string): voi
|
|
|
262
280
|
}
|
|
263
281
|
}
|
|
264
282
|
|
|
265
|
-
export function disposeParsers(): void {
|
|
283
|
+
export async function disposeParsers(): Promise<void> {
|
|
266
284
|
if (_cachedParsers) {
|
|
267
285
|
disposeMapEntries(_cachedParsers, 'parser');
|
|
268
286
|
_cachedParsers = null;
|
|
@@ -276,6 +294,7 @@ export function disposeParsers(): void {
|
|
|
276
294
|
_initialized = false;
|
|
277
295
|
_allParsersLoaded = false;
|
|
278
296
|
_loadingPromises.clear();
|
|
297
|
+
await disposeWasmWorkerPool();
|
|
279
298
|
}
|
|
280
299
|
|
|
281
300
|
export function getParser(parsers: Map<string, Parser | null>, filePath: string): Parser | null {
|
|
@@ -286,33 +305,33 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
|
|
|
286
305
|
}
|
|
287
306
|
|
|
288
307
|
/**
|
|
289
|
-
*
|
|
290
|
-
*
|
|
291
|
-
*
|
|
308
|
+
* Backfill missing AST-analysis data (astNodes, dataflow, def.complexity,
|
|
309
|
+
* def.cfg) via the WASM worker pool for files that were parsed by the native
|
|
310
|
+
* engine but are missing one or more analyses.
|
|
311
|
+
*
|
|
312
|
+
* Historically this function populated `symbols._tree` so the main-thread
|
|
313
|
+
* visitor walk in `ast-analysis/engine.ts` could run. After the worker-isolation
|
|
314
|
+
* refactor (#965), the worker runs every visitor itself and returns pre-computed
|
|
315
|
+
* analysis data — `_tree` is never set on the main thread.
|
|
316
|
+
*
|
|
317
|
+
* Name is preserved for caller compatibility; the function now ensures
|
|
318
|
+
* *analysis data* rather than *trees*.
|
|
292
319
|
*/
|
|
293
320
|
export async function ensureWasmTrees(
|
|
294
321
|
fileSymbols: Map<string, any>,
|
|
295
322
|
rootDir: string,
|
|
296
323
|
): Promise<void> {
|
|
297
|
-
//
|
|
298
|
-
const
|
|
324
|
+
// Collect files that still need analysis data and are parseable by WASM.
|
|
325
|
+
const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
|
|
299
326
|
for (const [relPath, symbols] of fileSymbols) {
|
|
300
|
-
if (
|
|
301
|
-
|
|
302
|
-
}
|
|
327
|
+
if (symbols._tree) continue; // legacy path — leave existing trees alone
|
|
328
|
+
if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
|
|
329
|
+
pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
|
|
303
330
|
}
|
|
304
|
-
if (
|
|
305
|
-
const parsers = await ensureParsersForFiles(filePaths);
|
|
331
|
+
if (pending.length === 0) return;
|
|
306
332
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
const ext = path.extname(relPath).toLowerCase();
|
|
310
|
-
const entry = _extToLang.get(ext);
|
|
311
|
-
if (!entry) continue;
|
|
312
|
-
const parser = parsers.get(entry.id);
|
|
313
|
-
if (!parser) continue;
|
|
314
|
-
|
|
315
|
-
const absPath = path.join(rootDir, relPath);
|
|
333
|
+
const pool = getWasmWorkerPool();
|
|
334
|
+
for (const { relPath, absPath, symbols } of pending) {
|
|
316
335
|
let code: string;
|
|
317
336
|
try {
|
|
318
337
|
code = fs.readFileSync(absPath, 'utf-8');
|
|
@@ -320,11 +339,45 @@ export async function ensureWasmTrees(
|
|
|
320
339
|
debug(`ensureWasmTrees: cannot read ${relPath}: ${(e as Error).message}`);
|
|
321
340
|
continue;
|
|
322
341
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
342
|
+
const output = await pool.parse(absPath, code, FULL_ANALYSIS);
|
|
343
|
+
if (!output) continue; // worker crashed or returned null — skip silently
|
|
344
|
+
mergeAnalysisData(symbols, output);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Merge pre-computed analysis data from a worker result onto existing symbols.
|
|
350
|
+
* Only fills gaps — never overwrites fields the caller already populated.
|
|
351
|
+
* Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
|
|
352
|
+
* per-definition complexity and cfg.
|
|
353
|
+
*/
|
|
354
|
+
function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
|
|
355
|
+
if (!symbols._langId && worker._langId) symbols._langId = worker._langId;
|
|
356
|
+
if (!symbols._lineCount && worker._lineCount) symbols._lineCount = worker._lineCount;
|
|
357
|
+
if (!Array.isArray(symbols.astNodes) && Array.isArray(worker.astNodes)) {
|
|
358
|
+
symbols.astNodes = worker.astNodes;
|
|
359
|
+
}
|
|
360
|
+
if (!symbols.dataflow && worker.dataflow) symbols.dataflow = worker.dataflow;
|
|
361
|
+
if (worker.typeMap && worker.typeMap.size > 0) {
|
|
362
|
+
if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
|
|
363
|
+
symbols.typeMap = new Map(worker.typeMap);
|
|
364
|
+
} else {
|
|
365
|
+
for (const [k, v] of worker.typeMap) {
|
|
366
|
+
if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
const existingDefs: any[] = Array.isArray(symbols.definitions) ? symbols.definitions : [];
|
|
371
|
+
const workerDefs: any[] = Array.isArray(worker.definitions) ? worker.definitions : [];
|
|
372
|
+
// Index existing defs by (kind, name, line) — mirrors engine.ts matching key.
|
|
373
|
+
const byKey = new Map<string, any>();
|
|
374
|
+
for (const d of existingDefs) byKey.set(`${d.kind}|${d.name}|${d.line}`, d);
|
|
375
|
+
for (const wd of workerDefs) {
|
|
376
|
+
const existing = byKey.get(`${wd.kind}|${wd.name}|${wd.line}`);
|
|
377
|
+
if (!existing) continue;
|
|
378
|
+
if (!existing.complexity && wd.complexity) existing.complexity = wd.complexity;
|
|
379
|
+
if ((!existing.cfg || !Array.isArray(existing.cfg.blocks)) && wd.cfg?.blocks) {
|
|
380
|
+
existing.cfg = wd.cfg;
|
|
328
381
|
}
|
|
329
382
|
}
|
|
330
383
|
}
|
|
@@ -338,6 +391,149 @@ export function isWasmAvailable(): boolean {
|
|
|
338
391
|
);
|
|
339
392
|
}
|
|
340
393
|
|
|
394
|
+
/**
|
|
395
|
+
* Return the set of lowercase file extensions whose WASM grammar is actually
|
|
396
|
+
* installed on disk. Used to scope engine-parity backfill to files that WASM
|
|
397
|
+
* can recover — languages without an installed grammar are skipped by both
|
|
398
|
+
* engines, so they don't represent a native-engine drop.
|
|
399
|
+
*
|
|
400
|
+
* Cached on first call; the grammars directory is shipped immutable.
|
|
401
|
+
*/
|
|
402
|
+
let _installedWasmExts: Set<string> | null = null;
|
|
403
|
+
export function getInstalledWasmExtensions(): Set<string> {
|
|
404
|
+
if (_installedWasmExts) return _installedWasmExts;
|
|
405
|
+
const exts = new Set<string>();
|
|
406
|
+
for (const entry of LANGUAGE_REGISTRY) {
|
|
407
|
+
if (fs.existsSync(grammarPath(entry.grammarFile))) {
|
|
408
|
+
for (const ext of entry.extensions) exts.add(ext.toLowerCase());
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
_installedWasmExts = exts;
|
|
412
|
+
return exts;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Lowercase file extensions covered by the native Rust addon.
|
|
417
|
+
*
|
|
418
|
+
* Mirrors `LanguageKind::from_extension` in
|
|
419
|
+
* `crates/codegraph-core/src/parser_registry.rs`. Used to classify why the
|
|
420
|
+
* native orchestrator dropped a file: extensions outside this set are a
|
|
421
|
+
* legitimate parser limit (no Rust extractor exists), while extensions inside
|
|
422
|
+
* it indicate a real native bug (parse/read/extract failure).
|
|
423
|
+
*
|
|
424
|
+
* Keep this list in sync with the Rust enum — the native addon is a separate
|
|
425
|
+
* npm package, so JS has no runtime way to discover its language coverage.
|
|
426
|
+
*/
|
|
427
|
+
export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
|
|
428
|
+
'.js',
|
|
429
|
+
'.jsx',
|
|
430
|
+
'.mjs',
|
|
431
|
+
'.cjs',
|
|
432
|
+
'.ts',
|
|
433
|
+
'.tsx',
|
|
434
|
+
'.py',
|
|
435
|
+
'.pyi',
|
|
436
|
+
'.tf',
|
|
437
|
+
'.hcl',
|
|
438
|
+
'.go',
|
|
439
|
+
'.rs',
|
|
440
|
+
'.java',
|
|
441
|
+
'.cs',
|
|
442
|
+
'.rb',
|
|
443
|
+
'.rake',
|
|
444
|
+
'.gemspec',
|
|
445
|
+
'.php',
|
|
446
|
+
'.phtml',
|
|
447
|
+
'.c',
|
|
448
|
+
'.h',
|
|
449
|
+
'.cpp',
|
|
450
|
+
'.cc',
|
|
451
|
+
'.cxx',
|
|
452
|
+
'.hpp',
|
|
453
|
+
'.kt',
|
|
454
|
+
'.kts',
|
|
455
|
+
'.swift',
|
|
456
|
+
'.scala',
|
|
457
|
+
'.sh',
|
|
458
|
+
'.bash',
|
|
459
|
+
'.ex',
|
|
460
|
+
'.exs',
|
|
461
|
+
'.lua',
|
|
462
|
+
'.dart',
|
|
463
|
+
'.zig',
|
|
464
|
+
'.hs',
|
|
465
|
+
'.ml',
|
|
466
|
+
'.mli',
|
|
467
|
+
]);
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* Classification for a file the native orchestrator dropped.
|
|
471
|
+
* - `unsupported-by-native`: extension has no Rust extractor (legitimate parser limit).
|
|
472
|
+
* - `native-extractor-failure`: extension is supported by native but the file was
|
|
473
|
+
* still dropped — points at a real bug (read error, parse failure, extractor crash).
|
|
474
|
+
*/
|
|
475
|
+
export type NativeDropReason = 'unsupported-by-native' | 'native-extractor-failure';
|
|
476
|
+
|
|
477
|
+
export interface NativeDropClassification {
|
|
478
|
+
/** Per-reason → per-extension → list of relative paths that hit that bucket. */
|
|
479
|
+
byReason: Record<NativeDropReason, Map<string, string[]>>;
|
|
480
|
+
/** Total file count per reason. */
|
|
481
|
+
totals: Record<NativeDropReason, number>;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Group the missing files (relative paths) by drop reason and extension so the
|
|
486
|
+
* caller can log per-extension counts and a sample path. Pure function — no
|
|
487
|
+
* I/O, safe to unit-test independently of the build pipeline.
|
|
488
|
+
*/
|
|
489
|
+
export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClassification {
|
|
490
|
+
const byReason: Record<NativeDropReason, Map<string, string[]>> = {
|
|
491
|
+
'unsupported-by-native': new Map(),
|
|
492
|
+
'native-extractor-failure': new Map(),
|
|
493
|
+
};
|
|
494
|
+
const totals: Record<NativeDropReason, number> = {
|
|
495
|
+
'unsupported-by-native': 0,
|
|
496
|
+
'native-extractor-failure': 0,
|
|
497
|
+
};
|
|
498
|
+
for (const rel of relPaths) {
|
|
499
|
+
const ext = path.extname(rel).toLowerCase();
|
|
500
|
+
const reason: NativeDropReason = NATIVE_SUPPORTED_EXTENSIONS.has(ext)
|
|
501
|
+
? 'native-extractor-failure'
|
|
502
|
+
: 'unsupported-by-native';
|
|
503
|
+
const bucket = byReason[reason];
|
|
504
|
+
let list = bucket.get(ext);
|
|
505
|
+
if (!list) {
|
|
506
|
+
list = [];
|
|
507
|
+
bucket.set(ext, list);
|
|
508
|
+
}
|
|
509
|
+
list.push(rel);
|
|
510
|
+
totals[reason]++;
|
|
511
|
+
}
|
|
512
|
+
return { byReason, totals };
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
|
|
517
|
+
* Caps at 3 sample paths per extension and 6 extensions total to keep warnings
|
|
518
|
+
* readable when many languages are dropped at once. Extensions are sorted by
|
|
519
|
+
* descending file count so the loudest offender shows up first; ties keep
|
|
520
|
+
* insertion order. Pure function — safe to unit-test independently.
|
|
521
|
+
*/
|
|
522
|
+
export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
|
|
523
|
+
const MAX_EXTS = 6;
|
|
524
|
+
const MAX_SAMPLES = 3;
|
|
525
|
+
const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
|
|
526
|
+
const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
|
|
527
|
+
const sample = paths.slice(0, MAX_SAMPLES).join(', ');
|
|
528
|
+
const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
|
|
529
|
+
return `${ext} (${paths.length}: ${sample}${more})`;
|
|
530
|
+
});
|
|
531
|
+
if (entries.length > MAX_EXTS) {
|
|
532
|
+
shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
|
|
533
|
+
}
|
|
534
|
+
return shown.join('; ');
|
|
535
|
+
}
|
|
536
|
+
|
|
341
537
|
// ── Unified API ──────────────────────────────────────────────────────────────
|
|
342
538
|
|
|
343
539
|
function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine {
|
|
@@ -721,23 +917,13 @@ async function backfillTypeMap(
|
|
|
721
917
|
return { typeMap: new Map(), backfilled: false };
|
|
722
918
|
}
|
|
723
919
|
}
|
|
724
|
-
const
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
}
|
|
730
|
-
return { typeMap: extracted.symbols.typeMap, backfilled: true };
|
|
731
|
-
} finally {
|
|
732
|
-
// Free the WASM tree to prevent memory accumulation across repeated builds
|
|
733
|
-
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
|
|
734
|
-
try {
|
|
735
|
-
extracted.tree.delete();
|
|
736
|
-
} catch (e) {
|
|
737
|
-
debug(`backfillTypeMap: WASM tree cleanup failed: ${toErrorMessage(e)}`);
|
|
738
|
-
}
|
|
739
|
-
}
|
|
920
|
+
const pool = getWasmWorkerPool();
|
|
921
|
+
// Extract-only — no visitor walk, we only need the typeMap from this pass.
|
|
922
|
+
const output = await pool.parse(filePath, code, EXTRACT_ONLY);
|
|
923
|
+
if (!output || output.typeMap.size === 0) {
|
|
924
|
+
return { typeMap: new Map(), backfilled: false };
|
|
740
925
|
}
|
|
926
|
+
return { typeMap: output.typeMap, backfilled: true };
|
|
741
927
|
}
|
|
742
928
|
|
|
743
929
|
/**
|
|
@@ -765,7 +951,16 @@ function wasmExtractSymbols(
|
|
|
765
951
|
if (!entry) return null;
|
|
766
952
|
const query = _queryCache.get(entry.id) ?? undefined;
|
|
767
953
|
// Query (web-tree-sitter) is structurally compatible with TreeSitterQuery at runtime
|
|
768
|
-
|
|
954
|
+
let symbols: ExtractorOutput | null;
|
|
955
|
+
try {
|
|
956
|
+
symbols = entry.extractor(tree as any, filePath, query as any);
|
|
957
|
+
} catch (e: unknown) {
|
|
958
|
+
warn(`Extractor error in ${filePath}: ${(e as Error).message}`);
|
|
959
|
+
// Free WASM tree to prevent memory leak — web-tree-sitter trees are backed
|
|
960
|
+
// by WASM linear memory and are not garbage-collected automatically.
|
|
961
|
+
if (typeof (tree as any).delete === 'function') (tree as any).delete();
|
|
962
|
+
return null;
|
|
963
|
+
}
|
|
769
964
|
return symbols ? { symbols, tree, langId: entry.id } : null;
|
|
770
965
|
}
|
|
771
966
|
|
|
@@ -796,10 +991,9 @@ export async function parseFileAuto(
|
|
|
796
991
|
return patched;
|
|
797
992
|
}
|
|
798
993
|
|
|
799
|
-
// WASM path
|
|
800
|
-
const
|
|
801
|
-
|
|
802
|
-
return extracted ? extracted.symbols : null;
|
|
994
|
+
// WASM path — dispatch to isolated worker
|
|
995
|
+
const pool = getWasmWorkerPool();
|
|
996
|
+
return pool.parse(filePath, source, FULL_ANALYSIS);
|
|
803
997
|
}
|
|
804
998
|
|
|
805
999
|
/** Backfill typeMap via WASM for TS/TSX files parsed by the native engine. */
|
|
@@ -812,40 +1006,44 @@ async function backfillTypeMapBatch(
|
|
|
812
1006
|
);
|
|
813
1007
|
if (tsFiles.length === 0) return;
|
|
814
1008
|
|
|
815
|
-
const
|
|
1009
|
+
const pool = getWasmWorkerPool();
|
|
816
1010
|
for (const { filePath, relPath } of tsFiles) {
|
|
817
|
-
let
|
|
1011
|
+
let code: string;
|
|
818
1012
|
try {
|
|
819
|
-
|
|
820
|
-
extracted = wasmExtractSymbols(parsers, filePath, code);
|
|
821
|
-
if (extracted?.symbols && extracted.symbols.typeMap.size > 0) {
|
|
822
|
-
const symbols = result.get(relPath);
|
|
823
|
-
if (!symbols) continue;
|
|
824
|
-
symbols.typeMap = extracted.symbols.typeMap;
|
|
825
|
-
symbols._typeMapBackfilled = true;
|
|
826
|
-
}
|
|
1013
|
+
code = fs.readFileSync(filePath, 'utf-8');
|
|
827
1014
|
} catch (e) {
|
|
828
|
-
debug(`batchExtract:
|
|
829
|
-
|
|
830
|
-
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
|
|
831
|
-
try {
|
|
832
|
-
extracted.tree.delete();
|
|
833
|
-
} catch (e) {
|
|
834
|
-
debug(`batchExtract: WASM tree cleanup failed: ${toErrorMessage(e)}`);
|
|
835
|
-
}
|
|
836
|
-
}
|
|
1015
|
+
debug(`batchExtract: cannot read ${filePath}: ${toErrorMessage(e)}`);
|
|
1016
|
+
continue;
|
|
837
1017
|
}
|
|
1018
|
+
const output = await pool.parse(filePath, code, EXTRACT_ONLY);
|
|
1019
|
+
if (!output || output.typeMap.size === 0) continue;
|
|
1020
|
+
const symbols = result.get(relPath);
|
|
1021
|
+
if (!symbols) continue;
|
|
1022
|
+
symbols.typeMap = output.typeMap;
|
|
1023
|
+
symbols._typeMapBackfilled = true;
|
|
838
1024
|
}
|
|
839
1025
|
}
|
|
840
1026
|
|
|
841
|
-
/**
|
|
1027
|
+
/**
|
|
1028
|
+
* Parse files via WASM engine, returning a Map<relPath, symbols>.
|
|
1029
|
+
*
|
|
1030
|
+
* Each file is dispatched to the WASM worker pool. The worker parses, extracts,
|
|
1031
|
+
* and runs all AST analyses (complexity, CFG, dataflow, ast-store) in its own
|
|
1032
|
+
* thread, returning fully pre-computed ExtractorOutput. V8 fatal errors from
|
|
1033
|
+
* tree-sitter WASM (#965) kill only the worker — the pool skips the file and
|
|
1034
|
+
* restarts the worker for the next one.
|
|
1035
|
+
*
|
|
1036
|
+
* `_tree` is NEVER set by this path. All downstream analyses operate on the
|
|
1037
|
+
* pre-computed `astNodes` / `dataflow` / `def.complexity` / `def.cfg` fields.
|
|
1038
|
+
*/
|
|
842
1039
|
async function parseFilesWasm(
|
|
843
1040
|
filePaths: string[],
|
|
844
1041
|
rootDir: string,
|
|
845
1042
|
): Promise<Map<string, ExtractorOutput>> {
|
|
846
1043
|
const result = new Map<string, ExtractorOutput>();
|
|
847
|
-
const
|
|
1044
|
+
const pool = getWasmWorkerPool();
|
|
848
1045
|
for (const filePath of filePaths) {
|
|
1046
|
+
if (!_extToLang.has(path.extname(filePath).toLowerCase())) continue;
|
|
849
1047
|
let code: string;
|
|
850
1048
|
try {
|
|
851
1049
|
code = fs.readFileSync(filePath, 'utf-8');
|
|
@@ -853,13 +1051,10 @@ async function parseFilesWasm(
|
|
|
853
1051
|
warn(`Skipping ${path.relative(rootDir, filePath)}: ${(err as Error).message}`);
|
|
854
1052
|
continue;
|
|
855
1053
|
}
|
|
856
|
-
const
|
|
857
|
-
if (
|
|
1054
|
+
const output = await pool.parse(filePath, code, FULL_ANALYSIS);
|
|
1055
|
+
if (output) {
|
|
858
1056
|
const relPath = path.relative(rootDir, filePath).split(path.sep).join('/');
|
|
859
|
-
|
|
860
|
-
extracted.symbols._langId = extracted.langId;
|
|
861
|
-
extracted.symbols._lineCount = code.split('\n').length;
|
|
862
|
-
result.set(relPath, extracted.symbols);
|
|
1057
|
+
result.set(relPath, output);
|
|
863
1058
|
}
|
|
864
1059
|
}
|
|
865
1060
|
return result;
|
|
@@ -884,8 +1079,10 @@ export async function parseFilesAuto(
|
|
|
884
1079
|
? native.parseFilesFull(filePaths, rootDir)
|
|
885
1080
|
: native.parseFiles(filePaths, rootDir, true, true);
|
|
886
1081
|
const needsTypeMap: { filePath: string; relPath: string }[] = [];
|
|
1082
|
+
const nativeParsed = new Set<string>();
|
|
887
1083
|
for (const r of nativeResults) {
|
|
888
1084
|
if (!r) continue;
|
|
1085
|
+
nativeParsed.add(r.file);
|
|
889
1086
|
const patched = patchNativeResult(r);
|
|
890
1087
|
const relPath = path.relative(rootDir, r.file).split(path.sep).join('/');
|
|
891
1088
|
result.set(relPath, patched);
|
|
@@ -901,6 +1098,24 @@ export async function parseFilesAuto(
|
|
|
901
1098
|
if (needsTypeMap.length > 0) {
|
|
902
1099
|
await backfillTypeMapBatch(needsTypeMap, result);
|
|
903
1100
|
}
|
|
1101
|
+
|
|
1102
|
+
// Engine parity: native may silently drop files whose extensions are in
|
|
1103
|
+
// SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
|
|
1104
|
+
// extractor/grammar is missing or fails. WASM handles these — fall back so
|
|
1105
|
+
// both engines process the same file set (#967). Restrict to installed WASM
|
|
1106
|
+
// grammars so we don't warn about files that neither engine can parse.
|
|
1107
|
+
const installedExts = getInstalledWasmExtensions();
|
|
1108
|
+
const dropped = filePaths.filter(
|
|
1109
|
+
(f) => !nativeParsed.has(f) && installedExts.has(path.extname(f).toLowerCase()),
|
|
1110
|
+
);
|
|
1111
|
+
if (dropped.length > 0) {
|
|
1112
|
+
warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
|
|
1113
|
+
const wasmResults = await parseFilesWasm(dropped, rootDir);
|
|
1114
|
+
for (const [relPath, symbols] of wasmResults) {
|
|
1115
|
+
result.set(relPath, symbols);
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
|
|
904
1119
|
return result;
|
|
905
1120
|
}
|
|
906
1121
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import fs from 'node:fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
-
import { closeDb, findDbPath, openDb } from '../../db/index.js';
|
|
3
|
+
import { closeDb, findDbPath, getBuildMeta, openDb } from '../../db/index.js';
|
|
4
4
|
import { warn } from '../../infrastructure/logger.js';
|
|
5
5
|
import { DbError } from '../../shared/errors.js';
|
|
6
6
|
import type { BetterSqlite3Database, NodeRow } from '../../types.js';
|
|
@@ -73,6 +73,21 @@ export async function buildEmbeddings(
|
|
|
73
73
|
const db = openDb(dbPath) as BetterSqlite3Database;
|
|
74
74
|
initEmbeddingsSchema(db);
|
|
75
75
|
|
|
76
|
+
// Prefer the repo root recorded at build time — embed may be invoked from a
|
|
77
|
+
// different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
|
|
78
|
+
// positional rootDir will be wrong in that case. For legacy DBs without
|
|
79
|
+
// root_dir metadata, fall back to `<dbParent>` only when the DB lives at
|
|
80
|
+
// the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
|
|
81
|
+
// the caller-provided rootDir (which may be an explicit positional arg).
|
|
82
|
+
// `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
|
|
83
|
+
// conventional-layout check is required to keep the rootDir path reachable.
|
|
84
|
+
const metaRoot = getBuildMeta(db, 'root_dir');
|
|
85
|
+
const resolvedDbPath = path.resolve(dbPath);
|
|
86
|
+
const dbDirName = path.basename(path.dirname(resolvedDbPath));
|
|
87
|
+
const dbParent =
|
|
88
|
+
dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
|
|
89
|
+
const resolvedRoot = metaRoot || dbParent || rootDir;
|
|
90
|
+
|
|
76
91
|
db.exec('DELETE FROM embeddings');
|
|
77
92
|
db.exec('DELETE FROM embedding_meta');
|
|
78
93
|
db.exec('DELETE FROM fts_index');
|
|
@@ -98,13 +113,17 @@ export async function buildEmbeddings(
|
|
|
98
113
|
const config = getModelConfig(modelKey);
|
|
99
114
|
const contextWindow = config.contextWindow;
|
|
100
115
|
let overflowCount = 0;
|
|
116
|
+
let filesRead = 0;
|
|
117
|
+
let filesSkipped = 0;
|
|
101
118
|
|
|
102
119
|
for (const [file, fileNodes] of byFile) {
|
|
103
|
-
const fullPath = path.isAbsolute(file) ? file : path.join(
|
|
120
|
+
const fullPath = path.isAbsolute(file) ? file : path.join(resolvedRoot, file);
|
|
104
121
|
let lines: string[];
|
|
105
122
|
try {
|
|
106
123
|
lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
|
|
124
|
+
filesRead++;
|
|
107
125
|
} catch (err: unknown) {
|
|
126
|
+
filesSkipped++;
|
|
108
127
|
warn(`Cannot read ${file} for embeddings: ${(err as Error).message}`);
|
|
109
128
|
continue;
|
|
110
129
|
}
|
|
@@ -136,6 +155,19 @@ export async function buildEmbeddings(
|
|
|
136
155
|
);
|
|
137
156
|
}
|
|
138
157
|
|
|
158
|
+
// If there were symbols to embed but every file failed to read, the DB was
|
|
159
|
+
// almost certainly built from a different location than the current cwd.
|
|
160
|
+
// Surface this clearly instead of emitting a silent "Stored 0 embeddings".
|
|
161
|
+
if (byFile.size > 0 && filesRead === 0) {
|
|
162
|
+
closeDb(db);
|
|
163
|
+
throw new DbError(
|
|
164
|
+
`embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
|
|
165
|
+
`Tried resolving against: ${resolvedRoot}\n` +
|
|
166
|
+
'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
|
|
167
|
+
{ file: dbPath },
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
|
|
139
171
|
console.log(`Embedding ${texts.length} symbols...`);
|
|
140
172
|
const { vectors, dim } = await embed(texts, modelKey);
|
|
141
173
|
|
|
@@ -253,7 +253,7 @@ export async function embed(
|
|
|
253
253
|
}
|
|
254
254
|
|
|
255
255
|
if (texts.length > batchSize) {
|
|
256
|
-
process.
|
|
256
|
+
process.stderr.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
|
|
257
257
|
}
|
|
258
258
|
}
|
|
259
259
|
|