@colbymchenry/codegraph-darwin-x64 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/dist/bin/codegraph.js +111 -11
- package/lib/dist/bin/codegraph.js.map +1 -1
- package/lib/dist/db/index.d.ts +22 -1
- package/lib/dist/db/index.d.ts.map +1 -1
- package/lib/dist/db/index.js +46 -1
- package/lib/dist/db/index.js.map +1 -1
- package/lib/dist/db/queries.d.ts +14 -0
- package/lib/dist/db/queries.d.ts.map +1 -1
- package/lib/dist/db/queries.js +25 -0
- package/lib/dist/db/queries.js.map +1 -1
- package/lib/dist/directory.d.ts +43 -0
- package/lib/dist/directory.d.ts.map +1 -1
- package/lib/dist/directory.js +121 -0
- package/lib/dist/directory.js.map +1 -1
- package/lib/dist/extraction/grammars.d.ts +11 -3
- package/lib/dist/extraction/grammars.d.ts.map +1 -1
- package/lib/dist/extraction/grammars.js +14 -5
- package/lib/dist/extraction/grammars.js.map +1 -1
- package/lib/dist/extraction/index.d.ts +6 -3
- package/lib/dist/extraction/index.d.ts.map +1 -1
- package/lib/dist/extraction/index.js +229 -62
- package/lib/dist/extraction/index.js.map +1 -1
- package/lib/dist/extraction/languages/c-cpp.d.ts.map +1 -1
- package/lib/dist/extraction/languages/c-cpp.js +47 -2
- package/lib/dist/extraction/languages/c-cpp.js.map +1 -1
- package/lib/dist/extraction/languages/csharp.d.ts.map +1 -1
- package/lib/dist/extraction/languages/csharp.js +20 -0
- package/lib/dist/extraction/languages/csharp.js.map +1 -1
- package/lib/dist/extraction/languages/dart.d.ts.map +1 -1
- package/lib/dist/extraction/languages/dart.js +22 -0
- package/lib/dist/extraction/languages/dart.js.map +1 -1
- package/lib/dist/extraction/languages/java.d.ts.map +1 -1
- package/lib/dist/extraction/languages/java.js +213 -9
- package/lib/dist/extraction/languages/java.js.map +1 -1
- package/lib/dist/extraction/languages/kotlin.d.ts.map +1 -1
- package/lib/dist/extraction/languages/kotlin.js +51 -0
- package/lib/dist/extraction/languages/kotlin.js.map +1 -1
- package/lib/dist/extraction/languages/scala.d.ts.map +1 -1
- package/lib/dist/extraction/languages/scala.js +19 -9
- package/lib/dist/extraction/languages/scala.js.map +1 -1
- package/lib/dist/extraction/parse-worker.js +4 -1
- package/lib/dist/extraction/parse-worker.js.map +1 -1
- package/lib/dist/extraction/tree-sitter-types.d.ts +13 -0
- package/lib/dist/extraction/tree-sitter-types.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.d.ts +119 -0
- package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.js +890 -11
- package/lib/dist/extraction/tree-sitter.js.map +1 -1
- package/lib/dist/index.d.ts +33 -0
- package/lib/dist/index.d.ts.map +1 -1
- package/lib/dist/index.js +68 -7
- package/lib/dist/index.js.map +1 -1
- package/lib/dist/installer/index.d.ts.map +1 -1
- package/lib/dist/installer/index.js +33 -67
- package/lib/dist/installer/index.js.map +1 -1
- package/lib/dist/installer/instructions-template.d.ts +3 -3
- package/lib/dist/installer/instructions-template.d.ts.map +1 -1
- package/lib/dist/installer/instructions-template.js +4 -4
- package/lib/dist/installer/targets/claude.d.ts +18 -12
- package/lib/dist/installer/targets/claude.d.ts.map +1 -1
- package/lib/dist/installer/targets/claude.js +78 -6
- package/lib/dist/installer/targets/claude.js.map +1 -1
- package/lib/dist/installer/targets/shared.d.ts +12 -2
- package/lib/dist/installer/targets/shared.d.ts.map +1 -1
- package/lib/dist/installer/targets/shared.js +13 -12
- package/lib/dist/installer/targets/shared.js.map +1 -1
- package/lib/dist/installer/targets/types.d.ts +7 -0
- package/lib/dist/installer/targets/types.d.ts.map +1 -1
- package/lib/dist/mcp/daemon.d.ts.map +1 -1
- package/lib/dist/mcp/daemon.js +16 -0
- package/lib/dist/mcp/daemon.js.map +1 -1
- package/lib/dist/mcp/engine.d.ts.map +1 -1
- package/lib/dist/mcp/engine.js +8 -0
- package/lib/dist/mcp/engine.js.map +1 -1
- package/lib/dist/mcp/proxy.d.ts.map +1 -1
- package/lib/dist/mcp/proxy.js +14 -1
- package/lib/dist/mcp/proxy.js.map +1 -1
- package/lib/dist/mcp/server-instructions.d.ts +18 -14
- package/lib/dist/mcp/server-instructions.d.ts.map +1 -1
- package/lib/dist/mcp/server-instructions.js +57 -52
- package/lib/dist/mcp/server-instructions.js.map +1 -1
- package/lib/dist/mcp/session.d.ts.map +1 -1
- package/lib/dist/mcp/session.js +23 -18
- package/lib/dist/mcp/session.js.map +1 -1
- package/lib/dist/mcp/tools.d.ts +51 -1
- package/lib/dist/mcp/tools.d.ts.map +1 -1
- package/lib/dist/mcp/tools.js +585 -151
- package/lib/dist/mcp/tools.js.map +1 -1
- package/lib/dist/project-config.d.ts +36 -0
- package/lib/dist/project-config.d.ts.map +1 -0
- package/lib/dist/project-config.js +235 -0
- package/lib/dist/project-config.js.map +1 -0
- package/lib/dist/reasoning/config.d.ts +45 -0
- package/lib/dist/reasoning/config.d.ts.map +1 -0
- package/lib/dist/reasoning/config.js +171 -0
- package/lib/dist/reasoning/config.js.map +1 -0
- package/lib/dist/reasoning/credentials.d.ts +5 -0
- package/lib/dist/reasoning/credentials.d.ts.map +1 -0
- package/lib/dist/reasoning/credentials.js +83 -0
- package/lib/dist/reasoning/credentials.js.map +1 -0
- package/lib/dist/reasoning/login.d.ts +21 -0
- package/lib/dist/reasoning/login.d.ts.map +1 -0
- package/lib/dist/reasoning/login.js +85 -0
- package/lib/dist/reasoning/login.js.map +1 -0
- package/lib/dist/reasoning/reasoner.d.ts +43 -0
- package/lib/dist/reasoning/reasoner.d.ts.map +1 -0
- package/lib/dist/reasoning/reasoner.js +308 -0
- package/lib/dist/reasoning/reasoner.js.map +1 -0
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts +33 -0
- package/lib/dist/resolution/c-fnptr-synthesizer.d.ts.map +1 -0
- package/lib/dist/resolution/c-fnptr-synthesizer.js +352 -0
- package/lib/dist/resolution/c-fnptr-synthesizer.js.map +1 -0
- package/lib/dist/resolution/callback-synthesizer.d.ts +6 -1
- package/lib/dist/resolution/callback-synthesizer.d.ts.map +1 -1
- package/lib/dist/resolution/callback-synthesizer.js +1109 -1
- package/lib/dist/resolution/callback-synthesizer.js.map +1 -1
- package/lib/dist/resolution/frameworks/goframe.d.ts +41 -0
- package/lib/dist/resolution/frameworks/goframe.d.ts.map +1 -0
- package/lib/dist/resolution/frameworks/goframe.js +112 -0
- package/lib/dist/resolution/frameworks/goframe.js.map +1 -0
- package/lib/dist/resolution/frameworks/index.d.ts +1 -0
- package/lib/dist/resolution/frameworks/index.d.ts.map +1 -1
- package/lib/dist/resolution/frameworks/index.js +5 -1
- package/lib/dist/resolution/frameworks/index.js.map +1 -1
- package/lib/dist/resolution/frameworks/react.d.ts.map +1 -1
- package/lib/dist/resolution/frameworks/react.js +17 -60
- package/lib/dist/resolution/frameworks/react.js.map +1 -1
- package/lib/dist/resolution/goframe-synthesizer.d.ts +28 -0
- package/lib/dist/resolution/goframe-synthesizer.d.ts.map +1 -0
- package/lib/dist/resolution/goframe-synthesizer.js +158 -0
- package/lib/dist/resolution/goframe-synthesizer.js.map +1 -0
- package/lib/dist/resolution/name-matcher.d.ts.map +1 -1
- package/lib/dist/resolution/name-matcher.js +48 -8
- package/lib/dist/resolution/name-matcher.js.map +1 -1
- package/lib/dist/resolution/strip-comments.d.ts +1 -1
- package/lib/dist/resolution/strip-comments.d.ts.map +1 -1
- package/lib/dist/resolution/strip-comments.js +2 -0
- package/lib/dist/resolution/strip-comments.js.map +1 -1
- package/lib/dist/sync/watcher.d.ts +68 -1
- package/lib/dist/sync/watcher.d.ts.map +1 -1
- package/lib/dist/sync/watcher.js +212 -14
- package/lib/dist/sync/watcher.js.map +1 -1
- package/lib/dist/telemetry/index.d.ts +0 -3
- package/lib/dist/telemetry/index.d.ts.map +1 -1
- package/lib/dist/telemetry/index.js +4 -7
- package/lib/dist/telemetry/index.js.map +1 -1
- package/lib/dist/upgrade/index.d.ts.map +1 -1
- package/lib/dist/upgrade/index.js +40 -4
- package/lib/dist/upgrade/index.js.map +1 -1
- package/lib/dist/utils.d.ts +14 -1
- package/lib/dist/utils.d.ts.map +1 -1
- package/lib/dist/utils.js +20 -2
- package/lib/dist/utils.js.map +1 -1
- package/lib/node_modules/.package-lock.json +1 -1
- package/lib/package.json +2 -2
- package/package.json +1 -1
|
@@ -55,6 +55,7 @@ const crypto = __importStar(require("crypto"));
|
|
|
55
55
|
const child_process_1 = require("child_process");
|
|
56
56
|
const tree_sitter_1 = require("./tree-sitter");
|
|
57
57
|
const grammars_1 = require("./grammars");
|
|
58
|
+
const project_config_1 = require("../project-config");
|
|
58
59
|
const directory_1 = require("../directory");
|
|
59
60
|
const errors_1 = require("../errors");
|
|
60
61
|
const utils_1 = require("../utils");
|
|
@@ -65,6 +66,17 @@ const frameworks_1 = require("../resolution/frameworks");
|
|
|
65
66
|
* File reads are I/O-bound; batching overlaps I/O wait with CPU parse work.
|
|
66
67
|
*/
|
|
67
68
|
const FILE_IO_BATCH_SIZE = 10;
|
|
69
|
+
/**
|
|
70
|
+
* How many files the `sync()` reconcile processes between cooperative yields to
|
|
71
|
+
* the event loop. The reconcile runs two O(files) loops of synchronous `fs`
|
|
72
|
+
* calls (existsSync for removals, statSync for adds/mods); on a very large repo
|
|
73
|
+
* (~100k files) an un-yielded run wedges the main thread for minutes, which both
|
|
74
|
+
* trips the liveness watchdog (it SIGKILLs a process whose loop stops turning)
|
|
75
|
+
* and blocks the first MCP tool call behind the catch-up gate (issue #905).
|
|
76
|
+
* Yielding every N files keeps the socket, the watchdog heartbeat, and any
|
|
77
|
+
* concurrent read query responsive while the reconcile runs.
|
|
78
|
+
*/
|
|
79
|
+
const SYNC_RECONCILE_YIELD_INTERVAL = 1000;
|
|
68
80
|
// PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
|
|
69
81
|
/**
|
|
70
82
|
* Maximum time (ms) to wait for a single file to parse in the worker thread.
|
|
@@ -240,6 +252,33 @@ function buildDefaultIgnore(rootDir) {
|
|
|
240
252
|
function defaultsOnlyIgnore() {
|
|
241
253
|
return (0, ignore_1.default)().add(DEFAULT_IGNORE_PATTERNS);
|
|
242
254
|
}
|
|
255
|
+
/**
|
|
256
|
+
* Matcher for the project's `codegraph.json` `includeIgnored` patterns — the
|
|
257
|
+
* explicit opt-in to index embedded git repos living inside gitignored
|
|
258
|
+
* directories (#622, #699). Returns `null` when the project opted in nothing,
|
|
259
|
+
* which is the zero-config DEFAULT: `.gitignore` is then fully respected and a
|
|
260
|
+
* gitignored directory (even one holding nested repos) is never walked or
|
|
261
|
+
* indexed (#970, #976). Built once per scan/sync/scope operation from the scan
|
|
262
|
+
* root and threaded down — never global, so multi-project daemons stay isolated.
|
|
263
|
+
*/
|
|
264
|
+
function loadIncludeIgnoredMatcher(rootDir) {
|
|
265
|
+
const patterns = (0, project_config_1.loadIncludeIgnoredPatterns)(rootDir);
|
|
266
|
+
return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* `git ls-files --directory` collapses a wholly-untracked/ignored directory into
|
|
270
|
+
* one entry — and when the command's own cwd is such a directory (the indexed
|
|
271
|
+
* root is itself a git-ignored subdir of an enclosing repo), git emits the
|
|
272
|
+
* literal `./` meaning "this entire directory". That sentinel is not a real
|
|
273
|
+
* nested path: feeding it to the `ignore` matcher throws ("path should be a
|
|
274
|
+
* `path.relative()`d string, but got "./""), which used to abort `buildScopeIgnore`
|
|
275
|
+
* and so break the MCP daemon's watcher/auto-sync on connect; and joining it back
|
|
276
|
+
* onto `repoDir` would just re-point at the cwd. Drop it wherever we consume
|
|
277
|
+
* `--directory` output. (#936)
|
|
278
|
+
*/
|
|
279
|
+
function isWholeCwdEntry(entry) {
|
|
280
|
+
return entry === './' || entry === '.' || entry === '';
|
|
281
|
+
}
|
|
243
282
|
/**
|
|
244
283
|
* List the gitignored DIRECTORIES of a repo (collapsed, trailing-slash form),
|
|
245
284
|
* relative to `repoDir`. These are invisible to every other `git ls-files` /
|
|
@@ -250,7 +289,7 @@ function defaultsOnlyIgnore() {
|
|
|
250
289
|
function listIgnoredDirs(repoDir) {
|
|
251
290
|
try {
|
|
252
291
|
const out = (0, child_process_1.execFileSync)('git', ['ls-files', '-z', '-o', '-i', '--exclude-standard', '--directory'], { cwd: repoDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
|
|
253
|
-
return out.split('\0').filter((e) => e.endsWith('/'));
|
|
292
|
+
return out.split('\0').filter((e) => e.endsWith('/') && !isWholeCwdEntry(e));
|
|
254
293
|
}
|
|
255
294
|
catch {
|
|
256
295
|
return [];
|
|
@@ -268,8 +307,10 @@ const EMBEDDED_REPO_SEARCH_ENTRIES = 2000;
|
|
|
268
307
|
* - A `.git` **file** is a pointer (`gitdir: …`). A git **worktree** points into
|
|
269
308
|
* the host repo's own `.git/worktrees/<name>`, so it is a second working view
|
|
270
309
|
* of a repo CodeGraph already indexes — indexing it just duplicates the whole
|
|
271
|
-
* graph N times; skip it (#848). A **submodule** points into
|
|
272
|
-
*
|
|
310
|
+
* graph N times; skip it (#848). A **submodule worktree** points into
|
|
311
|
+
* `.git/modules/<module>/worktrees/<name>` — same duplication, so skip it too
|
|
312
|
+
* (#945). A **submodule** checkout points into `.git/modules/<module>` (no
|
|
313
|
+
* `worktrees/` segment) and is distinct code, so index it as before.
|
|
273
314
|
*
|
|
274
315
|
* Returns `'none'` when there is no `.git` entry here.
|
|
275
316
|
*/
|
|
@@ -287,9 +328,12 @@ function classifyGitDir(absDir) {
|
|
|
287
328
|
return 'none';
|
|
288
329
|
try {
|
|
289
330
|
const gitdir = fs.readFileSync(path.join(absDir, '.git'), 'utf8').match(/^gitdir:\s*(.+)$/m)?.[1]?.trim();
|
|
290
|
-
// A
|
|
331
|
+
// A worktree's gitdir lives under some repo's `.git/worktrees/<name>` —
|
|
332
|
+
// either the top-level repo's (`.git/worktrees/`) or, for a worktree of a
|
|
333
|
+
// submodule, that submodule's gitdir (`.git/modules/<module>/worktrees/`).
|
|
334
|
+
// The optional `modules/<module>` segment covers the submodule case (#945).
|
|
291
335
|
// Match both separators so a Windows-style pointer is recognized too.
|
|
292
|
-
if (gitdir && /(^|[\\/])\.git[\\/]worktrees[\\/]/.test(gitdir))
|
|
336
|
+
if (gitdir && /(^|[\\/])\.git[\\/](modules[\\/][^\\/]+[\\/])?worktrees[\\/]/.test(gitdir))
|
|
293
337
|
return 'worktree';
|
|
294
338
|
}
|
|
295
339
|
catch {
|
|
@@ -398,9 +442,12 @@ function buildScopeIgnore(rootDir, embeddedRoots) {
|
|
|
398
442
|
}
|
|
399
443
|
/**
|
|
400
444
|
* Standalone discovery of every embedded repo root under `rootDir` (relative,
|
|
401
|
-
* trailing-slashed) —
|
|
402
|
-
* (#514)
|
|
403
|
-
*
|
|
445
|
+
* trailing-slashed) — the untracked kind (#193) always, and the gitignored kind
|
|
446
|
+
* (#514) only for directories the project opted in via `codegraph.json`
|
|
447
|
+
* `includeIgnored` (#622, #699); otherwise `.gitignore` is respected and they
|
|
448
|
+
* are not discovered (#970, #976). Recursive (an embedded repo can embed further
|
|
449
|
+
* repos). Returns [] for non-git roots: the filesystem walk handles nested repos
|
|
450
|
+
* there already.
|
|
404
451
|
*/
|
|
405
452
|
function discoverEmbeddedRepoRoots(rootDir) {
|
|
406
453
|
try {
|
|
@@ -411,18 +458,19 @@ function discoverEmbeddedRepoRoots(rootDir) {
|
|
|
411
458
|
}
|
|
412
459
|
const out = [];
|
|
413
460
|
const defaults = defaultsOnlyIgnore();
|
|
461
|
+
const includeIgnored = loadIncludeIgnoredMatcher(rootDir);
|
|
414
462
|
const visit = (repoAbs, prefix) => {
|
|
415
463
|
const candidates = [];
|
|
416
464
|
try {
|
|
417
465
|
const o = (0, child_process_1.execFileSync)('git', ['ls-files', '-z', '-o', '--exclude-standard', '--directory'], { cwd: repoAbs, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
|
|
418
466
|
for (const e of o.split('\0')) {
|
|
419
|
-
if (e.endsWith('/') && !defaults.ignores(e)) {
|
|
467
|
+
if (e.endsWith('/') && !isWholeCwdEntry(e) && !defaults.ignores(e)) {
|
|
420
468
|
candidates.push(...findNestedGitRepos(path.join(repoAbs, e), e));
|
|
421
469
|
}
|
|
422
470
|
}
|
|
423
471
|
}
|
|
424
472
|
catch { /* untracked listing failed — ignored-side discovery still runs */ }
|
|
425
|
-
candidates.push(...findIgnoredEmbeddedRepos(repoAbs));
|
|
473
|
+
candidates.push(...findIgnoredEmbeddedRepos(repoAbs, includeIgnored, prefix));
|
|
426
474
|
for (const rel of candidates) {
|
|
427
475
|
const full = (0, utils_1.normalizePath)(prefix + rel);
|
|
428
476
|
out.push(full);
|
|
@@ -433,16 +481,30 @@ function discoverEmbeddedRepoRoots(rootDir) {
|
|
|
433
481
|
return out;
|
|
434
482
|
}
|
|
435
483
|
/**
|
|
436
|
-
* Discover embedded repos hidden by `repoDir`'s OWN
|
|
437
|
-
* gitignored directory
|
|
438
|
-
*
|
|
484
|
+
* Discover embedded repos hidden by `repoDir`'s OWN gitignore rules: for each
|
|
485
|
+
* gitignored directory, search for nested `.git` roots. Returns repo paths
|
|
486
|
+
* relative to `repoDir`, trailing-slashed.
|
|
487
|
+
*
|
|
488
|
+
* OPT-IN ONLY. Walking into a gitignored directory contradicts what every other
|
|
489
|
+
* tool (and CodeGraph's own `git ls-files` foundation) does — `.gitignore`
|
|
490
|
+
* excludes. So this returns `[]` unless the project opted the directory in via
|
|
491
|
+
* `codegraph.json` `includeIgnored`; without that, a gitignored dir — including
|
|
492
|
+
* a huge reference/data dir full of nested clones — is left untouched (#970,
|
|
493
|
+
* #976). When opted in, it restores the super-repo-of-clones behavior (#622,
|
|
494
|
+
* #699). `prefix` is the scan-root-relative path of `repoDir`, so a pattern like
|
|
495
|
+
* `services/` opts that whole subtree in at any recursion depth. Built-in
|
|
496
|
+
* default excludes (`node_modules`, …) are always skipped.
|
|
439
497
|
*/
|
|
440
|
-
function findIgnoredEmbeddedRepos(repoDir) {
|
|
498
|
+
function findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix) {
|
|
499
|
+
if (!includeIgnored)
|
|
500
|
+
return [];
|
|
441
501
|
const defaults = defaultsOnlyIgnore();
|
|
442
502
|
const repos = [];
|
|
443
503
|
for (const dir of listIgnoredDirs(repoDir)) {
|
|
444
504
|
if (defaults.ignores(dir))
|
|
445
505
|
continue;
|
|
506
|
+
if (!includeIgnored.ignores((0, utils_1.normalizePath)(prefix + dir)))
|
|
507
|
+
continue;
|
|
446
508
|
repos.push(...findNestedGitRepos(path.join(repoDir, dir), dir));
|
|
447
509
|
}
|
|
448
510
|
return repos;
|
|
@@ -458,12 +520,15 @@ function findIgnoredEmbeddedRepos(repoDir) {
|
|
|
458
520
|
* skips them entirely, and untracked output reports them only as an opaque
|
|
459
521
|
* "subdir/" entry (trailing slash) rather than expanding their files. Each
|
|
460
522
|
* embedded repo is its own git boundary, so we re-run `git ls-files` inside it.
|
|
461
|
-
* (See issue #193.) GITIGNORED embedded repos are invisible even to that
|
|
462
|
-
*
|
|
463
|
-
*
|
|
464
|
-
*
|
|
523
|
+
* (See issue #193.) GITIGNORED embedded repos are invisible even to that; they
|
|
524
|
+
* are discovered separately via `findIgnoredEmbeddedRepos` (#514) but ONLY for
|
|
525
|
+
* directories the project opted in through `codegraph.json` `includeIgnored`
|
|
526
|
+
* (`includeIgnored` here, threaded from the scan root) — by default `.gitignore`
|
|
527
|
+
* is respected and they stay out (#970, #976). Every embedded repo root (however
|
|
528
|
+
* found) is recorded in `embeddedRoots` so callers can exempt its files from the
|
|
529
|
+
* parent's own gitignore rules.
|
|
465
530
|
*/
|
|
466
|
-
function collectGitFiles(repoDir, prefix, files, embeddedRoots) {
|
|
531
|
+
function collectGitFiles(repoDir, prefix, files, embeddedRoots, includeIgnored = null) {
|
|
467
532
|
const gitOpts = { cwd: repoDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true };
|
|
468
533
|
// Tracked files. --recurse-submodules pulls in files from active submodules,
|
|
469
534
|
// which the index would otherwise represent only as a commit pointer.
|
|
@@ -497,18 +562,20 @@ function collectGitFiles(repoDir, prefix, files, embeddedRoots) {
|
|
|
497
562
|
// it's a duplicate working view of an already-indexed repo (#848).
|
|
498
563
|
if (classifyGitDir(childDir) === 'embedded' && !defaultsOnlyIgnore().ignores(rel)) {
|
|
499
564
|
embeddedRoots?.add((0, utils_1.normalizePath)(prefix + rel));
|
|
500
|
-
collectGitFiles(childDir, prefix + rel, files, embeddedRoots);
|
|
565
|
+
collectGitFiles(childDir, prefix + rel, files, embeddedRoots, includeIgnored);
|
|
501
566
|
}
|
|
502
567
|
continue;
|
|
503
568
|
}
|
|
504
569
|
files.add((0, utils_1.normalizePath)(prefix + rel));
|
|
505
570
|
}
|
|
506
571
|
// Embedded repos hidden by THIS repo's ignore rules (`/packages/` in a
|
|
507
|
-
// super-repo .gitignore) never appear in any listing above
|
|
508
|
-
//
|
|
509
|
-
|
|
572
|
+
// super-repo .gitignore) never appear in any listing above. By default they
|
|
573
|
+
// stay hidden — `.gitignore` is respected (#970, #976). They are recursed into
|
|
574
|
+
// only when the project opted the directory in via `codegraph.json`
|
|
575
|
+
// `includeIgnored` (#622, #699), which `findIgnoredEmbeddedRepos` enforces.
|
|
576
|
+
for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
|
|
510
577
|
embeddedRoots?.add((0, utils_1.normalizePath)(prefix + rel));
|
|
511
|
-
collectGitFiles(path.join(repoDir, rel), prefix + rel, files, embeddedRoots);
|
|
578
|
+
collectGitFiles(path.join(repoDir, rel), prefix + rel, files, embeddedRoots, includeIgnored);
|
|
512
579
|
}
|
|
513
580
|
}
|
|
514
581
|
/**
|
|
@@ -536,7 +603,7 @@ function getGitVisibleFiles(rootDir) {
|
|
|
536
603
|
}
|
|
537
604
|
const files = new Set();
|
|
538
605
|
const embeddedRoots = new Set();
|
|
539
|
-
collectGitFiles(rootDir, '', files, embeddedRoots);
|
|
606
|
+
collectGitFiles(rootDir, '', files, embeddedRoots, loadIncludeIgnoredMatcher(rootDir));
|
|
540
607
|
// Apply built-in default ignores uniformly — to tracked files too, since
|
|
541
608
|
// committing a dependency/build dir doesn't make it project code. A
|
|
542
609
|
// `.gitignore` negation (e.g. `!vendor/`) is the explicit opt-in. (issue #407)
|
|
@@ -554,26 +621,42 @@ function getGitVisibleFiles(rootDir) {
|
|
|
554
621
|
* Use `git status` to detect changed files instead of scanning every file.
|
|
555
622
|
* Returns null on failure so callers fall back to full scan.
|
|
556
623
|
*
|
|
557
|
-
* Recurses into embedded repos —
|
|
558
|
-
*
|
|
559
|
-
* kind (#514: they never appear in the parent's status at all)
|
|
560
|
-
*
|
|
561
|
-
*
|
|
562
|
-
*
|
|
563
|
-
*
|
|
624
|
+
* Recurses into embedded repos — the untracked kind (#193: the parent's status
|
|
625
|
+
* collapses them to an opaque `?? subdir/` entry) always, and the gitignored
|
|
626
|
+
* kind (#514: they never appear in the parent's status at all) only for
|
|
627
|
+
* directories opted in via `codegraph.json` `includeIgnored` (#622, #699) —
|
|
628
|
+
* running `git status` inside each, so changes in a multi-repo workspace sync
|
|
629
|
+
* without a full rescan. By default a gitignored dir is left alone, matching the
|
|
630
|
+
* full-index scan (#970, #976). Deleting an ENTIRE embedded repo dir is the one
|
|
631
|
+
* case this cannot see (the child status that would report the deletions is gone
|
|
632
|
+
* with it); a full `codegraph index` reconciles that.
|
|
564
633
|
*/
|
|
565
634
|
function getGitChangedFiles(rootDir) {
|
|
566
635
|
try {
|
|
567
636
|
const changes = { modified: [], added: [], deleted: [] };
|
|
568
|
-
|
|
637
|
+
// Custom extension → language overrides from the project's codegraph.json,
|
|
638
|
+
// so change detection sees the same custom-extension files the full index does.
|
|
639
|
+
const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
|
|
640
|
+
collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir));
|
|
569
641
|
return changes;
|
|
570
642
|
}
|
|
571
643
|
catch {
|
|
572
644
|
return null;
|
|
573
645
|
}
|
|
574
646
|
}
|
|
575
|
-
function collectGitStatus(repoDir, prefix, out) {
|
|
647
|
+
function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null) {
|
|
576
648
|
const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
|
|
649
|
+
// This repo's own ignore rules — built-in defaults (#407) plus its .gitignore.
|
|
650
|
+
// Change detection must exclude the SAME files the full index does, but git
|
|
651
|
+
// status hides neither: it ignores nothing for *tracked* paths, and the
|
|
652
|
+
// built-in defaults aren't gitignore at all. Without this filter a committed
|
|
653
|
+
// vendor/ dir, or a tracked file under a .gitignored dir, surfaces here as a
|
|
654
|
+
// change — so `codegraph status` (which reads getChangedFiles) reports a
|
|
655
|
+
// pending edit the full index never tracks and `sync` never clears. Matching
|
|
656
|
+
// repo-relative `rel` at each recursion level mirrors getGitVisibleFiles'
|
|
657
|
+
// ScopeIgnore: every embedded repo is judged by ITS OWN rules, never the
|
|
658
|
+
// parent's. (#766)
|
|
659
|
+
const ig = buildDefaultIgnore(repoDir);
|
|
577
660
|
const untrackedDirs = [];
|
|
578
661
|
for (const line of output.split('\n')) {
|
|
579
662
|
if (line.length < 4)
|
|
@@ -587,29 +670,38 @@ function collectGitStatus(repoDir, prefix, out) {
|
|
|
587
670
|
continue;
|
|
588
671
|
}
|
|
589
672
|
const filePath = (0, utils_1.normalizePath)(prefix + rel);
|
|
590
|
-
|
|
591
|
-
|
|
673
|
+
if (!(0, grammars_1.isSourceFile)(filePath, overrides))
|
|
674
|
+
continue;
|
|
675
|
+
if (statusCode.includes('D')) {
|
|
676
|
+
// Deletions stay unfiltered: getChangedFiles acts on one only when the
|
|
677
|
+
// path is already tracked in the DB, where removal is always correct — and
|
|
678
|
+
// that lets a newly-excluded dir's stale rows clean themselves up. (#766)
|
|
679
|
+
out.deleted.push(filePath);
|
|
680
|
+
continue;
|
|
681
|
+
}
|
|
682
|
+
// Added (`??`) / modified files inside an excluded dir must not enter the
|
|
683
|
+
// index — match against the repo-relative path, same as the full scan. (#766)
|
|
684
|
+
if (ig.ignores(rel))
|
|
592
685
|
continue;
|
|
593
686
|
if (statusCode === '??') {
|
|
594
687
|
out.added.push(filePath);
|
|
595
688
|
}
|
|
596
|
-
else if (statusCode.includes('D')) {
|
|
597
|
-
out.deleted.push(filePath);
|
|
598
|
-
}
|
|
599
689
|
else {
|
|
600
690
|
// M, MM, AM, A (staged), etc. — treat as modified
|
|
601
691
|
out.modified.push(filePath);
|
|
602
692
|
}
|
|
603
693
|
}
|
|
604
694
|
// Recurse embedded repos found under untracked dirs (at the dir itself or
|
|
605
|
-
// nested deeper)
|
|
695
|
+
// nested deeper). Gitignored dirs are walked only for the directories the
|
|
696
|
+
// project opted in via `includeIgnored`; by default `.gitignore` is respected
|
|
697
|
+
// and they are left alone (#970, #976), mirroring the full-index scan.
|
|
606
698
|
for (const rel of untrackedDirs) {
|
|
607
699
|
for (const repoRel of findNestedGitRepos(path.join(repoDir, rel), rel)) {
|
|
608
|
-
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out);
|
|
700
|
+
collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored);
|
|
609
701
|
}
|
|
610
702
|
}
|
|
611
|
-
for (const rel of findIgnoredEmbeddedRepos(repoDir)) {
|
|
612
|
-
collectGitStatus(path.join(repoDir, rel), prefix + rel, out);
|
|
703
|
+
for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
|
|
704
|
+
collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored);
|
|
613
705
|
}
|
|
614
706
|
}
|
|
615
707
|
/**
|
|
@@ -620,13 +712,15 @@ function collectGitStatus(repoDir, prefix, out) {
|
|
|
620
712
|
* projects, falls back to a filesystem walk that parses .gitignore itself.
|
|
621
713
|
*/
|
|
622
714
|
function scanDirectory(rootDir, onProgress) {
|
|
715
|
+
// Custom extension → language overrides from the project's codegraph.json.
|
|
716
|
+
const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
|
|
623
717
|
// Fast path: use git to get all visible files (respects .gitignore everywhere)
|
|
624
718
|
const gitFiles = getGitVisibleFiles(rootDir);
|
|
625
719
|
if (gitFiles) {
|
|
626
720
|
const files = [];
|
|
627
721
|
let count = 0;
|
|
628
722
|
for (const filePath of gitFiles) {
|
|
629
|
-
if ((0, grammars_1.isSourceFile)(filePath)) {
|
|
723
|
+
if ((0, grammars_1.isSourceFile)(filePath, overrides)) {
|
|
630
724
|
files.push(filePath);
|
|
631
725
|
count++;
|
|
632
726
|
onProgress?.(count, filePath);
|
|
@@ -642,12 +736,14 @@ function scanDirectory(rootDir, onProgress) {
|
|
|
642
736
|
* allowing worker threads to receive and render progress messages.
|
|
643
737
|
*/
|
|
644
738
|
async function scanDirectoryAsync(rootDir, onProgress) {
|
|
739
|
+
// Custom extension → language overrides from the project's codegraph.json.
|
|
740
|
+
const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
|
|
645
741
|
const gitFiles = getGitVisibleFiles(rootDir);
|
|
646
742
|
if (gitFiles) {
|
|
647
743
|
const files = [];
|
|
648
744
|
let count = 0;
|
|
649
745
|
for (const filePath of gitFiles) {
|
|
650
|
-
if ((0, grammars_1.isSourceFile)(filePath)) {
|
|
746
|
+
if ((0, grammars_1.isSourceFile)(filePath, overrides)) {
|
|
651
747
|
files.push(filePath);
|
|
652
748
|
count++;
|
|
653
749
|
onProgress?.(count, filePath);
|
|
@@ -668,6 +764,8 @@ function scanDirectoryWalk(rootDir, onProgress) {
|
|
|
668
764
|
const files = [];
|
|
669
765
|
let count = 0;
|
|
670
766
|
const visitedDirs = new Set();
|
|
767
|
+
// Custom extension → language overrides from the project's codegraph.json.
|
|
768
|
+
const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
|
|
671
769
|
const loadIgnore = (dir) => {
|
|
672
770
|
const giPath = path.join(dir, '.gitignore');
|
|
673
771
|
if (!fs.existsSync(giPath))
|
|
@@ -734,7 +832,7 @@ function scanDirectoryWalk(rootDir, onProgress) {
|
|
|
734
832
|
}
|
|
735
833
|
}
|
|
736
834
|
else if (stat.isFile()) {
|
|
737
|
-
if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
|
|
835
|
+
if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath, overrides)) {
|
|
738
836
|
files.push(relativePath);
|
|
739
837
|
count++;
|
|
740
838
|
onProgress?.(count, relativePath);
|
|
@@ -752,7 +850,7 @@ function scanDirectoryWalk(rootDir, onProgress) {
|
|
|
752
850
|
}
|
|
753
851
|
}
|
|
754
852
|
else if (entry.isFile()) {
|
|
755
|
-
if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
|
|
853
|
+
if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath, overrides)) {
|
|
756
854
|
files.push(relativePath);
|
|
757
855
|
count++;
|
|
758
856
|
onProgress?.(count, relativePath);
|
|
@@ -866,6 +964,10 @@ class ExtractionOrchestrator {
|
|
|
866
964
|
let filesErrored = 0;
|
|
867
965
|
let totalNodes = 0;
|
|
868
966
|
let totalEdges = 0;
|
|
967
|
+
// Custom extension → language overrides from the project's codegraph.json.
|
|
968
|
+
// Threaded into language detection so custom-extension files load the right
|
|
969
|
+
// grammar and store under the mapped language.
|
|
970
|
+
const overrides = (0, project_config_1.loadExtensionOverrides)(this.rootDir);
|
|
869
971
|
const log = verbose
|
|
870
972
|
? (msg) => { console.log(`[worker] ${msg}`); }
|
|
871
973
|
: (_msg) => { };
|
|
@@ -915,7 +1017,7 @@ class ExtractionOrchestrator {
|
|
|
915
1017
|
});
|
|
916
1018
|
await new Promise(resolve => setImmediate(resolve));
|
|
917
1019
|
// Detect needed languages and load grammars in the parse worker
|
|
918
|
-
const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
|
|
1020
|
+
const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f, undefined, overrides)))];
|
|
919
1021
|
// .h files default to 'c' but may be C++ — ensure cpp grammar is loaded when c is needed
|
|
920
1022
|
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
921
1023
|
neededLanguages.push('cpp');
|
|
@@ -1014,9 +1116,13 @@ class ExtractionOrchestrator {
|
|
|
1014
1116
|
w.terminate().catch(() => { });
|
|
1015
1117
|
}
|
|
1016
1118
|
async function requestParse(filePath, content) {
|
|
1119
|
+
// Resolve the language on the main thread (where the project's
|
|
1120
|
+
// codegraph.json overrides are loaded) and hand it to the worker, so the
|
|
1121
|
+
// worker never needs the override map itself.
|
|
1122
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1017
1123
|
if (!WorkerClass) {
|
|
1018
1124
|
// In-process fallback
|
|
1019
|
-
return (0, tree_sitter_1.extractFromSource)(filePath, content,
|
|
1125
|
+
return (0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames);
|
|
1020
1126
|
}
|
|
1021
1127
|
// Recycle the worker before the next parse if we've hit the threshold.
|
|
1022
1128
|
// This destroys the WASM linear memory (which can grow but never shrink)
|
|
@@ -1041,7 +1147,7 @@ class ExtractionOrchestrator {
|
|
|
1041
1147
|
worker.terminate().catch(() => { });
|
|
1042
1148
|
}, timeoutMs);
|
|
1043
1149
|
pendingParses.set(id, { resolve, reject, timer });
|
|
1044
|
-
worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames });
|
|
1150
|
+
worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames, language });
|
|
1045
1151
|
});
|
|
1046
1152
|
}
|
|
1047
1153
|
for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
|
|
@@ -1063,7 +1169,10 @@ class ExtractionOrchestrator {
|
|
|
1063
1169
|
// Read files in parallel (with path validation before any I/O)
|
|
1064
1170
|
const fileContents = await Promise.all(batch.map(async (fp) => {
|
|
1065
1171
|
try {
|
|
1066
|
-
|
|
1172
|
+
// Indexing read: follow in-root symlinks the directory walk already
|
|
1173
|
+
// descended into (the `../` guard still applies) so files reached
|
|
1174
|
+
// via an in-root symlink-to-outside still index (#935).
|
|
1175
|
+
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, fp, { allowSymlinkEscape: true });
|
|
1067
1176
|
if (!fullPath) {
|
|
1068
1177
|
(0, errors_1.logWarn)('Path traversal blocked in batch reader', { filePath: fp });
|
|
1069
1178
|
return { filePath: fp, content: null, stats: null, error: new Error('Path traversal blocked') };
|
|
@@ -1147,7 +1256,7 @@ class ExtractionOrchestrator {
|
|
|
1147
1256
|
processed++;
|
|
1148
1257
|
// Store in database on main thread (SQLite is not thread-safe)
|
|
1149
1258
|
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1150
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content);
|
|
1259
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1151
1260
|
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1152
1261
|
}
|
|
1153
1262
|
if (result.errors.length > 0) {
|
|
@@ -1169,7 +1278,7 @@ class ExtractionOrchestrator {
|
|
|
1169
1278
|
// Files with no symbols but no errors (yaml, twig, properties) are
|
|
1170
1279
|
// tracked at the file level — count them as indexed so the CLI
|
|
1171
1280
|
// doesn't misleadingly report "No files found to index".
|
|
1172
|
-
const lang = (0, grammars_1.detectLanguage)(filePath, content);
|
|
1281
|
+
const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1173
1282
|
if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
|
|
1174
1283
|
filesIndexed++;
|
|
1175
1284
|
}
|
|
@@ -1222,7 +1331,7 @@ class ExtractionOrchestrator {
|
|
|
1222
1331
|
continue;
|
|
1223
1332
|
}
|
|
1224
1333
|
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1225
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content);
|
|
1334
|
+
const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
|
|
1226
1335
|
const stats = await fsp.stat(path.join(this.rootDir, filePath));
|
|
1227
1336
|
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
1228
1337
|
const idx = errors.indexOf(errEntry);
|
|
@@ -1270,7 +1379,7 @@ class ExtractionOrchestrator {
|
|
|
1270
1379
|
continue;
|
|
1271
1380
|
}
|
|
1272
1381
|
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
1273
|
-
const language = (0, grammars_1.detectLanguage)(filePath, fullContent);
|
|
1382
|
+
const language = (0, grammars_1.detectLanguage)(filePath, fullContent, overrides);
|
|
1274
1383
|
const stats = await fsp.stat(path.join(this.rootDir, filePath));
|
|
1275
1384
|
this.storeExtractionResult(filePath, fullContent, language, stats, result);
|
|
1276
1385
|
const idx = errors.indexOf(errEntry);
|
|
@@ -1350,7 +1459,8 @@ class ExtractionOrchestrator {
|
|
|
1350
1459
|
* Index a single file
|
|
1351
1460
|
*/
|
|
1352
1461
|
async indexFile(relativePath) {
|
|
1353
|
-
|
|
1462
|
+
// Indexing read: follow in-root symlinks (the `../` guard still applies), #935.
|
|
1463
|
+
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath, { allowSymlinkEscape: true });
|
|
1354
1464
|
if (!fullPath) {
|
|
1355
1465
|
return {
|
|
1356
1466
|
nodes: [],
|
|
@@ -1390,8 +1500,8 @@ class ExtractionOrchestrator {
|
|
|
1390
1500
|
* Used by the parallel batch reader to avoid redundant file I/O.
|
|
1391
1501
|
*/
|
|
1392
1502
|
async indexFileWithContent(relativePath, content, stats) {
|
|
1393
|
-
// Prevent
|
|
1394
|
-
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath);
|
|
1503
|
+
// Prevent `../` traversal; follow in-root symlinks like the directory walk (#935).
|
|
1504
|
+
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath, { allowSymlinkEscape: true });
|
|
1395
1505
|
if (!fullPath) {
|
|
1396
1506
|
(0, errors_1.logWarn)('Path traversal blocked in indexFileWithContent', { relativePath });
|
|
1397
1507
|
return {
|
|
@@ -1419,8 +1529,8 @@ class ExtractionOrchestrator {
|
|
|
1419
1529
|
durationMs: 0,
|
|
1420
1530
|
};
|
|
1421
1531
|
}
|
|
1422
|
-
// Detect language
|
|
1423
|
-
const language = (0, grammars_1.detectLanguage)(relativePath, content);
|
|
1532
|
+
// Detect language (honoring the project's codegraph.json extension overrides)
|
|
1533
|
+
const language = (0, grammars_1.detectLanguage)(relativePath, content, (0, project_config_1.loadExtensionOverrides)(this.rootDir));
|
|
1424
1534
|
if (!(0, grammars_1.isLanguageSupported)(language)) {
|
|
1425
1535
|
return {
|
|
1426
1536
|
nodes: [],
|
|
@@ -1451,6 +1561,25 @@ class ExtractionOrchestrator {
|
|
|
1451
1561
|
if (existingFile && existingFile.contentHash === contentHash) {
|
|
1452
1562
|
return; // No changes
|
|
1453
1563
|
}
|
|
1564
|
+
// Snapshot incoming cross-file edges BEFORE deleting this file's nodes.
|
|
1565
|
+
// `deleteFile` cascades to delete every edge whose source OR target is a
|
|
1566
|
+
// node in this file (edges.FK ... ON DELETE CASCADE). Edges whose SOURCE is
|
|
1567
|
+
// in this file are re-emitted by the extractor below, but edges whose SOURCE
|
|
1568
|
+
// is in a *different* (unchanged) file are not — they would be silently
|
|
1569
|
+
// dropped, which is issue #899: re-indexing a callee file severs `calls`/
|
|
1570
|
+
// `references` edges from callers that import it via module-attribute
|
|
1571
|
+
// access (`pkg.mod.fn(...)`).
|
|
1572
|
+
//
|
|
1573
|
+
// We snapshot the edge plus the target node's (name, kind) so we can
|
|
1574
|
+
// re-resolve to the re-indexed target's NEW id. Node ids are
|
|
1575
|
+
// `sha256(filePath:kind:name:line)`, so any line shift in the callee file
|
|
1576
|
+
// (e.g. a docstring-only edit above the symbol) changes every target id and
|
|
1577
|
+
// a naive re-insert by old id would silently drop every edge. Matching by
|
|
1578
|
+
// (filePath, kind, name) is stable across line shifts; if the symbol was
|
|
1579
|
+
// renamed/removed, no match is found and the edge stays dropped (correct).
|
|
1580
|
+
const crossFileIncomingEdges = existingFile
|
|
1581
|
+
? this.queries.getCrossFileIncomingEdgesWithTarget(filePath)
|
|
1582
|
+
: [];
|
|
1454
1583
|
// Delete existing data for this file
|
|
1455
1584
|
if (existingFile) {
|
|
1456
1585
|
this.queries.deleteFile(filePath);
|
|
@@ -1471,6 +1600,31 @@ class ExtractionOrchestrator {
|
|
|
1471
1600
|
this.queries.insertEdges(validEdges);
|
|
1472
1601
|
}
|
|
1473
1602
|
}
|
|
1603
|
+
// Re-insert cross-file incoming edges snapshotted before the delete,
|
|
1604
|
+
// re-resolving each edge's target to the re-indexed node's new id by
|
|
1605
|
+
// (filePath, kind, name). Node ids include the source line, so any line
|
|
1606
|
+
// shift in the callee file (e.g. a docstring-only edit above the symbol)
|
|
1607
|
+
// changes every target id and a naive re-insert by old id would drop them
|
|
1608
|
+
// all. `insertEdges` still filters to endpoints that exist, so edges whose
|
|
1609
|
+
// caller (source) was deleted, or whose callee (target) was renamed/removed
|
|
1610
|
+
// during the re-index (no match in `newTargetIds`), are dropped. This
|
|
1611
|
+
// closes the #899 edge-drop on `sync`.
|
|
1612
|
+
if (crossFileIncomingEdges.length > 0) {
|
|
1613
|
+
const newNodesByKindName = new Map();
|
|
1614
|
+
for (const n of validNodes) {
|
|
1615
|
+
newNodesByKindName.set(`${n.kind}\0${n.name}`, n.id);
|
|
1616
|
+
}
|
|
1617
|
+
const reinserted = [];
|
|
1618
|
+
for (const e of crossFileIncomingEdges) {
|
|
1619
|
+
const newTargetId = newNodesByKindName.get(`${e.targetKind}\0${e.targetName}`);
|
|
1620
|
+
if (newTargetId) {
|
|
1621
|
+
reinserted.push({ source: e.source, target: newTargetId, kind: e.kind, metadata: e.metadata, line: e.line, column: e.column, provenance: e.provenance });
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1624
|
+
if (reinserted.length > 0) {
|
|
1625
|
+
this.queries.insertEdges(reinserted);
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1474
1628
|
// Insert unresolved references in batch with denormalized filePath/language
|
|
1475
1629
|
if (result.unresolvedReferences.length > 0) {
|
|
1476
1630
|
const insertedIds = new Set(validNodes.map((n) => n.id));
|
|
@@ -1530,7 +1684,7 @@ class ExtractionOrchestrator {
|
|
|
1530
1684
|
// whether or not the project uses git, and crucially also catches committed
|
|
1531
1685
|
// changes from `git pull`/`checkout`/`merge`/`rebase` — which `git status`
|
|
1532
1686
|
// cannot see, because the working tree is clean afterward.
|
|
1533
|
-
const currentFiles =
|
|
1687
|
+
const currentFiles = await scanDirectoryAsync(this.rootDir);
|
|
1534
1688
|
filesChecked = currentFiles.length;
|
|
1535
1689
|
const currentSet = new Set(currentFiles);
|
|
1536
1690
|
const trackedFiles = this.queries.getAllFiles();
|
|
@@ -1541,14 +1695,26 @@ class ExtractionOrchestrator {
|
|
|
1541
1695
|
// Removals: tracked in the DB but no longer a present source file. Check the
|
|
1542
1696
|
// filesystem directly — `scanDirectory` (via `git ls-files`) still lists a
|
|
1543
1697
|
// file deleted from disk but not yet staged, so set membership alone misses it.
|
|
1698
|
+
// `reconcileChecks` drives the cooperative yield shared with the adds/mods loop
|
|
1699
|
+
// below (see SYNC_RECONCILE_YIELD_INTERVAL / issue #905).
|
|
1700
|
+
let reconcileChecks = 0;
|
|
1544
1701
|
for (const tracked of trackedFiles) {
|
|
1545
1702
|
if (!currentSet.has(tracked.path) || !fs.existsSync(path.join(this.rootDir, tracked.path))) {
|
|
1546
1703
|
this.queries.deleteFile(tracked.path);
|
|
1547
1704
|
filesRemoved++;
|
|
1548
1705
|
}
|
|
1706
|
+
if (++reconcileChecks % SYNC_RECONCILE_YIELD_INTERVAL === 0) {
|
|
1707
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
1708
|
+
}
|
|
1549
1709
|
}
|
|
1550
1710
|
// Adds / modifications.
|
|
1551
1711
|
for (const filePath of currentFiles) {
|
|
1712
|
+
// Same cooperative yield as the removals loop — this is the other O(files)
|
|
1713
|
+
// synchronous-stat loop that wedges the main thread on a large repo (#905).
|
|
1714
|
+
// Yield at the top of the body so the `continue` fast-paths below still hit it.
|
|
1715
|
+
if (++reconcileChecks % SYNC_RECONCILE_YIELD_INTERVAL === 0) {
|
|
1716
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
1717
|
+
}
|
|
1552
1718
|
const fullPath = path.join(this.rootDir, filePath);
|
|
1553
1719
|
const tracked = trackedMap.get(filePath);
|
|
1554
1720
|
// Cheap pre-filter: an already-indexed file whose size AND mtime both match
|
|
@@ -1591,7 +1757,8 @@ class ExtractionOrchestrator {
|
|
|
1591
1757
|
}
|
|
1592
1758
|
// Load only grammars needed for changed files
|
|
1593
1759
|
if (filesToIndex.length > 0) {
|
|
1594
|
-
const
|
|
1760
|
+
const overrides = (0, project_config_1.loadExtensionOverrides)(this.rootDir);
|
|
1761
|
+
const neededLanguages = [...new Set(filesToIndex.map((f) => (0, grammars_1.detectLanguage)(f, undefined, overrides)))];
|
|
1595
1762
|
// .h files default to 'c' but may be C++ — ensure cpp grammar is loaded
|
|
1596
1763
|
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
1597
1764
|
neededLanguages.push('cpp');
|