gitnexus 1.6.6-rc.25 → 1.6.6-rc.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/dist/cli/analyze.d.ts +2 -0
- package/dist/cli/analyze.js +75 -2
- package/dist/cli/index.js +6 -0
- package/dist/cli/wiki.js +19 -0
- package/dist/core/group/extractors/http-route-extractor.d.ts +1 -0
- package/dist/core/group/extractors/http-route-extractor.js +23 -12
- package/dist/core/ingestion/languages/typescript/captures.js +59 -10
- package/dist/core/ingestion/parsing-processor.js +61 -7
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +312 -74
- package/dist/core/ingestion/pipeline.d.ts +50 -0
- package/dist/core/ingestion/workers/parse-worker.js +54 -11
- package/dist/core/ingestion/workers/quarantine.d.ts +45 -0
- package/dist/core/ingestion/workers/quarantine.js +38 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +184 -2
- package/dist/core/ingestion/workers/worker-pool.js +814 -72
- package/dist/core/run-analyze.d.ts +8 -0
- package/dist/core/run-analyze.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -359,6 +359,16 @@ npx gitnexus analyze
|
|
|
359
359
|
|
|
360
360
|
For repositories with very large source files, `GITNEXUS_WORKER_SUB_BATCH_MAX_BYTES` controls the worker job byte budget. The default is **8388608 bytes (8 MB)**.
|
|
361
361
|
|
|
362
|
+
### Worker pool resilience tuning
|
|
363
|
+
|
|
364
|
+
Three env vars expose the pool's resilience layers (respawn budget, cumulative-timeout cap, circuit breaker). Defaults are tuned for typical repos; bump them when an analyze legitimately needs more retries, or lower them to fail-fast on a known-bad shape.
|
|
365
|
+
|
|
366
|
+
| Variable | Default | Effect |
|
|
367
|
+
| ------------------------------------------------- | ------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
|
|
368
|
+
| `GITNEXUS_WORKER_MAX_RESPAWNS_PER_SLOT` | `3` | Max replacement spawns per slot before the slot is dropped from the active rotation. |
|
|
369
|
+
| `GITNEXUS_WORKER_MAX_CUMULATIVE_TIMEOUT_MS` | `5 × subBatchTimeoutMs` | Total retry wall-time budget per job before quarantining. Bounds exponentially-growing retry waits. |
|
|
370
|
+
| `GITNEXUS_WORKER_CONSECUTIVE_FAILURE_THRESHOLD` | `max(3, poolSize)` | Per-slot consecutive deaths before the pool's circuit breaker trips. After tripping, dispatches require a fresh pool. |
|
|
371
|
+
|
|
362
372
|
## Privacy
|
|
363
373
|
|
|
364
374
|
- All processing happens locally on your machine
|
package/dist/cli/analyze.d.ts
CHANGED
|
@@ -69,6 +69,8 @@ export interface AnalyzeOptions {
|
|
|
69
69
|
maxFileSize?: string;
|
|
70
70
|
/** Override worker sub-batch idle timeout in seconds. */
|
|
71
71
|
workerTimeout?: string;
|
|
72
|
+
/** Parse worker pool size; 0 disables workers (sequential fallback). */
|
|
73
|
+
workers?: string;
|
|
72
74
|
embeddingThreads?: string;
|
|
73
75
|
embeddingBatchSize?: string;
|
|
74
76
|
embeddingSubBatchSize?: string;
|
package/dist/cli/analyze.js
CHANGED
|
@@ -133,10 +133,46 @@ function ensureHeap() {
|
|
|
133
133
|
` (Windows: set NODE_OPTIONS=--max-old-space-size=24576 && gitnexus analyze [your-args])\n` +
|
|
134
134
|
` If this persists, it may be a native crash unrelated to heap size.\n`, { recoveryHint: 'heap-oom-respawn' });
|
|
135
135
|
}
|
|
136
|
-
|
|
136
|
+
const status = typeof e === 'object' && e !== null && 'status' in e && typeof e.status === 'number'
|
|
137
|
+
? e.status
|
|
138
|
+
: 1;
|
|
139
|
+
process.exitCode = status;
|
|
137
140
|
}
|
|
138
141
|
return true;
|
|
139
142
|
}
|
|
143
|
+
/**
|
|
144
|
+
* GITNEXUS_* env vars that `analyzeCommand` writes for backward-compatible
|
|
145
|
+
* downstream consumption. Snapshotted at function entry and restored in the
|
|
146
|
+
* finally block so that programmatic callers (tests, long-running hosts)
|
|
147
|
+
* don't see leaked state across invocations. `GITNEXUS_WORKER_POOL_SIZE` is
|
|
148
|
+
* NOT in this list: that knob is threaded through `runFullAnalysis` options
|
|
149
|
+
* (see `workerPoolSize` plumbing) so the CLI never has to mutate `process.env`
|
|
150
|
+
* for it in the first place.
|
|
151
|
+
*/
|
|
152
|
+
const ANALYZE_CLI_ENV_KEYS = [
|
|
153
|
+
'GITNEXUS_VERBOSE',
|
|
154
|
+
'GITNEXUS_MAX_FILE_SIZE',
|
|
155
|
+
'GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS',
|
|
156
|
+
'GITNEXUS_EMBEDDING_THREADS',
|
|
157
|
+
'GITNEXUS_EMBEDDING_BATCH_SIZE',
|
|
158
|
+
'GITNEXUS_EMBEDDING_SUB_BATCH_SIZE',
|
|
159
|
+
'GITNEXUS_EMBEDDING_DEVICE',
|
|
160
|
+
];
|
|
161
|
+
const snapshotAnalyzeEnv = () => {
|
|
162
|
+
const snap = {};
|
|
163
|
+
for (const k of ANALYZE_CLI_ENV_KEYS)
|
|
164
|
+
snap[k] = process.env[k];
|
|
165
|
+
return snap;
|
|
166
|
+
};
|
|
167
|
+
const restoreAnalyzeEnv = (snap) => {
|
|
168
|
+
for (const k of ANALYZE_CLI_ENV_KEYS) {
|
|
169
|
+
const v = snap[k];
|
|
170
|
+
if (v === undefined)
|
|
171
|
+
delete process.env[k];
|
|
172
|
+
else
|
|
173
|
+
process.env[k] = v;
|
|
174
|
+
}
|
|
175
|
+
};
|
|
140
176
|
/**
|
|
141
177
|
* Whether the post-index skill step should run.
|
|
142
178
|
*
|
|
@@ -159,6 +195,22 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
159
195
|
// async error that escapes the try/catch below (#1169) surfaces with
|
|
160
196
|
// a stack trace and a non-zero exit code instead of a silent exit 0.
|
|
161
197
|
installFatalHandlers();
|
|
198
|
+
// Snapshot the GITNEXUS_* env vars that the impl writes for downstream
|
|
199
|
+
// consumption, so they don't leak across `analyzeCommand` invocations in
|
|
200
|
+
// programmatic callers (tests, long-running hosts). `process.exit(0)` on
|
|
201
|
+
// the success path bypasses `finally` — intentional: when the process is
|
|
202
|
+
// exiting, restoration is moot. For early-return paths (validation
|
|
203
|
+
// errors) and the alreadyUpToDate fast path the finally restores the
|
|
204
|
+
// pre-call values.
|
|
205
|
+
const envSnap = snapshotAnalyzeEnv();
|
|
206
|
+
try {
|
|
207
|
+
await analyzeCommandImpl(inputPath, options);
|
|
208
|
+
}
|
|
209
|
+
finally {
|
|
210
|
+
restoreAnalyzeEnv(envSnap);
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
const analyzeCommandImpl = async (inputPath, options) => {
|
|
162
214
|
if (options?.verbose) {
|
|
163
215
|
process.env.GITNEXUS_VERBOSE = '1';
|
|
164
216
|
}
|
|
@@ -174,6 +226,23 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
174
226
|
}
|
|
175
227
|
process.env.GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS = String(Math.round(workerTimeoutSeconds * 1000));
|
|
176
228
|
}
|
|
229
|
+
// `--workers` is threaded through `runFullAnalysis` options → PipelineOptions
|
|
230
|
+
// → createWorkerPool, intentionally bypassing the GITNEXUS_WORKER_POOL_SIZE
|
|
231
|
+
// env channel so this CLI surface never mutates `process.env` for pool size.
|
|
232
|
+
// Tests can therefore re-invoke analyzeCommand with different --workers
|
|
233
|
+
// values back-to-back and observe the value they passed, not whatever the
|
|
234
|
+
// previous call leaked.
|
|
235
|
+
let workerPoolSize;
|
|
236
|
+
if (options?.workers !== undefined) {
|
|
237
|
+
const parsedWorkers = Number(options.workers);
|
|
238
|
+
if (!Number.isInteger(parsedWorkers) || parsedWorkers < 0) {
|
|
239
|
+
cliError(' --workers must be a non-negative integer. ' +
|
|
240
|
+
'Pass 0 to disable the worker pool (sequential fallback).\n');
|
|
241
|
+
process.exitCode = 1;
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
workerPoolSize = parsedWorkers;
|
|
245
|
+
}
|
|
177
246
|
// Parse `--embeddings [limit]`: `true` → default cap, string → numeric cap
|
|
178
247
|
// (0 disables the cap entirely). Validated up here so failures match the
|
|
179
248
|
// sibling-validation pattern (exit before bar.start() — otherwise
|
|
@@ -394,6 +463,10 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
394
463
|
// be able to accept the duplicate name without also paying the
|
|
395
464
|
// cost of a full pipeline re-index. See #829 review round 2.
|
|
396
465
|
allowDuplicateName: options?.allowDuplicateName,
|
|
466
|
+
// Worker pool size threaded from --workers, replacing the previous
|
|
467
|
+
// GITNEXUS_WORKER_POOL_SIZE env mutation. `undefined` defers to the
|
|
468
|
+
// env / auto-formula fallback inside the pipeline.
|
|
469
|
+
workerPoolSize,
|
|
397
470
|
}, {
|
|
398
471
|
onProgress: (_phase, percent, message) => {
|
|
399
472
|
updateBar(percent, message);
|
|
@@ -515,7 +588,7 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
515
588
|
// eslint-disable-next-line no-console -- restoring after intentional progress-bar routing
|
|
516
589
|
console.error = origError;
|
|
517
590
|
bar.stop();
|
|
518
|
-
const msg = err.message
|
|
591
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
519
592
|
// Registry name-collision from --name (#829) — surface as an
|
|
520
593
|
// actionable error rather than a generic stack-trace.
|
|
521
594
|
if (err instanceof RegistryNameCollisionError) {
|
package/dist/cli/index.js
CHANGED
|
@@ -38,6 +38,7 @@ program
|
|
|
38
38
|
.option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
|
|
39
39
|
.option('--max-file-size <kb>', 'Skip files larger than this (KB). Default: 512. Hard cap: 32768 (tree-sitter limit).')
|
|
40
40
|
.option('--worker-timeout <seconds>', 'Worker sub-batch idle timeout before retry/fallback. Default: 30.')
|
|
41
|
+
.option('--workers <n>', 'Parse worker pool size. Default: cores-1 capped at 16. Pass 0 to disable workers (sequential).')
|
|
41
42
|
.option('--embedding-threads <n>', 'Limit local ONNX embedding CPU threads')
|
|
42
43
|
.option('--embedding-batch-size <n>', 'Number of nodes per embedding batch')
|
|
43
44
|
.option('--embedding-sub-batch-size <n>', 'Number of chunks per embedding model call')
|
|
@@ -47,6 +48,11 @@ program
|
|
|
47
48
|
' GITNEXUS_MAX_FILE_SIZE=N Override large-file skip threshold (KB). Default 512, max 32768.\n' +
|
|
48
49
|
' GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS=N Worker idle timeout in milliseconds. Default 30000.\n' +
|
|
49
50
|
' GITNEXUS_WORKER_SUB_BATCH_MAX_BYTES=N Worker job byte budget. Default 8388608.\n' +
|
|
51
|
+
' GITNEXUS_WORKER_POOL_SIZE=N Parse worker count override. Default cores-1 capped at 16.\n' +
|
|
52
|
+
' GITNEXUS_PARSE_CHUNK_CONCURRENCY=N Concurrent in-flight parse chunks. Default 2.\n' +
|
|
53
|
+
' GITNEXUS_WORKER_MAX_RESPAWNS_PER_SLOT=N Max replacement spawns per slot before drop. Default 3.\n' +
|
|
54
|
+
' GITNEXUS_WORKER_MAX_CUMULATIVE_TIMEOUT_MS=N Total retry wall-time per job. Default 5x sub-batch timeout.\n' +
|
|
55
|
+
' GITNEXUS_WORKER_CONSECUTIVE_FAILURE_THRESHOLD=N Per-slot deaths to trip circuit breaker. Default max(3, poolSize).\n' +
|
|
50
56
|
' GITNEXUS_EMBEDDING_THREADS=N Limit local ONNX CPU threads for --embeddings.\n' +
|
|
51
57
|
' GITNEXUS_SEMANTIC_EXACT_SCAN_LIMIT=N Max embedding chunks for exact-scan fallback. Default 10000.\n' +
|
|
52
58
|
'\nTip: `.gitnexusignore` supports `.gitignore`-style negation. Add e.g.\n' +
|
package/dist/cli/wiki.js
CHANGED
|
@@ -80,6 +80,25 @@ function prompt(question, hide = false) {
|
|
|
80
80
|
});
|
|
81
81
|
}
|
|
82
82
|
export const wikiCommand = async (inputPath, options) => {
|
|
83
|
+
// Snapshot GITNEXUS_VERBOSE at entry — wikiCommand mutates it (the impl
|
|
84
|
+
// below) so cursor-client (process.env-driven) sees the right value during
|
|
85
|
+
// this run. Restored in finally so back-to-back wiki calls in long-running
|
|
86
|
+
// hosts don't leak verbose state from one invocation to the next. Pairs
|
|
87
|
+
// with the same snapshot/restore pattern in `analyzeCommand`.
|
|
88
|
+
const originalVerbose = process.env.GITNEXUS_VERBOSE;
|
|
89
|
+
try {
|
|
90
|
+
await wikiCommandImpl(inputPath, options);
|
|
91
|
+
}
|
|
92
|
+
finally {
|
|
93
|
+
if (originalVerbose === undefined) {
|
|
94
|
+
delete process.env.GITNEXUS_VERBOSE;
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
process.env.GITNEXUS_VERBOSE = originalVerbose;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
const wikiCommandImpl = async (inputPath, options) => {
|
|
83
102
|
// Set verbose mode globally for cursor-client to pick up
|
|
84
103
|
if (options?.verbose) {
|
|
85
104
|
process.env.GITNEXUS_VERBOSE = '1';
|
|
@@ -15,12 +15,14 @@ import { getPluginForFile, HTTP_SCAN_GLOB } from './http-patterns/index.js';
|
|
|
15
15
|
* the preferred path because the graph has richer symbol metadata
|
|
16
16
|
* (real uids, class/method structure, etc.).
|
|
17
17
|
*
|
|
18
|
-
* 2. **Source-scan
|
|
19
|
-
* the per-language plugin registry in `./http-patterns/`. Used
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
18
|
+
* 2. **Source-scan supplement (Strategy B)** — parse files directly with
|
|
19
|
+
* the per-language plugin registry in `./http-patterns/`. Used to
|
|
20
|
+
* fill gaps when graph extraction only covers part of a polyglot repo
|
|
21
|
+
* (e.g. Java graph routes plus Go source-scan routes). Graph entries
|
|
22
|
+
* remain authoritative for duplicate contract IDs because they carry
|
|
23
|
+
* richer symbol metadata. Each plugin owns its tree-sitter grammar
|
|
24
|
+
* and query sources — this orchestrator imports NO grammars or query
|
|
25
|
+
* strings.
|
|
24
26
|
*
|
|
25
27
|
* Adding a new language for Strategy B is a one-file edit in
|
|
26
28
|
* `http-patterns/index.ts`: register a new `HttpLanguagePlugin` and
|
|
@@ -175,13 +177,11 @@ export class HttpRouteExtractor {
|
|
|
175
177
|
return scannedFiles;
|
|
176
178
|
};
|
|
177
179
|
const graphProviders = dbExecutor != null ? await this.extractProvidersGraph(dbExecutor, getDetections) : [];
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
180
|
+
// Source scan always runs to capture routes in languages/files not covered
|
|
181
|
+
// by graph edges; the glob and per-file parse results are cached above.
|
|
182
|
+
const providers = this.mergeGraphAndSourceContracts(graphProviders, this.extractProvidersSourceScan(await getScannedFiles(), getDetections));
|
|
181
183
|
const graphConsumers = dbExecutor != null ? await this.extractConsumersGraph(dbExecutor, getDetections) : [];
|
|
182
|
-
const consumers = graphConsumers.
|
|
183
|
-
? graphConsumers
|
|
184
|
-
: this.extractConsumersSourceScan(await getScannedFiles(), getDetections);
|
|
184
|
+
const consumers = this.mergeGraphAndSourceContracts(graphConsumers, this.extractConsumersSourceScan(await getScannedFiles(), getDetections));
|
|
185
185
|
return [...providers, ...consumers];
|
|
186
186
|
}
|
|
187
187
|
async scanFiles(repoPath) {
|
|
@@ -427,4 +427,15 @@ export class HttpRouteExtractor {
|
|
|
427
427
|
}
|
|
428
428
|
return out;
|
|
429
429
|
}
|
|
430
|
+
mergeGraphAndSourceContracts(graphContracts, sourceContracts) {
|
|
431
|
+
const seenContractIds = new Set(graphContracts.map((c) => c.contractId));
|
|
432
|
+
const out = [...graphContracts];
|
|
433
|
+
for (const contract of sourceContracts) {
|
|
434
|
+
if (seenContractIds.has(contract.contractId))
|
|
435
|
+
continue;
|
|
436
|
+
seenContractIds.add(contract.contractId);
|
|
437
|
+
out.push(contract);
|
|
438
|
+
}
|
|
439
|
+
return out;
|
|
440
|
+
}
|
|
430
441
|
}
|
|
@@ -53,7 +53,7 @@ const CALL_TAGS = [
|
|
|
53
53
|
'@reference.call.member',
|
|
54
54
|
'@reference.call.constructor',
|
|
55
55
|
];
|
|
56
|
-
function
|
|
56
|
+
function pickFirstCapture(grouped, tags) {
|
|
57
57
|
for (const tag of tags) {
|
|
58
58
|
const cap = grouped[tag];
|
|
59
59
|
if (cap !== undefined)
|
|
@@ -61,6 +61,14 @@ function pickFirstDefined(grouped, tags) {
|
|
|
61
61
|
}
|
|
62
62
|
return undefined;
|
|
63
63
|
}
|
|
64
|
+
function pickFirstNode(grouped, tags) {
|
|
65
|
+
for (const tag of tags) {
|
|
66
|
+
const node = grouped[tag];
|
|
67
|
+
if (node !== undefined)
|
|
68
|
+
return node;
|
|
69
|
+
}
|
|
70
|
+
return undefined;
|
|
71
|
+
}
|
|
64
72
|
/**
|
|
65
73
|
* Drop `@reference.read.member` matches whose underlying `member_expression`
|
|
66
74
|
* is NOT actually a read context:
|
|
@@ -102,6 +110,33 @@ function shouldEmitReadMember(memberNode) {
|
|
|
102
110
|
return true;
|
|
103
111
|
}
|
|
104
112
|
}
|
|
113
|
+
/** Walks the parent chain from `node` (inclusive), returning the first node
|
|
114
|
+
* whose type matches, or null. Faster than `findNodeAtRange` when the caller
|
|
115
|
+
* already holds the anchor node — avoids re-scanning the tree from the root. */
|
|
116
|
+
function findSelfOrAncestorOfType(node, type) {
|
|
117
|
+
if (node === undefined)
|
|
118
|
+
return null;
|
|
119
|
+
let current = node;
|
|
120
|
+
while (current !== null) {
|
|
121
|
+
if (current.type === type)
|
|
122
|
+
return current;
|
|
123
|
+
current = current.parent;
|
|
124
|
+
}
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
/** Walks the parent chain from `node` (inclusive), returning the first node
|
|
128
|
+
* whose type is in the set, or null. Plural form of {@link findSelfOrAncestorOfType}. */
|
|
129
|
+
function findSelfOrAncestorOfTypes(node, types) {
|
|
130
|
+
if (node === undefined)
|
|
131
|
+
return null;
|
|
132
|
+
let current = node;
|
|
133
|
+
while (current !== null) {
|
|
134
|
+
if (types.includes(current.type))
|
|
135
|
+
return current;
|
|
136
|
+
current = current.parent;
|
|
137
|
+
}
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
105
140
|
export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
106
141
|
// Skip the parse when the caller (parse phase's scopeTreeCache) already
|
|
107
142
|
// produced a Tree for this source. Cache miss = re-parse, same as before.
|
|
@@ -135,9 +170,11 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
135
170
|
// `@`; we put it back so the central extractor's prefix lookups
|
|
136
171
|
// (`@scope.`, `@declaration.`, …) work.
|
|
137
172
|
const grouped = {};
|
|
173
|
+
const groupedNodes = {};
|
|
138
174
|
for (const c of m.captures) {
|
|
139
175
|
const tag = '@' + c.name;
|
|
140
176
|
grouped[tag] = nodeToCapture(tag, c.node);
|
|
177
|
+
groupedNodes[tag] = c.node;
|
|
141
178
|
}
|
|
142
179
|
if (Object.keys(grouped).length === 0)
|
|
143
180
|
continue;
|
|
@@ -148,7 +185,11 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
148
185
|
// the file-level dependency.
|
|
149
186
|
if (grouped['@import.statement'] !== undefined) {
|
|
150
187
|
const stmtCapture = grouped['@import.statement'];
|
|
151
|
-
const stmtNode =
|
|
188
|
+
const stmtNode = findSelfOrAncestorOfTypes(groupedNodes['@import.statement'], [
|
|
189
|
+
'import_statement',
|
|
190
|
+
'export_statement',
|
|
191
|
+
]) ??
|
|
192
|
+
findNodeAtRange(tree.rootNode, stmtCapture.range, 'import_statement') ??
|
|
152
193
|
findNodeAtRange(tree.rootNode, stmtCapture.range, 'export_statement');
|
|
153
194
|
if (stmtNode !== null) {
|
|
154
195
|
const decomposed = splitImportStatement(stmtNode);
|
|
@@ -166,7 +207,8 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
166
207
|
// `splitDynamicImport` branch consumes.
|
|
167
208
|
if (grouped['@import.dynamic'] !== undefined) {
|
|
168
209
|
const dynCapture = grouped['@import.dynamic'];
|
|
169
|
-
const callNode =
|
|
210
|
+
const callNode = findSelfOrAncestorOfType(groupedNodes['@import.dynamic'], 'call_expression') ??
|
|
211
|
+
findNodeAtRange(tree.rootNode, dynCapture.range, 'call_expression');
|
|
170
212
|
if (callNode !== null) {
|
|
171
213
|
const decomposed = splitImportStatement(callNode);
|
|
172
214
|
for (const d of decomposed)
|
|
@@ -180,7 +222,8 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
180
222
|
// we rely on this emit-side filter so the query stays simple.
|
|
181
223
|
if (grouped['@reference.read.member'] !== undefined) {
|
|
182
224
|
const anchor = grouped['@reference.read.member'];
|
|
183
|
-
const memberNode =
|
|
225
|
+
const memberNode = findSelfOrAncestorOfType(groupedNodes['@reference.read.member'], 'member_expression') ??
|
|
226
|
+
findNodeAtRange(tree.rootNode, anchor.range, 'member_expression');
|
|
184
227
|
if (memberNode === null || !shouldEmitReadMember(memberNode)) {
|
|
185
228
|
continue;
|
|
186
229
|
}
|
|
@@ -190,9 +233,10 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
190
233
|
// overloads — TypeScript supports overload signatures via
|
|
191
234
|
// function_signature, so `parameterTypes` is populated when
|
|
192
235
|
// available.
|
|
193
|
-
const declAnchor =
|
|
236
|
+
const declAnchor = pickFirstCapture(grouped, FUNCTION_DECL_TAGS);
|
|
237
|
+
const declAnchorNode = pickFirstNode(groupedNodes, FUNCTION_DECL_TAGS);
|
|
194
238
|
if (declAnchor !== undefined) {
|
|
195
|
-
const fnNode = findFunctionNode(tree.rootNode, declAnchor.range);
|
|
239
|
+
const fnNode = findFunctionNode(tree.rootNode, declAnchor.range, declAnchorNode);
|
|
196
240
|
if (fnNode !== null) {
|
|
197
241
|
const arity = computeTsArityMetadata(fnNode);
|
|
198
242
|
if (arity.parameterCount !== undefined) {
|
|
@@ -224,9 +268,11 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
224
268
|
// calls to disambiguate by props-arity, a JSX-aware arity
|
|
225
269
|
// synthesizer would need to count `jsx_attribute` children of the
|
|
226
270
|
// opening tag instead of `arguments`.
|
|
227
|
-
const callAnchor =
|
|
271
|
+
const callAnchor = pickFirstCapture(grouped, CALL_TAGS);
|
|
272
|
+
const callAnchorNode = pickFirstNode(groupedNodes, CALL_TAGS);
|
|
228
273
|
if (callAnchor !== undefined && grouped['@reference.arity'] === undefined) {
|
|
229
|
-
const callNode =
|
|
274
|
+
const callNode = findSelfOrAncestorOfTypes(callAnchorNode, ['call_expression', 'new_expression']) ??
|
|
275
|
+
findNodeAtRange(tree.rootNode, callAnchor.range, 'call_expression') ??
|
|
230
276
|
findNodeAtRange(tree.rootNode, callAnchor.range, 'new_expression');
|
|
231
277
|
if (callNode !== null) {
|
|
232
278
|
const argList = callNode.childForFieldName('arguments');
|
|
@@ -247,7 +293,7 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
247
293
|
// lookup instead of synthesis — covered by `tsReceiverBinding`.
|
|
248
294
|
const scopeFnAnchor = grouped['@scope.function'];
|
|
249
295
|
if (scopeFnAnchor !== undefined) {
|
|
250
|
-
const fnNode = findFunctionNode(tree.rootNode, scopeFnAnchor.range);
|
|
296
|
+
const fnNode = findFunctionNode(tree.rootNode, scopeFnAnchor.range, groupedNodes['@scope.function']);
|
|
251
297
|
if (fnNode !== null) {
|
|
252
298
|
const synth = synthesizeTsReceiverBinding(fnNode);
|
|
253
299
|
if (synth !== null)
|
|
@@ -464,7 +510,10 @@ function inferArgType(argNode) {
|
|
|
464
510
|
* The `@scope.function` anchor range covers the whole node, but the
|
|
465
511
|
* tag alone doesn't identify which node type among the many TS
|
|
466
512
|
* function-likes. */
|
|
467
|
-
function findFunctionNode(rootNode, range) {
|
|
513
|
+
function findFunctionNode(rootNode, range, anchorNode) {
|
|
514
|
+
const fromAnchor = findSelfOrAncestorOfTypes(anchorNode, FUNCTION_NODE_TYPES);
|
|
515
|
+
if (fromAnchor !== null)
|
|
516
|
+
return fromAnchor;
|
|
468
517
|
for (const nodeType of FUNCTION_NODE_TYPES) {
|
|
469
518
|
const n = findNodeAtRange(rootNode, range, nodeType);
|
|
470
519
|
if (n !== null)
|
|
@@ -652,6 +652,13 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, sco
|
|
|
652
652
|
// ============================================================================
|
|
653
653
|
// Public API
|
|
654
654
|
// ============================================================================
|
|
655
|
+
/**
|
|
656
|
+
* Per-`WorkerPool` log-dedup state for quarantine reporting. Keyed on the
|
|
657
|
+
* pool instance so multiple concurrent pools (test fixtures, future
|
|
658
|
+
* multi-pool callers) each get their own seen-set. WeakMap entries vanish
|
|
659
|
+
* when the pool is garbage-collected.
|
|
660
|
+
*/
|
|
661
|
+
const loggedQuarantineByPool = new WeakMap();
|
|
655
662
|
export const processParsing = async (graph, files, symbolTable, astCache,
|
|
656
663
|
/**
|
|
657
664
|
* Persistent tree cache (separate from `astCache`, which the caller
|
|
@@ -684,14 +691,61 @@ outRawResults) => {
|
|
|
684
691
|
// a repo crosses the worker-pool threshold.
|
|
685
692
|
logger.warn(`[scope-resolution prof] worker pool engaged for ${files.length} files — cross-phase tree cache will be empty; scope-resolution re-parses.`);
|
|
686
693
|
}
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
+
// U20 design pivot: the worker pool's resilience layers
|
|
695
|
+
// (respawn budget, circuit breaker, quarantine, slot-attribution,
|
|
696
|
+
// cumulative timeout) are the SOLE contract for handling worker
|
|
697
|
+
// failures. There is no sequential-parser fallback for either
|
|
698
|
+
// partial quarantine or full pool failure — the operator must see
|
|
699
|
+
// a clear hard signal when workers can't recover, instead of a
|
|
700
|
+
// silently-degraded graph from a possibly-crashing main-thread
|
|
701
|
+
// sequential parser. A failing tree-sitter native binding that
|
|
702
|
+
// quarantined a worker would, under the previous design, re-trigger
|
|
703
|
+
// the same SIGSEGV on the main thread; we avoid that risk entirely.
|
|
704
|
+
//
|
|
705
|
+
// - Partial quarantine: the file is missing from this run's graph;
|
|
706
|
+
// the per-chunk warn log below surfaces it; U2's chunk-cache
|
|
707
|
+
// write-guard in parse-impl.ts keeps the chunk uncached so the
|
|
708
|
+
// next analyze gets a cache miss and a fresh pool retries.
|
|
709
|
+
// - Full pool failure: `WorkerPoolDispatchError` propagates from
|
|
710
|
+
// `processParsingWithWorkers` up through this function. The
|
|
711
|
+
// analyze run errors out instead of falling back to sequential.
|
|
712
|
+
const data = await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, reportProgress, outRawResults);
|
|
713
|
+
// Session-scoped quarantine (worker-pool resilience Layer 3): surface
|
|
714
|
+
// any files this pool has decided are unsafe for workers so the
|
|
715
|
+
// operator can see what was skipped. The pool already filtered them
|
|
716
|
+
// out of dispatch; we only need to log + progress-report. Quarantine
|
|
717
|
+
// is session-scoped per pool instance — a fresh `createWorkerPool`
|
|
718
|
+
// call clears it.
|
|
719
|
+
//
|
|
720
|
+
// Dedup: log full path list only for entries newly quarantined since
|
|
721
|
+
// the previous dispatch on the same pool. The per-chunk progress
|
|
722
|
+
// message still surfaces the count for UX continuity, but the
|
|
723
|
+
// structured `quarantinedFiles` payload is only emitted when there
|
|
724
|
+
// is new signal — prevents O(quarantine × chunks) log spam.
|
|
725
|
+
const quarantineSnapshot = workerPool.getQuarantinedPaths?.() ?? [];
|
|
726
|
+
const quarantineSet = new Set(quarantineSnapshot);
|
|
727
|
+
if (quarantineSet.size > 0) {
|
|
728
|
+
const quarantinedInChunk = files.filter((file) => quarantineSet.has(file.path));
|
|
729
|
+
if (quarantinedInChunk.length > 0) {
|
|
730
|
+
const seenForPool = loggedQuarantineByPool.get(workerPool) ?? new Set();
|
|
731
|
+
const newlyQuarantined = quarantinedInChunk
|
|
732
|
+
.map((file) => file.path)
|
|
733
|
+
.filter((p) => !seenForPool.has(p));
|
|
734
|
+
for (const p of newlyQuarantined)
|
|
735
|
+
seenForPool.add(p);
|
|
736
|
+
loggedQuarantineByPool.set(workerPool, seenForPool);
|
|
737
|
+
if (newlyQuarantined.length > 0) {
|
|
738
|
+
logger.warn({
|
|
739
|
+
newlyQuarantined,
|
|
740
|
+
cumulativeQuarantine: quarantineSet.size,
|
|
741
|
+
chunkSkipped: quarantinedInChunk.length,
|
|
742
|
+
}, `Worker quarantine: ${newlyQuarantined.length} new file(s) skipped this chunk ` +
|
|
743
|
+
`(${quarantinedInChunk.length} skipped total, ${quarantineSet.size} cumulative).`);
|
|
744
|
+
}
|
|
745
|
+
reportProgress?.(lastProgress, files.length, `${quarantinedInChunk.length} worker-quarantined file(s) skipped`);
|
|
746
|
+
}
|
|
694
747
|
}
|
|
748
|
+
return data;
|
|
695
749
|
}
|
|
696
750
|
// Fallback: sequential parsing (no pre-extracted data)
|
|
697
751
|
await processParsingSequential(graph, files, symbolTable, astCache, scopeTreeCache, reportProgress);
|