gitnexus 1.6.6-rc.25 → 1.6.6-rc.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -359,6 +359,16 @@ npx gitnexus analyze
359
359
 
360
360
  For repositories with very large source files, `GITNEXUS_WORKER_SUB_BATCH_MAX_BYTES` controls the worker job byte budget. The default is **8388608 bytes (8 MB)**.
361
361
 
362
+ ### Worker pool resilience tuning
363
+
364
+ Three env vars expose the pool's resilience layers (respawn budget, cumulative-timeout cap, circuit breaker). Defaults are tuned for typical repos; bump them when an analyze legitimately needs more retries, or lower them to fail-fast on a known-bad shape.
365
+
366
+ | Variable | Default | Effect |
367
+ | ------------------------------------------------- | ------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
368
+ | `GITNEXUS_WORKER_MAX_RESPAWNS_PER_SLOT` | `3` | Max replacement spawns per slot before the slot is dropped from the active rotation. |
369
+ | `GITNEXUS_WORKER_MAX_CUMULATIVE_TIMEOUT_MS` | `5 × subBatchTimeoutMs` | Total retry wall-time budget per job before quarantining. Bounds exponentially-growing retry waits. |
370
+ | `GITNEXUS_WORKER_CONSECUTIVE_FAILURE_THRESHOLD` | `max(3, poolSize)` | Per-slot consecutive deaths before the pool's circuit breaker trips. After tripping, dispatches require a fresh pool. |
371
+
362
372
  ## Privacy
363
373
 
364
374
  - All processing happens locally on your machine
@@ -69,6 +69,8 @@ export interface AnalyzeOptions {
69
69
  maxFileSize?: string;
70
70
  /** Override worker sub-batch idle timeout in seconds. */
71
71
  workerTimeout?: string;
72
+ /** Parse worker pool size; 0 disables workers (sequential fallback). */
73
+ workers?: string;
72
74
  embeddingThreads?: string;
73
75
  embeddingBatchSize?: string;
74
76
  embeddingSubBatchSize?: string;
@@ -133,10 +133,46 @@ function ensureHeap() {
133
133
  ` (Windows: set NODE_OPTIONS=--max-old-space-size=24576 && gitnexus analyze [your-args])\n` +
134
134
  ` If this persists, it may be a native crash unrelated to heap size.\n`, { recoveryHint: 'heap-oom-respawn' });
135
135
  }
136
- process.exitCode = e.status ?? 1;
136
+ const status = typeof e === 'object' && e !== null && 'status' in e && typeof e.status === 'number'
137
+ ? e.status
138
+ : 1;
139
+ process.exitCode = status;
137
140
  }
138
141
  return true;
139
142
  }
143
+ /**
144
+ * GITNEXUS_* env vars that `analyzeCommand` writes for backward-compatible
145
+ * downstream consumption. Snapshotted at function entry and restored in the
146
+ * finally block so that programmatic callers (tests, long-running hosts)
147
+ * don't see leaked state across invocations. `GITNEXUS_WORKER_POOL_SIZE` is
148
+ * NOT in this list: that knob is threaded through `runFullAnalysis` options
149
+ * (see `workerPoolSize` plumbing) so the CLI never has to mutate `process.env`
150
+ * for it in the first place.
151
+ */
152
+ const ANALYZE_CLI_ENV_KEYS = [
153
+ 'GITNEXUS_VERBOSE',
154
+ 'GITNEXUS_MAX_FILE_SIZE',
155
+ 'GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS',
156
+ 'GITNEXUS_EMBEDDING_THREADS',
157
+ 'GITNEXUS_EMBEDDING_BATCH_SIZE',
158
+ 'GITNEXUS_EMBEDDING_SUB_BATCH_SIZE',
159
+ 'GITNEXUS_EMBEDDING_DEVICE',
160
+ ];
161
+ const snapshotAnalyzeEnv = () => {
162
+ const snap = {};
163
+ for (const k of ANALYZE_CLI_ENV_KEYS)
164
+ snap[k] = process.env[k];
165
+ return snap;
166
+ };
167
+ const restoreAnalyzeEnv = (snap) => {
168
+ for (const k of ANALYZE_CLI_ENV_KEYS) {
169
+ const v = snap[k];
170
+ if (v === undefined)
171
+ delete process.env[k];
172
+ else
173
+ process.env[k] = v;
174
+ }
175
+ };
140
176
  /**
141
177
  * Whether the post-index skill step should run.
142
178
  *
@@ -159,6 +195,22 @@ export const analyzeCommand = async (inputPath, options) => {
159
195
  // async error that escapes the try/catch below (#1169) surfaces with
160
196
  // a stack trace and a non-zero exit code instead of a silent exit 0.
161
197
  installFatalHandlers();
198
+ // Snapshot the GITNEXUS_* env vars that the impl writes for downstream
199
+ // consumption, so they don't leak across `analyzeCommand` invocations in
200
+ // programmatic callers (tests, long-running hosts). `process.exit(0)` on
201
+ // the success path bypasses `finally` — intentional: when the process is
202
+ // exiting, restoration is moot. For early-return paths (validation
203
+ // errors) and the alreadyUpToDate fast path the finally restores the
204
+ // pre-call values.
205
+ const envSnap = snapshotAnalyzeEnv();
206
+ try {
207
+ await analyzeCommandImpl(inputPath, options);
208
+ }
209
+ finally {
210
+ restoreAnalyzeEnv(envSnap);
211
+ }
212
+ };
213
+ const analyzeCommandImpl = async (inputPath, options) => {
162
214
  if (options?.verbose) {
163
215
  process.env.GITNEXUS_VERBOSE = '1';
164
216
  }
@@ -174,6 +226,23 @@ export const analyzeCommand = async (inputPath, options) => {
174
226
  }
175
227
  process.env.GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS = String(Math.round(workerTimeoutSeconds * 1000));
176
228
  }
229
+ // `--workers` is threaded through `runFullAnalysis` options → PipelineOptions
230
+ // → createWorkerPool, intentionally bypassing the GITNEXUS_WORKER_POOL_SIZE
231
+ // env channel so this CLI surface never mutates `process.env` for pool size.
232
+ // Tests can therefore re-invoke analyzeCommand with different --workers
233
+ // values back-to-back and observe the value they passed, not whatever the
234
+ // previous call leaked.
235
+ let workerPoolSize;
236
+ if (options?.workers !== undefined) {
237
+ const parsedWorkers = Number(options.workers);
238
+ if (!Number.isInteger(parsedWorkers) || parsedWorkers < 0) {
239
+ cliError(' --workers must be a non-negative integer. ' +
240
+ 'Pass 0 to disable the worker pool (sequential fallback).\n');
241
+ process.exitCode = 1;
242
+ return;
243
+ }
244
+ workerPoolSize = parsedWorkers;
245
+ }
177
246
  // Parse `--embeddings [limit]`: `true` → default cap, string → numeric cap
178
247
  // (0 disables the cap entirely). Validated up here so failures match the
179
248
  // sibling-validation pattern (exit before bar.start() — otherwise
@@ -394,6 +463,10 @@ export const analyzeCommand = async (inputPath, options) => {
394
463
  // be able to accept the duplicate name without also paying the
395
464
  // cost of a full pipeline re-index. See #829 review round 2.
396
465
  allowDuplicateName: options?.allowDuplicateName,
466
+ // Worker pool size threaded from --workers, replacing the previous
467
+ // GITNEXUS_WORKER_POOL_SIZE env mutation. `undefined` defers to the
468
+ // env / auto-formula fallback inside the pipeline.
469
+ workerPoolSize,
397
470
  }, {
398
471
  onProgress: (_phase, percent, message) => {
399
472
  updateBar(percent, message);
@@ -515,7 +588,7 @@ export const analyzeCommand = async (inputPath, options) => {
515
588
  // eslint-disable-next-line no-console -- restoring after intentional progress-bar routing
516
589
  console.error = origError;
517
590
  bar.stop();
518
- const msg = err.message || String(err);
591
+ const msg = err instanceof Error ? err.message : String(err);
519
592
  // Registry name-collision from --name (#829) — surface as an
520
593
  // actionable error rather than a generic stack-trace.
521
594
  if (err instanceof RegistryNameCollisionError) {
package/dist/cli/index.js CHANGED
@@ -38,6 +38,7 @@ program
38
38
  .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
39
39
  .option('--max-file-size <kb>', 'Skip files larger than this (KB). Default: 512. Hard cap: 32768 (tree-sitter limit).')
40
40
  .option('--worker-timeout <seconds>', 'Worker sub-batch idle timeout before retry/fallback. Default: 30.')
41
+ .option('--workers <n>', 'Parse worker pool size. Default: cores-1 capped at 16. Pass 0 to disable workers (sequential).')
41
42
  .option('--embedding-threads <n>', 'Limit local ONNX embedding CPU threads')
42
43
  .option('--embedding-batch-size <n>', 'Number of nodes per embedding batch')
43
44
  .option('--embedding-sub-batch-size <n>', 'Number of chunks per embedding model call')
@@ -47,6 +48,11 @@ program
47
48
  ' GITNEXUS_MAX_FILE_SIZE=N Override large-file skip threshold (KB). Default 512, max 32768.\n' +
48
49
  ' GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS=N Worker idle timeout in milliseconds. Default 30000.\n' +
49
50
  ' GITNEXUS_WORKER_SUB_BATCH_MAX_BYTES=N Worker job byte budget. Default 8388608.\n' +
51
+ ' GITNEXUS_WORKER_POOL_SIZE=N Parse worker count override. Default cores-1 capped at 16.\n' +
52
+ ' GITNEXUS_PARSE_CHUNK_CONCURRENCY=N Concurrent in-flight parse chunks. Default 2.\n' +
53
+ ' GITNEXUS_WORKER_MAX_RESPAWNS_PER_SLOT=N Max replacement spawns per slot before drop. Default 3.\n' +
54
+ ' GITNEXUS_WORKER_MAX_CUMULATIVE_TIMEOUT_MS=N Total retry wall-time per job. Default 5x sub-batch timeout.\n' +
55
+ ' GITNEXUS_WORKER_CONSECUTIVE_FAILURE_THRESHOLD=N Per-slot deaths to trip circuit breaker. Default max(3, poolSize).\n' +
50
56
  ' GITNEXUS_EMBEDDING_THREADS=N Limit local ONNX CPU threads for --embeddings.\n' +
51
57
  ' GITNEXUS_SEMANTIC_EXACT_SCAN_LIMIT=N Max embedding chunks for exact-scan fallback. Default 10000.\n' +
52
58
  '\nTip: `.gitnexusignore` supports `.gitignore`-style negation. Add e.g.\n' +
package/dist/cli/wiki.js CHANGED
@@ -80,6 +80,25 @@ function prompt(question, hide = false) {
80
80
  });
81
81
  }
82
82
  export const wikiCommand = async (inputPath, options) => {
83
+ // Snapshot GITNEXUS_VERBOSE at entry — wikiCommand mutates it (the impl
84
+ // below) so cursor-client (process.env-driven) sees the right value during
85
+ // this run. Restored in finally so back-to-back wiki calls in long-running
86
+ // hosts don't leak verbose state from one invocation to the next. Pairs
87
+ // with the same snapshot/restore pattern in `analyzeCommand`.
88
+ const originalVerbose = process.env.GITNEXUS_VERBOSE;
89
+ try {
90
+ await wikiCommandImpl(inputPath, options);
91
+ }
92
+ finally {
93
+ if (originalVerbose === undefined) {
94
+ delete process.env.GITNEXUS_VERBOSE;
95
+ }
96
+ else {
97
+ process.env.GITNEXUS_VERBOSE = originalVerbose;
98
+ }
99
+ }
100
+ };
101
+ const wikiCommandImpl = async (inputPath, options) => {
83
102
  // Set verbose mode globally for cursor-client to pick up
84
103
  if (options?.verbose) {
85
104
  process.env.GITNEXUS_VERBOSE = '1';
@@ -18,4 +18,5 @@ export declare class HttpRouteExtractor implements ContractExtractor {
18
18
  private extractConsumersGraph;
19
19
  private extractConsumersSourceScan;
20
20
  private dedupeContracts;
21
+ private mergeGraphAndSourceContracts;
21
22
  }
@@ -15,12 +15,14 @@ import { getPluginForFile, HTTP_SCAN_GLOB } from './http-patterns/index.js';
15
15
  * the preferred path because the graph has richer symbol metadata
16
16
  * (real uids, class/method structure, etc.).
17
17
  *
18
- * 2. **Source-scan fallback (Strategy B)** — parse files directly with
19
- * the per-language plugin registry in `./http-patterns/`. Used when
20
- * the graph has no routes/fetches for this repo (e.g. a repo that
21
- * hasn't been indexed yet, or whose indexer doesn't know the
22
- * framework). Each plugin owns its tree-sitter grammar and query
23
- * sources this orchestrator imports NO grammars or query strings.
18
+ * 2. **Source-scan supplement (Strategy B)** — parse files directly with
19
+ * the per-language plugin registry in `./http-patterns/`. Used to
20
+ * fill gaps when graph extraction only covers part of a polyglot repo
21
+ * (e.g. Java graph routes plus Go source-scan routes). Graph entries
22
+ * remain authoritative for duplicate contract IDs because they carry
23
+ * richer symbol metadata. Each plugin owns its tree-sitter grammar
24
+ * and query sources — this orchestrator imports NO grammars or query
25
+ * strings.
24
26
  *
25
27
  * Adding a new language for Strategy B is a one-file edit in
26
28
  * `http-patterns/index.ts`: register a new `HttpLanguagePlugin` and
@@ -175,13 +177,11 @@ export class HttpRouteExtractor {
175
177
  return scannedFiles;
176
178
  };
177
179
  const graphProviders = dbExecutor != null ? await this.extractProvidersGraph(dbExecutor, getDetections) : [];
178
- const providers = graphProviders.length > 0
179
- ? graphProviders
180
- : this.extractProvidersSourceScan(await getScannedFiles(), getDetections);
180
+ // Source scan always runs to capture routes in languages/files not covered
181
+ // by graph edges; the glob and per-file parse results are cached above.
182
+ const providers = this.mergeGraphAndSourceContracts(graphProviders, this.extractProvidersSourceScan(await getScannedFiles(), getDetections));
181
183
  const graphConsumers = dbExecutor != null ? await this.extractConsumersGraph(dbExecutor, getDetections) : [];
182
- const consumers = graphConsumers.length > 0
183
- ? graphConsumers
184
- : this.extractConsumersSourceScan(await getScannedFiles(), getDetections);
184
+ const consumers = this.mergeGraphAndSourceContracts(graphConsumers, this.extractConsumersSourceScan(await getScannedFiles(), getDetections));
185
185
  return [...providers, ...consumers];
186
186
  }
187
187
  async scanFiles(repoPath) {
@@ -427,4 +427,15 @@ export class HttpRouteExtractor {
427
427
  }
428
428
  return out;
429
429
  }
430
+ mergeGraphAndSourceContracts(graphContracts, sourceContracts) {
431
+ const seenContractIds = new Set(graphContracts.map((c) => c.contractId));
432
+ const out = [...graphContracts];
433
+ for (const contract of sourceContracts) {
434
+ if (seenContractIds.has(contract.contractId))
435
+ continue;
436
+ seenContractIds.add(contract.contractId);
437
+ out.push(contract);
438
+ }
439
+ return out;
440
+ }
430
441
  }
@@ -53,7 +53,7 @@ const CALL_TAGS = [
53
53
  '@reference.call.member',
54
54
  '@reference.call.constructor',
55
55
  ];
56
- function pickFirstDefined(grouped, tags) {
56
+ function pickFirstCapture(grouped, tags) {
57
57
  for (const tag of tags) {
58
58
  const cap = grouped[tag];
59
59
  if (cap !== undefined)
@@ -61,6 +61,14 @@ function pickFirstDefined(grouped, tags) {
61
61
  }
62
62
  return undefined;
63
63
  }
64
+ function pickFirstNode(grouped, tags) {
65
+ for (const tag of tags) {
66
+ const node = grouped[tag];
67
+ if (node !== undefined)
68
+ return node;
69
+ }
70
+ return undefined;
71
+ }
64
72
  /**
65
73
  * Drop `@reference.read.member` matches whose underlying `member_expression`
66
74
  * is NOT actually a read context:
@@ -102,6 +110,33 @@ function shouldEmitReadMember(memberNode) {
102
110
  return true;
103
111
  }
104
112
  }
113
+ /** Walks the parent chain from `node` (inclusive), returning the first node
114
+ * whose type matches, or null. Faster than `findNodeAtRange` when the caller
115
+ * already holds the anchor node — avoids re-scanning the tree from the root. */
116
+ function findSelfOrAncestorOfType(node, type) {
117
+ if (node === undefined)
118
+ return null;
119
+ let current = node;
120
+ while (current !== null) {
121
+ if (current.type === type)
122
+ return current;
123
+ current = current.parent;
124
+ }
125
+ return null;
126
+ }
127
+ /** Walks the parent chain from `node` (inclusive), returning the first node
128
+ * whose type is in the set, or null. Plural form of {@link findSelfOrAncestorOfType}. */
129
+ function findSelfOrAncestorOfTypes(node, types) {
130
+ if (node === undefined)
131
+ return null;
132
+ let current = node;
133
+ while (current !== null) {
134
+ if (types.includes(current.type))
135
+ return current;
136
+ current = current.parent;
137
+ }
138
+ return null;
139
+ }
105
140
  export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
106
141
  // Skip the parse when the caller (parse phase's scopeTreeCache) already
107
142
  // produced a Tree for this source. Cache miss = re-parse, same as before.
@@ -135,9 +170,11 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
135
170
  // `@`; we put it back so the central extractor's prefix lookups
136
171
  // (`@scope.`, `@declaration.`, …) work.
137
172
  const grouped = {};
173
+ const groupedNodes = {};
138
174
  for (const c of m.captures) {
139
175
  const tag = '@' + c.name;
140
176
  grouped[tag] = nodeToCapture(tag, c.node);
177
+ groupedNodes[tag] = c.node;
141
178
  }
142
179
  if (Object.keys(grouped).length === 0)
143
180
  continue;
@@ -148,7 +185,11 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
148
185
  // the file-level dependency.
149
186
  if (grouped['@import.statement'] !== undefined) {
150
187
  const stmtCapture = grouped['@import.statement'];
151
- const stmtNode = findNodeAtRange(tree.rootNode, stmtCapture.range, 'import_statement') ??
188
+ const stmtNode = findSelfOrAncestorOfTypes(groupedNodes['@import.statement'], [
189
+ 'import_statement',
190
+ 'export_statement',
191
+ ]) ??
192
+ findNodeAtRange(tree.rootNode, stmtCapture.range, 'import_statement') ??
152
193
  findNodeAtRange(tree.rootNode, stmtCapture.range, 'export_statement');
153
194
  if (stmtNode !== null) {
154
195
  const decomposed = splitImportStatement(stmtNode);
@@ -166,7 +207,8 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
166
207
  // `splitDynamicImport` branch consumes.
167
208
  if (grouped['@import.dynamic'] !== undefined) {
168
209
  const dynCapture = grouped['@import.dynamic'];
169
- const callNode = findNodeAtRange(tree.rootNode, dynCapture.range, 'call_expression');
210
+ const callNode = findSelfOrAncestorOfType(groupedNodes['@import.dynamic'], 'call_expression') ??
211
+ findNodeAtRange(tree.rootNode, dynCapture.range, 'call_expression');
170
212
  if (callNode !== null) {
171
213
  const decomposed = splitImportStatement(callNode);
172
214
  for (const d of decomposed)
@@ -180,7 +222,8 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
180
222
  // we rely on this emit-side filter so the query stays simple.
181
223
  if (grouped['@reference.read.member'] !== undefined) {
182
224
  const anchor = grouped['@reference.read.member'];
183
- const memberNode = findNodeAtRange(tree.rootNode, anchor.range, 'member_expression');
225
+ const memberNode = findSelfOrAncestorOfType(groupedNodes['@reference.read.member'], 'member_expression') ??
226
+ findNodeAtRange(tree.rootNode, anchor.range, 'member_expression');
184
227
  if (memberNode === null || !shouldEmitReadMember(memberNode)) {
185
228
  continue;
186
229
  }
@@ -190,9 +233,10 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
190
233
  // overloads — TypeScript supports overload signatures via
191
234
  // function_signature, so `parameterTypes` is populated when
192
235
  // available.
193
- const declAnchor = pickFirstDefined(grouped, FUNCTION_DECL_TAGS);
236
+ const declAnchor = pickFirstCapture(grouped, FUNCTION_DECL_TAGS);
237
+ const declAnchorNode = pickFirstNode(groupedNodes, FUNCTION_DECL_TAGS);
194
238
  if (declAnchor !== undefined) {
195
- const fnNode = findFunctionNode(tree.rootNode, declAnchor.range);
239
+ const fnNode = findFunctionNode(tree.rootNode, declAnchor.range, declAnchorNode);
196
240
  if (fnNode !== null) {
197
241
  const arity = computeTsArityMetadata(fnNode);
198
242
  if (arity.parameterCount !== undefined) {
@@ -224,9 +268,11 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
224
268
  // calls to disambiguate by props-arity, a JSX-aware arity
225
269
  // synthesizer would need to count `jsx_attribute` children of the
226
270
  // opening tag instead of `arguments`.
227
- const callAnchor = pickFirstDefined(grouped, CALL_TAGS);
271
+ const callAnchor = pickFirstCapture(grouped, CALL_TAGS);
272
+ const callAnchorNode = pickFirstNode(groupedNodes, CALL_TAGS);
228
273
  if (callAnchor !== undefined && grouped['@reference.arity'] === undefined) {
229
- const callNode = findNodeAtRange(tree.rootNode, callAnchor.range, 'call_expression') ??
274
+ const callNode = findSelfOrAncestorOfTypes(callAnchorNode, ['call_expression', 'new_expression']) ??
275
+ findNodeAtRange(tree.rootNode, callAnchor.range, 'call_expression') ??
230
276
  findNodeAtRange(tree.rootNode, callAnchor.range, 'new_expression');
231
277
  if (callNode !== null) {
232
278
  const argList = callNode.childForFieldName('arguments');
@@ -247,7 +293,7 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
247
293
  // lookup instead of synthesis — covered by `tsReceiverBinding`.
248
294
  const scopeFnAnchor = grouped['@scope.function'];
249
295
  if (scopeFnAnchor !== undefined) {
250
- const fnNode = findFunctionNode(tree.rootNode, scopeFnAnchor.range);
296
+ const fnNode = findFunctionNode(tree.rootNode, scopeFnAnchor.range, groupedNodes['@scope.function']);
251
297
  if (fnNode !== null) {
252
298
  const synth = synthesizeTsReceiverBinding(fnNode);
253
299
  if (synth !== null)
@@ -464,7 +510,10 @@ function inferArgType(argNode) {
464
510
  * The `@scope.function` anchor range covers the whole node, but the
465
511
  * tag alone doesn't identify which node type among the many TS
466
512
  * function-likes. */
467
- function findFunctionNode(rootNode, range) {
513
+ function findFunctionNode(rootNode, range, anchorNode) {
514
+ const fromAnchor = findSelfOrAncestorOfTypes(anchorNode, FUNCTION_NODE_TYPES);
515
+ if (fromAnchor !== null)
516
+ return fromAnchor;
468
517
  for (const nodeType of FUNCTION_NODE_TYPES) {
469
518
  const n = findNodeAtRange(rootNode, range, nodeType);
470
519
  if (n !== null)
@@ -652,6 +652,13 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, sco
652
652
  // ============================================================================
653
653
  // Public API
654
654
  // ============================================================================
655
+ /**
656
+ * Per-`WorkerPool` log-dedup state for quarantine reporting. Keyed on the
657
+ * pool instance so multiple concurrent pools (test fixtures, future
658
+ * multi-pool callers) each get their own seen-set. WeakMap entries vanish
659
+ * when the pool is garbage-collected.
660
+ */
661
+ const loggedQuarantineByPool = new WeakMap();
655
662
  export const processParsing = async (graph, files, symbolTable, astCache,
656
663
  /**
657
664
  * Persistent tree cache (separate from `astCache`, which the caller
@@ -684,14 +691,61 @@ outRawResults) => {
684
691
  // a repo crosses the worker-pool threshold.
685
692
  logger.warn(`[scope-resolution prof] worker pool engaged for ${files.length} files — cross-phase tree cache will be empty; scope-resolution re-parses.`);
686
693
  }
687
- try {
688
- return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, reportProgress, outRawResults);
689
- }
690
- catch (err) {
691
- const message = err instanceof Error ? err.message : String(err);
692
- logger.warn({ message }, 'Worker pool parsing stopped; continuing with sequential parser:');
693
- reportProgress?.(lastProgress, files.length, `Sequential fallback after worker issue: ${message}`);
694
+ // U20 design pivot: the worker pool's resilience layers
695
+ // (respawn budget, circuit breaker, quarantine, slot-attribution,
696
+ // cumulative timeout) are the SOLE contract for handling worker
697
+ // failures. There is no sequential-parser fallback for either
698
+ // partial quarantine or full pool failure the operator must see
699
+ // a clear hard signal when workers can't recover, instead of a
700
+ // silently-degraded graph from a possibly-crashing main-thread
701
+ // sequential parser. A failing tree-sitter native binding that
702
+ // quarantined a worker would, under the previous design, re-trigger
703
+ // the same SIGSEGV on the main thread; we avoid that risk entirely.
704
+ //
705
+ // - Partial quarantine: the file is missing from this run's graph;
706
+ // the per-chunk warn log below surfaces it; U2's chunk-cache
707
+ // write-guard in parse-impl.ts keeps the chunk uncached so the
708
+ // next analyze gets a cache miss and a fresh pool retries.
709
+ // - Full pool failure: `WorkerPoolDispatchError` propagates from
710
+ // `processParsingWithWorkers` up through this function. The
711
+ // analyze run errors out instead of falling back to sequential.
712
+ const data = await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, reportProgress, outRawResults);
713
+ // Session-scoped quarantine (worker-pool resilience Layer 3): surface
714
+ // any files this pool has decided are unsafe for workers so the
715
+ // operator can see what was skipped. The pool already filtered them
716
+ // out of dispatch; we only need to log + progress-report. Quarantine
717
+ // is session-scoped per pool instance — a fresh `createWorkerPool`
718
+ // call clears it.
719
+ //
720
+ // Dedup: log full path list only for entries newly quarantined since
721
+ // the previous dispatch on the same pool. The per-chunk progress
722
+ // message still surfaces the count for UX continuity, but the
723
+ // structured `quarantinedFiles` payload is only emitted when there
724
+ // is new signal — prevents O(quarantine × chunks) log spam.
725
+ const quarantineSnapshot = workerPool.getQuarantinedPaths?.() ?? [];
726
+ const quarantineSet = new Set(quarantineSnapshot);
727
+ if (quarantineSet.size > 0) {
728
+ const quarantinedInChunk = files.filter((file) => quarantineSet.has(file.path));
729
+ if (quarantinedInChunk.length > 0) {
730
+ const seenForPool = loggedQuarantineByPool.get(workerPool) ?? new Set();
731
+ const newlyQuarantined = quarantinedInChunk
732
+ .map((file) => file.path)
733
+ .filter((p) => !seenForPool.has(p));
734
+ for (const p of newlyQuarantined)
735
+ seenForPool.add(p);
736
+ loggedQuarantineByPool.set(workerPool, seenForPool);
737
+ if (newlyQuarantined.length > 0) {
738
+ logger.warn({
739
+ newlyQuarantined,
740
+ cumulativeQuarantine: quarantineSet.size,
741
+ chunkSkipped: quarantinedInChunk.length,
742
+ }, `Worker quarantine: ${newlyQuarantined.length} new file(s) skipped this chunk ` +
743
+ `(${quarantinedInChunk.length} skipped total, ${quarantineSet.size} cumulative).`);
744
+ }
745
+ reportProgress?.(lastProgress, files.length, `${quarantinedInChunk.length} worker-quarantined file(s) skipped`);
746
+ }
694
747
  }
748
+ return data;
695
749
  }
696
750
  // Fallback: sequential parsing (no pre-extracted data)
697
751
  await processParsingSequential(graph, files, symbolTable, astCache, scopeTreeCache, reportProgress);