gitnexus 1.6.6-rc.30 → 1.6.6-rc.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/cli/analyze.js +255 -21
  2. package/dist/core/ingestion/filesystem-walker.js +18 -4
  3. package/dist/core/ingestion/pipeline-phases/parse-impl.js +61 -24
  4. package/dist/core/ingestion/workers/worker-pool.d.ts +4 -0
  5. package/dist/core/ingestion/workers/worker-pool.js +14 -1
  6. package/dist/core/lbug/lbug-adapter.js +183 -39
  7. package/dist/core/lbug/pool-adapter.js +137 -1
  8. package/dist/core/lbug/sidecar-recovery.d.ts +68 -0
  9. package/dist/core/lbug/sidecar-recovery.js +277 -0
  10. package/dist/server/api.js +5 -0
  11. package/package.json +1 -1
  12. package/scripts/build.js +20 -4
  13. package/web/assets/{agent-Warpie3K.js → agent-CBcds30d.js} +101 -76
  14. package/web/assets/{architectureDiagram-UL44E2DR-BVMgctd1.js → architectureDiagram-UL44E2DR-dIoPPr6x.js} +1 -1
  15. package/web/assets/{chunk-LCXTWHL2-D0Ojsf12.js → chunk-LCXTWHL2-B8hbjKUm.js} +1 -1
  16. package/web/assets/{chunk-RG4AUYOV-CGOYR7mA.js → chunk-RG4AUYOV-EfsAenro.js} +1 -1
  17. package/web/assets/{classDiagram-KGZ6W3CR-BWNNFh9P.js → classDiagram-KGZ6W3CR-_hSUwNQJ.js} +1 -1
  18. package/web/assets/{classDiagram-v2-72OJOZXJ-D6den_PS.js → classDiagram-v2-72OJOZXJ-C0NcgLqj.js} +1 -1
  19. package/web/assets/{diagram-3NCE3AQN-fpPDz4AJ.js → diagram-3NCE3AQN-CYrNJJUh.js} +1 -1
  20. package/web/assets/{diagram-GF46GFSD-CBCK4dz-.js → diagram-GF46GFSD-56NpS1jw.js} +1 -1
  21. package/web/assets/{diagram-QXG6HAR7-Dp7brFCQ.js → diagram-QXG6HAR7-DwXkFq_r.js} +1 -1
  22. package/web/assets/{diagram-WEQXMOUZ-CcNoPnrZ.js → diagram-WEQXMOUZ-C6BTq9za.js} +1 -1
  23. package/web/assets/{erDiagram-L5TCEMPS-C7gqztHL.js → erDiagram-L5TCEMPS-BcEjYsUQ.js} +1 -1
  24. package/web/assets/{flowDiagram-H6V6AXG4-AhdoRoMl.js → flowDiagram-H6V6AXG4-DWAVIV6V.js} +1 -1
  25. package/web/assets/{index-DHb4KmNb.js → index-Czp-OFT-.js} +5 -5
  26. package/web/assets/index-nSZgUaIx.css +2 -0
  27. package/web/assets/{infoDiagram-3YFTVSEB-mP4ELRFj.js → infoDiagram-3YFTVSEB-ui-e52GZ.js} +1 -1
  28. package/web/assets/{ishikawaDiagram-BNXS4ZKH-CPiYSQcK.js → ishikawaDiagram-BNXS4ZKH-DGimV4zg.js} +1 -1
  29. package/web/assets/{kanban-definition-75IXJCU3-EI3ocXjG.js → kanban-definition-75IXJCU3-BOyfgvKL.js} +1 -1
  30. package/web/assets/{mindmap-definition-2TDM6QVE-293vIByN.js → mindmap-definition-2TDM6QVE-Ba3QrYSU.js} +1 -1
  31. package/web/assets/{pieDiagram-CU6KROY3-DhdftcYl.js → pieDiagram-CU6KROY3-DMFBXNrM.js} +1 -1
  32. package/web/assets/{requirementDiagram-JXO7QTGE-DdPq7kYS.js → requirementDiagram-JXO7QTGE-bS4xboSz.js} +1 -1
  33. package/web/assets/{sequenceDiagram-VS2MUI6T-CuIcQieZ.js → sequenceDiagram-VS2MUI6T-BqKET_2i.js} +1 -1
  34. package/web/assets/{stateDiagram-7D4R322I-CjVShw_t.js → stateDiagram-7D4R322I-DP9kvX2i.js} +1 -1
  35. package/web/assets/{stateDiagram-v2-36443NZ5-D9CkWeqa.js → stateDiagram-v2-36443NZ5-DB-cZ1VL.js} +1 -1
  36. package/web/assets/{timeline-definition-O6YCAMPW-Dzybzo6D.js → timeline-definition-O6YCAMPW-DNScSOi7.js} +1 -1
  37. package/web/assets/{vennDiagram-MWXL3ELB-CapSiCmK.js → vennDiagram-MWXL3ELB-Bd1zTNWW.js} +1 -1
  38. package/web/assets/{wardleyDiagram-CUQ6CDDI-CohjGBRu.js → wardleyDiagram-CUQ6CDDI-DqCDQKFt.js} +1 -1
  39. package/web/assets/{xychartDiagram-N2JHSOCM-CiYjF6Jz.js → xychartDiagram-N2JHSOCM-B8Cje_Ei.js} +1 -1
  40. package/web/index.html +2 -2
  41. package/web/assets/index-BleGLU8S.css +0 -2
@@ -8,7 +8,7 @@
8
8
  * skill generation (--skills), summary output, and process.exit().
9
9
  */
10
10
  import path from 'path';
11
- import { execFileSync } from 'child_process';
11
+ import { spawn } from 'child_process';
12
12
  import v8 from 'v8';
13
13
  import cliProgress from 'cli-progress';
14
14
  import { closeLbug } from '../core/lbug/lbug-adapter.js';
@@ -29,6 +29,7 @@ import { isHfDownloadFailure } from '../core/embeddings/hf-env.js';
29
29
  // previous behaviour silently swallowed stack traces and made #1169
30
30
  // indistinguishable from a no-op success on Windows.
31
31
  const realStderrWrite = process.stderr.write.bind(process.stderr);
32
+ const realStdoutWrite = process.stdout.write.bind(process.stdout);
32
33
  const writeFatalToStderr = (label, err) => {
33
34
  const isErr = err instanceof Error;
34
35
  const message = isErr ? err.message : String(err);
@@ -67,14 +68,212 @@ const HEAP_FLAG = `--max-old-space-size=${RESPAWN_HEAP_MB}`;
67
68
  /** Increase default stack size (KB) to prevent stack overflow on deep class hierarchies. */
68
69
  const STACK_KB = 4096;
69
70
  const STACK_FLAG = `--stack-size=${STACK_KB}`;
71
+ const RESPAWN_OUTPUT_TAIL_CHARS = 1024 * 1024;
72
+ const RESPAWN_PROGRESS_ENV = 'GITNEXUS_RESPAWN_PROGRESS_TTY';
73
+ const terminalColumns = () => {
74
+ const parsed = Number(process.env.COLUMNS);
75
+ return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : 80;
76
+ };
77
+ const ANSI_ESCAPE_PATTERN = /\x1B(?:\[[0-?]*[ -/]*[@-~]|\][^\x07]*(?:\x07|\x1B\\)|[PX^_][\s\S]*?\x1B\\|[78]|[@-Z\\-_])/y;
78
+ const splitGraphemes = (text) => {
79
+ const Segmenter = Intl.Segmenter;
80
+ if (Segmenter) {
81
+ return Array.from(new Segmenter(undefined, { granularity: 'grapheme' }).segment(text), (s) => s.segment);
82
+ }
83
+ return Array.from(text);
84
+ };
85
+ const isZeroWidthCodePoint = (codePoint) => codePoint === 0x200d ||
86
+ (codePoint >= 0x0300 && codePoint <= 0x036f) ||
87
+ (codePoint >= 0x1ab0 && codePoint <= 0x1aff) ||
88
+ (codePoint >= 0x1dc0 && codePoint <= 0x1dff) ||
89
+ (codePoint >= 0x20d0 && codePoint <= 0x20ff) ||
90
+ (codePoint >= 0xfe00 && codePoint <= 0xfe0f) ||
91
+ (codePoint >= 0xfe20 && codePoint <= 0xfe2f);
92
+ const isWideCodePoint = (codePoint) => codePoint >= 0x1100 &&
93
+ (codePoint <= 0x115f ||
94
+ codePoint === 0x2329 ||
95
+ codePoint === 0x232a ||
96
+ (codePoint >= 0x2e80 && codePoint <= 0xa4cf && codePoint !== 0x303f) ||
97
+ (codePoint >= 0xac00 && codePoint <= 0xd7a3) ||
98
+ (codePoint >= 0xf900 && codePoint <= 0xfaff) ||
99
+ (codePoint >= 0xfe10 && codePoint <= 0xfe19) ||
100
+ (codePoint >= 0xfe30 && codePoint <= 0xfe6f) ||
101
+ (codePoint >= 0xff00 && codePoint <= 0xff60) ||
102
+ (codePoint >= 0xffe0 && codePoint <= 0xffe6) ||
103
+ (codePoint >= 0x1f300 && codePoint <= 0x1faff) ||
104
+ (codePoint >= 0x20000 && codePoint <= 0x3fffd));
105
+ const visibleColumns = (text) => {
106
+ let columns = 0;
107
+ for (const char of Array.from(text)) {
108
+ const codePoint = char.codePointAt(0);
109
+ if (codePoint === undefined || isZeroWidthCodePoint(codePoint))
110
+ continue;
111
+ columns += isWideCodePoint(codePoint) ? 2 : 1;
112
+ }
113
+ return columns;
114
+ };
115
+ const readAnsiEscapeAt = (text, index) => {
116
+ ANSI_ESCAPE_PATTERN.lastIndex = index;
117
+ return ANSI_ESCAPE_PATTERN.exec(text)?.[0];
118
+ };
119
+ const truncateAnsiToColumns = (text, maxColumns) => {
120
+ if (!Number.isFinite(maxColumns) || maxColumns <= 0)
121
+ return '';
122
+ let output = '';
123
+ let columns = 0;
124
+ let index = 0;
125
+ while (index < text.length) {
126
+ const escape = readAnsiEscapeAt(text, index);
127
+ if (escape) {
128
+ output += escape;
129
+ index += escape.length;
130
+ continue;
131
+ }
132
+ const nextEscapeIndex = text.indexOf('\x1B', index);
133
+ const plainEnd = nextEscapeIndex === -1 ? text.length : nextEscapeIndex;
134
+ const plainText = text.slice(index, plainEnd);
135
+ for (const segment of splitGraphemes(plainText)) {
136
+ const width = visibleColumns(segment);
137
+ if (width > 0 && columns + width > maxColumns)
138
+ return output;
139
+ output += segment;
140
+ columns += width;
141
+ }
142
+ index = plainEnd;
143
+ }
144
+ return output;
145
+ };
146
+ const createAnsiPipeTerminal = (stream) => {
147
+ let linewrap = true;
148
+ let dy = 0;
149
+ const write = (s) => {
150
+ stream.write(s);
151
+ };
152
+ const moveVertical = (delta) => {
153
+ if (delta > 0)
154
+ write(`\x1B[${delta}B`);
155
+ else if (delta < 0)
156
+ write(`\x1B[${Math.abs(delta)}A`);
157
+ };
158
+ return {
159
+ cursorSave: () => write('\x1B7'),
160
+ cursorRestore: () => write('\x1B8'),
161
+ cursor: (enabled) => write(enabled ? '\x1B[?25h' : '\x1B[?25l'),
162
+ lineWrapping: (enabled) => {
163
+ linewrap = enabled;
164
+ write(enabled ? '\x1B[?7h' : '\x1B[?7l');
165
+ },
166
+ cursorTo: (x = null, y = null) => {
167
+ if (typeof y === 'number' && typeof x === 'number') {
168
+ write(`\x1B[${y + 1};${x + 1}H`);
169
+ return;
170
+ }
171
+ if (typeof x === 'number') {
172
+ write(x === 0 ? '\r' : `\x1B[${x + 1}G`);
173
+ }
174
+ },
175
+ cursorRelative: (dx = null, nextDy = null) => {
176
+ if (typeof dx === 'number' && dx !== 0) {
177
+ write(dx > 0 ? `\x1B[${dx}C` : `\x1B[${Math.abs(dx)}D`);
178
+ }
179
+ if (typeof nextDy === 'number' && nextDy !== 0) {
180
+ dy += nextDy;
181
+ moveVertical(nextDy);
182
+ }
183
+ },
184
+ cursorRelativeReset: () => {
185
+ moveVertical(-dy);
186
+ write('\r');
187
+ dy = 0;
188
+ },
189
+ clearRight: () => write('\x1B[0K'),
190
+ clearLine: () => write('\x1B[2K'),
191
+ clearBottom: () => write('\x1B[0J'),
192
+ newline: () => {
193
+ write('\n');
194
+ dy++;
195
+ },
196
+ write: (s, rawWrite = false) => {
197
+ const width = terminalColumns();
198
+ write(linewrap && rawWrite === false ? truncateAnsiToColumns(s, width) : s);
199
+ },
200
+ isTTY: () => true,
201
+ getWidth: terminalColumns,
202
+ };
203
+ };
204
+ const shouldBridgeRespawnProgressTty = () => process.stderr.isTTY === true || process.stdout.isTTY === true;
205
+ const appendOutputTail = (tail, chunk) => {
206
+ const text = Buffer.isBuffer(chunk)
207
+ ? chunk.toString('utf8')
208
+ : typeof chunk === 'string'
209
+ ? chunk
210
+ : String(chunk ?? '');
211
+ if (!text)
212
+ return tail;
213
+ const next = tail + text;
214
+ return next.length > RESPAWN_OUTPUT_TAIL_CHARS ? next.slice(-RESPAWN_OUTPUT_TAIL_CHARS) : next;
215
+ };
216
+ /**
217
+ * Run the respawned analyzer while teeing child output through to the parent
218
+ * and keeping a bounded tail for crash classification.
219
+ *
220
+ * `execFileSync(..., { stdio: 'inherit' })` preserved live progress but hid
221
+ * stderr/stdout from the parent on abnormal exits. That made every
222
+ * SIGABRT/status-134 child look like an output-less V8 heap OOM, even when the
223
+ * terminal had already shown a native crash such as
224
+ * `libc++abi: ... Napi::Error`. Piped streams plus an explicit tee keeps the UX
225
+ * and gives `childProcessLikelyOom` the evidence it needs.
226
+ */
227
+ const runRespawnedAnalyze = (args, env) => new Promise((resolve) => {
228
+ let stdout = '';
229
+ let stderr = '';
230
+ let settled = false;
231
+ const finish = (exit) => {
232
+ if (settled)
233
+ return;
234
+ settled = true;
235
+ resolve(exit);
236
+ };
237
+ const child = spawn(process.execPath, [...args], {
238
+ stdio: ['inherit', 'pipe', 'pipe'],
239
+ env,
240
+ });
241
+ child.stdout?.on('data', (chunk) => {
242
+ stdout = appendOutputTail(stdout, chunk);
243
+ realStdoutWrite(chunk);
244
+ });
245
+ child.stderr?.on('data', (chunk) => {
246
+ stderr = appendOutputTail(stderr, chunk);
247
+ realStderrWrite(chunk);
248
+ });
249
+ child.on('error', (err) => {
250
+ finish({
251
+ status: 1,
252
+ signal: null,
253
+ stdout,
254
+ stderr,
255
+ message: err instanceof Error ? err.message : String(err),
256
+ });
257
+ });
258
+ child.on('close', (status, signal) => {
259
+ finish({
260
+ status,
261
+ signal,
262
+ stdout,
263
+ stderr,
264
+ message: `Command failed: ${process.execPath} ${args.join(' ')}`,
265
+ });
266
+ });
267
+ });
70
268
  /**
71
269
  * Heuristic for "child re-exec likely died from V8 OOM".
72
270
  *
73
- * Platform-independent detection is best-effort: V8/Node usually emit
74
- * stable heap-exhaustion phrases in stderr/message across Linux/macOS/Windows
75
- * (for example "JavaScript heap out of memory" or "Reached heap limit"),
76
- * while some environments only expose status/signal (e.g. 134/SIGABRT).
77
- * We combine both text signatures and process-exit signatures.
271
+ * Platform-independent detection is best-effort: V8/Node usually emit stable
272
+ * heap-exhaustion phrases in stderr/message across Linux/macOS/Windows (for
273
+ * example "JavaScript heap out of memory" or "Reached heap limit"). When the
274
+ * child produced no output at all, we still treat status 134/SIGABRT as likely
275
+ * heap OOM. If stderr/stdout contains a native crash diagnostic, the output
276
+ * evidence wins and we do not print heap guidance.
78
277
  */
79
278
  const childProcessLikelyOom = (err) => {
80
279
  if (!err || typeof err !== 'object')
@@ -97,6 +296,23 @@ const childProcessLikelyOom = (err) => {
97
296
  return false;
98
297
  return e.status === 134 || e.signal === 'SIGABRT';
99
298
  };
299
+ const childProcessLikelyNativeAbort = (err) => {
300
+ if (!err || typeof err !== 'object')
301
+ return false;
302
+ const e = err;
303
+ const hasNativeAbortSignature = (v) => {
304
+ const text = (Buffer.isBuffer(v) ? v.toString('utf8') : typeof v === 'string' ? v : '').toLowerCase();
305
+ if (!text)
306
+ return false;
307
+ return (text.includes('napi::error') ||
308
+ text.includes('libc++abi: terminating') ||
309
+ text.includes('abort trap') ||
310
+ text.includes('native stack') ||
311
+ text.includes('native worker') ||
312
+ text.includes('native binding'));
313
+ };
314
+ return [e.message, e.stderr, e.stdout].some((v) => hasNativeAbortSignature(v));
315
+ };
100
316
  const forceHeapOOMForTestIfEnabled = () => {
101
317
  if (process.env.GITNEXUS_TEST_FORCE_HEAP_OOM !== '1')
102
318
  return;
@@ -107,7 +323,7 @@ const forceHeapOOMForTestIfEnabled = () => {
107
323
  chunks.push('x'.repeat(1024 * 1024));
108
324
  };
109
325
  /** Re-exec the process with a 16GB heap and larger stack if we're currently below that. */
110
- function ensureHeap() {
326
+ async function ensureHeap() {
111
327
  const nodeOpts = process.env.NODE_OPTIONS || '';
112
328
  if (nodeOpts.includes('--max-old-space-size'))
113
329
  return false;
@@ -119,23 +335,30 @@ function ensureHeap() {
119
335
  const cliFlags = [HEAP_FLAG];
120
336
  if (!nodeOpts.includes('--stack-size'))
121
337
  cliFlags.push(STACK_FLAG);
122
- try {
123
- execFileSync(process.execPath, [...cliFlags, ...process.argv.slice(1)], {
124
- stdio: 'inherit',
125
- env: { ...process.env, NODE_OPTIONS: `${nodeOpts} ${HEAP_FLAG}`.trim() },
126
- });
127
- }
128
- catch (e) {
129
- if (childProcessLikelyOom(e)) {
338
+ const childArgs = [...cliFlags, ...process.argv.slice(1)];
339
+ const childEnv = {
340
+ ...process.env,
341
+ NODE_OPTIONS: `${nodeOpts} ${HEAP_FLAG}`.trim(),
342
+ };
343
+ if (shouldBridgeRespawnProgressTty())
344
+ childEnv[RESPAWN_PROGRESS_ENV] = '1';
345
+ const childExit = await runRespawnedAnalyze(childArgs, childEnv);
346
+ if (childExit.status !== 0 || childExit.signal) {
347
+ if (childProcessLikelyOom(childExit)) {
130
348
  cliError(` Analysis likely ran out of memory.\n` +
131
349
  ` Retry with a larger heap if your machine allows it:\n` +
132
350
  ` NODE_OPTIONS="--max-old-space-size=24576" gitnexus analyze [your-args]\n` +
133
351
  ` (Windows: set NODE_OPTIONS=--max-old-space-size=24576 && gitnexus analyze [your-args])\n` +
134
352
  ` If this persists, it may be a native crash unrelated to heap size.\n`, { recoveryHint: 'heap-oom-respawn' });
135
353
  }
136
- const status = typeof e === 'object' && e !== null && 'status' in e && typeof e.status === 'number'
137
- ? e.status
138
- : 1;
354
+ else if (childProcessLikelyNativeAbort(childExit)) {
355
+ cliError(` Analysis aborted in a native worker or native binding path.\n` +
356
+ ` Try one of these recovery paths:\n` +
357
+ ` gitnexus analyze --workers 0\n` +
358
+ ` npm uninstall -g gitnexus && npm install -g gitnexus@latest\n` +
359
+ ` Use Node 22 LTS if you are on a newer non-LTS runtime.\n`, { recoveryHint: 'native-worker-abort' });
360
+ }
361
+ const status = typeof childExit.status === 'number' && childExit.status !== 0 ? childExit.status : 1;
139
362
  process.exitCode = status;
140
363
  }
141
364
  return true;
@@ -157,6 +380,7 @@ const ANALYZE_CLI_ENV_KEYS = [
157
380
  'GITNEXUS_EMBEDDING_BATCH_SIZE',
158
381
  'GITNEXUS_EMBEDDING_SUB_BATCH_SIZE',
159
382
  'GITNEXUS_EMBEDDING_DEVICE',
383
+ 'GITNEXUS_ANALYZE_PROGRESS_ACTIVE',
160
384
  ];
161
385
  const snapshotAnalyzeEnv = () => {
162
386
  const snap = {};
@@ -188,7 +412,7 @@ const restoreAnalyzeEnv = (snap) => {
188
412
  */
189
413
  export const shouldGenerateCommunitySkillFiles = (options, pipelineResult) => Boolean(options?.skills && pipelineResult && !options?.indexOnly);
190
414
  export const analyzeCommand = async (inputPath, options) => {
191
- if (ensureHeap())
415
+ if (await ensureHeap())
192
416
  return;
193
417
  forceHeapOOMForTestIfEnabled();
194
418
  // Install fatal handlers immediately after re-exec resolution so any
@@ -362,7 +586,7 @@ const analyzeCommandImpl = async (inputPath, options) => {
362
586
  console.log(`${maxFileSizeBanner}\n`);
363
587
  }
364
588
  // ── CLI progress bar setup ─────────────────────────────────────────
365
- const bar = new cliProgress.SingleBar({
589
+ const barOptions = {
366
590
  format: ' {bar} {percentage}% | {phase}',
367
591
  barCompleteChar: '\u2588',
368
592
  barIncompleteChar: '\u2591',
@@ -371,7 +595,16 @@ const analyzeCommandImpl = async (inputPath, options) => {
371
595
  autopadding: true,
372
596
  clearOnComplete: false,
373
597
  stopOnComplete: false,
374
- }, cliProgress.Presets.shades_grey);
598
+ };
599
+ if (process.env[RESPAWN_PROGRESS_ENV] === '1' && process.stderr.isTTY !== true) {
600
+ // Heap respawn pipes stderr so the parent can classify native/OOM crashes.
601
+ // The parent was a real TTY when it opted into this env var, so forward
602
+ // ANSI cursor controls through the pipe instead of cli-progress' non-TTY
603
+ // newline mode. That keeps one-line redraw UX while retaining stderr tail
604
+ // capture for diagnostics.
605
+ barOptions.terminal = createAnsiPipeTerminal(process.stderr);
606
+ }
607
+ const bar = new cliProgress.SingleBar(barOptions, cliProgress.Presets.shades_grey);
375
608
  bar.start(100, 0, { phase: 'Initializing...' });
376
609
  // Graceful SIGINT handling. Pino's default destination is `sync: false`
377
610
  // (buffered) — flush before exit so in-flight records reach stderr.
@@ -413,6 +646,7 @@ const analyzeCommandImpl = async (inputPath, options) => {
413
646
  console.warn = barLog;
414
647
  // eslint-disable-next-line no-console -- intentional console-routing for progress bar UX
415
648
  console.error = barLog;
649
+ process.env.GITNEXUS_ANALYZE_PROGRESS_ACTIVE = '1';
416
650
  // Track elapsed time per phase
417
651
  let lastPhaseLabel = 'Initializing...';
418
652
  let phaseStart = Date.now();
@@ -6,6 +6,20 @@ import { glob } from 'glob';
6
6
  import { createIgnoreFilter } from '../../config/ignore-service.js';
7
7
  import { logger } from '../logger.js';
8
8
  const READ_CONCURRENCY = 32;
9
+ const ANALYZE_PROGRESS_ACTIVE_ENV = 'GITNEXUS_ANALYZE_PROGRESS_ACTIVE';
10
+ const warnLargeFileSkip = (message) => {
11
+ if (process.env[ANALYZE_PROGRESS_ACTIVE_ENV] === '1') {
12
+ // analyze.ts routes console.warn through the progress bar logger while
13
+ // the bar is active. Emitting the operator-facing large-file notice there
14
+ // avoids raw pino NDJSON corrupting the one-line progress display in the
15
+ // heap-respawn child, whose stderr is intentionally piped for crash
16
+ // classification.
17
+ // eslint-disable-next-line no-console -- intentionally routed by analyze progress UI
18
+ console.warn(message);
19
+ return;
20
+ }
21
+ logger.warn(message);
22
+ };
9
23
  /**
10
24
  * Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
11
25
  * Memory: ~10MB for 100K files vs ~1GB+ with content.
@@ -50,7 +64,7 @@ export const walkRepositoryPaths = async (repoPath, onProgress) => {
50
64
  const isDefault = maxFileSizeBytes === DEFAULT_MAX_FILE_SIZE_BYTES;
51
65
  const isOverrideUnset = !process.env.GITNEXUS_MAX_FILE_SIZE;
52
66
  const suffix = isDefault ? ', likely generated/vendored' : '';
53
- logger.warn(` Skipped ${skippedLarge} large files (>${maxFileSizeBytes / 1024}KB${suffix})`);
67
+ warnLargeFileSkip(` Skipped ${skippedLarge} large files (>${maxFileSizeBytes / 1024}KB${suffix})`);
54
68
  // Always show at least the first few paths so users can diagnose why
55
69
  // edges are missing from a specific file (issue #1659). The full list is
56
70
  // gated behind GITNEXUS_VERBOSE=1 to avoid flooding output on repos with
@@ -61,17 +75,17 @@ export const walkRepositoryPaths = async (repoPath, onProgress) => {
61
75
  const showAll = isVerboseIngestionEnabled() || skippedLargePaths.length <= SKIPPED_PREVIEW_CAP;
62
76
  const preview = showAll ? skippedLargePaths : skippedLargePaths.slice(0, SKIPPED_PREVIEW_CAP);
63
77
  for (const p of preview) {
64
- logger.warn(` - ${p}`);
78
+ warnLargeFileSkip(` - ${p}`);
65
79
  }
66
80
  if (!showAll) {
67
81
  const remaining = skippedLargePaths.length - SKIPPED_PREVIEW_CAP;
68
- logger.warn(` ...and ${remaining} more (set GITNEXUS_VERBOSE=1 to list them all)`);
82
+ warnLargeFileSkip(` ...and ${remaining} more (set GITNEXUS_VERBOSE=1 to list them all)`);
69
83
  }
70
84
  // Only hint about the env var when the user has not set it at all. An
71
85
  // explicit GITNEXUS_MAX_FILE_SIZE=512 happens to resolve to the same
72
86
  // bytes as the default but the operator clearly already knows the knob.
73
87
  if (isDefault && isOverrideUnset) {
74
- logger.warn(` Set GITNEXUS_MAX_FILE_SIZE=<KB> to include files above the default cap.`);
88
+ warnLargeFileSkip(` Set GITNEXUS_MAX_FILE_SIZE=<KB> to include files above the default cap.`);
75
89
  }
76
90
  }
77
91
  return entries;
@@ -24,7 +24,7 @@ import { createASTCache } from '../ast-cache.js';
24
24
  import { getLanguageFromFilename } from '../../../_shared/index.js';
25
25
  import { readFileContents } from '../filesystem-walker.js';
26
26
  import { isLanguageAvailable } from '../../tree-sitter/parser-loader.js';
27
- import { createWorkerPool } from '../workers/worker-pool.js';
27
+ import { createWorkerPool, WorkerPoolInitializationError } from '../workers/worker-pool.js';
28
28
  import { extractFetchCallsFromFiles } from '../call-processor.js';
29
29
  import fs from 'node:fs';
30
30
  import path from 'node:path';
@@ -158,18 +158,24 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
158
158
  const MIN_FILES_FOR_WORKERS = options?.workerThresholdsForTest?.minFiles ?? 15;
159
159
  const MIN_BYTES_FOR_WORKERS = options?.workerThresholdsForTest?.minBytes ?? 512 * 1024;
160
160
  const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
161
- // Create worker pool once, reuse across chunks.
161
+ // Create worker pool lazily, reuse across cache-miss chunks.
162
162
  //
163
163
  // `workerPoolSize === 0` is a programmatic equivalent of `skipWorkers:
164
164
  // true` per the `PipelineOptions.workerPoolSize` contract. Short-
165
- // circuiting here avoids constructing a useless pool that rejects
166
- // every dispatch (with a `Worker pool parsing stopped` warn log per
167
- // chunk) just to fall back to the sequential path via the error
168
- // catch the gate honors the docstring directly.
169
- let workerPool;
170
- if (!options?.skipWorkers &&
165
+ // circuiting here avoids constructing a useless pool. The pool is
166
+ // intentionally NOT created before parse-cache lookup: a warm-cache
167
+ // all-hit run should replay cached worker output without loading
168
+ // parse-worker.js or any tree-sitter/N-API native bindings.
169
+ const shouldUseWorkers = !options?.skipWorkers &&
171
170
  options?.workerPoolSize !== 0 &&
172
- (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS)) {
171
+ (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS);
172
+ let workerPool;
173
+ let workerPoolDisabled = false;
174
+ const getOrCreateWorkerPool = () => {
175
+ if (!shouldUseWorkers || workerPoolDisabled)
176
+ return undefined;
177
+ if (workerPool)
178
+ return workerPool;
173
179
  try {
174
180
  // U20.U3 test-only injection: integration tests pass a custom
175
181
  // worker script URL via `workerUrlForTest` (mirrors the
@@ -188,11 +194,14 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
188
194
  }
189
195
  }
190
196
  workerPool = createWorkerPool(workerUrl, options?.workerPoolSize);
197
+ return workerPool;
191
198
  }
192
199
  catch (err) {
200
+ workerPoolDisabled = true;
193
201
  logger.warn({ err: err.message }, 'Worker pool creation failed, using sequential fallback:');
202
+ return undefined;
194
203
  }
195
- }
204
+ };
196
205
  let filesParsedSoFar = 0;
197
206
  // Two caches with different lifetimes:
198
207
  // - `astCache` (chunk-local, cleared between chunks) — call /
@@ -300,12 +309,19 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
300
309
  // operators running `gitnexus analyze --verbose` in production
301
310
  // never saw the log (M3 from PR #1693 review).
302
311
  const chunkStartMs = verboseThroughputLog ? Date.now() : null;
303
- const chunkContents = await chunkContentPromises[chunkIdx];
312
+ const chunkContentPromise = chunkContentPromises[chunkIdx];
313
+ if (!chunkContentPromise) {
314
+ throw new Error(`Missing prefetched parse chunk ${chunkIdx + 1}/${numChunks}`);
315
+ }
316
+ const chunkContents = await chunkContentPromise;
304
317
  chunkContentPromises[chunkIdx] = undefined; // release the in-memory copy
305
318
  startChunkPrefetch(chunkIdx + parseChunkConcurrency);
306
- const chunkFiles = chunkPaths
307
- .filter((p) => chunkContents.has(p))
308
- .map((p) => ({ path: p, content: chunkContents.get(p) }));
319
+ const chunkFiles = [];
320
+ for (const p of chunkPaths) {
321
+ const content = chunkContents.get(p);
322
+ if (content !== undefined)
323
+ chunkFiles.push({ path: p, content });
324
+ }
309
325
  // Compute the chunk's content-hash signature (if cache available).
310
326
  let chunkHash = null;
311
327
  if (parseCache) {
@@ -316,7 +332,7 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
316
332
  chunkHash = computeChunkHash(entries);
317
333
  }
318
334
  let chunkWorkerData;
319
- const cachedRaw = chunkHash ? parseCache.entries.get(chunkHash) : undefined;
335
+ const cachedRaw = chunkHash && parseCache ? parseCache.entries.get(chunkHash) : undefined;
320
336
  // Track every chunk hash we touched so the orchestrator can
321
337
  // prune stale entries (chunks whose composition no longer
322
338
  // corresponds to a live chunk in the current scan) before saving.
@@ -328,7 +344,7 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
328
344
  chunkCacheHits++;
329
345
  chunkWorkerData = mergeChunkResults(graph, symbolTable, cachedRaw);
330
346
  if (isDev) {
331
- logger.info(`📦 parse-cache HIT: chunk ${chunkIdx + 1}/${numChunks} (${chunkFiles.length} files, ${chunkHash.slice(0, 8)})`);
347
+ logger.info(`📦 parse-cache HIT: chunk ${chunkIdx + 1}/${numChunks} (${chunkFiles.length} files, ${chunkHash?.slice(0, 8) ?? 'unknown'})`);
332
348
  }
333
349
  // Progress update so UI advances even on a cache hit.
334
350
  const cachedFiles = chunkFiles.length;
@@ -352,7 +368,7 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
352
368
  // them under the chunk hash for the next run.
353
369
  chunkCacheMisses++;
354
370
  const rawResults = [];
355
- chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, scopeTreeCache, (current, _total, filePath) => {
371
+ const progressForChunk = (current, _total, filePath) => {
356
372
  const globalCurrent = filesParsedSoFar + current;
357
373
  // Parse phase covers 20-70 (M2). Deferred extraction handles 70-95.
358
374
  const parsingProgress = 20 + (globalCurrent / totalParseable) * 50;
@@ -367,10 +383,28 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
367
383
  nodesCreated: graph.nodeCount,
368
384
  },
369
385
  });
370
- }, workerPool,
371
- // Capture raw results only when we have a cache to write to —
372
- // otherwise we'd retain extra arrays for nothing.
373
- parseCache && chunkHash ? rawResults : undefined);
386
+ };
387
+ const activeWorkerPool = getOrCreateWorkerPool();
388
+ try {
389
+ chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, scopeTreeCache, progressForChunk, activeWorkerPool,
390
+ // Capture raw results only when we have a cache to write to —
391
+ // otherwise we'd retain extra arrays for nothing.
392
+ parseCache && chunkHash && activeWorkerPool ? rawResults : undefined);
393
+ }
394
+ catch (err) {
395
+ if (!(err instanceof WorkerPoolInitializationError))
396
+ throw err;
397
+ logger.warn({
398
+ err: err.message,
399
+ readinessFailures: err.readinessFailures,
400
+ }, 'Worker pool initialization failed, using sequential fallback:');
401
+ rawResults.length = 0;
402
+ workerPoolDisabled = true;
403
+ const failedPool = workerPool;
404
+ workerPool = undefined;
405
+ await failedPool?.terminate().catch(() => undefined);
406
+ chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, scopeTreeCache, progressForChunk, undefined, undefined);
407
+ }
374
408
  // Persist the raw results for this chunk hash. Sequential path
375
409
  // doesn't populate rawResults (it writes directly to graph), so
376
410
  // small repos without worker pool simply don't cache. That's fine.
@@ -686,9 +720,12 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
686
720
  const cachedSequentialChunkFiles = [];
687
721
  for (const chunkPaths of sequentialChunkPaths) {
688
722
  const chunkContents = await readFileContents(repoPath, chunkPaths);
689
- const chunkFiles = chunkPaths
690
- .filter((p) => chunkContents.has(p))
691
- .map((p) => ({ path: p, content: chunkContents.get(p) }));
723
+ const chunkFiles = [];
724
+ for (const p of chunkPaths) {
725
+ const content = chunkContents.get(p);
726
+ if (content !== undefined)
727
+ chunkFiles.push({ path: p, content });
728
+ }
692
729
  cachedSequentialChunkFiles.push(chunkFiles);
693
730
  astCache = createASTCache(chunkFiles.length);
694
731
  const sequentialHeritage = await extractExtractedHeritageFromFiles(chunkFiles, astCache);
@@ -149,6 +149,10 @@ export declare class WorkerPoolDispatchError extends Error {
149
149
  readonly quarantinedPaths: readonly string[];
150
150
  constructor(message: string, quarantinedPaths?: readonly string[]);
151
151
  }
152
+ export declare class WorkerPoolInitializationError extends WorkerPoolDispatchError {
153
+ readonly readinessFailures: readonly string[];
154
+ constructor(message: string, quarantinedPaths?: readonly string[], readinessFailures?: readonly string[]);
155
+ }
152
156
  interface ResolvedWorkerPoolOptions {
153
157
  subBatchSize: number;
154
158
  subBatchMaxBytes: number;
@@ -77,6 +77,14 @@ export class WorkerPoolDispatchError extends Error {
77
77
  this.quarantinedPaths = quarantinedPaths;
78
78
  }
79
79
  }
80
+ export class WorkerPoolInitializationError extends WorkerPoolDispatchError {
81
+ readinessFailures;
82
+ constructor(message, quarantinedPaths = [], readinessFailures = []) {
83
+ super(message, quarantinedPaths);
84
+ this.name = 'WorkerPoolInitializationError';
85
+ this.readinessFailures = readinessFailures;
86
+ }
87
+ }
80
88
  /**
81
89
  * Max files to send to a worker in a single postMessage.
82
90
  * Keeps structured-clone memory bounded per sub-batch.
@@ -342,6 +350,7 @@ export const createWorkerPool = (workerUrl, poolSize, options) => {
342
350
  // 1100+ LOC of pool plumbing. Public worker-pool API is unchanged —
343
351
  // `getQuarantinedPaths()` still returns the same defensive copy.
344
352
  const quarantine = createQuarantine();
353
+ const initialReadinessFailures = [];
345
354
  // Per-slot consecutive-failure counter (F6): replaces the prior pool-wide
346
355
  // scalar so a chronically-failing slot trips the breaker on its own
347
356
  // failure streak instead of being masked by another slot's successes.
@@ -385,6 +394,7 @@ export const createWorkerPool = (workerUrl, poolSize, options) => {
385
394
  await waitForWorkerReady(w);
386
395
  }
387
396
  catch (err) {
397
+ initialReadinessFailures.push(err instanceof Error ? err.message : String(err));
388
398
  logger.warn({
389
399
  workerIndex: i,
390
400
  err: err instanceof Error ? err.message : String(err),
@@ -412,7 +422,10 @@ export const createWorkerPool = (workerUrl, poolSize, options) => {
412
422
  if (items.length === 0)
413
423
  return [];
414
424
  if (activeSlots.size === 0) {
415
- throw new WorkerPoolDispatchError('Worker pool has no active workers', []);
425
+ const detail = initialReadinessFailures.length > 0
426
+ ? ` after initial ready handshake: ${initialReadinessFailures.join('; ')}`
427
+ : '';
428
+ throw new WorkerPoolInitializationError(`Worker pool has no active workers${detail}`, [], initialReadinessFailures);
416
429
  }
417
430
  // Layer 3: filter out quarantined paths so a known-bad file never reaches
418
431
  // a worker again this pool lifetime. The caller queries