gitnexus 1.6.8-rc.8 → 1.6.8-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -147,6 +147,12 @@ interface LanguageProviderConfig {
147
147
  * `undefined` when no constraints exist / the node isn't a templated
148
148
  * function. Languages without SFINAE / concept semantics leave this
149
149
  * undefined and the disambiguation is a pass-through.
150
+ *
151
+ * Cloneability contract: the returned payload crosses the worker boundary
152
+ * via structured clone, so it MUST be structured-clone-safe (no functions,
153
+ * symbols, or tree-sitter `SyntaxNode`s — only plain data). Wrap the return
154
+ * with `assertCloneable` from `workers/clone-safety.ts` so a future leak is a
155
+ * compile error at the source instead of a runtime DataCloneError (#2143).
150
156
  */
151
157
  readonly extractTemplateConstraints?: (definitionNode: SyntaxNode) => unknown;
152
158
  /** Override the default node label for definition.function captures.
@@ -268,8 +274,12 @@ interface LanguageProviderConfig {
268
274
  * disk store WITHOUT a main-thread re-parse. The main thread restores them
269
275
  * via the matching `ScopeResolver.applyCaptureSideChannel` hook.
270
276
  *
271
- * MUST return plain data (objects / arrays / primitives) so it round-trips
272
- * through `JSON.stringify` + the parsedfile-store interning reviver.
277
+ * Cloneability contract: MUST return plain data (objects / arrays /
278
+ * primitives no functions, symbols, or tree-sitter `SyntaxNode`s) so it
279
+ * survives BOTH the worker→main structured clone AND `JSON.stringify` + the
280
+ * parsedfile-store interning reviver. Wrap the return with `assertCloneable`
281
+ * from `workers/clone-safety.ts` so a future non-serializable leak is a
282
+ * compile error at the source instead of a runtime DataCloneError (#2143).
273
283
  *
274
284
  * Default: undefined (provider has no capture-time module-level side effects).
275
285
  */
@@ -38,7 +38,8 @@ import { cCallConfig, cppCallConfig } from '../call-extractors/configs/c-cpp.js'
38
38
  import { stripUeMacros } from '../cpp-ue-preprocessor.js';
39
39
  import { emitCScopeCaptures, interpretCImport, interpretCTypeBinding, cArityCompatibility, cBindingScopeFor, cImportOwningScope, cReceiverBinding, collectCStaticLinkageSideChannel, } from './c/index.js';
40
40
  import { emitCppScopeCaptures, interpretCppImport, interpretCppTypeBinding, cppArityCompatibility, cppBindingScopeFor, cppImportOwningScope, cppReceiverBinding, collectCppCaptureSideChannel, } from './cpp/index.js';
41
- import { extractCppTemplateConstraints } from './cpp/constraint-extractor.js';
41
+ import { extractCppTemplateConstraints, } from './cpp/constraint-extractor.js';
42
+ import { assertCloneable } from '../workers/clone-safety.js';
42
43
  const C_BUILT_INS = new Set([
43
44
  'printf',
44
45
  'fprintf',
@@ -358,7 +359,10 @@ export const cProvider = defineLanguage({
358
359
  // `static` functions look non-file-local on the main thread and leak into
359
360
  // cross-file global free-call resolution / wildcard imports. See
360
361
  // `c/capture-side-channel.ts`.
361
- collectCaptureSideChannel: collectCStaticLinkageSideChannel,
362
+ // `assertCloneable` is a runtime identity; it makes a future non-serializable
363
+ // value in the side-channel payload a compile error here, at the source, rather
364
+ // than a DataCloneError at the worker boundary (#2143).
365
+ collectCaptureSideChannel: (filePath) => assertCloneable(collectCStaticLinkageSideChannel(filePath)),
362
366
  interpretImport: interpretCImport,
363
367
  interpretTypeBinding: interpretCTypeBinding,
364
368
  bindingScopeFor: cBindingScopeFor,
@@ -431,7 +435,7 @@ export const cppProvider = defineLanguage({
431
435
  // just populated for this file into plain data on `ParsedFile.captureSideChannel`,
432
436
  // so the main thread can restore them via `applyCaptureSideChannel` WITHOUT a
433
437
  // re-parse (#1983). See `cpp/capture-side-channel.ts`.
434
- collectCaptureSideChannel: collectCppCaptureSideChannel,
438
+ collectCaptureSideChannel: (filePath) => assertCloneable(collectCppCaptureSideChannel(filePath)),
435
439
  interpretImport: interpretCppImport,
436
440
  interpretTypeBinding: interpretCppTypeBinding,
437
441
  bindingScopeFor: cppBindingScopeFor,
@@ -482,5 +486,8 @@ function extractCppTemplateConstraintsForProvider(definitionNode) {
482
486
  }
483
487
  break;
484
488
  }
485
- return extractCppTemplateConstraints(templateDecl, declarator);
489
+ // Guard the boundary at the source: a future non-cloneable member of the
490
+ // constraint payload becomes a compile error here, not a runtime
491
+ // DataCloneError at the worker post (#2143).
492
+ return assertCloneable(extractCppTemplateConstraints(templateDecl, declarator));
486
493
  }
@@ -10,6 +10,7 @@ import { SupportedLanguages } from '../../../_shared/index.js';
10
10
  import { createClassExtractor } from '../class-extractors/generic.js';
11
11
  import { kotlinClassConfig } from '../class-extractors/configs/jvm.js';
12
12
  import { defineLanguage } from '../language-provider.js';
13
+ import { assertCloneable } from '../workers/clone-safety.js';
13
14
  import { kotlinTypeConfig } from '../type-extractors/jvm.js';
14
15
  import { kotlinExportChecker } from '../export-detection.js';
15
16
  import { createImportResolver } from '../import-resolvers/resolver-factory.js';
@@ -166,7 +167,10 @@ export const kotlinProvider = defineLanguage({
166
167
  // so the main thread can restore them via `applyCaptureSideChannel` WITHOUT a
167
168
  // re-parse (#1983). Without this, companion/static dispatch emits no CALLS
168
169
  // edges on the worker path. See `kotlin/capture-side-channel.ts`.
169
- collectCaptureSideChannel: collectKotlinCaptureSideChannel,
170
+ // `assertCloneable` is a runtime identity; it makes a future non-serializable
171
+ // value in the side-channel payload a compile error here, at the source, rather
172
+ // than a DataCloneError at the worker boundary (#2143).
173
+ collectCaptureSideChannel: (filePath) => assertCloneable(collectKotlinCaptureSideChannel(filePath)),
170
174
  interpretImport: interpretKotlinImport,
171
175
  interpretTypeBinding: interpretKotlinTypeBinding,
172
176
  bindingScopeFor: kotlinBindingScopeFor,
@@ -144,6 +144,27 @@ chunkHash) => {
144
144
  .join(', ');
145
145
  logger.warn(` Skipped unsupported languages: ${summary}`);
146
146
  }
147
+ // Clone-safety telemetry (#2112): files whose parse output carried a value
148
+ // the structured-clone algorithm couldn't serialize across the worker
149
+ // boundary. The worker sanitized/dropped the offending value so the run
150
+ // could complete; surface the (rare) data loss so it's visible and the
151
+ // offending extractor can be fixed at source.
152
+ const skippedPaths = [];
153
+ for (const result of chunkResults) {
154
+ for (const entry of result.skippedPaths ?? [])
155
+ skippedPaths.push(entry);
156
+ }
157
+ if (skippedPaths.length > 0) {
158
+ // Keep the per-file reason ("stripped N value(s) from nodes" /
159
+ // "dropped non-serializable parsedFiles entry") — it distinguishes a
160
+ // recoverable strip from a whole-record drop, which a path-only line loses.
161
+ const shown = skippedPaths
162
+ .slice(0, 10)
163
+ .map((e) => `${e.path} (${e.reason})`)
164
+ .join(', ');
165
+ const more = skippedPaths.length > 10 ? ` …and ${skippedPaths.length - 10} more` : '';
166
+ logger.warn(` Sanitized ${skippedPaths.length} file(s) with non-serializable parse output: ${shown}${more}`);
167
+ }
147
168
  onFileProgress?.(total, total, 'done');
148
169
  return chunkResults;
149
170
  };
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Structured-clone safety for the worker result boundary (#2112).
3
+ *
4
+ * A parse worker delivers its accumulated result to the main thread via
5
+ * `parentPort.postMessage(...)`. Node serializes that payload with the
6
+ * structured-clone algorithm SYNCHRONOUSLY on the worker thread, and it
7
+ * THROWS a `DataCloneError` the instant it meets a value it can't serialize —
8
+ * a function, a symbol, a Promise, a WeakMap, etc. The reporter of #2112 hit
9
+ * exactly this: a node record whose `properties` carried an own-enumerable
10
+ * value pointing at a native function (`function toString() { [native code] }
11
+ * could not be cloned`). One such value aborted the entire parse phase,
12
+ * because the worker re-posts the throw as `{type:'error'}` which the pool
13
+ * counts as a worker death — and under `GITNEXUS_WORKER_POOL_SIZE=1` the same
14
+ * graph re-throws on every respawn until the slot's budget is exhausted.
15
+ *
16
+ * This module is the safety net. It runs ONLY after a real clone failure on
17
+ * the fast-path post (zero overhead on healthy runs), and rewrites the
18
+ * boundary-crossing arrays so the result becomes cloneable: a non-cloneable
19
+ * value inside a plain extraction record is dropped (the record is otherwise
20
+ * kept — strictly-missing data, never wrong), and a `ParsedFile` that can't be
21
+ * made cloneable is dropped whole so scope-resolution re-derives it on the
22
+ * main thread (where there is no clone boundary) with intact edge data.
23
+ *
24
+ * Language-neutral by construction: it keys on value shape and field name
25
+ * only, never on a language (AGENTS.md shared-pipeline rule). The strip
26
+ * semantics mirror what the store path's `JSON.stringify` already silently
27
+ * drops, so store / no-store / cold / warm runs converge on the same graph.
28
+ */
29
+ /** A file whose parse result was sanitized or dropped at the clone boundary. */
30
+ export interface SkippedPath {
31
+ /** Best-effort source path of the offending record (or `(unknown)`). */
32
+ path: string;
33
+ /** Human-readable reason, e.g. "dropped 1 non-serializable value from nodes". */
34
+ reason: string;
35
+ }
36
+ /**
37
+ * True iff `value` survives Node's structured-clone algorithm (the same
38
+ * algorithm `postMessage` uses). This is the authoritative probe — it matches
39
+ * the real failure exactly, including Map/Set/Date/RegExp/TypedArray support,
40
+ * so it never false-positives on the `Scope` Maps that clone fine.
41
+ */
42
+ export declare function isStructuredCloneable(value: unknown): boolean;
43
+ /** The leaf values the structured-clone algorithm copies verbatim. */
44
+ type CloneablePrimitive = undefined | null | boolean | number | bigint | string;
45
+ /**
46
+ * Maps `T` to itself when every value reachable from it is structured-clone
47
+ * safe, and to a type containing `never` at the first offending property
48
+ * otherwise. A function or symbol — the values `postMessage` rejects — becomes
49
+ * `never`, so a struct carrying one is no longer assignable to its own
50
+ * `Cloneable<T>` and `assertCloneable` rejects it, naming the bad key.
51
+ *
52
+ * Implemented as a homomorphic mapped type (`{ [K in keyof T]: … }`) so it
53
+ * preserves `interface` shapes and `readonly` modifiers and works WITHOUT
54
+ * requiring the payload types to carry an index signature — sidestepping the
55
+ * "closed interface is not assignable to a recursive index-signature type" wall
56
+ * that blocked the value-typed-`Cloneable` approach (#2143). `Map`/`Set`/array
57
+ * containers recurse into their element types; `Date`/`RegExp` are clone-safe
58
+ * leaves.
59
+ */
60
+ /** True iff `T` is `any` (the canonical `IsAny` probe: only `any` satisfies `0 extends 1 & T`). */
61
+ type IsAny<T> = 0 extends 1 & T ? true : false;
62
+ export type Cloneable<T> = IsAny<T> extends true ? never : T extends CloneablePrimitive | Date | RegExp ? T : T extends (...args: never[]) => unknown ? never : T extends symbol ? never : T extends ReadonlyMap<infer K, infer V> ? ReadonlyMap<Cloneable<K>, Cloneable<V>> : T extends ReadonlySet<infer U> ? ReadonlySet<Cloneable<U>> : T extends readonly (infer U)[] ? T extends unknown[] ? Cloneable<U>[] : readonly Cloneable<U>[] : T extends object ? {
63
+ [K in keyof T]: Cloneable<T[K]>;
64
+ } : never;
65
+ /**
66
+ * Identity at runtime (zero cost — returns its argument unchanged); a
67
+ * compile-time assertion that `value` is structured-clone safe. Wrap a
68
+ * producer that feeds an `unknown` worker-result sink:
69
+ *
70
+ * collectCaptureSideChannel: (filePath) => assertCloneable(collectFoo(filePath))
71
+ *
72
+ * If `collectFoo`'s return type ever gains a non-cloneable member (a function, a
73
+ * `SyntaxNode`, …) the call fails to compile, pointing at the offending key.
74
+ *
75
+ * The parameter is a conditional type rather than an `extends Cloneable<T>`
76
+ * constraint because a self-referential constraint (`T extends Cloneable<T>`)
77
+ * is a "circular constraint" error in TypeScript. For a clone-safe `T` the
78
+ * parameter resolves to `T` (call type-checks as a plain identity); for an
79
+ * unsafe `T` it resolves to `Cloneable<T>` (which has `never` at the bad key),
80
+ * so the argument is rejected.
81
+ */
82
+ export declare function assertCloneable<T>(value: T extends Cloneable<T> ? T : Cloneable<T>): T;
83
+ export interface MakeCloneSafeOptions {
84
+ /**
85
+ * Array field names whose offending elements are DROPPED whole rather than
86
+ * stripped in place (e.g. `parsedFiles` — its `captureSideChannel` drives
87
+ * edge resolution, so a stripped-and-delivered file would ship WRONG edges;
88
+ * dropping it lets scope-resolution re-derive it on the main thread).
89
+ */
90
+ dropWholeElement: ReadonlySet<string>;
91
+ /** Field names to skip entirely (e.g. the `skippedPaths` field itself). */
92
+ skipFields?: ReadonlySet<string>;
93
+ /** Keys to probe for a file path when attributing a skip. */
94
+ pathKeys?: readonly string[];
95
+ }
96
+ /**
97
+ * Make a worker result's boundary-crossing array fields structured-cloneable,
98
+ * mutating `result` in place. Only arrays that actually contain a
99
+ * non-cloneable value are rewritten; everything else keeps referential
100
+ * identity. Returns the list of affected file paths for reporting.
101
+ *
102
+ * Call this after ANY failure of the fast-path post — a `DataCloneError`, OR a
103
+ * throwing getter's own error surfaced by structuredClone (the caller in
104
+ * `post-result.ts` recovers on any throw, not only `DataCloneError`).
105
+ */
106
+ export declare function makeWorkerResultCloneSafe(result: Record<string, unknown>, options: MakeCloneSafeOptions): {
107
+ skipped: SkippedPath[];
108
+ };
109
+ export {};
@@ -0,0 +1,465 @@
1
+ /**
2
+ * Structured-clone safety for the worker result boundary (#2112).
3
+ *
4
+ * A parse worker delivers its accumulated result to the main thread via
5
+ * `parentPort.postMessage(...)`. Node serializes that payload with the
6
+ * structured-clone algorithm SYNCHRONOUSLY on the worker thread, and it
7
+ * THROWS a `DataCloneError` the instant it meets a value it can't serialize —
8
+ * a function, a symbol, a Promise, a WeakMap, etc. The reporter of #2112 hit
9
+ * exactly this: a node record whose `properties` carried an own-enumerable
10
+ * value pointing at a native function (`function toString() { [native code] }
11
+ * could not be cloned`). One such value aborted the entire parse phase,
12
+ * because the worker re-posts the throw as `{type:'error'}` which the pool
13
+ * counts as a worker death — and under `GITNEXUS_WORKER_POOL_SIZE=1` the same
14
+ * graph re-throws on every respawn until the slot's budget is exhausted.
15
+ *
16
+ * This module is the safety net. It runs ONLY after a real clone failure on
17
+ * the fast-path post (zero overhead on healthy runs), and rewrites the
18
+ * boundary-crossing arrays so the result becomes cloneable: a non-cloneable
19
+ * value inside a plain extraction record is dropped (the record is otherwise
20
+ * kept — strictly-missing data, never wrong), and a `ParsedFile` that can't be
21
+ * made cloneable is dropped whole so scope-resolution re-derives it on the
22
+ * main thread (where there is no clone boundary) with intact edge data.
23
+ *
24
+ * Language-neutral by construction: it keys on value shape and field name
25
+ * only, never on a language (AGENTS.md shared-pipeline rule). The strip
26
+ * semantics mirror what the store path's `JSON.stringify` already silently
27
+ * drops, so store / no-store / cold / warm runs converge on the same graph.
28
+ */
29
+ /**
30
+ * True iff `value` survives Node's structured-clone algorithm (the same
31
+ * algorithm `postMessage` uses). This is the authoritative probe — it matches
32
+ * the real failure exactly, including Map/Set/Date/RegExp/TypedArray support,
33
+ * so it never false-positives on the `Scope` Maps that clone fine.
34
+ */
35
+ export function isStructuredCloneable(value) {
36
+ try {
37
+ structuredClone(value);
38
+ return true;
39
+ }
40
+ catch {
41
+ return false;
42
+ }
43
+ }
44
+ /**
45
+ * Identity at runtime (zero cost — returns its argument unchanged); a
46
+ * compile-time assertion that `value` is structured-clone safe. Wrap a
47
+ * producer that feeds an `unknown` worker-result sink:
48
+ *
49
+ * collectCaptureSideChannel: (filePath) => assertCloneable(collectFoo(filePath))
50
+ *
51
+ * If `collectFoo`'s return type ever gains a non-cloneable member (a function, a
52
+ * `SyntaxNode`, …) the call fails to compile, pointing at the offending key.
53
+ *
54
+ * The parameter is a conditional type rather than an `extends Cloneable<T>`
55
+ * constraint because a self-referential constraint (`T extends Cloneable<T>`)
56
+ * is a "circular constraint" error in TypeScript. For a clone-safe `T` the
57
+ * parameter resolves to `T` (call type-checks as a plain identity); for an
58
+ * unsafe `T` it resolves to `Cloneable<T>` (which has `never` at the bad key),
59
+ * so the argument is rejected.
60
+ */
61
+ export function assertCloneable(value) {
62
+ return value;
63
+ }
64
+ /**
65
+ * Recursion cap for the module's own traversal. An over-deep subtree is treated
66
+ * as non-cloneable rather than recursing to a stack overflow — without this, a
67
+ * deeply-nested record would throw `RangeError` inside the sanitizer and (since
68
+ * the recovery path is the safety net) re-arm the very cascade #2112 fixes. Set
69
+ * far below the observed ~3000-frame overflow and far above any real
70
+ * parse-result record (extraction records are shallow plain data). Note: this
71
+ * caps the module's recursion only; `structuredClone`'s own internal recursion
72
+ * (the `isStructuredCloneable` probe of non-plain objects) is bounded by that
73
+ * helper's catch-all, which turns a probe-side `RangeError` into a
74
+ * non-cloneable verdict — so do not narrow that catch.
75
+ */
76
+ const MAX_CLONE_DEPTH = 200;
77
+ /**
78
+ * True iff `key` is a canonical array-index string (`"0"`, `"1"`, … `< 2^32-1`)
79
+ * — i.e. one of the slots the numeric index loop already visits. Everything
80
+ * else returned by `Object.keys(array)` is a NON-index own-enumerable property
81
+ * (`arr.meta = …`), which the structured-clone algorithm ALSO serializes (and
82
+ * throws on if non-cloneable). The array branches of `containsNonCloneable` and
83
+ * `stripNonCloneable` use this to scan those extra keys in lockstep.
84
+ */
85
+ function isArrayIndexKey(key) {
86
+ const n = Number(key);
87
+ return Number.isInteger(n) && n >= 0 && n < 4294967295 && String(n) === key;
88
+ }
89
+ /**
90
+ * Non-allocating scan: returns true on the FIRST value structured-clone would
91
+ * reject. Used to decide whether an array (or element) needs rewriting at all,
92
+ * so clean arrays keep their referential identity and pay no copy cost.
93
+ */
94
+ function containsNonCloneable(value, seen, depth = 0) {
95
+ const t = typeof value;
96
+ if (t === 'function' || t === 'symbol')
97
+ return true;
98
+ if (value === null || t !== 'object')
99
+ return false;
100
+ // Depth bound: treat an over-deep subtree as non-cloneable (the element is
101
+ // then stripped/dropped) instead of overflowing the stack.
102
+ if (depth >= MAX_CLONE_DEPTH)
103
+ return true;
104
+ const obj = value;
105
+ // Cycles clone fine; don't recurse into one twice.
106
+ if (seen.has(obj))
107
+ return false;
108
+ // Structured-clone-native containers carry no non-cloneable payload of their
109
+ // own; their *contents* still need scanning (a Map value could be a fn).
110
+ if (obj instanceof Date || obj instanceof RegExp)
111
+ return false;
112
+ // Buffers/views usually clone, but a DETACHED one is rejected by
113
+ // structuredClone — probe rather than wave it through. No byteLength
114
+ // heuristic: a legitimately empty `new Uint8Array(0)` also has byteLength 0
115
+ // yet clones fine, so a length check would false-positive.
116
+ if (obj instanceof ArrayBuffer || ArrayBuffer.isView(obj))
117
+ return !isStructuredCloneable(obj);
118
+ seen.add(obj);
119
+ if (Array.isArray(obj)) {
120
+ for (let i = 0; i < obj.length; i++) {
121
+ if (containsNonCloneable(obj[i], seen, depth + 1))
122
+ return true;
123
+ }
124
+ // structuredClone also serializes an array's NON-index own-enumerable
125
+ // properties and throws on a non-cloneable one — scan them too (lockstep
126
+ // with stripNonCloneable's array branch; see isArrayIndexKey).
127
+ for (const key of Object.keys(obj)) {
128
+ if (isArrayIndexKey(key))
129
+ continue;
130
+ let child;
131
+ try {
132
+ child = obj[key];
133
+ }
134
+ catch {
135
+ return true; // a throwing getter can't be serialized either
136
+ }
137
+ if (containsNonCloneable(child, seen, depth + 1))
138
+ return true;
139
+ }
140
+ return false;
141
+ }
142
+ if (obj instanceof Map) {
143
+ for (const [k, v] of obj) {
144
+ if (containsNonCloneable(k, seen, depth + 1) || containsNonCloneable(v, seen, depth + 1))
145
+ return true;
146
+ }
147
+ return false;
148
+ }
149
+ if (obj instanceof Set) {
150
+ for (const v of obj) {
151
+ if (containsNonCloneable(v, seen, depth + 1))
152
+ return true;
153
+ }
154
+ return false;
155
+ }
156
+ // A non-plain object (Promise, WeakMap, class instance with internal slots)
157
+ // that structured clone can't handle: detect via the authoritative probe.
158
+ // Plain objects fall through to a property scan (cheap, no allocation).
159
+ const proto = Object.getPrototypeOf(obj);
160
+ if (proto !== Object.prototype && proto !== null) {
161
+ if (!isStructuredCloneable(obj))
162
+ return true;
163
+ return false;
164
+ }
165
+ for (const key of Object.keys(obj)) {
166
+ let child;
167
+ try {
168
+ child = obj[key];
169
+ }
170
+ catch {
171
+ // A getter that throws can't be serialized either — treat as non-cloneable.
172
+ return true;
173
+ }
174
+ if (containsNonCloneable(child, seen, depth + 1))
175
+ return true;
176
+ }
177
+ return false;
178
+ }
179
+ /** Record a strip at `path` (root → `(root)`); keeps the count + key path in sync. */
180
+ function recordStrip(ctx, path) {
181
+ ctx.stripped++;
182
+ ctx.keys.push(path === '' ? '(root)' : path);
183
+ }
184
+ /**
185
+ * Deep-copy `value`, replacing any value structured-clone would reject with
186
+ * `undefined` (which clones fine). Preserves primitives, arrays, plain
187
+ * objects, and the structured-clone-native containers (Date, RegExp, Map,
188
+ * Set, ArrayBuffer, TypedArray). Rebuilds only what it must — clean leaves are
189
+ * returned by reference. `path` is the dotted key path of `value` (for the
190
+ * diagnostic record).
191
+ */
192
+ function stripNonCloneable(value, ctx, depth = 0, path = '') {
193
+ const t = typeof value;
194
+ if (t === 'function' || t === 'symbol') {
195
+ recordStrip(ctx, path);
196
+ return undefined;
197
+ }
198
+ if (value === null || t !== 'object')
199
+ return value;
200
+ // Depth bound (mirrors containsNonCloneable): drop an over-deep subtree to
201
+ // `undefined` (itself cloneable, and a legal property value / array element)
202
+ // rather than overflowing the stack.
203
+ if (depth >= MAX_CLONE_DEPTH) {
204
+ recordStrip(ctx, path);
205
+ return undefined;
206
+ }
207
+ const obj = value;
208
+ // Memoized? Return the SAME stripped copy (preserves DAG shape; terminates
209
+ // cycles by returning the in-progress copy inserted before recursing below).
210
+ if (ctx.seen.has(obj))
211
+ return ctx.seen.get(obj);
212
+ // Leaf-like values: returned by reference, but still memoize the decision so
213
+ // a second alias resolves identically.
214
+ if (obj instanceof Date || obj instanceof RegExp) {
215
+ ctx.seen.set(obj, value);
216
+ return value;
217
+ }
218
+ if (obj instanceof ArrayBuffer || ArrayBuffer.isView(obj)) {
219
+ // Keep a live buffer/view (even an empty one); drop a detached one, which
220
+ // structuredClone rejects. The probe is exact — no byteLength heuristic.
221
+ if (!isStructuredCloneable(obj)) {
222
+ recordStrip(ctx, path);
223
+ ctx.seen.set(obj, undefined);
224
+ return undefined;
225
+ }
226
+ ctx.seen.set(obj, value);
227
+ return value;
228
+ }
229
+ // Containers: allocate the empty copy, memoize it BEFORE recursing, then fill
230
+ // — so a cycle/alias that re-enters gets this in-progress copy.
231
+ if (Array.isArray(obj)) {
232
+ const out = [];
233
+ ctx.seen.set(obj, out);
234
+ for (let i = 0; i < obj.length; i++)
235
+ out.push(stripNonCloneable(obj[i], ctx, depth + 1, `${path}[${i}]`));
236
+ // Carry NON-index own-enumerable props through the same strip (lockstep
237
+ // with containsNonCloneable): structuredClone serializes them, so a
238
+ // non-cloneable one must be stripped rather than left to throw on re-post.
239
+ for (const key of Object.keys(obj)) {
240
+ if (isArrayIndexKey(key))
241
+ continue;
242
+ const childPath = `${path}.${key}`;
243
+ let child;
244
+ try {
245
+ child = obj[key];
246
+ }
247
+ catch {
248
+ recordStrip(ctx, childPath);
249
+ continue;
250
+ }
251
+ out[key] = stripNonCloneable(child, ctx, depth + 1, childPath);
252
+ }
253
+ return out;
254
+ }
255
+ if (obj instanceof Map) {
256
+ // Scope limit (acceptable): object keys aren't identity-preserved across
257
+ // stripping. Parse-result Maps are primitive-keyed, so this never bites.
258
+ const out = new Map();
259
+ ctx.seen.set(obj, out);
260
+ for (const [k, v] of obj)
261
+ out.set(stripNonCloneable(k, ctx, depth + 1, `${path}<key>`), stripNonCloneable(v, ctx, depth + 1, `${path}<map>`));
262
+ return out;
263
+ }
264
+ if (obj instanceof Set) {
265
+ const out = new Set();
266
+ ctx.seen.set(obj, out);
267
+ for (const v of obj)
268
+ out.add(stripNonCloneable(v, ctx, depth + 1, `${path}<set>`));
269
+ return out;
270
+ }
271
+ const proto = Object.getPrototypeOf(obj);
272
+ if (proto !== Object.prototype && proto !== null) {
273
+ // Non-plain object that the probe already flagged as non-cloneable and
274
+ // that we can't safely reconstruct (Promise, WeakMap, class instance with
275
+ // internal slots). Drop it whole — memoize the decision so aliases agree.
276
+ if (!isStructuredCloneable(obj)) {
277
+ recordStrip(ctx, path);
278
+ ctx.seen.set(obj, undefined);
279
+ return undefined;
280
+ }
281
+ ctx.seen.set(obj, value);
282
+ return value;
283
+ }
284
+ const out = {};
285
+ ctx.seen.set(obj, out);
286
+ for (const key of Object.keys(obj)) {
287
+ const childPath = path === '' ? key : `${path}.${key}`;
288
+ let child;
289
+ try {
290
+ child = obj[key];
291
+ }
292
+ catch {
293
+ // A getter that throws is non-serializable — drop the property.
294
+ recordStrip(ctx, childPath);
295
+ continue;
296
+ }
297
+ out[key] = stripNonCloneable(child, ctx, depth + 1, childPath);
298
+ }
299
+ return out;
300
+ }
301
+ /** Keys checked (top-level and one level deep) to attribute a record to a file. */
302
+ const DEFAULT_PATH_KEYS = ['filePath', 'path', 'file'];
303
+ /** Read `obj[key]`, returning undefined if the access throws (throwing getter / Proxy trap). */
304
+ function safeGet(obj, key) {
305
+ try {
306
+ return obj[key];
307
+ }
308
+ catch {
309
+ return undefined;
310
+ }
311
+ }
312
+ /** Read a path key off a child object (one level deep); never throws. */
313
+ function pathFromChild(child, pathKeys) {
314
+ if (child === null || typeof child !== 'object')
315
+ return undefined;
316
+ const crec = child;
317
+ for (const pk of pathKeys) {
318
+ const v = safeGet(crec, pk);
319
+ if (typeof v === 'string')
320
+ return v;
321
+ }
322
+ return undefined;
323
+ }
324
+ /**
325
+ * Best-effort source-path extraction for reporting; never throws. Reads are
326
+ * defensive (a throwing getter / Proxy trap on a path-attribution key must not
327
+ * escape and abandon the sanitize — it would re-arm the fail-closed cascade).
328
+ */
329
+ function findFilePath(element, pathKeys) {
330
+ if (element === null || typeof element !== 'object')
331
+ return undefined;
332
+ const rec = element;
333
+ // Top level first — a ParsedFile carries `filePath` here.
334
+ for (const key of pathKeys) {
335
+ const v = safeGet(rec, key);
336
+ if (typeof v === 'string')
337
+ return v;
338
+ }
339
+ // Known child next — a ParsedNode carries its path at `properties.filePath`.
340
+ // Prefer it over the generic sweep so attribution is deterministic when a
341
+ // sibling child also happens to carry a path-like key.
342
+ const fromProps = pathFromChild(safeGet(rec, 'properties'), pathKeys);
343
+ if (fromProps !== undefined)
344
+ return fromProps;
345
+ // Generic one-level sweep as the fallback for other shapes.
346
+ let keys;
347
+ try {
348
+ keys = Object.keys(rec);
349
+ }
350
+ catch {
351
+ return undefined; // a Proxy ownKeys trap that throws — give up on attribution
352
+ }
353
+ for (const key of keys) {
354
+ if (key === 'properties')
355
+ continue; // already checked above
356
+ const fromChild = pathFromChild(safeGet(rec, key), pathKeys);
357
+ if (fromChild !== undefined)
358
+ return fromChild;
359
+ }
360
+ return undefined;
361
+ }
362
+ /**
363
+ * Make a worker result's boundary-crossing array fields structured-cloneable,
364
+ * mutating `result` in place. Only arrays that actually contain a
365
+ * non-cloneable value are rewritten; everything else keeps referential
366
+ * identity. Returns the list of affected file paths for reporting.
367
+ *
368
+ * Call this after ANY failure of the fast-path post — a `DataCloneError`, OR a
369
+ * throwing getter's own error surfaced by structuredClone (the caller in
370
+ * `post-result.ts` recovers on any throw, not only `DataCloneError`).
371
+ */
372
+ export function makeWorkerResultCloneSafe(result, options) {
373
+ const pathKeys = options.pathKeys ?? DEFAULT_PATH_KEYS;
374
+ const skipped = [];
375
+ for (const field of Object.keys(result)) {
376
+ if (options.skipFields?.has(field))
377
+ continue;
378
+ const value = result[field];
379
+ if (!Array.isArray(value))
380
+ continue;
381
+ const dropWhole = options.dropWholeElement.has(field);
382
+ // `out` is built lazily — only once a dirty element appears — by copying the
383
+ // clean prefix, so a fully-clean array is never rebuilt and keeps its
384
+ // referential identity (no field reassignment). A dirty element is scanned
385
+ // (containsNonCloneable) and then stripped (stripNonCloneable): two passes,
386
+ // deliberately. The non-allocating pre-scan is exactly what lets CLEAN
387
+ // elements stay by reference (zero-copy) — replacing it with an
388
+ // always-allocating strip would regress that. This whole path is
389
+ // failure-path-only (the fast post already threw), so the second pass over
390
+ // the rare dirty element is acceptable.
391
+ let out = null;
392
+ for (let i = 0; i < value.length; i++) {
393
+ const element = value[i];
394
+ try {
395
+ if (!containsNonCloneable(element, new WeakSet())) {
396
+ if (out)
397
+ out.push(element);
398
+ continue;
399
+ }
400
+ if (!out)
401
+ out = value.slice(0, i); // first dirty element: copy clean prefix
402
+ const path = findFilePath(element, pathKeys) ?? '(unknown)';
403
+ if (dropWhole) {
404
+ skipped.push({ path, reason: `dropped non-serializable ${field} entry` });
405
+ continue;
406
+ }
407
+ const ctx = { stripped: 0, seen: new Map(), keys: [] };
408
+ const cleaned = stripNonCloneable(element, ctx);
409
+ // Last-resort guard: if stripping functions/symbols still left something
410
+ // structured-clone rejects, drop the element rather than re-throw.
411
+ if (isStructuredCloneable(cleaned)) {
412
+ out.push(cleaned);
413
+ // Name the offending key path(s) so the leak is locatable from the log
414
+ // (e.g. "from nodes: properties.toString") — not just the array field.
415
+ const at = ctx.keys.slice(0, 3).join(', ');
416
+ const more = ctx.keys.length > 3 ? `, …+${ctx.keys.length - 3}` : '';
417
+ skipped.push({
418
+ path,
419
+ reason: `stripped ${ctx.stripped} non-serializable value(s) from ${field}: ${at}${more}`,
420
+ });
421
+ }
422
+ else {
423
+ skipped.push({ path, reason: `dropped unsalvageable ${field} entry` });
424
+ }
425
+ }
426
+ catch {
427
+ // A throw DURING this element's scan/strip — a Proxy with a throwing
428
+ // `getPrototypeOf`/`ownKeys` trap reached by Object.getPrototypeOf /
429
+ // Object.keys, or any other structural-enumeration throw. Drop the
430
+ // element rather than let the throw escape to postResultCloneSafe's
431
+ // fail-closed {type:'error'} (which under POOL_SIZE=1 re-arms the
432
+ // cascade this net prevents). One pathological element can't sink the
433
+ // whole result.
434
+ if (!out)
435
+ out = value.slice(0, i);
436
+ skipped.push({ path: '(unknown)', reason: `dropped ${field} entry (sanitizer error)` });
437
+ }
438
+ }
439
+ if (out)
440
+ result[field] = out;
441
+ }
442
+ // Final safety gate. The loop above only rewrites ARRAY fields, so a future
443
+ // non-array result sink (a nested object / Map) — or an array field whose own
444
+ // non-index property the element loop didn't reach — could still hold a
445
+ // non-cloneable value and throw on the re-post. Make "the returned result is
446
+ // structured-cloneable" a hard postcondition: strip any remaining offending
447
+ // field in place. Failure-path-only and a no-op once the result is already
448
+ // clean (the per-field probe short-circuits every clean field).
449
+ if (!isStructuredCloneable(result)) {
450
+ for (const field of Object.keys(result)) {
451
+ if (options.skipFields?.has(field))
452
+ continue;
453
+ if (isStructuredCloneable(result[field]))
454
+ continue;
455
+ const ctx = { stripped: 0, seen: new Map(), keys: [] };
456
+ result[field] = stripNonCloneable(result[field], ctx);
457
+ const at = ctx.keys.slice(0, 3).join(', ');
458
+ skipped.push({
459
+ path: '(result)',
460
+ reason: `stripped ${ctx.stripped} non-serializable value(s) from ${field}${at ? `: ${at}` : ''}`,
461
+ });
462
+ }
463
+ }
464
+ return { skipped };
465
+ }
@@ -1,4 +1,5 @@
1
1
  import { SupportedLanguages } from '../../../_shared/index.js';
2
+ import type { SkippedPath } from './clone-safety.js';
2
3
  import type { ExtractedRouterInclude, ExtractedRouterImport, ExtractedRouterModuleAlias } from '../route-extractors/fastapi-router-bindings.js';
3
4
  import { type MixedChainStep } from '../utils/call-analysis.js';
4
5
  import type { ConstructorBinding } from '../type-env.js';
@@ -201,6 +202,15 @@ export interface ParseWorkerResult {
201
202
  */
202
203
  parsedFiles: ParsedFile[];
203
204
  skippedLanguages: Record<string, number>;
205
+ /**
206
+ * Files whose parse output carried a value the structured-clone algorithm
207
+ * couldn't serialize across the worker boundary (#2112). The clone-safety
208
+ * net stripped or dropped the offending value so the result could be
209
+ * delivered; these paths are surfaced to the operator so the (rare) data
210
+ * loss is visible. Optional for cache backward compatibility — older cache
211
+ * entries predate the field; consumers must guard with `?? []`.
212
+ */
213
+ skippedPaths?: SkippedPath[];
204
214
  fileCount: number;
205
215
  }
206
216
  export interface ParseWorkerInput {
@@ -17,6 +17,8 @@ import { getProvider } from '../languages/index.js';
17
17
  import { getTreeSitterBufferSize, getTreeSitterContentByteLength, TREE_SITTER_MAX_BUFFER, } from '../constants.js';
18
18
  import { ARRAY_METHOD_HOC_BLOCKLIST_SET, DEFAULT_EXPORT_IDENTIFIER_BLOCKLIST_SET, deriveDefaultExportHocName, } from '../ts-js-hoc-utils.js';
19
19
  import { parseSourceSafe } from '../../tree-sitter/safe-parse.js';
20
+ import { postResultCloneSafe } from './post-result.js';
21
+ import { mergeResult } from './result-merge.js';
20
22
  // ── Worker grammar loading — enforcement boundary (#2091/#2093, #2101) ───────
21
23
  // The worker maintains its own grammar table (the guarded `_require`s below +
22
24
  // `languageMap`) and intentionally does NOT consult the runtime
@@ -1793,41 +1795,8 @@ let accumulated = {
1793
1795
  fileCount: 0,
1794
1796
  };
1795
1797
  let cumulativeProcessed = 0;
1796
- // Use a loop instead of push(...spread) to avoid hitting V8's argument limit
1797
- // when merging large result sets (push(...arr) calls apply() under the hood
1798
- // and blows the stack when arr has >~65k elements).
1799
- const appendAll = (target, src) => {
1800
- for (let i = 0; i < src.length; i++)
1801
- target.push(src[i]);
1802
- };
1803
- const mergeResult = (target, src) => {
1804
- appendAll(target.nodes, src.nodes);
1805
- appendAll(target.relationships, src.relationships);
1806
- appendAll(target.symbols, src.symbols);
1807
- appendAll(target.calls, src.calls);
1808
- appendAll(target.assignments, src.assignments);
1809
- appendAll(target.routes, src.routes);
1810
- appendAll(target.fetchCalls, src.fetchCalls);
1811
- appendAll(target.fetchWrapperDefs, src.fetchWrapperDefs);
1812
- appendAll(target.decoratorRoutes, src.decoratorRoutes);
1813
- if (src.routerIncludes)
1814
- appendAll(target.routerIncludes, src.routerIncludes);
1815
- if (src.routerImports)
1816
- appendAll(target.routerImports, src.routerImports);
1817
- if (src.routerModuleAliases) {
1818
- target.routerModuleAliases ??= [];
1819
- appendAll(target.routerModuleAliases, src.routerModuleAliases);
1820
- }
1821
- appendAll(target.toolDefs, src.toolDefs);
1822
- appendAll(target.ormQueries, src.ormQueries);
1823
- appendAll(target.constructorBindings, src.constructorBindings);
1824
- appendAll(target.fileScopeBindings, src.fileScopeBindings);
1825
- appendAll(target.parsedFiles, src.parsedFiles);
1826
- for (const [lang, count] of Object.entries(src.skippedLanguages)) {
1827
- target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count;
1828
- }
1829
- target.fileCount += src.fileCount;
1830
- };
1798
+ // `mergeResult` (+ its `appendAll`) lives in ./result-merge.ts (extracted so it
1799
+ // can be unit-tested without importing this entry module).
1831
1800
  // Signal the pool that worker-side initialization (parser imports, language
1832
1801
  // grammars, type-env setup, all helper modules) is complete and the message
1833
1802
  // handler below is about to be attached. The pool's `waitForWorkerReady`
@@ -1920,7 +1889,7 @@ parentPort.on('message', (msg) => {
1920
1889
  accumulated.parsedFiles = [];
1921
1890
  }
1922
1891
  }
1923
- parentPort.postMessage({ type: 'result', data: accumulated });
1892
+ postResultCloneSafe(accumulated);
1924
1893
  // Reset for potential reuse
1925
1894
  accumulated = {
1926
1895
  nodes: [],
@@ -0,0 +1,22 @@
1
+ import type { ParseWorkerResult } from './parse-worker.js';
2
+ /**
3
+ * Deliver the accumulated result to the pool, surviving a non-cloneable value
4
+ * (#2112). Fast path: post as-is — on a healthy result this is the only thing
5
+ * that runs, so clone-safety adds zero overhead to normal runs. If structured
6
+ * clone rejects the payload (a function/symbol leaked into an extraction
7
+ * record — the reporter's case was a node `properties` value pointing at a
8
+ * native `toString`), rewrite the boundary-crossing arrays so the result is
9
+ * cloneable, record the affected paths on `result.skippedPaths`, warn the
10
+ * operator naming the offending field + file (so the still-unpinned leak is
11
+ * diagnosable from logs and fixable at source), and re-post.
12
+ *
13
+ * Recovery is attempted for ANY first-post failure, not only a `DataCloneError`.
14
+ * structuredClone invokes getters, and a getter that THROWS surfaces its own
15
+ * error (a `RangeError`, etc.) — NOT a `DataCloneError` (confirmed against a
16
+ * real MessageChannel). Gating recovery on `DataCloneError` let such a throw
17
+ * re-throw past the sanitizer and re-arm, under `POOL_SIZE=1`, the worker-death
18
+ * cascade this net prevents. The recovery path is wrapped in its own try/catch
19
+ * so a still-uncloneable re-post fails closed to a primitive-only
20
+ * `{type:'error'}` DELIBERATELY rather than escaping the worker.
21
+ */
22
+ export declare function postResultCloneSafe(result: ParseWorkerResult): void;
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Worker → main result delivery with clone-safety (#2112).
3
+ *
4
+ * Extracted from `parse-worker.ts` into its own side-effect-free module so it
5
+ * can be imported and exercised directly (the parse worker is an entry module:
6
+ * importing it would construct the parser, post `ready`, and attach the real
7
+ * message handler). The integration test imports `postResultCloneSafe` from
8
+ * here to cover the production wiring end to end rather than re-implementing it.
9
+ */
10
+ import { parentPort } from 'node:worker_threads';
11
+ import { makeWorkerResultCloneSafe } from './clone-safety.js';
12
+ /**
13
+ * Strict mode (opt-in via `GITNEXUS_STRICT_CLONE=1`, inherited by workers). When
14
+ * on, a clone failure THROWS with the offending key path instead of silently
15
+ * sanitizing + delivering — so a leak introduced by a future provider/extractor
16
+ * change fails LOUDLY (in CI / dev) at its origin rather than being quietly
17
+ * stripped in production. The silent-recovery behavior is exactly what hid the
18
+ * original #2112 leak; strict mode removes the silence where we want loudness.
19
+ * Off in production, where the net's job is to keep the run alive.
20
+ */
21
+ const STRICT_CLONE = process.env.GITNEXUS_STRICT_CLONE === '1';
22
+ /**
23
+ * Deliver the accumulated result to the pool, surviving a non-cloneable value
24
+ * (#2112). Fast path: post as-is — on a healthy result this is the only thing
25
+ * that runs, so clone-safety adds zero overhead to normal runs. If structured
26
+ * clone rejects the payload (a function/symbol leaked into an extraction
27
+ * record — the reporter's case was a node `properties` value pointing at a
28
+ * native `toString`), rewrite the boundary-crossing arrays so the result is
29
+ * cloneable, record the affected paths on `result.skippedPaths`, warn the
30
+ * operator naming the offending field + file (so the still-unpinned leak is
31
+ * diagnosable from logs and fixable at source), and re-post.
32
+ *
33
+ * Recovery is attempted for ANY first-post failure, not only a `DataCloneError`.
34
+ * structuredClone invokes getters, and a getter that THROWS surfaces its own
35
+ * error (a `RangeError`, etc.) — NOT a `DataCloneError` (confirmed against a
36
+ * real MessageChannel). Gating recovery on `DataCloneError` let such a throw
37
+ * re-throw past the sanitizer and re-arm, under `POOL_SIZE=1`, the worker-death
38
+ * cascade this net prevents. The recovery path is wrapped in its own try/catch
39
+ * so a still-uncloneable re-post fails closed to a primitive-only
40
+ * `{type:'error'}` DELIBERATELY rather than escaping the worker.
41
+ */
42
+ export function postResultCloneSafe(result) {
43
+ try {
44
+ parentPort.postMessage({ type: 'result', data: result });
45
+ return;
46
+ }
47
+ catch {
48
+ // Fall through to recovery on ANY failure (DataCloneError OR a throwing
49
+ // getter's own error). A healthy post returned above and never reaches here.
50
+ }
51
+ try {
52
+ // `as unknown as Record<string, unknown>` is the standard widening for a
53
+ // no-index-signature interface (TS rejects a single-step `as`). The field
54
+ // sets are typed to `keyof ParseWorkerResult` so renaming a field is a
55
+ // compile error here, not a silent loss of the drop-whole / skip protection.
56
+ const { skipped } = makeWorkerResultCloneSafe(result, {
57
+ dropWholeElement: new Set(['parsedFiles']),
58
+ skipFields: new Set(['skippedPaths']),
59
+ });
60
+ if (skipped.length > 0) {
61
+ if (STRICT_CLONE) {
62
+ // Surface the leak loudly with its exact key path(s) instead of
63
+ // delivering a sanitized result. Routes to the catch below → a
64
+ // primitive-only {type:'error'} the pool reports, failing CI.
65
+ const detail = skipped.map((s) => `${s.path}: ${s.reason}`).join('; ');
66
+ throw new Error(`GITNEXUS_STRICT_CLONE: worker result was not structured-cloneable — ${detail}`);
67
+ }
68
+ result.skippedPaths = [...(result.skippedPaths ?? []), ...skipped];
69
+ const sample = skipped
70
+ .slice(0, 5)
71
+ .map((s) => `${s.path} (${s.reason})`)
72
+ .join('; ');
73
+ const more = skipped.length > 5 ? ` …and ${skipped.length - 5} more` : '';
74
+ if (parentPort) {
75
+ parentPort.postMessage({
76
+ type: 'warning',
77
+ message: `Sanitized ${skipped.length} file(s) with non-serializable parse output before delivery: ${sample}${more}`,
78
+ });
79
+ }
80
+ }
81
+ parentPort.postMessage({ type: 'result', data: result });
82
+ }
83
+ catch (err) {
84
+ const e = err instanceof Error ? err : new Error(String(err));
85
+ parentPort.postMessage({ type: 'error', error: e.message, errorStack: e.stack });
86
+ }
87
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Merge of accumulated parse-worker results (sub-batch result → the conceptual
3
+ * job's running accumulator).
4
+ *
5
+ * Extracted from `parse-worker.ts` into this side-effect-free module so the
6
+ * merge can be imported and unit-tested directly — the parse worker is an entry
7
+ * module (importing it constructs the parser, posts `ready`, and attaches the
8
+ * real MessagePort handler), so a main-thread test cannot import a helper out of
9
+ * it. Mirrors the `post-result.ts` extraction.
10
+ *
11
+ * `import type` of `ParseWorkerResult` is erased at runtime, so there is no
12
+ * import cycle with `parse-worker.ts` (which imports this module's runtime).
13
+ */
14
+ import type { ParseWorkerResult } from './parse-worker.js';
15
+ /**
16
+ * Merge `src` into `target` in place: append every boundary-crossing array,
17
+ * sum the per-language skip counts, union the clone-safety `skippedPaths`, and
18
+ * add the file count.
19
+ */
20
+ export declare const mergeResult: (target: ParseWorkerResult, src: ParseWorkerResult) => void;
@@ -0,0 +1,43 @@
1
+ // Use a loop instead of push(...spread) to avoid hitting V8's argument limit
2
+ // when merging large result sets (push(...arr) calls apply() under the hood
3
+ // and blows the stack when arr has >~65k elements).
4
+ const appendAll = (target, src) => {
5
+ for (let i = 0; i < src.length; i++)
6
+ target.push(src[i]);
7
+ };
8
+ /**
9
+ * Merge `src` into `target` in place: append every boundary-crossing array,
10
+ * sum the per-language skip counts, union the clone-safety `skippedPaths`, and
11
+ * add the file count.
12
+ */
13
+ export const mergeResult = (target, src) => {
14
+ appendAll(target.nodes, src.nodes);
15
+ appendAll(target.relationships, src.relationships);
16
+ appendAll(target.symbols, src.symbols);
17
+ appendAll(target.calls, src.calls);
18
+ appendAll(target.assignments, src.assignments);
19
+ appendAll(target.routes, src.routes);
20
+ appendAll(target.fetchCalls, src.fetchCalls);
21
+ appendAll(target.fetchWrapperDefs, src.fetchWrapperDefs);
22
+ appendAll(target.decoratorRoutes, src.decoratorRoutes);
23
+ if (src.routerIncludes)
24
+ appendAll(target.routerIncludes, src.routerIncludes);
25
+ if (src.routerImports)
26
+ appendAll(target.routerImports, src.routerImports);
27
+ if (src.routerModuleAliases) {
28
+ target.routerModuleAliases ??= [];
29
+ appendAll(target.routerModuleAliases, src.routerModuleAliases);
30
+ }
31
+ appendAll(target.toolDefs, src.toolDefs);
32
+ appendAll(target.ormQueries, src.ormQueries);
33
+ appendAll(target.constructorBindings, src.constructorBindings);
34
+ appendAll(target.fileScopeBindings, src.fileScopeBindings);
35
+ appendAll(target.parsedFiles, src.parsedFiles);
36
+ for (const [lang, count] of Object.entries(src.skippedLanguages)) {
37
+ target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count;
38
+ }
39
+ if (src.skippedPaths && src.skippedPaths.length > 0) {
40
+ (target.skippedPaths ??= []).push(...src.skippedPaths);
41
+ }
42
+ target.fileCount += src.fileCount;
43
+ };
@@ -273,6 +273,12 @@ export declare function resolveAutoPoolSize(): number;
273
273
  * time spent across all attempts/splits/retries. When the budget is
274
274
  * exhausted, the pool surfaces the in-flight path via `WorkerPoolDispatchError`
275
275
  * instead of letting timeouts compound indefinitely.
276
+ *
277
+ * Upstream of these layers, the parse worker self-sanitizes a result that the
278
+ * structured-clone algorithm can't serialize (#2112) — stripping or dropping
279
+ * the offending value and reporting the affected paths on the result — so a
280
+ * single non-cloneable value can't masquerade as a worker death and exhaust a
281
+ * slot's respawn budget here.
276
282
  */
277
283
  export declare const createWorkerPool: (workerUrl: URL, poolSize?: number, options?: WorkerPoolOptions) => WorkerPool;
278
284
  export {};
@@ -523,6 +523,12 @@ function createJobs(items, maxItems, maxBytes, timeoutMs, chunkHash) {
523
523
  * time spent across all attempts/splits/retries. When the budget is
524
524
  * exhausted, the pool surfaces the in-flight path via `WorkerPoolDispatchError`
525
525
  * instead of letting timeouts compound indefinitely.
526
+ *
527
+ * Upstream of these layers, the parse worker self-sanitizes a result that the
528
+ * structured-clone algorithm can't serialize (#2112) — stripping or dropping
529
+ * the offending value and reporting the affected paths on the result — so a
530
+ * single non-cloneable value can't masquerade as a worker death and exhaust a
531
+ * slot's respawn budget here.
526
532
  */
527
533
  export const createWorkerPool = (workerUrl, poolSize, options) => {
528
534
  // Validate worker script exists before spawning to prevent uncaught
@@ -1313,14 +1319,19 @@ export const createWorkerPool = (workerUrl, poolSize, options) => {
1313
1319
  if (settled || stopped)
1314
1320
  return;
1315
1321
  // Native postMessage delivers POJO directly via Node's
1316
- // structured clone. V8 deserialization failures (malformed
1317
- // frame, non-cloneable value) surface as a `messageerror`
1318
- // event handled below they never reach this handler. The
1319
- // only thing we need to guard for here is a worker that
1320
- // sends a message without a `type` discriminant (a bug in
1321
- // the worker, not a wire-format issue): without the guard
1322
- // `null.type` would throw a TypeError out of the
1323
- // EventEmitter listener uncaughtException on the main
1322
+ // structured clone. Two distinct clone failure modes exist,
1323
+ // and NEITHER reaches this handler: (1) a SENDER-side
1324
+ // non-cloneable value (a function/symbol that leaked into the
1325
+ // result) throws a synchronous `DataCloneError` on the
1326
+ // worker's own postMessage the parse worker self-sanitizes
1327
+ // such results before delivery (#2112) and falls back to a
1328
+ // primitive-only `{type:'error'}` if it still can't serialize;
1329
+ // (2) a RECEIVER-side deserialization failure surfaces as a
1330
+ // `messageerror` event handled below. The only thing THIS
1331
+ // handler guards is a worker that sends a message without a
1332
+ // `type` discriminant (a worker bug, not a wire-format issue):
1333
+ // without the guard `null.type` would throw a TypeError out of
1334
+ // the EventEmitter listener → uncaughtException on the main
1324
1335
  // thread.
1325
1336
  const msg = raw;
1326
1337
  if (msg === null || typeof msg !== 'object' || typeof msg.type !== 'string') {
@@ -1417,12 +1428,15 @@ export const createWorkerPool = (workerUrl, poolSize, options) => {
1417
1428
  `Likely OOM or native addon failure${inFlightSuffix}.`, excludes);
1418
1429
  }
1419
1430
  };
1420
- // `messageerror` fires when V8 fails to deserialize a postMessage
1421
- // payload (e.g., the worker tries to send a non-cloneable value
1422
- // back, or structured-clone hits an unsupported shape). The worker
1423
- // stays ALIVE but the message is lost without this handler the
1424
- // pool would sit on the dropped message until the idle timeout
1425
- // expires. Treat it as worker death so the resilience layers fire:
1431
+ // `messageerror` fires when V8 fails to DESERIALIZE a postMessage
1432
+ // payload on THIS (receiver) side a value that serialized on the
1433
+ // worker but can't be reconstructed here. (A non-cloneable value on
1434
+ // the SENDER side instead throws a synchronous DataCloneError on the
1435
+ // worker's own postMessage; that path is caught and sanitized
1436
+ // worker-side (#2112) and never arrives here.) The worker stays ALIVE
1437
+ // but the message is lost — without this handler the pool would sit on
1438
+ // the dropped message until the idle timeout expires. Treat it as
1439
+ // worker death so the resilience layers fire:
1426
1440
  // requeue the remainder via `recoverAndResume`, attribute the
1427
1441
  // in-flight file from the `starting-file` signal (if observed),
1428
1442
  // and let the per-slot respawn budget and circuit breaker decide
@@ -0,0 +1,58 @@
1
+ /**
2
+ * JSON-safe projection of `AnalyzeResult` for the analyze-worker → parent IPC
3
+ * boundary (#2112 boundary audit; #2135).
4
+ *
5
+ * The forked analyze worker (`analyze-worker.ts`) reports completion to the
6
+ * parent over `child_process` IPC, which uses Node's DEFAULT `'json'`
7
+ * serialization — `api.ts` forks the worker with no `serialization:` option, so
8
+ * the channel runs `JSON.stringify`/`JSON.parse`, NOT V8 structured clone.
9
+ *
10
+ * `AnalyzeResult.pipelineResult` is populated on every successful analysis
11
+ * (`run-analyze.ts`) and carries `pipelineResult.graph` — the live
12
+ * `KnowledgeGraph` closure object. Sending the raw result across this channel is
13
+ * wrong three ways:
14
+ * 1. Waste — the graph's `nodes`/`relationships` getters force-materialize the
15
+ * ENTIRE graph into two arrays, then JSON-stringify them, on every analyze.
16
+ * On a large repo (the #2112 scenario) that is a multi-hundred-MB
17
+ * stringify+parse whose result is immediately discarded.
18
+ * 2. Silent corruption — the graph's methods are own function properties;
19
+ * `JSON.stringify` drops them with no error, so a `pipelineResult.graph`
20
+ * that survived the wire is a data-only husk whose `forEachNode(...)` throws
21
+ * "is not a function" far from the cause.
22
+ * 3. Conditional crash — a BigInt or circular reference anywhere in the
23
+ * payload makes `process.send` throw `TypeError` synchronously; the throw is
24
+ * caught in the worker and re-sent as `{type:'error'}`, turning a
25
+ * SUCCESSFUL analysis (DB already written) into a reported FAILURE. This is
26
+ * the #2112 failure family on the server path, and — unlike the parse-worker
27
+ * result boundary — it has no clone-safety net.
28
+ *
29
+ * The parent (`api.ts`) reads only `result.repoName`; `pipelineResult`'s real
30
+ * consumers (CLI skill generation) call `runFullAnalysis` in-process and never
31
+ * cross this fork. So the worker sends an explicit allowlist of the scalar
32
+ * fields, JSON-safe by construction.
33
+ */
34
+ import type { AnalyzeResult } from '../core/run-analyze.js';
35
+ /**
36
+ * The JSON-safe subset of `AnalyzeResult` that crosses the analyze-worker IPC
37
+ * boundary. A `Pick` allowlist — NOT `Omit<…, 'pipelineResult'>`. With `Pick`
38
+ * the allowlist IS the type, so the projection is exhaustive by construction:
39
+ * `projectAnalyzeResultForIpc`'s return literal must name exactly these keys
40
+ * (omitting one is a compile error), and a new field added to `AnalyzeResult`
41
+ * is simply absent from the wire until it is *deliberately* added here. `Omit`
42
+ * couldn't give that guarantee — it kept every other field, including OPTIONAL
43
+ * ones (e.g. `isPrimaryBranch?`), so an optional non-serializable field could be
44
+ * advertised by the type yet silently dropped by the runtime allowlist.
45
+ *
46
+ * `isPrimaryBranch` is intentionally excluded: the parent (`api.ts`) reads only
47
+ * `repoName`, and nothing consumes `isPrimaryBranch` across this fork (its CLI
48
+ * consumer calls `runFullAnalysis` in-process). Add a field here only when a
49
+ * server-side IPC consumer actually needs it — and only if it is JSON-safe.
50
+ */
51
+ export type AnalyzeResultIpc = Pick<AnalyzeResult, 'repoName' | 'repoPath' | 'stats' | 'alreadyUpToDate' | 'ftsRepairedOnly' | 'ftsSkipped'>;
52
+ /**
53
+ * Project an `AnalyzeResult` down to the JSON-safe fields the parent consumes,
54
+ * dropping `pipelineResult` (the live `KnowledgeGraph`) and any other field not
55
+ * in the `AnalyzeResultIpc` allowlist. The return literal is exhaustive over
56
+ * `AnalyzeResultIpc` (a missing key is a compile error).
57
+ */
58
+ export declare function projectAnalyzeResultForIpc(result: AnalyzeResult): AnalyzeResultIpc;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Project an `AnalyzeResult` down to the JSON-safe fields the parent consumes,
3
+ * dropping `pipelineResult` (the live `KnowledgeGraph`) and any other field not
4
+ * in the `AnalyzeResultIpc` allowlist. The return literal is exhaustive over
5
+ * `AnalyzeResultIpc` (a missing key is a compile error).
6
+ */
7
+ export function projectAnalyzeResultForIpc(result) {
8
+ return {
9
+ repoName: result.repoName,
10
+ repoPath: result.repoPath,
11
+ stats: result.stats,
12
+ alreadyUpToDate: result.alreadyUpToDate,
13
+ ftsRepairedOnly: result.ftsRepairedOnly,
14
+ ftsSkipped: result.ftsSkipped,
15
+ };
16
+ }
@@ -11,6 +11,7 @@
11
11
  * Child -> Parent: { type: 'error', message: string }
12
12
  */
13
13
  import { runFullAnalysis } from '../core/run-analyze.js';
14
+ import { projectAnalyzeResultForIpc } from './analyze-worker-ipc.js';
14
15
  import { closeLbug } from '../core/lbug/lbug-adapter.js';
15
16
  function send(msg) {
16
17
  process.send?.(msg);
@@ -48,7 +49,12 @@ process.on('message', async (msg) => {
48
49
  send({ type: 'progress', phase: 'log', percent: -1, message });
49
50
  },
50
51
  });
51
- send({ type: 'complete', result });
52
+ // Send a JSON-safe projection, NOT the raw result: the IPC channel is
53
+ // default-JSON serialization and `result.pipelineResult` carries the live
54
+ // KnowledgeGraph (wasteful to materialize, silently corrupted by JSON, and
55
+ // a BigInt/circular value would throw and mis-report this success as a
56
+ // failure). See analyze-worker-ipc.ts.
57
+ send({ type: 'complete', result: projectAnalyzeResultForIpc(result) });
52
58
  }
53
59
  catch (err) {
54
60
  send({ type: 'error', message: err?.message || 'Analysis failed' });
@@ -149,6 +149,9 @@ export const slimParseWorkerResultsForCache = (chunkResults) => {
149
149
  assignments: [],
150
150
  constructorBindings: [],
151
151
  parsedFiles: [],
152
+ // #2112: a clone-safety skip list is per-run telemetry, not graph data —
153
+ // replay ignores it. Drop it so it doesn't bloat the cached shard.
154
+ skippedPaths: [],
152
155
  });
153
156
  }
154
157
  return slim;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.8-rc.8",
3
+ "version": "1.6.8-rc.9",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",