gitnexus 1.6.8-rc.8 → 1.6.8-rc.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/ingestion/language-provider.d.ts +12 -2
- package/dist/core/ingestion/languages/c-cpp.js +11 -4
- package/dist/core/ingestion/languages/kotlin.js +5 -1
- package/dist/core/ingestion/parsing-processor.js +21 -0
- package/dist/core/ingestion/workers/clone-safety.d.ts +109 -0
- package/dist/core/ingestion/workers/clone-safety.js +465 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +10 -0
- package/dist/core/ingestion/workers/parse-worker.js +5 -36
- package/dist/core/ingestion/workers/post-result.d.ts +22 -0
- package/dist/core/ingestion/workers/post-result.js +87 -0
- package/dist/core/ingestion/workers/result-merge.d.ts +20 -0
- package/dist/core/ingestion/workers/result-merge.js +43 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +6 -0
- package/dist/core/ingestion/workers/worker-pool.js +28 -14
- package/dist/server/analyze-worker-ipc.d.ts +58 -0
- package/dist/server/analyze-worker-ipc.js +16 -0
- package/dist/server/analyze-worker.js +7 -1
- package/dist/storage/parse-cache.js +3 -0
- package/package.json +1 -1
|
@@ -147,6 +147,12 @@ interface LanguageProviderConfig {
|
|
|
147
147
|
* `undefined` when no constraints exist / the node isn't a templated
|
|
148
148
|
* function. Languages without SFINAE / concept semantics leave this
|
|
149
149
|
* undefined and the disambiguation is a pass-through.
|
|
150
|
+
*
|
|
151
|
+
* Cloneability contract: the returned payload crosses the worker boundary
|
|
152
|
+
* via structured clone, so it MUST be structured-clone-safe (no functions,
|
|
153
|
+
* symbols, or tree-sitter `SyntaxNode`s — only plain data). Wrap the return
|
|
154
|
+
* with `assertCloneable` from `workers/clone-safety.ts` so a future leak is a
|
|
155
|
+
* compile error at the source instead of a runtime DataCloneError (#2143).
|
|
150
156
|
*/
|
|
151
157
|
readonly extractTemplateConstraints?: (definitionNode: SyntaxNode) => unknown;
|
|
152
158
|
/** Override the default node label for definition.function captures.
|
|
@@ -268,8 +274,12 @@ interface LanguageProviderConfig {
|
|
|
268
274
|
* disk store WITHOUT a main-thread re-parse. The main thread restores them
|
|
269
275
|
* via the matching `ScopeResolver.applyCaptureSideChannel` hook.
|
|
270
276
|
*
|
|
271
|
-
* MUST return plain data (objects / arrays /
|
|
272
|
-
*
|
|
277
|
+
* Cloneability contract: MUST return plain data (objects / arrays /
|
|
278
|
+
* primitives — no functions, symbols, or tree-sitter `SyntaxNode`s) so it
|
|
279
|
+
* survives BOTH the worker→main structured clone AND `JSON.stringify` + the
|
|
280
|
+
* parsedfile-store interning reviver. Wrap the return with `assertCloneable`
|
|
281
|
+
* from `workers/clone-safety.ts` so a future non-serializable leak is a
|
|
282
|
+
* compile error at the source instead of a runtime DataCloneError (#2143).
|
|
273
283
|
*
|
|
274
284
|
* Default: undefined (provider has no capture-time module-level side effects).
|
|
275
285
|
*/
|
|
@@ -38,7 +38,8 @@ import { cCallConfig, cppCallConfig } from '../call-extractors/configs/c-cpp.js'
|
|
|
38
38
|
import { stripUeMacros } from '../cpp-ue-preprocessor.js';
|
|
39
39
|
import { emitCScopeCaptures, interpretCImport, interpretCTypeBinding, cArityCompatibility, cBindingScopeFor, cImportOwningScope, cReceiverBinding, collectCStaticLinkageSideChannel, } from './c/index.js';
|
|
40
40
|
import { emitCppScopeCaptures, interpretCppImport, interpretCppTypeBinding, cppArityCompatibility, cppBindingScopeFor, cppImportOwningScope, cppReceiverBinding, collectCppCaptureSideChannel, } from './cpp/index.js';
|
|
41
|
-
import { extractCppTemplateConstraints } from './cpp/constraint-extractor.js';
|
|
41
|
+
import { extractCppTemplateConstraints, } from './cpp/constraint-extractor.js';
|
|
42
|
+
import { assertCloneable } from '../workers/clone-safety.js';
|
|
42
43
|
const C_BUILT_INS = new Set([
|
|
43
44
|
'printf',
|
|
44
45
|
'fprintf',
|
|
@@ -358,7 +359,10 @@ export const cProvider = defineLanguage({
|
|
|
358
359
|
// `static` functions look non-file-local on the main thread and leak into
|
|
359
360
|
// cross-file global free-call resolution / wildcard imports. See
|
|
360
361
|
// `c/capture-side-channel.ts`.
|
|
361
|
-
|
|
362
|
+
// `assertCloneable` is a runtime identity; it makes a future non-serializable
|
|
363
|
+
// value in the side-channel payload a compile error here, at the source, rather
|
|
364
|
+
// than a DataCloneError at the worker boundary (#2143).
|
|
365
|
+
collectCaptureSideChannel: (filePath) => assertCloneable(collectCStaticLinkageSideChannel(filePath)),
|
|
362
366
|
interpretImport: interpretCImport,
|
|
363
367
|
interpretTypeBinding: interpretCTypeBinding,
|
|
364
368
|
bindingScopeFor: cBindingScopeFor,
|
|
@@ -431,7 +435,7 @@ export const cppProvider = defineLanguage({
|
|
|
431
435
|
// just populated for this file into plain data on `ParsedFile.captureSideChannel`,
|
|
432
436
|
// so the main thread can restore them via `applyCaptureSideChannel` WITHOUT a
|
|
433
437
|
// re-parse (#1983). See `cpp/capture-side-channel.ts`.
|
|
434
|
-
collectCaptureSideChannel: collectCppCaptureSideChannel,
|
|
438
|
+
collectCaptureSideChannel: (filePath) => assertCloneable(collectCppCaptureSideChannel(filePath)),
|
|
435
439
|
interpretImport: interpretCppImport,
|
|
436
440
|
interpretTypeBinding: interpretCppTypeBinding,
|
|
437
441
|
bindingScopeFor: cppBindingScopeFor,
|
|
@@ -482,5 +486,8 @@ function extractCppTemplateConstraintsForProvider(definitionNode) {
|
|
|
482
486
|
}
|
|
483
487
|
break;
|
|
484
488
|
}
|
|
485
|
-
|
|
489
|
+
// Guard the boundary at the source: a future non-cloneable member of the
|
|
490
|
+
// constraint payload becomes a compile error here, not a runtime
|
|
491
|
+
// DataCloneError at the worker post (#2143).
|
|
492
|
+
return assertCloneable(extractCppTemplateConstraints(templateDecl, declarator));
|
|
486
493
|
}
|
|
@@ -10,6 +10,7 @@ import { SupportedLanguages } from '../../../_shared/index.js';
|
|
|
10
10
|
import { createClassExtractor } from '../class-extractors/generic.js';
|
|
11
11
|
import { kotlinClassConfig } from '../class-extractors/configs/jvm.js';
|
|
12
12
|
import { defineLanguage } from '../language-provider.js';
|
|
13
|
+
import { assertCloneable } from '../workers/clone-safety.js';
|
|
13
14
|
import { kotlinTypeConfig } from '../type-extractors/jvm.js';
|
|
14
15
|
import { kotlinExportChecker } from '../export-detection.js';
|
|
15
16
|
import { createImportResolver } from '../import-resolvers/resolver-factory.js';
|
|
@@ -166,7 +167,10 @@ export const kotlinProvider = defineLanguage({
|
|
|
166
167
|
// so the main thread can restore them via `applyCaptureSideChannel` WITHOUT a
|
|
167
168
|
// re-parse (#1983). Without this, companion/static dispatch emits no CALLS
|
|
168
169
|
// edges on the worker path. See `kotlin/capture-side-channel.ts`.
|
|
169
|
-
|
|
170
|
+
// `assertCloneable` is a runtime identity; it makes a future non-serializable
|
|
171
|
+
// value in the side-channel payload a compile error here, at the source, rather
|
|
172
|
+
// than a DataCloneError at the worker boundary (#2143).
|
|
173
|
+
collectCaptureSideChannel: (filePath) => assertCloneable(collectKotlinCaptureSideChannel(filePath)),
|
|
170
174
|
interpretImport: interpretKotlinImport,
|
|
171
175
|
interpretTypeBinding: interpretKotlinTypeBinding,
|
|
172
176
|
bindingScopeFor: kotlinBindingScopeFor,
|
|
@@ -144,6 +144,27 @@ chunkHash) => {
|
|
|
144
144
|
.join(', ');
|
|
145
145
|
logger.warn(` Skipped unsupported languages: ${summary}`);
|
|
146
146
|
}
|
|
147
|
+
// Clone-safety telemetry (#2112): files whose parse output carried a value
|
|
148
|
+
// the structured-clone algorithm couldn't serialize across the worker
|
|
149
|
+
// boundary. The worker sanitized/dropped the offending value so the run
|
|
150
|
+
// could complete; surface the (rare) data loss so it's visible and the
|
|
151
|
+
// offending extractor can be fixed at source.
|
|
152
|
+
const skippedPaths = [];
|
|
153
|
+
for (const result of chunkResults) {
|
|
154
|
+
for (const entry of result.skippedPaths ?? [])
|
|
155
|
+
skippedPaths.push(entry);
|
|
156
|
+
}
|
|
157
|
+
if (skippedPaths.length > 0) {
|
|
158
|
+
// Keep the per-file reason ("stripped N value(s) from nodes" /
|
|
159
|
+
// "dropped non-serializable parsedFiles entry") — it distinguishes a
|
|
160
|
+
// recoverable strip from a whole-record drop, which a path-only line loses.
|
|
161
|
+
const shown = skippedPaths
|
|
162
|
+
.slice(0, 10)
|
|
163
|
+
.map((e) => `${e.path} (${e.reason})`)
|
|
164
|
+
.join(', ');
|
|
165
|
+
const more = skippedPaths.length > 10 ? ` …and ${skippedPaths.length - 10} more` : '';
|
|
166
|
+
logger.warn(` Sanitized ${skippedPaths.length} file(s) with non-serializable parse output: ${shown}${more}`);
|
|
167
|
+
}
|
|
147
168
|
onFileProgress?.(total, total, 'done');
|
|
148
169
|
return chunkResults;
|
|
149
170
|
};
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured-clone safety for the worker result boundary (#2112).
|
|
3
|
+
*
|
|
4
|
+
* A parse worker delivers its accumulated result to the main thread via
|
|
5
|
+
* `parentPort.postMessage(...)`. Node serializes that payload with the
|
|
6
|
+
* structured-clone algorithm SYNCHRONOUSLY on the worker thread, and it
|
|
7
|
+
* THROWS a `DataCloneError` the instant it meets a value it can't serialize —
|
|
8
|
+
* a function, a symbol, a Promise, a WeakMap, etc. The reporter of #2112 hit
|
|
9
|
+
* exactly this: a node record whose `properties` carried an own-enumerable
|
|
10
|
+
* value pointing at a native function (`function toString() { [native code] }
|
|
11
|
+
* could not be cloned`). One such value aborted the entire parse phase,
|
|
12
|
+
* because the worker re-posts the throw as `{type:'error'}` which the pool
|
|
13
|
+
* counts as a worker death — and under `GITNEXUS_WORKER_POOL_SIZE=1` the same
|
|
14
|
+
* graph re-throws on every respawn until the slot's budget is exhausted.
|
|
15
|
+
*
|
|
16
|
+
* This module is the safety net. It runs ONLY after a real clone failure on
|
|
17
|
+
* the fast-path post (zero overhead on healthy runs), and rewrites the
|
|
18
|
+
* boundary-crossing arrays so the result becomes cloneable: a non-cloneable
|
|
19
|
+
* value inside a plain extraction record is dropped (the record is otherwise
|
|
20
|
+
* kept — strictly-missing data, never wrong), and a `ParsedFile` that can't be
|
|
21
|
+
* made cloneable is dropped whole so scope-resolution re-derives it on the
|
|
22
|
+
* main thread (where there is no clone boundary) with intact edge data.
|
|
23
|
+
*
|
|
24
|
+
* Language-neutral by construction: it keys on value shape and field name
|
|
25
|
+
* only, never on a language (AGENTS.md shared-pipeline rule). The strip
|
|
26
|
+
* semantics mirror what the store path's `JSON.stringify` already silently
|
|
27
|
+
* drops, so store / no-store / cold / warm runs converge on the same graph.
|
|
28
|
+
*/
|
|
29
|
+
/** A file whose parse result was sanitized or dropped at the clone boundary. */
|
|
30
|
+
export interface SkippedPath {
|
|
31
|
+
/** Best-effort source path of the offending record (or `(unknown)`). */
|
|
32
|
+
path: string;
|
|
33
|
+
/** Human-readable reason, e.g. "dropped 1 non-serializable value from nodes". */
|
|
34
|
+
reason: string;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* True iff `value` survives Node's structured-clone algorithm (the same
|
|
38
|
+
* algorithm `postMessage` uses). This is the authoritative probe — it matches
|
|
39
|
+
* the real failure exactly, including Map/Set/Date/RegExp/TypedArray support,
|
|
40
|
+
* so it never false-positives on the `Scope` Maps that clone fine.
|
|
41
|
+
*/
|
|
42
|
+
export declare function isStructuredCloneable(value: unknown): boolean;
|
|
43
|
+
/** The leaf values the structured-clone algorithm copies verbatim. */
|
|
44
|
+
type CloneablePrimitive = undefined | null | boolean | number | bigint | string;
|
|
45
|
+
/**
|
|
46
|
+
* Maps `T` to itself when every value reachable from it is structured-clone
|
|
47
|
+
* safe, and to a type containing `never` at the first offending property
|
|
48
|
+
* otherwise. A function or symbol — the values `postMessage` rejects — becomes
|
|
49
|
+
* `never`, so a struct carrying one is no longer assignable to its own
|
|
50
|
+
* `Cloneable<T>` and `assertCloneable` rejects it, naming the bad key.
|
|
51
|
+
*
|
|
52
|
+
* Implemented as a homomorphic mapped type (`{ [K in keyof T]: … }`) so it
|
|
53
|
+
* preserves `interface` shapes and `readonly` modifiers and works WITHOUT
|
|
54
|
+
* requiring the payload types to carry an index signature — sidestepping the
|
|
55
|
+
* "closed interface is not assignable to a recursive index-signature type" wall
|
|
56
|
+
* that blocked the value-typed-`Cloneable` approach (#2143). `Map`/`Set`/array
|
|
57
|
+
* containers recurse into their element types; `Date`/`RegExp` are clone-safe
|
|
58
|
+
* leaves.
|
|
59
|
+
*/
|
|
60
|
+
/** True iff `T` is `any` (the canonical `IsAny` probe: only `any` satisfies `0 extends 1 & T`). */
|
|
61
|
+
type IsAny<T> = 0 extends 1 & T ? true : false;
|
|
62
|
+
export type Cloneable<T> = IsAny<T> extends true ? never : T extends CloneablePrimitive | Date | RegExp ? T : T extends (...args: never[]) => unknown ? never : T extends symbol ? never : T extends ReadonlyMap<infer K, infer V> ? ReadonlyMap<Cloneable<K>, Cloneable<V>> : T extends ReadonlySet<infer U> ? ReadonlySet<Cloneable<U>> : T extends readonly (infer U)[] ? T extends unknown[] ? Cloneable<U>[] : readonly Cloneable<U>[] : T extends object ? {
|
|
63
|
+
[K in keyof T]: Cloneable<T[K]>;
|
|
64
|
+
} : never;
|
|
65
|
+
/**
|
|
66
|
+
* Identity at runtime (zero cost — returns its argument unchanged); a
|
|
67
|
+
* compile-time assertion that `value` is structured-clone safe. Wrap a
|
|
68
|
+
* producer that feeds an `unknown` worker-result sink:
|
|
69
|
+
*
|
|
70
|
+
* collectCaptureSideChannel: (filePath) => assertCloneable(collectFoo(filePath))
|
|
71
|
+
*
|
|
72
|
+
* If `collectFoo`'s return type ever gains a non-cloneable member (a function, a
|
|
73
|
+
* `SyntaxNode`, …) the call fails to compile, pointing at the offending key.
|
|
74
|
+
*
|
|
75
|
+
* The parameter is a conditional type rather than an `extends Cloneable<T>`
|
|
76
|
+
* constraint because a self-referential constraint (`T extends Cloneable<T>`)
|
|
77
|
+
* is a "circular constraint" error in TypeScript. For a clone-safe `T` the
|
|
78
|
+
* parameter resolves to `T` (call type-checks as a plain identity); for an
|
|
79
|
+
* unsafe `T` it resolves to `Cloneable<T>` (which has `never` at the bad key),
|
|
80
|
+
* so the argument is rejected.
|
|
81
|
+
*/
|
|
82
|
+
export declare function assertCloneable<T>(value: T extends Cloneable<T> ? T : Cloneable<T>): T;
|
|
83
|
+
export interface MakeCloneSafeOptions {
|
|
84
|
+
/**
|
|
85
|
+
* Array field names whose offending elements are DROPPED whole rather than
|
|
86
|
+
* stripped in place (e.g. `parsedFiles` — its `captureSideChannel` drives
|
|
87
|
+
* edge resolution, so a stripped-and-delivered file would ship WRONG edges;
|
|
88
|
+
* dropping it lets scope-resolution re-derive it on the main thread).
|
|
89
|
+
*/
|
|
90
|
+
dropWholeElement: ReadonlySet<string>;
|
|
91
|
+
/** Field names to skip entirely (e.g. the `skippedPaths` field itself). */
|
|
92
|
+
skipFields?: ReadonlySet<string>;
|
|
93
|
+
/** Keys to probe for a file path when attributing a skip. */
|
|
94
|
+
pathKeys?: readonly string[];
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Make a worker result's boundary-crossing array fields structured-cloneable,
|
|
98
|
+
* mutating `result` in place. Only arrays that actually contain a
|
|
99
|
+
* non-cloneable value are rewritten; everything else keeps referential
|
|
100
|
+
* identity. Returns the list of affected file paths for reporting.
|
|
101
|
+
*
|
|
102
|
+
* Call this after ANY failure of the fast-path post — a `DataCloneError`, OR a
|
|
103
|
+
* throwing getter's own error surfaced by structuredClone (the caller in
|
|
104
|
+
* `post-result.ts` recovers on any throw, not only `DataCloneError`).
|
|
105
|
+
*/
|
|
106
|
+
export declare function makeWorkerResultCloneSafe(result: Record<string, unknown>, options: MakeCloneSafeOptions): {
|
|
107
|
+
skipped: SkippedPath[];
|
|
108
|
+
};
|
|
109
|
+
export {};
|
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured-clone safety for the worker result boundary (#2112).
|
|
3
|
+
*
|
|
4
|
+
* A parse worker delivers its accumulated result to the main thread via
|
|
5
|
+
* `parentPort.postMessage(...)`. Node serializes that payload with the
|
|
6
|
+
* structured-clone algorithm SYNCHRONOUSLY on the worker thread, and it
|
|
7
|
+
* THROWS a `DataCloneError` the instant it meets a value it can't serialize —
|
|
8
|
+
* a function, a symbol, a Promise, a WeakMap, etc. The reporter of #2112 hit
|
|
9
|
+
* exactly this: a node record whose `properties` carried an own-enumerable
|
|
10
|
+
* value pointing at a native function (`function toString() { [native code] }
|
|
11
|
+
* could not be cloned`). One such value aborted the entire parse phase,
|
|
12
|
+
* because the worker re-posts the throw as `{type:'error'}` which the pool
|
|
13
|
+
* counts as a worker death — and under `GITNEXUS_WORKER_POOL_SIZE=1` the same
|
|
14
|
+
* graph re-throws on every respawn until the slot's budget is exhausted.
|
|
15
|
+
*
|
|
16
|
+
* This module is the safety net. It runs ONLY after a real clone failure on
|
|
17
|
+
* the fast-path post (zero overhead on healthy runs), and rewrites the
|
|
18
|
+
* boundary-crossing arrays so the result becomes cloneable: a non-cloneable
|
|
19
|
+
* value inside a plain extraction record is dropped (the record is otherwise
|
|
20
|
+
* kept — strictly-missing data, never wrong), and a `ParsedFile` that can't be
|
|
21
|
+
* made cloneable is dropped whole so scope-resolution re-derives it on the
|
|
22
|
+
* main thread (where there is no clone boundary) with intact edge data.
|
|
23
|
+
*
|
|
24
|
+
* Language-neutral by construction: it keys on value shape and field name
|
|
25
|
+
* only, never on a language (AGENTS.md shared-pipeline rule). The strip
|
|
26
|
+
* semantics mirror what the store path's `JSON.stringify` already silently
|
|
27
|
+
* drops, so store / no-store / cold / warm runs converge on the same graph.
|
|
28
|
+
*/
|
|
29
|
+
/**
|
|
30
|
+
* True iff `value` survives Node's structured-clone algorithm (the same
|
|
31
|
+
* algorithm `postMessage` uses). This is the authoritative probe — it matches
|
|
32
|
+
* the real failure exactly, including Map/Set/Date/RegExp/TypedArray support,
|
|
33
|
+
* so it never false-positives on the `Scope` Maps that clone fine.
|
|
34
|
+
*/
|
|
35
|
+
export function isStructuredCloneable(value) {
|
|
36
|
+
try {
|
|
37
|
+
structuredClone(value);
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Identity at runtime (zero cost — returns its argument unchanged); a
|
|
46
|
+
* compile-time assertion that `value` is structured-clone safe. Wrap a
|
|
47
|
+
* producer that feeds an `unknown` worker-result sink:
|
|
48
|
+
*
|
|
49
|
+
* collectCaptureSideChannel: (filePath) => assertCloneable(collectFoo(filePath))
|
|
50
|
+
*
|
|
51
|
+
* If `collectFoo`'s return type ever gains a non-cloneable member (a function, a
|
|
52
|
+
* `SyntaxNode`, …) the call fails to compile, pointing at the offending key.
|
|
53
|
+
*
|
|
54
|
+
* The parameter is a conditional type rather than an `extends Cloneable<T>`
|
|
55
|
+
* constraint because a self-referential constraint (`T extends Cloneable<T>`)
|
|
56
|
+
* is a "circular constraint" error in TypeScript. For a clone-safe `T` the
|
|
57
|
+
* parameter resolves to `T` (call type-checks as a plain identity); for an
|
|
58
|
+
* unsafe `T` it resolves to `Cloneable<T>` (which has `never` at the bad key),
|
|
59
|
+
* so the argument is rejected.
|
|
60
|
+
*/
|
|
61
|
+
export function assertCloneable(value) {
|
|
62
|
+
return value;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Recursion cap for the module's own traversal. An over-deep subtree is treated
|
|
66
|
+
* as non-cloneable rather than recursing to a stack overflow — without this, a
|
|
67
|
+
* deeply-nested record would throw `RangeError` inside the sanitizer and (since
|
|
68
|
+
* the recovery path is the safety net) re-arm the very cascade #2112 fixes. Set
|
|
69
|
+
* far below the observed ~3000-frame overflow and far above any real
|
|
70
|
+
* parse-result record (extraction records are shallow plain data). Note: this
|
|
71
|
+
* caps the module's recursion only; `structuredClone`'s own internal recursion
|
|
72
|
+
* (the `isStructuredCloneable` probe of non-plain objects) is bounded by that
|
|
73
|
+
* helper's catch-all, which turns a probe-side `RangeError` into a
|
|
74
|
+
* non-cloneable verdict — so do not narrow that catch.
|
|
75
|
+
*/
|
|
76
|
+
const MAX_CLONE_DEPTH = 200;
|
|
77
|
+
/**
|
|
78
|
+
* True iff `key` is a canonical array-index string (`"0"`, `"1"`, … `< 2^32-1`)
|
|
79
|
+
* — i.e. one of the slots the numeric index loop already visits. Everything
|
|
80
|
+
* else returned by `Object.keys(array)` is a NON-index own-enumerable property
|
|
81
|
+
* (`arr.meta = …`), which the structured-clone algorithm ALSO serializes (and
|
|
82
|
+
* throws on if non-cloneable). The array branches of `containsNonCloneable` and
|
|
83
|
+
* `stripNonCloneable` use this to scan those extra keys in lockstep.
|
|
84
|
+
*/
|
|
85
|
+
function isArrayIndexKey(key) {
|
|
86
|
+
const n = Number(key);
|
|
87
|
+
return Number.isInteger(n) && n >= 0 && n < 4294967295 && String(n) === key;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Non-allocating scan: returns true on the FIRST value structured-clone would
|
|
91
|
+
* reject. Used to decide whether an array (or element) needs rewriting at all,
|
|
92
|
+
* so clean arrays keep their referential identity and pay no copy cost.
|
|
93
|
+
*/
|
|
94
|
+
function containsNonCloneable(value, seen, depth = 0) {
|
|
95
|
+
const t = typeof value;
|
|
96
|
+
if (t === 'function' || t === 'symbol')
|
|
97
|
+
return true;
|
|
98
|
+
if (value === null || t !== 'object')
|
|
99
|
+
return false;
|
|
100
|
+
// Depth bound: treat an over-deep subtree as non-cloneable (the element is
|
|
101
|
+
// then stripped/dropped) instead of overflowing the stack.
|
|
102
|
+
if (depth >= MAX_CLONE_DEPTH)
|
|
103
|
+
return true;
|
|
104
|
+
const obj = value;
|
|
105
|
+
// Cycles clone fine; don't recurse into one twice.
|
|
106
|
+
if (seen.has(obj))
|
|
107
|
+
return false;
|
|
108
|
+
// Structured-clone-native containers carry no non-cloneable payload of their
|
|
109
|
+
// own; their *contents* still need scanning (a Map value could be a fn).
|
|
110
|
+
if (obj instanceof Date || obj instanceof RegExp)
|
|
111
|
+
return false;
|
|
112
|
+
// Buffers/views usually clone, but a DETACHED one is rejected by
|
|
113
|
+
// structuredClone — probe rather than wave it through. No byteLength
|
|
114
|
+
// heuristic: a legitimately empty `new Uint8Array(0)` also has byteLength 0
|
|
115
|
+
// yet clones fine, so a length check would false-positive.
|
|
116
|
+
if (obj instanceof ArrayBuffer || ArrayBuffer.isView(obj))
|
|
117
|
+
return !isStructuredCloneable(obj);
|
|
118
|
+
seen.add(obj);
|
|
119
|
+
if (Array.isArray(obj)) {
|
|
120
|
+
for (let i = 0; i < obj.length; i++) {
|
|
121
|
+
if (containsNonCloneable(obj[i], seen, depth + 1))
|
|
122
|
+
return true;
|
|
123
|
+
}
|
|
124
|
+
// structuredClone also serializes an array's NON-index own-enumerable
|
|
125
|
+
// properties and throws on a non-cloneable one — scan them too (lockstep
|
|
126
|
+
// with stripNonCloneable's array branch; see isArrayIndexKey).
|
|
127
|
+
for (const key of Object.keys(obj)) {
|
|
128
|
+
if (isArrayIndexKey(key))
|
|
129
|
+
continue;
|
|
130
|
+
let child;
|
|
131
|
+
try {
|
|
132
|
+
child = obj[key];
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
return true; // a throwing getter can't be serialized either
|
|
136
|
+
}
|
|
137
|
+
if (containsNonCloneable(child, seen, depth + 1))
|
|
138
|
+
return true;
|
|
139
|
+
}
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
if (obj instanceof Map) {
|
|
143
|
+
for (const [k, v] of obj) {
|
|
144
|
+
if (containsNonCloneable(k, seen, depth + 1) || containsNonCloneable(v, seen, depth + 1))
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
if (obj instanceof Set) {
|
|
150
|
+
for (const v of obj) {
|
|
151
|
+
if (containsNonCloneable(v, seen, depth + 1))
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
return false;
|
|
155
|
+
}
|
|
156
|
+
// A non-plain object (Promise, WeakMap, class instance with internal slots)
|
|
157
|
+
// that structured clone can't handle: detect via the authoritative probe.
|
|
158
|
+
// Plain objects fall through to a property scan (cheap, no allocation).
|
|
159
|
+
const proto = Object.getPrototypeOf(obj);
|
|
160
|
+
if (proto !== Object.prototype && proto !== null) {
|
|
161
|
+
if (!isStructuredCloneable(obj))
|
|
162
|
+
return true;
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
for (const key of Object.keys(obj)) {
|
|
166
|
+
let child;
|
|
167
|
+
try {
|
|
168
|
+
child = obj[key];
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
// A getter that throws can't be serialized either — treat as non-cloneable.
|
|
172
|
+
return true;
|
|
173
|
+
}
|
|
174
|
+
if (containsNonCloneable(child, seen, depth + 1))
|
|
175
|
+
return true;
|
|
176
|
+
}
|
|
177
|
+
return false;
|
|
178
|
+
}
|
|
179
|
+
/** Record a strip at `path` (root → `(root)`); keeps the count + key path in sync. */
|
|
180
|
+
function recordStrip(ctx, path) {
|
|
181
|
+
ctx.stripped++;
|
|
182
|
+
ctx.keys.push(path === '' ? '(root)' : path);
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Deep-copy `value`, replacing any value structured-clone would reject with
|
|
186
|
+
* `undefined` (which clones fine). Preserves primitives, arrays, plain
|
|
187
|
+
* objects, and the structured-clone-native containers (Date, RegExp, Map,
|
|
188
|
+
* Set, ArrayBuffer, TypedArray). Rebuilds only what it must — clean leaves are
|
|
189
|
+
* returned by reference. `path` is the dotted key path of `value` (for the
|
|
190
|
+
* diagnostic record).
|
|
191
|
+
*/
|
|
192
|
+
function stripNonCloneable(value, ctx, depth = 0, path = '') {
|
|
193
|
+
const t = typeof value;
|
|
194
|
+
if (t === 'function' || t === 'symbol') {
|
|
195
|
+
recordStrip(ctx, path);
|
|
196
|
+
return undefined;
|
|
197
|
+
}
|
|
198
|
+
if (value === null || t !== 'object')
|
|
199
|
+
return value;
|
|
200
|
+
// Depth bound (mirrors containsNonCloneable): drop an over-deep subtree to
|
|
201
|
+
// `undefined` (itself cloneable, and a legal property value / array element)
|
|
202
|
+
// rather than overflowing the stack.
|
|
203
|
+
if (depth >= MAX_CLONE_DEPTH) {
|
|
204
|
+
recordStrip(ctx, path);
|
|
205
|
+
return undefined;
|
|
206
|
+
}
|
|
207
|
+
const obj = value;
|
|
208
|
+
// Memoized? Return the SAME stripped copy (preserves DAG shape; terminates
|
|
209
|
+
// cycles by returning the in-progress copy inserted before recursing below).
|
|
210
|
+
if (ctx.seen.has(obj))
|
|
211
|
+
return ctx.seen.get(obj);
|
|
212
|
+
// Leaf-like values: returned by reference, but still memoize the decision so
|
|
213
|
+
// a second alias resolves identically.
|
|
214
|
+
if (obj instanceof Date || obj instanceof RegExp) {
|
|
215
|
+
ctx.seen.set(obj, value);
|
|
216
|
+
return value;
|
|
217
|
+
}
|
|
218
|
+
if (obj instanceof ArrayBuffer || ArrayBuffer.isView(obj)) {
|
|
219
|
+
// Keep a live buffer/view (even an empty one); drop a detached one, which
|
|
220
|
+
// structuredClone rejects. The probe is exact — no byteLength heuristic.
|
|
221
|
+
if (!isStructuredCloneable(obj)) {
|
|
222
|
+
recordStrip(ctx, path);
|
|
223
|
+
ctx.seen.set(obj, undefined);
|
|
224
|
+
return undefined;
|
|
225
|
+
}
|
|
226
|
+
ctx.seen.set(obj, value);
|
|
227
|
+
return value;
|
|
228
|
+
}
|
|
229
|
+
// Containers: allocate the empty copy, memoize it BEFORE recursing, then fill
|
|
230
|
+
// — so a cycle/alias that re-enters gets this in-progress copy.
|
|
231
|
+
if (Array.isArray(obj)) {
|
|
232
|
+
const out = [];
|
|
233
|
+
ctx.seen.set(obj, out);
|
|
234
|
+
for (let i = 0; i < obj.length; i++)
|
|
235
|
+
out.push(stripNonCloneable(obj[i], ctx, depth + 1, `${path}[${i}]`));
|
|
236
|
+
// Carry NON-index own-enumerable props through the same strip (lockstep
|
|
237
|
+
// with containsNonCloneable): structuredClone serializes them, so a
|
|
238
|
+
// non-cloneable one must be stripped rather than left to throw on re-post.
|
|
239
|
+
for (const key of Object.keys(obj)) {
|
|
240
|
+
if (isArrayIndexKey(key))
|
|
241
|
+
continue;
|
|
242
|
+
const childPath = `${path}.${key}`;
|
|
243
|
+
let child;
|
|
244
|
+
try {
|
|
245
|
+
child = obj[key];
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
recordStrip(ctx, childPath);
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
out[key] = stripNonCloneable(child, ctx, depth + 1, childPath);
|
|
252
|
+
}
|
|
253
|
+
return out;
|
|
254
|
+
}
|
|
255
|
+
if (obj instanceof Map) {
|
|
256
|
+
// Scope limit (acceptable): object keys aren't identity-preserved across
|
|
257
|
+
// stripping. Parse-result Maps are primitive-keyed, so this never bites.
|
|
258
|
+
const out = new Map();
|
|
259
|
+
ctx.seen.set(obj, out);
|
|
260
|
+
for (const [k, v] of obj)
|
|
261
|
+
out.set(stripNonCloneable(k, ctx, depth + 1, `${path}<key>`), stripNonCloneable(v, ctx, depth + 1, `${path}<map>`));
|
|
262
|
+
return out;
|
|
263
|
+
}
|
|
264
|
+
if (obj instanceof Set) {
|
|
265
|
+
const out = new Set();
|
|
266
|
+
ctx.seen.set(obj, out);
|
|
267
|
+
for (const v of obj)
|
|
268
|
+
out.add(stripNonCloneable(v, ctx, depth + 1, `${path}<set>`));
|
|
269
|
+
return out;
|
|
270
|
+
}
|
|
271
|
+
const proto = Object.getPrototypeOf(obj);
|
|
272
|
+
if (proto !== Object.prototype && proto !== null) {
|
|
273
|
+
// Non-plain object that the probe already flagged as non-cloneable and
|
|
274
|
+
// that we can't safely reconstruct (Promise, WeakMap, class instance with
|
|
275
|
+
// internal slots). Drop it whole — memoize the decision so aliases agree.
|
|
276
|
+
if (!isStructuredCloneable(obj)) {
|
|
277
|
+
recordStrip(ctx, path);
|
|
278
|
+
ctx.seen.set(obj, undefined);
|
|
279
|
+
return undefined;
|
|
280
|
+
}
|
|
281
|
+
ctx.seen.set(obj, value);
|
|
282
|
+
return value;
|
|
283
|
+
}
|
|
284
|
+
const out = {};
|
|
285
|
+
ctx.seen.set(obj, out);
|
|
286
|
+
for (const key of Object.keys(obj)) {
|
|
287
|
+
const childPath = path === '' ? key : `${path}.${key}`;
|
|
288
|
+
let child;
|
|
289
|
+
try {
|
|
290
|
+
child = obj[key];
|
|
291
|
+
}
|
|
292
|
+
catch {
|
|
293
|
+
// A getter that throws is non-serializable — drop the property.
|
|
294
|
+
recordStrip(ctx, childPath);
|
|
295
|
+
continue;
|
|
296
|
+
}
|
|
297
|
+
out[key] = stripNonCloneable(child, ctx, depth + 1, childPath);
|
|
298
|
+
}
|
|
299
|
+
return out;
|
|
300
|
+
}
|
|
301
|
+
/** Keys checked (top-level and one level deep) to attribute a record to a file. */
|
|
302
|
+
const DEFAULT_PATH_KEYS = ['filePath', 'path', 'file'];
|
|
303
|
+
/** Read `obj[key]`, returning undefined if the access throws (throwing getter / Proxy trap). */
|
|
304
|
+
function safeGet(obj, key) {
|
|
305
|
+
try {
|
|
306
|
+
return obj[key];
|
|
307
|
+
}
|
|
308
|
+
catch {
|
|
309
|
+
return undefined;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
/** Read a path key off a child object (one level deep); never throws. */
|
|
313
|
+
function pathFromChild(child, pathKeys) {
|
|
314
|
+
if (child === null || typeof child !== 'object')
|
|
315
|
+
return undefined;
|
|
316
|
+
const crec = child;
|
|
317
|
+
for (const pk of pathKeys) {
|
|
318
|
+
const v = safeGet(crec, pk);
|
|
319
|
+
if (typeof v === 'string')
|
|
320
|
+
return v;
|
|
321
|
+
}
|
|
322
|
+
return undefined;
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Best-effort source-path extraction for reporting; never throws. Reads are
|
|
326
|
+
* defensive (a throwing getter / Proxy trap on a path-attribution key must not
|
|
327
|
+
* escape and abandon the sanitize — it would re-arm the fail-closed cascade).
|
|
328
|
+
*/
|
|
329
|
+
function findFilePath(element, pathKeys) {
|
|
330
|
+
if (element === null || typeof element !== 'object')
|
|
331
|
+
return undefined;
|
|
332
|
+
const rec = element;
|
|
333
|
+
// Top level first — a ParsedFile carries `filePath` here.
|
|
334
|
+
for (const key of pathKeys) {
|
|
335
|
+
const v = safeGet(rec, key);
|
|
336
|
+
if (typeof v === 'string')
|
|
337
|
+
return v;
|
|
338
|
+
}
|
|
339
|
+
// Known child next — a ParsedNode carries its path at `properties.filePath`.
|
|
340
|
+
// Prefer it over the generic sweep so attribution is deterministic when a
|
|
341
|
+
// sibling child also happens to carry a path-like key.
|
|
342
|
+
const fromProps = pathFromChild(safeGet(rec, 'properties'), pathKeys);
|
|
343
|
+
if (fromProps !== undefined)
|
|
344
|
+
return fromProps;
|
|
345
|
+
// Generic one-level sweep as the fallback for other shapes.
|
|
346
|
+
let keys;
|
|
347
|
+
try {
|
|
348
|
+
keys = Object.keys(rec);
|
|
349
|
+
}
|
|
350
|
+
catch {
|
|
351
|
+
return undefined; // a Proxy ownKeys trap that throws — give up on attribution
|
|
352
|
+
}
|
|
353
|
+
for (const key of keys) {
|
|
354
|
+
if (key === 'properties')
|
|
355
|
+
continue; // already checked above
|
|
356
|
+
const fromChild = pathFromChild(safeGet(rec, key), pathKeys);
|
|
357
|
+
if (fromChild !== undefined)
|
|
358
|
+
return fromChild;
|
|
359
|
+
}
|
|
360
|
+
return undefined;
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Make a worker result's boundary-crossing array fields structured-cloneable,
|
|
364
|
+
* mutating `result` in place. Only arrays that actually contain a
|
|
365
|
+
* non-cloneable value are rewritten; everything else keeps referential
|
|
366
|
+
* identity. Returns the list of affected file paths for reporting.
|
|
367
|
+
*
|
|
368
|
+
* Call this after ANY failure of the fast-path post — a `DataCloneError`, OR a
|
|
369
|
+
* throwing getter's own error surfaced by structuredClone (the caller in
|
|
370
|
+
* `post-result.ts` recovers on any throw, not only `DataCloneError`).
|
|
371
|
+
*/
|
|
372
|
+
export function makeWorkerResultCloneSafe(result, options) {
|
|
373
|
+
const pathKeys = options.pathKeys ?? DEFAULT_PATH_KEYS;
|
|
374
|
+
const skipped = [];
|
|
375
|
+
for (const field of Object.keys(result)) {
|
|
376
|
+
if (options.skipFields?.has(field))
|
|
377
|
+
continue;
|
|
378
|
+
const value = result[field];
|
|
379
|
+
if (!Array.isArray(value))
|
|
380
|
+
continue;
|
|
381
|
+
const dropWhole = options.dropWholeElement.has(field);
|
|
382
|
+
// `out` is built lazily — only once a dirty element appears — by copying the
|
|
383
|
+
// clean prefix, so a fully-clean array is never rebuilt and keeps its
|
|
384
|
+
// referential identity (no field reassignment). A dirty element is scanned
|
|
385
|
+
// (containsNonCloneable) and then stripped (stripNonCloneable): two passes,
|
|
386
|
+
// deliberately. The non-allocating pre-scan is exactly what lets CLEAN
|
|
387
|
+
// elements stay by reference (zero-copy) — replacing it with an
|
|
388
|
+
// always-allocating strip would regress that. This whole path is
|
|
389
|
+
// failure-path-only (the fast post already threw), so the second pass over
|
|
390
|
+
// the rare dirty element is acceptable.
|
|
391
|
+
let out = null;
|
|
392
|
+
for (let i = 0; i < value.length; i++) {
|
|
393
|
+
const element = value[i];
|
|
394
|
+
try {
|
|
395
|
+
if (!containsNonCloneable(element, new WeakSet())) {
|
|
396
|
+
if (out)
|
|
397
|
+
out.push(element);
|
|
398
|
+
continue;
|
|
399
|
+
}
|
|
400
|
+
if (!out)
|
|
401
|
+
out = value.slice(0, i); // first dirty element: copy clean prefix
|
|
402
|
+
const path = findFilePath(element, pathKeys) ?? '(unknown)';
|
|
403
|
+
if (dropWhole) {
|
|
404
|
+
skipped.push({ path, reason: `dropped non-serializable ${field} entry` });
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
const ctx = { stripped: 0, seen: new Map(), keys: [] };
|
|
408
|
+
const cleaned = stripNonCloneable(element, ctx);
|
|
409
|
+
// Last-resort guard: if stripping functions/symbols still left something
|
|
410
|
+
// structured-clone rejects, drop the element rather than re-throw.
|
|
411
|
+
if (isStructuredCloneable(cleaned)) {
|
|
412
|
+
out.push(cleaned);
|
|
413
|
+
// Name the offending key path(s) so the leak is locatable from the log
|
|
414
|
+
// (e.g. "from nodes: properties.toString") — not just the array field.
|
|
415
|
+
const at = ctx.keys.slice(0, 3).join(', ');
|
|
416
|
+
const more = ctx.keys.length > 3 ? `, …+${ctx.keys.length - 3}` : '';
|
|
417
|
+
skipped.push({
|
|
418
|
+
path,
|
|
419
|
+
reason: `stripped ${ctx.stripped} non-serializable value(s) from ${field}: ${at}${more}`,
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
else {
|
|
423
|
+
skipped.push({ path, reason: `dropped unsalvageable ${field} entry` });
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
catch {
|
|
427
|
+
// A throw DURING this element's scan/strip — a Proxy with a throwing
|
|
428
|
+
// `getPrototypeOf`/`ownKeys` trap reached by Object.getPrototypeOf /
|
|
429
|
+
// Object.keys, or any other structural-enumeration throw. Drop the
|
|
430
|
+
// element rather than let the throw escape to postResultCloneSafe's
|
|
431
|
+
// fail-closed {type:'error'} (which under POOL_SIZE=1 re-arms the
|
|
432
|
+
// cascade this net prevents). One pathological element can't sink the
|
|
433
|
+
// whole result.
|
|
434
|
+
if (!out)
|
|
435
|
+
out = value.slice(0, i);
|
|
436
|
+
skipped.push({ path: '(unknown)', reason: `dropped ${field} entry (sanitizer error)` });
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
if (out)
|
|
440
|
+
result[field] = out;
|
|
441
|
+
}
|
|
442
|
+
// Final safety gate. The loop above only rewrites ARRAY fields, so a future
|
|
443
|
+
// non-array result sink (a nested object / Map) — or an array field whose own
|
|
444
|
+
// non-index property the element loop didn't reach — could still hold a
|
|
445
|
+
// non-cloneable value and throw on the re-post. Make "the returned result is
|
|
446
|
+
// structured-cloneable" a hard postcondition: strip any remaining offending
|
|
447
|
+
// field in place. Failure-path-only and a no-op once the result is already
|
|
448
|
+
// clean (the per-field probe short-circuits every clean field).
|
|
449
|
+
if (!isStructuredCloneable(result)) {
|
|
450
|
+
for (const field of Object.keys(result)) {
|
|
451
|
+
if (options.skipFields?.has(field))
|
|
452
|
+
continue;
|
|
453
|
+
if (isStructuredCloneable(result[field]))
|
|
454
|
+
continue;
|
|
455
|
+
const ctx = { stripped: 0, seen: new Map(), keys: [] };
|
|
456
|
+
result[field] = stripNonCloneable(result[field], ctx);
|
|
457
|
+
const at = ctx.keys.slice(0, 3).join(', ');
|
|
458
|
+
skipped.push({
|
|
459
|
+
path: '(result)',
|
|
460
|
+
reason: `stripped ${ctx.stripped} non-serializable value(s) from ${field}${at ? `: ${at}` : ''}`,
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
return { skipped };
|
|
465
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { SupportedLanguages } from '../../../_shared/index.js';
|
|
2
|
+
import type { SkippedPath } from './clone-safety.js';
|
|
2
3
|
import type { ExtractedRouterInclude, ExtractedRouterImport, ExtractedRouterModuleAlias } from '../route-extractors/fastapi-router-bindings.js';
|
|
3
4
|
import { type MixedChainStep } from '../utils/call-analysis.js';
|
|
4
5
|
import type { ConstructorBinding } from '../type-env.js';
|
|
@@ -201,6 +202,15 @@ export interface ParseWorkerResult {
|
|
|
201
202
|
*/
|
|
202
203
|
parsedFiles: ParsedFile[];
|
|
203
204
|
skippedLanguages: Record<string, number>;
|
|
205
|
+
/**
|
|
206
|
+
* Files whose parse output carried a value the structured-clone algorithm
|
|
207
|
+
* couldn't serialize across the worker boundary (#2112). The clone-safety
|
|
208
|
+
* net stripped or dropped the offending value so the result could be
|
|
209
|
+
* delivered; these paths are surfaced to the operator so the (rare) data
|
|
210
|
+
* loss is visible. Optional for cache backward compatibility — older cache
|
|
211
|
+
* entries predate the field; consumers must guard with `?? []`.
|
|
212
|
+
*/
|
|
213
|
+
skippedPaths?: SkippedPath[];
|
|
204
214
|
fileCount: number;
|
|
205
215
|
}
|
|
206
216
|
export interface ParseWorkerInput {
|
|
@@ -17,6 +17,8 @@ import { getProvider } from '../languages/index.js';
|
|
|
17
17
|
import { getTreeSitterBufferSize, getTreeSitterContentByteLength, TREE_SITTER_MAX_BUFFER, } from '../constants.js';
|
|
18
18
|
import { ARRAY_METHOD_HOC_BLOCKLIST_SET, DEFAULT_EXPORT_IDENTIFIER_BLOCKLIST_SET, deriveDefaultExportHocName, } from '../ts-js-hoc-utils.js';
|
|
19
19
|
import { parseSourceSafe } from '../../tree-sitter/safe-parse.js';
|
|
20
|
+
import { postResultCloneSafe } from './post-result.js';
|
|
21
|
+
import { mergeResult } from './result-merge.js';
|
|
20
22
|
// ── Worker grammar loading — enforcement boundary (#2091/#2093, #2101) ───────
|
|
21
23
|
// The worker maintains its own grammar table (the guarded `_require`s below +
|
|
22
24
|
// `languageMap`) and intentionally does NOT consult the runtime
|
|
@@ -1793,41 +1795,8 @@ let accumulated = {
|
|
|
1793
1795
|
fileCount: 0,
|
|
1794
1796
|
};
|
|
1795
1797
|
let cumulativeProcessed = 0;
|
|
1796
|
-
//
|
|
1797
|
-
//
|
|
1798
|
-
// and blows the stack when arr has >~65k elements).
|
|
1799
|
-
const appendAll = (target, src) => {
|
|
1800
|
-
for (let i = 0; i < src.length; i++)
|
|
1801
|
-
target.push(src[i]);
|
|
1802
|
-
};
|
|
1803
|
-
const mergeResult = (target, src) => {
|
|
1804
|
-
appendAll(target.nodes, src.nodes);
|
|
1805
|
-
appendAll(target.relationships, src.relationships);
|
|
1806
|
-
appendAll(target.symbols, src.symbols);
|
|
1807
|
-
appendAll(target.calls, src.calls);
|
|
1808
|
-
appendAll(target.assignments, src.assignments);
|
|
1809
|
-
appendAll(target.routes, src.routes);
|
|
1810
|
-
appendAll(target.fetchCalls, src.fetchCalls);
|
|
1811
|
-
appendAll(target.fetchWrapperDefs, src.fetchWrapperDefs);
|
|
1812
|
-
appendAll(target.decoratorRoutes, src.decoratorRoutes);
|
|
1813
|
-
if (src.routerIncludes)
|
|
1814
|
-
appendAll(target.routerIncludes, src.routerIncludes);
|
|
1815
|
-
if (src.routerImports)
|
|
1816
|
-
appendAll(target.routerImports, src.routerImports);
|
|
1817
|
-
if (src.routerModuleAliases) {
|
|
1818
|
-
target.routerModuleAliases ??= [];
|
|
1819
|
-
appendAll(target.routerModuleAliases, src.routerModuleAliases);
|
|
1820
|
-
}
|
|
1821
|
-
appendAll(target.toolDefs, src.toolDefs);
|
|
1822
|
-
appendAll(target.ormQueries, src.ormQueries);
|
|
1823
|
-
appendAll(target.constructorBindings, src.constructorBindings);
|
|
1824
|
-
appendAll(target.fileScopeBindings, src.fileScopeBindings);
|
|
1825
|
-
appendAll(target.parsedFiles, src.parsedFiles);
|
|
1826
|
-
for (const [lang, count] of Object.entries(src.skippedLanguages)) {
|
|
1827
|
-
target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count;
|
|
1828
|
-
}
|
|
1829
|
-
target.fileCount += src.fileCount;
|
|
1830
|
-
};
|
|
1798
|
+
// `mergeResult` (+ its `appendAll`) lives in ./result-merge.ts (extracted so it
|
|
1799
|
+
// can be unit-tested without importing this entry module).
|
|
1831
1800
|
// Signal the pool that worker-side initialization (parser imports, language
|
|
1832
1801
|
// grammars, type-env setup, all helper modules) is complete and the message
|
|
1833
1802
|
// handler below is about to be attached. The pool's `waitForWorkerReady`
|
|
@@ -1920,7 +1889,7 @@ parentPort.on('message', (msg) => {
|
|
|
1920
1889
|
accumulated.parsedFiles = [];
|
|
1921
1890
|
}
|
|
1922
1891
|
}
|
|
1923
|
-
|
|
1892
|
+
postResultCloneSafe(accumulated);
|
|
1924
1893
|
// Reset for potential reuse
|
|
1925
1894
|
accumulated = {
|
|
1926
1895
|
nodes: [],
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { ParseWorkerResult } from './parse-worker.js';
|
|
2
|
+
/**
|
|
3
|
+
* Deliver the accumulated result to the pool, surviving a non-cloneable value
|
|
4
|
+
* (#2112). Fast path: post as-is — on a healthy result this is the only thing
|
|
5
|
+
* that runs, so clone-safety adds zero overhead to normal runs. If structured
|
|
6
|
+
* clone rejects the payload (a function/symbol leaked into an extraction
|
|
7
|
+
* record — the reporter's case was a node `properties` value pointing at a
|
|
8
|
+
* native `toString`), rewrite the boundary-crossing arrays so the result is
|
|
9
|
+
* cloneable, record the affected paths on `result.skippedPaths`, warn the
|
|
10
|
+
* operator naming the offending field + file (so the still-unpinned leak is
|
|
11
|
+
* diagnosable from logs and fixable at source), and re-post.
|
|
12
|
+
*
|
|
13
|
+
* Recovery is attempted for ANY first-post failure, not only a `DataCloneError`.
|
|
14
|
+
* structuredClone invokes getters, and a getter that THROWS surfaces its own
|
|
15
|
+
* error (a `RangeError`, etc.) — NOT a `DataCloneError` (confirmed against a
|
|
16
|
+
* real MessageChannel). Gating recovery on `DataCloneError` let such a throw
|
|
17
|
+
* re-throw past the sanitizer and re-arm, under `POOL_SIZE=1`, the worker-death
|
|
18
|
+
* cascade this net prevents. The recovery path is wrapped in its own try/catch
|
|
19
|
+
* so a still-uncloneable re-post fails closed to a primitive-only
|
|
20
|
+
* `{type:'error'}` DELIBERATELY rather than escaping the worker.
|
|
21
|
+
*/
|
|
22
|
+
export declare function postResultCloneSafe(result: ParseWorkerResult): void;
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker → main result delivery with clone-safety (#2112).
|
|
3
|
+
*
|
|
4
|
+
* Extracted from `parse-worker.ts` into its own side-effect-free module so it
|
|
5
|
+
* can be imported and exercised directly (the parse worker is an entry module:
|
|
6
|
+
* importing it would construct the parser, post `ready`, and attach the real
|
|
7
|
+
* message handler). The integration test imports `postResultCloneSafe` from
|
|
8
|
+
* here to cover the production wiring end to end rather than re-implementing it.
|
|
9
|
+
*/
|
|
10
|
+
import { parentPort } from 'node:worker_threads';
|
|
11
|
+
import { makeWorkerResultCloneSafe } from './clone-safety.js';
|
|
12
|
+
/**
|
|
13
|
+
* Strict mode (opt-in via `GITNEXUS_STRICT_CLONE=1`, inherited by workers). When
|
|
14
|
+
* on, a clone failure THROWS with the offending key path instead of silently
|
|
15
|
+
* sanitizing + delivering — so a leak introduced by a future provider/extractor
|
|
16
|
+
* change fails LOUDLY (in CI / dev) at its origin rather than being quietly
|
|
17
|
+
* stripped in production. The silent-recovery behavior is exactly what hid the
|
|
18
|
+
* original #2112 leak; strict mode removes the silence where we want loudness.
|
|
19
|
+
* Off in production, where the net's job is to keep the run alive.
|
|
20
|
+
*/
|
|
21
|
+
const STRICT_CLONE = process.env.GITNEXUS_STRICT_CLONE === '1';
|
|
22
|
+
/**
|
|
23
|
+
* Deliver the accumulated result to the pool, surviving a non-cloneable value
|
|
24
|
+
* (#2112). Fast path: post as-is — on a healthy result this is the only thing
|
|
25
|
+
* that runs, so clone-safety adds zero overhead to normal runs. If structured
|
|
26
|
+
* clone rejects the payload (a function/symbol leaked into an extraction
|
|
27
|
+
* record — the reporter's case was a node `properties` value pointing at a
|
|
28
|
+
* native `toString`), rewrite the boundary-crossing arrays so the result is
|
|
29
|
+
* cloneable, record the affected paths on `result.skippedPaths`, warn the
|
|
30
|
+
* operator naming the offending field + file (so the still-unpinned leak is
|
|
31
|
+
* diagnosable from logs and fixable at source), and re-post.
|
|
32
|
+
*
|
|
33
|
+
* Recovery is attempted for ANY first-post failure, not only a `DataCloneError`.
|
|
34
|
+
* structuredClone invokes getters, and a getter that THROWS surfaces its own
|
|
35
|
+
* error (a `RangeError`, etc.) — NOT a `DataCloneError` (confirmed against a
|
|
36
|
+
* real MessageChannel). Gating recovery on `DataCloneError` let such a throw
|
|
37
|
+
* re-throw past the sanitizer and re-arm, under `POOL_SIZE=1`, the worker-death
|
|
38
|
+
* cascade this net prevents. The recovery path is wrapped in its own try/catch
|
|
39
|
+
* so a still-uncloneable re-post fails closed to a primitive-only
|
|
40
|
+
* `{type:'error'}` DELIBERATELY rather than escaping the worker.
|
|
41
|
+
*/
|
|
42
|
+
export function postResultCloneSafe(result) {
|
|
43
|
+
try {
|
|
44
|
+
parentPort.postMessage({ type: 'result', data: result });
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
// Fall through to recovery on ANY failure (DataCloneError OR a throwing
|
|
49
|
+
// getter's own error). A healthy post returned above and never reaches here.
|
|
50
|
+
}
|
|
51
|
+
try {
|
|
52
|
+
// `as unknown as Record<string, unknown>` is the standard widening for a
|
|
53
|
+
// no-index-signature interface (TS rejects a single-step `as`). The field
|
|
54
|
+
// sets are typed to `keyof ParseWorkerResult` so renaming a field is a
|
|
55
|
+
// compile error here, not a silent loss of the drop-whole / skip protection.
|
|
56
|
+
const { skipped } = makeWorkerResultCloneSafe(result, {
|
|
57
|
+
dropWholeElement: new Set(['parsedFiles']),
|
|
58
|
+
skipFields: new Set(['skippedPaths']),
|
|
59
|
+
});
|
|
60
|
+
if (skipped.length > 0) {
|
|
61
|
+
if (STRICT_CLONE) {
|
|
62
|
+
// Surface the leak loudly with its exact key path(s) instead of
|
|
63
|
+
// delivering a sanitized result. Routes to the catch below → a
|
|
64
|
+
// primitive-only {type:'error'} the pool reports, failing CI.
|
|
65
|
+
const detail = skipped.map((s) => `${s.path}: ${s.reason}`).join('; ');
|
|
66
|
+
throw new Error(`GITNEXUS_STRICT_CLONE: worker result was not structured-cloneable — ${detail}`);
|
|
67
|
+
}
|
|
68
|
+
result.skippedPaths = [...(result.skippedPaths ?? []), ...skipped];
|
|
69
|
+
const sample = skipped
|
|
70
|
+
.slice(0, 5)
|
|
71
|
+
.map((s) => `${s.path} (${s.reason})`)
|
|
72
|
+
.join('; ');
|
|
73
|
+
const more = skipped.length > 5 ? ` …and ${skipped.length - 5} more` : '';
|
|
74
|
+
if (parentPort) {
|
|
75
|
+
parentPort.postMessage({
|
|
76
|
+
type: 'warning',
|
|
77
|
+
message: `Sanitized ${skipped.length} file(s) with non-serializable parse output before delivery: ${sample}${more}`,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
parentPort.postMessage({ type: 'result', data: result });
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
85
|
+
parentPort.postMessage({ type: 'error', error: e.message, errorStack: e.stack });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Merge of accumulated parse-worker results (sub-batch result → the conceptual
|
|
3
|
+
* job's running accumulator).
|
|
4
|
+
*
|
|
5
|
+
* Extracted from `parse-worker.ts` into this side-effect-free module so the
|
|
6
|
+
* merge can be imported and unit-tested directly — the parse worker is an entry
|
|
7
|
+
* module (importing it constructs the parser, posts `ready`, and attaches the
|
|
8
|
+
* real MessagePort handler), so a main-thread test cannot import a helper out of
|
|
9
|
+
* it. Mirrors the `post-result.ts` extraction.
|
|
10
|
+
*
|
|
11
|
+
* `import type` of `ParseWorkerResult` is erased at runtime, so there is no
|
|
12
|
+
* import cycle with `parse-worker.ts` (which imports this module's runtime).
|
|
13
|
+
*/
|
|
14
|
+
import type { ParseWorkerResult } from './parse-worker.js';
|
|
15
|
+
/**
|
|
16
|
+
* Merge `src` into `target` in place: append every boundary-crossing array,
|
|
17
|
+
* sum the per-language skip counts, union the clone-safety `skippedPaths`, and
|
|
18
|
+
* add the file count.
|
|
19
|
+
*/
|
|
20
|
+
export declare const mergeResult: (target: ParseWorkerResult, src: ParseWorkerResult) => void;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// Use a loop instead of push(...spread) to avoid hitting V8's argument limit
|
|
2
|
+
// when merging large result sets (push(...arr) calls apply() under the hood
|
|
3
|
+
// and blows the stack when arr has >~65k elements).
|
|
4
|
+
const appendAll = (target, src) => {
|
|
5
|
+
for (let i = 0; i < src.length; i++)
|
|
6
|
+
target.push(src[i]);
|
|
7
|
+
};
|
|
8
|
+
/**
|
|
9
|
+
* Merge `src` into `target` in place: append every boundary-crossing array,
|
|
10
|
+
* sum the per-language skip counts, union the clone-safety `skippedPaths`, and
|
|
11
|
+
* add the file count.
|
|
12
|
+
*/
|
|
13
|
+
export const mergeResult = (target, src) => {
|
|
14
|
+
appendAll(target.nodes, src.nodes);
|
|
15
|
+
appendAll(target.relationships, src.relationships);
|
|
16
|
+
appendAll(target.symbols, src.symbols);
|
|
17
|
+
appendAll(target.calls, src.calls);
|
|
18
|
+
appendAll(target.assignments, src.assignments);
|
|
19
|
+
appendAll(target.routes, src.routes);
|
|
20
|
+
appendAll(target.fetchCalls, src.fetchCalls);
|
|
21
|
+
appendAll(target.fetchWrapperDefs, src.fetchWrapperDefs);
|
|
22
|
+
appendAll(target.decoratorRoutes, src.decoratorRoutes);
|
|
23
|
+
if (src.routerIncludes)
|
|
24
|
+
appendAll(target.routerIncludes, src.routerIncludes);
|
|
25
|
+
if (src.routerImports)
|
|
26
|
+
appendAll(target.routerImports, src.routerImports);
|
|
27
|
+
if (src.routerModuleAliases) {
|
|
28
|
+
target.routerModuleAliases ??= [];
|
|
29
|
+
appendAll(target.routerModuleAliases, src.routerModuleAliases);
|
|
30
|
+
}
|
|
31
|
+
appendAll(target.toolDefs, src.toolDefs);
|
|
32
|
+
appendAll(target.ormQueries, src.ormQueries);
|
|
33
|
+
appendAll(target.constructorBindings, src.constructorBindings);
|
|
34
|
+
appendAll(target.fileScopeBindings, src.fileScopeBindings);
|
|
35
|
+
appendAll(target.parsedFiles, src.parsedFiles);
|
|
36
|
+
for (const [lang, count] of Object.entries(src.skippedLanguages)) {
|
|
37
|
+
target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count;
|
|
38
|
+
}
|
|
39
|
+
if (src.skippedPaths && src.skippedPaths.length > 0) {
|
|
40
|
+
(target.skippedPaths ??= []).push(...src.skippedPaths);
|
|
41
|
+
}
|
|
42
|
+
target.fileCount += src.fileCount;
|
|
43
|
+
};
|
|
@@ -273,6 +273,12 @@ export declare function resolveAutoPoolSize(): number;
|
|
|
273
273
|
* time spent across all attempts/splits/retries. When the budget is
|
|
274
274
|
* exhausted, the pool surfaces the in-flight path via `WorkerPoolDispatchError`
|
|
275
275
|
* instead of letting timeouts compound indefinitely.
|
|
276
|
+
*
|
|
277
|
+
* Upstream of these layers, the parse worker self-sanitizes a result that the
|
|
278
|
+
* structured-clone algorithm can't serialize (#2112) — stripping or dropping
|
|
279
|
+
* the offending value and reporting the affected paths on the result — so a
|
|
280
|
+
* single non-cloneable value can't masquerade as a worker death and exhaust a
|
|
281
|
+
* slot's respawn budget here.
|
|
276
282
|
*/
|
|
277
283
|
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number, options?: WorkerPoolOptions) => WorkerPool;
|
|
278
284
|
export {};
|
|
@@ -523,6 +523,12 @@ function createJobs(items, maxItems, maxBytes, timeoutMs, chunkHash) {
|
|
|
523
523
|
* time spent across all attempts/splits/retries. When the budget is
|
|
524
524
|
* exhausted, the pool surfaces the in-flight path via `WorkerPoolDispatchError`
|
|
525
525
|
* instead of letting timeouts compound indefinitely.
|
|
526
|
+
*
|
|
527
|
+
* Upstream of these layers, the parse worker self-sanitizes a result that the
|
|
528
|
+
* structured-clone algorithm can't serialize (#2112) — stripping or dropping
|
|
529
|
+
* the offending value and reporting the affected paths on the result — so a
|
|
530
|
+
* single non-cloneable value can't masquerade as a worker death and exhaust a
|
|
531
|
+
* slot's respawn budget here.
|
|
526
532
|
*/
|
|
527
533
|
export const createWorkerPool = (workerUrl, poolSize, options) => {
|
|
528
534
|
// Validate worker script exists before spawning to prevent uncaught
|
|
@@ -1313,14 +1319,19 @@ export const createWorkerPool = (workerUrl, poolSize, options) => {
|
|
|
1313
1319
|
if (settled || stopped)
|
|
1314
1320
|
return;
|
|
1315
1321
|
// Native postMessage delivers POJO directly via Node's
|
|
1316
|
-
// structured clone.
|
|
1317
|
-
//
|
|
1318
|
-
//
|
|
1319
|
-
//
|
|
1320
|
-
//
|
|
1321
|
-
//
|
|
1322
|
-
// `
|
|
1323
|
-
//
|
|
1322
|
+
// structured clone. Two distinct clone failure modes exist,
|
|
1323
|
+
// and NEITHER reaches this handler: (1) a SENDER-side
|
|
1324
|
+
// non-cloneable value (a function/symbol that leaked into the
|
|
1325
|
+
// result) throws a synchronous `DataCloneError` on the
|
|
1326
|
+
// worker's own postMessage — the parse worker self-sanitizes
|
|
1327
|
+
// such results before delivery (#2112) and falls back to a
|
|
1328
|
+
// primitive-only `{type:'error'}` if it still can't serialize;
|
|
1329
|
+
// (2) a RECEIVER-side deserialization failure surfaces as a
|
|
1330
|
+
// `messageerror` event handled below. The only thing THIS
|
|
1331
|
+
// handler guards is a worker that sends a message without a
|
|
1332
|
+
// `type` discriminant (a worker bug, not a wire-format issue):
|
|
1333
|
+
// without the guard `null.type` would throw a TypeError out of
|
|
1334
|
+
// the EventEmitter listener → uncaughtException on the main
|
|
1324
1335
|
// thread.
|
|
1325
1336
|
const msg = raw;
|
|
1326
1337
|
if (msg === null || typeof msg !== 'object' || typeof msg.type !== 'string') {
|
|
@@ -1417,12 +1428,15 @@ export const createWorkerPool = (workerUrl, poolSize, options) => {
|
|
|
1417
1428
|
`Likely OOM or native addon failure${inFlightSuffix}.`, excludes);
|
|
1418
1429
|
}
|
|
1419
1430
|
};
|
|
1420
|
-
// `messageerror` fires when V8 fails to
|
|
1421
|
-
// payload (
|
|
1422
|
-
//
|
|
1423
|
-
//
|
|
1424
|
-
//
|
|
1425
|
-
//
|
|
1431
|
+
// `messageerror` fires when V8 fails to DESERIALIZE a postMessage
|
|
1432
|
+
// payload on THIS (receiver) side — a value that serialized on the
|
|
1433
|
+
// worker but can't be reconstructed here. (A non-cloneable value on
|
|
1434
|
+
// the SENDER side instead throws a synchronous DataCloneError on the
|
|
1435
|
+
// worker's own postMessage; that path is caught and sanitized
|
|
1436
|
+
// worker-side (#2112) and never arrives here.) The worker stays ALIVE
|
|
1437
|
+
// but the message is lost — without this handler the pool would sit on
|
|
1438
|
+
// the dropped message until the idle timeout expires. Treat it as
|
|
1439
|
+
// worker death so the resilience layers fire:
|
|
1426
1440
|
// requeue the remainder via `recoverAndResume`, attribute the
|
|
1427
1441
|
// in-flight file from the `starting-file` signal (if observed),
|
|
1428
1442
|
// and let the per-slot respawn budget and circuit breaker decide
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON-safe projection of `AnalyzeResult` for the analyze-worker → parent IPC
|
|
3
|
+
* boundary (#2112 boundary audit; #2135).
|
|
4
|
+
*
|
|
5
|
+
* The forked analyze worker (`analyze-worker.ts`) reports completion to the
|
|
6
|
+
* parent over `child_process` IPC, which uses Node's DEFAULT `'json'`
|
|
7
|
+
* serialization — `api.ts` forks the worker with no `serialization:` option, so
|
|
8
|
+
* the channel runs `JSON.stringify`/`JSON.parse`, NOT V8 structured clone.
|
|
9
|
+
*
|
|
10
|
+
* `AnalyzeResult.pipelineResult` is populated on every successful analysis
|
|
11
|
+
* (`run-analyze.ts`) and carries `pipelineResult.graph` — the live
|
|
12
|
+
* `KnowledgeGraph` closure object. Sending the raw result across this channel is
|
|
13
|
+
* wrong three ways:
|
|
14
|
+
* 1. Waste — the graph's `nodes`/`relationships` getters force-materialize the
|
|
15
|
+
* ENTIRE graph into two arrays, then JSON-stringify them, on every analyze.
|
|
16
|
+
* On a large repo (the #2112 scenario) that is a multi-hundred-MB
|
|
17
|
+
* stringify+parse whose result is immediately discarded.
|
|
18
|
+
* 2. Silent corruption — the graph's methods are own function properties;
|
|
19
|
+
* `JSON.stringify` drops them with no error, so a `pipelineResult.graph`
|
|
20
|
+
* that survived the wire is a data-only husk whose `forEachNode(...)` throws
|
|
21
|
+
* "is not a function" far from the cause.
|
|
22
|
+
* 3. Conditional crash — a BigInt or circular reference anywhere in the
|
|
23
|
+
* payload makes `process.send` throw `TypeError` synchronously; the throw is
|
|
24
|
+
* caught in the worker and re-sent as `{type:'error'}`, turning a
|
|
25
|
+
* SUCCESSFUL analysis (DB already written) into a reported FAILURE. This is
|
|
26
|
+
* the #2112 failure family on the server path, and — unlike the parse-worker
|
|
27
|
+
* result boundary — it has no clone-safety net.
|
|
28
|
+
*
|
|
29
|
+
* The parent (`api.ts`) reads only `result.repoName`; `pipelineResult`'s real
|
|
30
|
+
* consumers (CLI skill generation) call `runFullAnalysis` in-process and never
|
|
31
|
+
* cross this fork. So the worker sends an explicit allowlist of the scalar
|
|
32
|
+
* fields, JSON-safe by construction.
|
|
33
|
+
*/
|
|
34
|
+
import type { AnalyzeResult } from '../core/run-analyze.js';
|
|
35
|
+
/**
|
|
36
|
+
* The JSON-safe subset of `AnalyzeResult` that crosses the analyze-worker IPC
|
|
37
|
+
* boundary. A `Pick` allowlist — NOT `Omit<…, 'pipelineResult'>`. With `Pick`
|
|
38
|
+
* the allowlist IS the type, so the projection is exhaustive by construction:
|
|
39
|
+
* `projectAnalyzeResultForIpc`'s return literal must name exactly these keys
|
|
40
|
+
* (omitting one is a compile error), and a new field added to `AnalyzeResult`
|
|
41
|
+
* is simply absent from the wire until it is *deliberately* added here. `Omit`
|
|
42
|
+
* couldn't give that guarantee — it kept every other field, including OPTIONAL
|
|
43
|
+
* ones (e.g. `isPrimaryBranch?`), so an optional non-serializable field could be
|
|
44
|
+
* advertised by the type yet silently dropped by the runtime allowlist.
|
|
45
|
+
*
|
|
46
|
+
* `isPrimaryBranch` is intentionally excluded: the parent (`api.ts`) reads only
|
|
47
|
+
* `repoName`, and nothing consumes `isPrimaryBranch` across this fork (its CLI
|
|
48
|
+
* consumer calls `runFullAnalysis` in-process). Add a field here only when a
|
|
49
|
+
* server-side IPC consumer actually needs it — and only if it is JSON-safe.
|
|
50
|
+
*/
|
|
51
|
+
export type AnalyzeResultIpc = Pick<AnalyzeResult, 'repoName' | 'repoPath' | 'stats' | 'alreadyUpToDate' | 'ftsRepairedOnly' | 'ftsSkipped'>;
|
|
52
|
+
/**
|
|
53
|
+
* Project an `AnalyzeResult` down to the JSON-safe fields the parent consumes,
|
|
54
|
+
* dropping `pipelineResult` (the live `KnowledgeGraph`) and any other field not
|
|
55
|
+
* in the `AnalyzeResultIpc` allowlist. The return literal is exhaustive over
|
|
56
|
+
* `AnalyzeResultIpc` (a missing key is a compile error).
|
|
57
|
+
*/
|
|
58
|
+
export declare function projectAnalyzeResultForIpc(result: AnalyzeResult): AnalyzeResultIpc;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Project an `AnalyzeResult` down to the JSON-safe fields the parent consumes,
|
|
3
|
+
* dropping `pipelineResult` (the live `KnowledgeGraph`) and any other field not
|
|
4
|
+
* in the `AnalyzeResultIpc` allowlist. The return literal is exhaustive over
|
|
5
|
+
* `AnalyzeResultIpc` (a missing key is a compile error).
|
|
6
|
+
*/
|
|
7
|
+
export function projectAnalyzeResultForIpc(result) {
|
|
8
|
+
return {
|
|
9
|
+
repoName: result.repoName,
|
|
10
|
+
repoPath: result.repoPath,
|
|
11
|
+
stats: result.stats,
|
|
12
|
+
alreadyUpToDate: result.alreadyUpToDate,
|
|
13
|
+
ftsRepairedOnly: result.ftsRepairedOnly,
|
|
14
|
+
ftsSkipped: result.ftsSkipped,
|
|
15
|
+
};
|
|
16
|
+
}
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* Child -> Parent: { type: 'error', message: string }
|
|
12
12
|
*/
|
|
13
13
|
import { runFullAnalysis } from '../core/run-analyze.js';
|
|
14
|
+
import { projectAnalyzeResultForIpc } from './analyze-worker-ipc.js';
|
|
14
15
|
import { closeLbug } from '../core/lbug/lbug-adapter.js';
|
|
15
16
|
function send(msg) {
|
|
16
17
|
process.send?.(msg);
|
|
@@ -48,7 +49,12 @@ process.on('message', async (msg) => {
|
|
|
48
49
|
send({ type: 'progress', phase: 'log', percent: -1, message });
|
|
49
50
|
},
|
|
50
51
|
});
|
|
51
|
-
|
|
52
|
+
// Send a JSON-safe projection, NOT the raw result: the IPC channel is
|
|
53
|
+
// default-JSON serialization and `result.pipelineResult` carries the live
|
|
54
|
+
// KnowledgeGraph (wasteful to materialize, silently corrupted by JSON, and
|
|
55
|
+
// a BigInt/circular value would throw and mis-report this success as a
|
|
56
|
+
// failure). See analyze-worker-ipc.ts.
|
|
57
|
+
send({ type: 'complete', result: projectAnalyzeResultForIpc(result) });
|
|
52
58
|
}
|
|
53
59
|
catch (err) {
|
|
54
60
|
send({ type: 'error', message: err?.message || 'Analysis failed' });
|
|
@@ -149,6 +149,9 @@ export const slimParseWorkerResultsForCache = (chunkResults) => {
|
|
|
149
149
|
assignments: [],
|
|
150
150
|
constructorBindings: [],
|
|
151
151
|
parsedFiles: [],
|
|
152
|
+
// #2112: a clone-safety skip list is per-run telemetry, not graph data —
|
|
153
|
+
// replay ignores it. Drop it so it doesn't bloat the cached shard.
|
|
154
|
+
skippedPaths: [],
|
|
152
155
|
});
|
|
153
156
|
}
|
|
154
157
|
return slim;
|
package/package.json
CHANGED