haechi 0.9.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +19 -12
- package/README.md +19 -12
- package/SECURITY.md +1 -1
- package/docs/README.md +1 -1
- package/docs/current/api-stability.ko.md +87 -41
- package/docs/current/api-stability.md +87 -41
- package/docs/current/configuration.ko.md +20 -1
- package/docs/current/configuration.md +20 -1
- package/docs/current/release-1.0-implementation-scope.ko.md +170 -0
- package/docs/current/release-1.0-implementation-scope.md +164 -0
- package/docs/current/release-1.1-implementation-scope.ko.md +128 -0
- package/docs/current/release-1.1-implementation-scope.md +128 -0
- package/docs/current/risk-register-release-gate.ko.md +26 -6
- package/docs/current/risk-register-release-gate.md +26 -6
- package/docs/current/threat-model.ko.md +22 -3
- package/docs/current/threat-model.md +22 -3
- package/package.json +7 -5
- package/packages/audit/index.mjs +13 -1
- package/packages/auth/index.mjs +173 -0
- package/packages/cli/bin/haechi.mjs +1 -1
- package/packages/cli/runtime.mjs +230 -5
- package/packages/core/index.mjs +19 -4
- package/packages/plugin/index.mjs +93 -17
- package/packages/plugin/process-sandbox.mjs +629 -0
- package/packages/plugin/sandbox-common.mjs +243 -0
- package/packages/plugin/sandbox.mjs +415 -0
- package/packages/plugin/signing.mjs +393 -0
- package/packages/ssrf/index.mjs +189 -0
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
// The process-isolated authProvider sandbox (Haechi 1.1 §2.1/§2.2/§2.6).
|
|
2
|
+
//
|
|
3
|
+
// CAPABILITY ENFORCEMENT (what this adds over the 1.0 worker, read
|
|
4
|
+
// docs/current/release-1.1-implementation-scope.md §1): the signed plugin runs in
|
|
5
|
+
// a CHILD node process under the Node permission model (`--permission`) with
|
|
6
|
+
// ZERO grants — no fs, no child-process, no worker, no addons, no wasi, and (since
|
|
7
|
+
// no `--allow-net` is passed) no network. On a Node that enforces `--allow-net`,
|
|
8
|
+
// the kernel denies `net`/`fetch`/`dns` AND the `process.binding('tcp_wrap')`
|
|
9
|
+
// bypass, so a malicious *signed* plugin CANNOT exfiltrate the credential it
|
|
10
|
+
// receives. This is real capability enforcement, not the worker's trust-only model.
|
|
11
|
+
//
|
|
12
|
+
// Three load-bearing controls the empirical (Node-26) review made mandatory:
|
|
13
|
+
// 1. NETWORK = the kernel `--allow-net` denial, never a "delete node:net"
|
|
14
|
+
// harness — a harness is trivially bypassed (tcp_wrap / a fresh import).
|
|
15
|
+
// 1.1 PR1 always spawns WITHOUT --allow-net (zero grants); the fail-closed
|
|
16
|
+
// `--allow-net` feature detection + `netEnforcement` config arrives in PR3.
|
|
17
|
+
// 2. STDIO fully closed — `stdio:['ignore','ignore','ignore','ipc']`: no stdout,
|
|
18
|
+
// no stderr, no inheritable fd. A plugin writing the credential to stderr
|
|
19
|
+
// reaches NO host-visible sink. The only channel is the dedicated IPC.
|
|
20
|
+
// 3. NO fs grant at all — the plugin is loaded from a `data:` URL the host hands
|
|
21
|
+
// to the child over IPC, so there is no temp-dir / realpath / symlink / TOCTOU
|
|
22
|
+
// surface and no `--allow-fs-read`. A runtime import of a host file fails
|
|
23
|
+
// closed (the permission model denies fs).
|
|
24
|
+
//
|
|
25
|
+
// The trust boundary (load gate, claims sanitizer, bearer extraction, host
|
|
26
|
+
// keyed-HMAC identity) is SHARED with the worker via ./sandbox-common.mjs so the
|
|
27
|
+
// two runtimes cannot diverge. Only the transport (child_process spawn + IPC) and
|
|
28
|
+
// its async spawn/load handshake live here.
|
|
29
|
+
//
|
|
30
|
+
// Zero runtime dependency: node:child_process + node:crypto + the in-repo
|
|
31
|
+
// haechi/plugin (load gate) and haechi/auth (identity + conformance).
|
|
32
|
+
|
|
33
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
34
|
+
import { randomUUID } from "node:crypto";
|
|
35
|
+
import { assertAuthProviderConformance, buildExternalIdentity } from "../auth/index.mjs";
|
|
36
|
+
import { createGuardedKeyFetcher } from "../ssrf/index.mjs";
|
|
37
|
+
import {
|
|
38
|
+
bearerCredentialFromRequest,
|
|
39
|
+
loadAndVerifyPlugin,
|
|
40
|
+
makeFireAndForgetAudit,
|
|
41
|
+
sanitizeClaims
|
|
42
|
+
} from "./sandbox-common.mjs";
|
|
43
|
+
|
|
44
|
+
// The child flags. `--permission` enables the deny-by-default Node permission
|
|
45
|
+
// model; we pass NO --allow-* grant, so fs/child-process/worker/addons/wasi/net
|
|
46
|
+
// are all kernel-denied. `--disable-proto=delete` removes Object.prototype.__proto__.
|
|
47
|
+
const CHILD_FLAGS = Object.freeze(["--permission", "--disable-proto=delete"]);
|
|
48
|
+
|
|
49
|
+
// A CONSTANT bootstrap harness, passed via `node -e`. It is identical for every
|
|
50
|
+
// plugin (the plugin bytes arrive over IPC, NOT on the command line — so there is
|
|
51
|
+
// no ARG_MAX limit and the harness never varies). It runs as CommonJS under -e and
|
|
52
|
+
// uses a dynamic import() of a data: URL to load the verified plugin source.
|
|
53
|
+
//
|
|
54
|
+
// Wire (JSON strings both directions over the IPC, serialization:'json'):
|
|
55
|
+
// host → child: {t:'load', source:<base64>} | {t:'auth', cid, credential}
|
|
56
|
+
// child → host: {t:'ready'} | {t:'loaded'} | {t:'load-error'} | {t:'auth', cid, claims|deny}
|
|
57
|
+
const PROCESS_HARNESS = [
|
|
58
|
+
"'use strict';",
|
|
59
|
+
"let __plugin = null;",
|
|
60
|
+
"function __pick(mod){",
|
|
61
|
+
" return (typeof mod.default === 'function') ? mod.default",
|
|
62
|
+
" : (typeof mod.authenticate === 'function') ? mod.authenticate",
|
|
63
|
+
" : (mod.default && typeof mod.default.authenticate === 'function') ? mod.default.authenticate",
|
|
64
|
+
" : null;",
|
|
65
|
+
"}",
|
|
66
|
+
"process.on('message', async (raw) => {",
|
|
67
|
+
" let msg;",
|
|
68
|
+
" try { msg = JSON.parse(typeof raw === 'string' ? raw : String(raw)); } catch { return; }",
|
|
69
|
+
" if (!msg || typeof msg !== 'object') return;",
|
|
70
|
+
" if (msg.t === 'load') {",
|
|
71
|
+
" try {",
|
|
72
|
+
" const mod = await import('data:text/javascript;base64,' + msg.source);",
|
|
73
|
+
" const fn = __pick(mod);",
|
|
74
|
+
" if (typeof fn !== 'function') throw new Error('plugin entry must export an authenticate function');",
|
|
75
|
+
" __plugin = fn;",
|
|
76
|
+
" process.send(JSON.stringify({ t: 'loaded' }));",
|
|
77
|
+
" } catch (err) {",
|
|
78
|
+
" process.send(JSON.stringify({ t: 'load-error' }));",
|
|
79
|
+
" }",
|
|
80
|
+
" return;",
|
|
81
|
+
" }",
|
|
82
|
+
" if (msg.t === 'auth') {",
|
|
83
|
+
" const cid = msg.cid;",
|
|
84
|
+
" try {",
|
|
85
|
+
" if (typeof __plugin !== 'function') { process.send(JSON.stringify({ t: 'auth', cid, deny: true })); return; }",
|
|
86
|
+
// The host injects operator-declared key material (the plugin NEVER names a URL;
|
|
87
|
+
// net is denied in the child, so it cannot fetch keys itself). Plugins that do
|
|
88
|
+
// not need it simply ignore the second argument.
|
|
89
|
+
" const out = await __plugin(msg.credential, { keyMaterial: (msg.keyMaterial !== undefined ? msg.keyMaterial : null) });",
|
|
90
|
+
" if (!out || out.deny === true || typeof out !== 'object') { process.send(JSON.stringify({ t: 'auth', cid, deny: true })); return; }",
|
|
91
|
+
" process.send(JSON.stringify({ t: 'auth', cid, claims: out }));",
|
|
92
|
+
" } catch (err) {",
|
|
93
|
+
// A plugin throw NEVER propagates: it surfaces to the host as a deny.
|
|
94
|
+
" process.send(JSON.stringify({ t: 'auth', cid, deny: true }));",
|
|
95
|
+
" }",
|
|
96
|
+
" return;",
|
|
97
|
+
" }",
|
|
98
|
+
"});",
|
|
99
|
+
"process.send(JSON.stringify({ t: 'ready' }));"
|
|
100
|
+
].join("\n");
|
|
101
|
+
|
|
102
|
+
// Detect whether THIS Node enforces network containment under --permission. The
|
|
103
|
+
// permission model only gates net if the `--allow-net` flag exists (Node >= 24 /
|
|
104
|
+
// experimental in some 22.x lines do not have it). Without it, --permission denies
|
|
105
|
+
// fs/exec/worker but NOT net — so a malicious plugin could still exfiltrate the
|
|
106
|
+
// credential over the network. We therefore fail closed (refuse to construct) on
|
|
107
|
+
// a Node that cannot enforce it, rather than pretend to contain.
|
|
108
|
+
//
|
|
109
|
+
// Detection (memoized; NO version parsing — we probe BEHAVIOR):
|
|
110
|
+
// 1. Fast path: if --allow-net isn't even a recognized flag, net is not gated.
|
|
111
|
+
// 2. Authoritative: spawn a `--permission` child with NO --allow-net and confirm
|
|
112
|
+
// net.connect is actually DENIED (ERR_ACCESS_DENIED). This is immune to a Node
|
|
113
|
+
// that lists the flag but does not enforce it — we verify the denial, not the
|
|
114
|
+
// flag. Exit 0 = net is enforced/denied (supported); anything else = not.
|
|
115
|
+
let _netSupportMemo;
|
|
116
|
+
export function netEnforcementSupported() {
|
|
117
|
+
if (_netSupportMemo !== undefined) {
|
|
118
|
+
return _netSupportMemo;
|
|
119
|
+
}
|
|
120
|
+
try {
|
|
121
|
+
if (!process.allowedNodeEnvironmentFlags?.has?.("--allow-net")) {
|
|
122
|
+
_netSupportMemo = false;
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
const probeCode =
|
|
126
|
+
"const n=require('net');const s=n.connect({host:'127.0.0.1',port:1});"
|
|
127
|
+
+ "s.on('error',e=>process.exit(e&&e.code==='ERR_ACCESS_DENIED'?0:3));"
|
|
128
|
+
+ "s.on('connect',()=>{try{s.destroy();}catch{}process.exit(3);});"
|
|
129
|
+
+ "setTimeout(()=>process.exit(3),500);";
|
|
130
|
+
const probe = spawnSync(process.execPath, ["--permission", "-e", probeCode], { stdio: "ignore" });
|
|
131
|
+
_netSupportMemo = probe.status === 0;
|
|
132
|
+
} catch {
|
|
133
|
+
_netSupportMemo = false;
|
|
134
|
+
}
|
|
135
|
+
return _netSupportMemo;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Env scrubbing: the permission model does NOT protect inherited env, so a child
|
|
139
|
+
// that could be made to read process.env would see host secrets. We pass a fresh,
|
|
140
|
+
// EMPTY env — no inherited vars, and critically no NODE_OPTIONS (which could inject
|
|
141
|
+
// flags). node --permission -e boots fine with an empty env (verified).
|
|
142
|
+
function scrubbedEnv() {
|
|
143
|
+
return {};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function createProcessIsolatedAuthProviderHandle({
|
|
147
|
+
manifestPath,
|
|
148
|
+
trustAnchors,
|
|
149
|
+
allowCapabilities = [],
|
|
150
|
+
pin = null,
|
|
151
|
+
revoked = {},
|
|
152
|
+
versionFloor = {},
|
|
153
|
+
cryptoProvider,
|
|
154
|
+
auditSink,
|
|
155
|
+
timeoutMs,
|
|
156
|
+
maxPendingCalls = 8,
|
|
157
|
+
maxMessageBytes = 16384,
|
|
158
|
+
coreVersion = null,
|
|
159
|
+
now = Date.now,
|
|
160
|
+
allowedLabelKeys,
|
|
161
|
+
execPath = process.execPath,
|
|
162
|
+
// Network containment policy. "require-permission" (the only PR1 mode, and the
|
|
163
|
+
// default) means: this Node MUST enforce --allow-net, else construction throws
|
|
164
|
+
// — the credential-containment guarantee is not honest without it. The
|
|
165
|
+
// best-effort "allow-harness" fallback is deferred to a later minor (see the
|
|
166
|
+
// 1.1 scope doc §2.2). `detectNetSupport` is an injectable seam for tests.
|
|
167
|
+
netEnforcement = "require-permission",
|
|
168
|
+
detectNetSupport = netEnforcementSupported,
|
|
169
|
+
// Optional host-mediated key material (1.1 §2.3): for a CUSTOM-credential plugin
|
|
170
|
+
// that needs a key document (e.g. a JWKS-like doc) to validate its credential.
|
|
171
|
+
// The HOST fetches it from this OPERATOR-declared URL through the SSRF-hardened
|
|
172
|
+
// core guarded fetch and injects it over the IPC — the plugin never names a URL
|
|
173
|
+
// (no plugin-driven SSRF), and net is denied in the child so it cannot fetch
|
|
174
|
+
// keys itself. The fetch is TTL-cached + cooldown-bounded (no outbound pump).
|
|
175
|
+
// Shape: { url, ttlMs?, cooldownMs?, timeoutMs?, maxBytes?, fetchImpl?, lookupImpl? }.
|
|
176
|
+
keyMaterial = null,
|
|
177
|
+
// Spawn-storm circuit breaker (anti-DoS): if the child is killed (timeout/crash)
|
|
178
|
+
// respawnMaxKills times within respawnWindowMs, trip to a PERMANENT fail-closed
|
|
179
|
+
// deny (operator reset = recreate the provider). respawnBackoffMs is the base for
|
|
180
|
+
// an exponential backoff between respawns so a flapping plugin cannot become a
|
|
181
|
+
// spawn storm.
|
|
182
|
+
respawnMaxKills = 5,
|
|
183
|
+
respawnWindowMs = 10_000,
|
|
184
|
+
respawnBackoffMs = 100
|
|
185
|
+
} = {}) {
|
|
186
|
+
if (!manifestPath || typeof manifestPath !== "string") {
|
|
187
|
+
throw new Error("createProcessIsolatedAuthProvider requires a manifestPath string");
|
|
188
|
+
}
|
|
189
|
+
if (typeof cryptoProvider?.hmac !== "function") {
|
|
190
|
+
throw new Error("createProcessIsolatedAuthProvider requires a cryptoProvider with hmac()");
|
|
191
|
+
}
|
|
192
|
+
if (!auditSink || typeof auditSink.record !== "function") {
|
|
193
|
+
throw new Error("createProcessIsolatedAuthProvider requires an auditSink with record()");
|
|
194
|
+
}
|
|
195
|
+
if (!Number.isInteger(timeoutMs) || timeoutMs <= 0) {
|
|
196
|
+
throw new Error("createProcessIsolatedAuthProvider requires a positive integer timeoutMs");
|
|
197
|
+
}
|
|
198
|
+
if (!Number.isInteger(maxPendingCalls) || maxPendingCalls < 1) {
|
|
199
|
+
throw new Error("maxPendingCalls must be a positive integer");
|
|
200
|
+
}
|
|
201
|
+
if (!Number.isInteger(maxMessageBytes) || maxMessageBytes < 1) {
|
|
202
|
+
throw new Error("maxMessageBytes must be a positive integer");
|
|
203
|
+
}
|
|
204
|
+
// Fail-closed network containment. PR1 supports only the "require-permission"
|
|
205
|
+
// mode; if this Node cannot enforce --allow-net, refuse to construct rather than
|
|
206
|
+
// run a plugin whose network egress is uncontained.
|
|
207
|
+
if (netEnforcement !== "require-permission") {
|
|
208
|
+
throw new Error(`unsupported netEnforcement: ${JSON.stringify(netEnforcement)} (1.1 supports only "require-permission")`);
|
|
209
|
+
}
|
|
210
|
+
if (!detectNetSupport()) {
|
|
211
|
+
throw new Error(
|
|
212
|
+
"process-isolated requires a Node that enforces the --allow-net permission "
|
|
213
|
+
+ "(netEnforcement: require-permission); this Node cannot contain plugin network "
|
|
214
|
+
+ "egress, so refusing to construct — use worker-isolated, or run on a Node with --allow-net"
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
const nowFn = typeof now === "function" ? now : () => now;
|
|
218
|
+
// Every process lifecycle event carries isolation:"process" (a host-computed,
|
|
219
|
+
// fixed-enum discriminator — never child-supplied) so an audit consumer can tell
|
|
220
|
+
// a process-isolated decision from a worker-isolated one. All audit fields here
|
|
221
|
+
// are host-computed/enum-only; no child free-text ever enters an event.
|
|
222
|
+
const auditBase = makeFireAndForgetAudit(auditSink);
|
|
223
|
+
const audit = (event) => auditBase({ isolation: "process", ...event });
|
|
224
|
+
|
|
225
|
+
// Optional host-mediated key-material fetcher (operator-declared URL only). The
|
|
226
|
+
// core guarded fetcher validates the https URL at construction and SSRF-guards
|
|
227
|
+
// every fetch; TTL cache + cooldown bound the outbound rate.
|
|
228
|
+
let keyFetcher = null;
|
|
229
|
+
if (keyMaterial !== null && keyMaterial !== undefined) {
|
|
230
|
+
if (typeof keyMaterial !== "object" || Array.isArray(keyMaterial) || typeof keyMaterial.url !== "string") {
|
|
231
|
+
throw new Error("keyMaterial must be an object with an operator-declared https url");
|
|
232
|
+
}
|
|
233
|
+
keyFetcher = createGuardedKeyFetcher({ ...keyMaterial, now: nowFn });
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Read+validate the manifest + run the FULL PR2 gate (shared with the worker
|
|
237
|
+
// runtime). Re-run on every (re)spawn — the gate is not a one-time check.
|
|
238
|
+
function loadAndVerify() {
|
|
239
|
+
return loadAndVerifyPlugin({
|
|
240
|
+
manifestPath,
|
|
241
|
+
expectedRuntime: "process-isolated",
|
|
242
|
+
trustAnchors,
|
|
243
|
+
allowCapabilities,
|
|
244
|
+
pin,
|
|
245
|
+
revoked,
|
|
246
|
+
versionFloor,
|
|
247
|
+
coreVersion,
|
|
248
|
+
now: nowFn(),
|
|
249
|
+
audit
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ---- child lifecycle -----------------------------------------------------
|
|
254
|
+
|
|
255
|
+
let child = null;
|
|
256
|
+
let pluginId = null;
|
|
257
|
+
let closed = false;
|
|
258
|
+
// The construction load is reused for the FIRST spawn; respawns re-load (re-verify).
|
|
259
|
+
let preloaded = null;
|
|
260
|
+
// cid -> settle(reply). Drops late/duplicate/unmatched replies by cid. Only one
|
|
261
|
+
// entry is ever live at a time (single-occupancy via the serialization chain).
|
|
262
|
+
const pending = new Map();
|
|
263
|
+
let respawning = null; // single-flight respawn guard
|
|
264
|
+
let chain = Promise.resolve();
|
|
265
|
+
let queueDepth = 0;
|
|
266
|
+
// Spawn-storm circuit breaker: timestamps of recent kills (pruned to the window)
|
|
267
|
+
// and a permanent trip flag.
|
|
268
|
+
let killTimes = [];
|
|
269
|
+
let breakerTripped = false;
|
|
270
|
+
|
|
271
|
+
// Spawn the child, await the {t:'ready'} handshake, hand it the verified plugin
|
|
272
|
+
// bytes as a data: URL over IPC, and await {t:'loaded'}. Bounded by timeoutMs;
|
|
273
|
+
// any failure kills the child and throws → fail closed. NOTE the plugin source
|
|
274
|
+
// crosses over IPC (not the command line) so there is no ARG_MAX limit.
|
|
275
|
+
async function spawnAndLoad({ entrySource, pluginId: pid }) {
|
|
276
|
+
const c = spawn(execPath, [...CHILD_FLAGS, "-e", PROCESS_HARNESS], {
|
|
277
|
+
stdio: ["ignore", "ignore", "ignore", "ipc"],
|
|
278
|
+
serialization: "json",
|
|
279
|
+
env: scrubbedEnv(),
|
|
280
|
+
windowsHide: true
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
let onReady;
|
|
284
|
+
let onLoaded;
|
|
285
|
+
let onFail;
|
|
286
|
+
let handshakeDone = false;
|
|
287
|
+
const ready = new Promise((resolve) => { onReady = resolve; });
|
|
288
|
+
const loaded = new Promise((resolve) => { onLoaded = resolve; });
|
|
289
|
+
// Rejects if the child dies DURING the handshake — so a startup crash fails
|
|
290
|
+
// fast (deny) instead of waiting out the full timeoutMs.
|
|
291
|
+
const failed = new Promise((_, reject) => { onFail = reject; });
|
|
292
|
+
|
|
293
|
+
c.on("message", (raw) => {
|
|
294
|
+
let parsed;
|
|
295
|
+
try {
|
|
296
|
+
parsed = JSON.parse(typeof raw === "string" ? raw : String(raw));
|
|
297
|
+
} catch {
|
|
298
|
+
return; // unparseable → drop
|
|
299
|
+
}
|
|
300
|
+
const t = parsed?.t;
|
|
301
|
+
if (t === "ready") { onReady(); return; }
|
|
302
|
+
if (t === "loaded") { onLoaded(true); return; }
|
|
303
|
+
if (t === "load-error") { onLoaded(false); return; }
|
|
304
|
+
if (t === "auth") {
|
|
305
|
+
const settle = pending.get(parsed.cid);
|
|
306
|
+
if (!settle) {
|
|
307
|
+
return; // unmatched / duplicate / late → drop
|
|
308
|
+
}
|
|
309
|
+
pending.delete(parsed.cid);
|
|
310
|
+
settle(parsed);
|
|
311
|
+
}
|
|
312
|
+
});
|
|
313
|
+
// Before the handshake completes, child !== c (child is only set on success),
|
|
314
|
+
// so terminateChild's `child === c` guard would ignore a startup crash. Route
|
|
315
|
+
// an early error/exit to the handshake rejection instead; afterwards the
|
|
316
|
+
// `child === c` guard handles a live-child crash (and ignores a stale one).
|
|
317
|
+
c.on("error", () => {
|
|
318
|
+
if (!handshakeDone) { onFail(new Error("child errored during spawn")); return; }
|
|
319
|
+
if (child === c) terminateChild("crash");
|
|
320
|
+
});
|
|
321
|
+
c.on("exit", (code) => {
|
|
322
|
+
if (!handshakeDone) { onFail(new Error(`child exited during spawn (code ${code})`)); return; }
|
|
323
|
+
if (code !== 0 && child === c) terminateChild("crash");
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
let handshakeTimer;
|
|
327
|
+
const handshakeTimeout = new Promise((_, reject) => {
|
|
328
|
+
handshakeTimer = setTimeout(() => reject(new Error("child spawn/load handshake timed out")), timeoutMs);
|
|
329
|
+
});
|
|
330
|
+
try {
|
|
331
|
+
await Promise.race([ready, failed, handshakeTimeout]);
|
|
332
|
+
c.send(JSON.stringify({ t: "load", source: Buffer.from(entrySource, "utf8").toString("base64") }));
|
|
333
|
+
const ok = await Promise.race([loaded, failed, handshakeTimeout]);
|
|
334
|
+
if (!ok) {
|
|
335
|
+
throw new Error("plugin failed to load in the process sandbox");
|
|
336
|
+
}
|
|
337
|
+
} catch (error) {
|
|
338
|
+
handshakeDone = true; // stop the error/exit handlers from acting on this child
|
|
339
|
+
clearTimeout(handshakeTimer);
|
|
340
|
+
try { c.kill("SIGKILL"); } catch { /* already gone */ }
|
|
341
|
+
throw error;
|
|
342
|
+
}
|
|
343
|
+
handshakeDone = true;
|
|
344
|
+
clearTimeout(handshakeTimer);
|
|
345
|
+
// close() may have run while we awaited the handshake; do NOT resurrect a child
|
|
346
|
+
// after close (kill-switch / process-leak). This check + the assignment are
|
|
347
|
+
// synchronous, so close() cannot interleave between them.
|
|
348
|
+
if (closed) {
|
|
349
|
+
try { c.kill("SIGKILL"); } catch { /* already gone */ }
|
|
350
|
+
throw new Error("provider closed during spawn");
|
|
351
|
+
}
|
|
352
|
+
child = c;
|
|
353
|
+
pluginId = pid;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Drop the live child (audit the cause), failing any matched in-flight call
|
|
357
|
+
// closed. Respawn happens lazily on the next call (re-running the full gate).
|
|
358
|
+
// A kill feeds the spawn-storm circuit breaker: too many within the window trips
|
|
359
|
+
// to a permanent fail-closed deny (operator reset = recreate the provider).
|
|
360
|
+
function terminateChild(cause) {
|
|
361
|
+
const terminated = child;
|
|
362
|
+
child = null;
|
|
363
|
+
if (terminated) {
|
|
364
|
+
audit({ type: "plugin.worker.terminated", decision: "plugin.worker.terminated", pluginId, cause });
|
|
365
|
+
try { terminated.kill("SIGKILL"); } catch { /* already gone */ }
|
|
366
|
+
const t = nowFn();
|
|
367
|
+
killTimes.push(t);
|
|
368
|
+
killTimes = killTimes.filter((ts) => (t - ts) < respawnWindowMs);
|
|
369
|
+
if (!breakerTripped && killTimes.length >= respawnMaxKills) {
|
|
370
|
+
breakerTripped = true;
|
|
371
|
+
audit({ type: "plugin.worker.terminated", decision: "plugin.worker.terminated", pluginId, cause: "respawn-storm" });
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
for (const [, settle] of pending) {
|
|
375
|
+
settle(null);
|
|
376
|
+
}
|
|
377
|
+
pending.clear();
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// LAZY (re)spawn behind a single-flight guard that RE-RUNS THE FULL PR2 GATE.
|
|
381
|
+
// A tripped circuit breaker fails closed permanently (the operator must recreate
|
|
382
|
+
// the provider). Respawns are exponentially backed off so a flapping plugin
|
|
383
|
+
// cannot become a spawn storm.
|
|
384
|
+
async function ensureChild() {
|
|
385
|
+
if (child || closed) {
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
if (breakerTripped) {
|
|
389
|
+
throw new Error("process plugin respawn-storm circuit breaker is tripped (fail-closed; recreate the provider to reset)");
|
|
390
|
+
}
|
|
391
|
+
if (respawning) {
|
|
392
|
+
return respawning;
|
|
393
|
+
}
|
|
394
|
+
respawning = (async () => {
|
|
395
|
+
const recentKills = killTimes.length;
|
|
396
|
+
if (recentKills > 0 && respawnBackoffMs > 0) {
|
|
397
|
+
const backoff = respawnBackoffMs * (2 ** Math.min(recentKills - 1, 6));
|
|
398
|
+
await new Promise((resolve) => setTimeout(resolve, backoff));
|
|
399
|
+
if (closed || breakerTripped) {
|
|
400
|
+
throw new Error("provider closed or breaker tripped during backoff");
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
const loaded = preloaded ?? loadAndVerify();
|
|
404
|
+
preloaded = null;
|
|
405
|
+
await spawnAndLoad(loaded);
|
|
406
|
+
})();
|
|
407
|
+
try {
|
|
408
|
+
await respawning;
|
|
409
|
+
} finally {
|
|
410
|
+
respawning = null;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// One serialized child round-trip. Resolves to the parsed reply, null (crash /
|
|
415
|
+
// spawn failure), { __timeout: true }, or { __oversized: true }. Runs alone.
|
|
416
|
+
async function roundTrip(credential) {
|
|
417
|
+
try {
|
|
418
|
+
await ensureChild();
|
|
419
|
+
} catch {
|
|
420
|
+
return null; // spawn/load failed → fail closed
|
|
421
|
+
}
|
|
422
|
+
if (!child) {
|
|
423
|
+
return null;
|
|
424
|
+
}
|
|
425
|
+
const cid = randomUUID();
|
|
426
|
+
const baseMessage = JSON.stringify({ t: "auth", cid, credential });
|
|
427
|
+
if (Buffer.byteLength(baseMessage, "utf8") > maxMessageBytes) {
|
|
428
|
+
return { __oversized: true };
|
|
429
|
+
}
|
|
430
|
+
// Host-mediated key material (if configured). The credential is bounded by
|
|
431
|
+
// maxMessageBytes above; the key document is separately bounded by the
|
|
432
|
+
// fetcher's maxBytes, so it is added AFTER the credential bound check.
|
|
433
|
+
let message = baseMessage;
|
|
434
|
+
if (keyFetcher) {
|
|
435
|
+
let doc;
|
|
436
|
+
try {
|
|
437
|
+
doc = await keyFetcher.get();
|
|
438
|
+
} catch {
|
|
439
|
+
return { __keyfetch: true }; // host key fetch failed (SSRF refusal / cooldown) → deny
|
|
440
|
+
}
|
|
441
|
+
message = JSON.stringify({ t: "auth", cid, credential, keyMaterial: doc });
|
|
442
|
+
}
|
|
443
|
+
return new Promise((resolve) => {
|
|
444
|
+
let done = false;
|
|
445
|
+
const settle = (value) => {
|
|
446
|
+
if (done) return;
|
|
447
|
+
done = true;
|
|
448
|
+
clearTimeout(timer);
|
|
449
|
+
resolve(value);
|
|
450
|
+
};
|
|
451
|
+
const timer = setTimeout(() => {
|
|
452
|
+
pending.delete(cid);
|
|
453
|
+
// Timeout → terminate the child (audited), deny. Respawn lazily.
|
|
454
|
+
terminateChild("timeout");
|
|
455
|
+
settle({ __timeout: true });
|
|
456
|
+
}, timeoutMs);
|
|
457
|
+
pending.set(cid, settle);
|
|
458
|
+
try {
|
|
459
|
+
child.send(message);
|
|
460
|
+
} catch {
|
|
461
|
+
pending.delete(cid);
|
|
462
|
+
settle(null); // child already dead → fail closed
|
|
463
|
+
}
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// The sandboxed provider. Proxies authenticate() into the child, then the HOST
|
|
468
|
+
// sanitizes + builds the keyed-HMAC identity. NEVER throws into the caller.
|
|
469
|
+
async function authenticate(request) {
|
|
470
|
+
try {
|
|
471
|
+
const credential = bearerCredentialFromRequest(request);
|
|
472
|
+
if (credential === null) {
|
|
473
|
+
return null; // missing credential → deny (no round-trip needed)
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
if (queueDepth >= maxPendingCalls) {
|
|
477
|
+
audit({ type: "plugin.authenticate.deny", decision: "plugin.authenticate.deny", pluginId, reason: "over-capacity" });
|
|
478
|
+
return null;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// Serialize: single-occupancy child. Each call waits its turn; distinct cids
|
|
482
|
+
// guarantee replies never cross even though calls are queued.
|
|
483
|
+
queueDepth += 1;
|
|
484
|
+
const myTurn = chain;
|
|
485
|
+
let release;
|
|
486
|
+
chain = new Promise((r) => { release = r; });
|
|
487
|
+
let reply;
|
|
488
|
+
try {
|
|
489
|
+
await myTurn;
|
|
490
|
+
reply = await roundTrip(credential);
|
|
491
|
+
} finally {
|
|
492
|
+
queueDepth -= 1;
|
|
493
|
+
release();
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
if (reply && reply.__oversized) {
|
|
497
|
+
audit({ type: "plugin.authenticate.deny", decision: "plugin.authenticate.deny", pluginId, reason: "oversized" });
|
|
498
|
+
return null;
|
|
499
|
+
}
|
|
500
|
+
if (reply && reply.__keyfetch) {
|
|
501
|
+
audit({ type: "plugin.authenticate.deny", decision: "plugin.authenticate.deny", pluginId, reason: "key-material-unavailable" });
|
|
502
|
+
return null;
|
|
503
|
+
}
|
|
504
|
+
if (!reply || reply.__timeout) {
|
|
505
|
+
if (reply && reply.__timeout) {
|
|
506
|
+
audit({ type: "plugin.authenticate.deny", decision: "plugin.authenticate.deny", pluginId, reason: "timeout" });
|
|
507
|
+
}
|
|
508
|
+
return null;
|
|
509
|
+
}
|
|
510
|
+
if (reply.deny === true || reply.claims === undefined) {
|
|
511
|
+
audit({ type: "plugin.authenticate.deny", decision: "plugin.authenticate.deny", pluginId, reason: "deny" });
|
|
512
|
+
return null;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
let claims;
|
|
516
|
+
try {
|
|
517
|
+
claims = sanitizeClaims(reply.claims);
|
|
518
|
+
} catch {
|
|
519
|
+
audit({ type: "plugin.authenticate.deny", decision: "plugin.authenticate.deny", pluginId, reason: "invalid-claims" });
|
|
520
|
+
return null;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
// The HOST builds the keyed-HMAC identity. The key NEVER crossed to the
|
|
524
|
+
// child; PII-safety is (re-)enforced here on every call.
|
|
525
|
+
try {
|
|
526
|
+
return await buildExternalIdentity({
|
|
527
|
+
provider: `plugin:${pluginId}`,
|
|
528
|
+
subject: claims.subject,
|
|
529
|
+
issuer: claims.issuer,
|
|
530
|
+
type: claims.type ?? "user",
|
|
531
|
+
scopes: claims.scopes ?? [],
|
|
532
|
+
labels: claims.labels ?? {},
|
|
533
|
+
...(allowedLabelKeys ? { allowedLabelKeys } : {})
|
|
534
|
+
}, cryptoProvider);
|
|
535
|
+
} catch {
|
|
536
|
+
audit({ type: "plugin.authenticate.deny", decision: "plugin.authenticate.deny", pluginId, reason: "invalid-claims" });
|
|
537
|
+
return null;
|
|
538
|
+
}
|
|
539
|
+
} catch {
|
|
540
|
+
// Catch-all: authenticate NEVER throws into the caller.
|
|
541
|
+
return null;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
async function close() {
|
|
546
|
+
closed = true;
|
|
547
|
+
const terminated = child;
|
|
548
|
+
child = null;
|
|
549
|
+
pending.clear();
|
|
550
|
+
if (terminated) {
|
|
551
|
+
try { terminated.kill("SIGKILL"); } catch { /* already gone */ }
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// ---- construct: synchronous load+verify (PR2 gate throws here), then a
|
|
556
|
+
// one-time async conformance gate. The eager sync gate makes a refused load throw
|
|
557
|
+
// at construction; the child spawns lazily on the first authenticate (which
|
|
558
|
+
// conformance drives), reusing the construction load via `preloaded`.
|
|
559
|
+
|
|
560
|
+
const initial = loadAndVerify();
|
|
561
|
+
preloaded = initial;
|
|
562
|
+
|
|
563
|
+
const provider = { id: `plugin:${initial.pluginId}`, authenticate, close };
|
|
564
|
+
|
|
565
|
+
const conformance = assertAuthProviderConformance(provider, { now: nowFn() })
|
|
566
|
+
.then((result) => {
|
|
567
|
+
if (!result.ok) {
|
|
568
|
+
audit({
|
|
569
|
+
type: "plugin.load.refused",
|
|
570
|
+
decision: "plugin.load.refused",
|
|
571
|
+
reason: "conformance-failed",
|
|
572
|
+
pluginId: initial.pluginId,
|
|
573
|
+
signerKeyId: initial.signerKeyId
|
|
574
|
+
});
|
|
575
|
+
return close().then(() => {
|
|
576
|
+
throw new Error(`plugin conformance failed: ${result.failures.join("; ")}`);
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
audit({
|
|
580
|
+
type: "plugin.load.accepted",
|
|
581
|
+
decision: "plugin.load.accepted",
|
|
582
|
+
pluginId: initial.pluginId,
|
|
583
|
+
version: initial.verified.version,
|
|
584
|
+
entrySha256: initial.entrySha256,
|
|
585
|
+
signerKeyId: initial.signerKeyId,
|
|
586
|
+
capabilitiesGranted: Object.entries(initial.verified.capabilities)
|
|
587
|
+
.filter(([, v]) => v === true)
|
|
588
|
+
.map(([k]) => k),
|
|
589
|
+
// Host-computed, enum-only capability-enforcement facts (never child input):
|
|
590
|
+
// the child is spawned with ZERO OS permission grants, and net is contained
|
|
591
|
+
// by the require-permission --allow-net denial.
|
|
592
|
+
netEnforcement,
|
|
593
|
+
grants: []
|
|
594
|
+
});
|
|
595
|
+
return provider;
|
|
596
|
+
});
|
|
597
|
+
|
|
598
|
+
provider.ready = conformance;
|
|
599
|
+
return { provider, conformance, pluginId: initial.pluginId, entrySha256: initial.entrySha256, signerKeyId: initial.signerKeyId };
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Async factory: resolves to the live provider AFTER conformance passes, rejects
|
|
603
|
+
// on ANY load failure (PR2 gate or conformance). Direct (test) callers await this.
|
|
604
|
+
export async function createProcessIsolatedAuthProvider(options) {
|
|
605
|
+
const { conformance } = createProcessIsolatedAuthProviderHandle(options);
|
|
606
|
+
return conformance;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// Synchronous factory for the runtime composition root: the PR2 gate runs eagerly
|
|
610
|
+
// (so a refused load throws at createRuntime time), and conformance is gated lazily
|
|
611
|
+
// behind provider.ready — authenticate() awaits readiness and fails closed (null)
|
|
612
|
+
// if conformance rejected. Returns the host-side authProvider immediately.
|
|
613
|
+
export function createProcessIsolatedAuthProviderSync(options) {
|
|
614
|
+
const { provider, conformance } = createProcessIsolatedAuthProviderHandle(options);
|
|
615
|
+
const ready = conformance.then(() => true, () => false);
|
|
616
|
+
return {
|
|
617
|
+
id: provider.id,
|
|
618
|
+
async authenticate(request) {
|
|
619
|
+
if (!(await ready)) {
|
|
620
|
+
return null; // conformance failed → permanently fail closed
|
|
621
|
+
}
|
|
622
|
+
return provider.authenticate(request);
|
|
623
|
+
},
|
|
624
|
+
close() {
|
|
625
|
+
return provider.close();
|
|
626
|
+
},
|
|
627
|
+
ready
|
|
628
|
+
};
|
|
629
|
+
}
|