@openwop/openwop-conformance 1.13.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +2 -2
- package/api/openapi.yaml +60 -0
- package/coverage.md +15 -4
- package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
- package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
- package/package.json +1 -1
- package/src/lib/discovery-capabilities.ts +18 -19
- package/src/lib/egressPolicy.ts +76 -0
- package/src/lib/otel-collector.ts +72 -0
- package/src/lib/profiles.ts +15 -0
- package/src/lib/sandbox-timeout-worker.mjs +31 -0
- package/src/lib/toolCatalog.ts +81 -0
- package/src/lib/wasm-sandbox-probe.ts +168 -0
- package/src/scenarios/core-standard-profile.test.ts +75 -0
- package/src/scenarios/egress-audience-binding.test.ts +81 -0
- package/src/scenarios/egress-decision-content-free.test.ts +57 -0
- package/src/scenarios/memory-degraded-projection.test.ts +121 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
- package/src/scenarios/otel-collector-canary-inspection.test.ts +211 -0
- package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +192 -75
- package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
- package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +52 -0
- package/src/scenarios/tool-catalog-projection.test.ts +120 -0
- package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
- package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0
|
@@ -8,87 +8,204 @@
|
|
|
8
8
|
* Asserts (behavioral, when a host advertises `version: 4` + the contract):
|
|
9
9
|
*
|
|
10
10
|
* 1. A `mode: replay` fork from event-log index `fromSeq` produces an
|
|
11
|
-
* event-log prefix `[0, fromSeq]` that is byte-equivalent
|
|
12
|
-
* original run's prefix (modulo per-
|
|
13
|
-
*
|
|
11
|
+
* observable event-log prefix `[0, fromSeq]` that is byte-equivalent
|
|
12
|
+
* to the original run's prefix (modulo volatile per-event fields:
|
|
13
|
+
* eventId/ULID entropy, per-region `observedAt` clocks per RFC 0036
|
|
14
|
+
* §E, and the run id itself).
|
|
14
15
|
*
|
|
15
|
-
* 2. The replay
|
|
16
|
-
*
|
|
17
|
-
*
|
|
16
|
+
* 2. (Crucially per §C.) The replay reproduces the OBSERVABLE RESULT of
|
|
17
|
+
* a nondeterministic tool node EVEN WHEN a fresh call would produce
|
|
18
|
+
* different bytes. The `conformance-phase4-nondet-tool` fixture's
|
|
19
|
+
* first node declares `config.nondeterministic: true`; a `version: 4`
|
|
20
|
+
* host MUST replay the original event-log entries for that node
|
|
21
|
+
* (cache the observable result) rather than re-executing it, so the
|
|
22
|
+
* node's terminal payload is identical across original + replay.
|
|
18
23
|
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
24
|
+
* The `conformance-phase4-nondet-tool` fixture ships in the suite (added
|
|
25
|
+
* via the RFC 0041 Phase 4 fixtures commit). These assertions are now
|
|
26
|
+
* runnable capability-gated `it()` bodies — consistent with the sibling
|
|
27
|
+
* `replay-divergence-at-refusal.test.ts`, which is likewise active and
|
|
28
|
+
* soft-skips on the same gate. They light up the moment a host advertises
|
|
29
|
+
* the `version: 4` replay-determinism contract; against hosts that don't
|
|
30
|
+
* (incl. the reference workflow-engine, which has not yet wired the
|
|
31
|
+
* pure-replay observable-cache path), they soft-skip honestly.
|
|
25
32
|
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* `
|
|
30
|
-
* nondeterministic-tool fixture (e.g., `conformance-phase4-nondet-tool`).
|
|
31
|
-
* Until that lands, the cross-boundary assertion is surfaced as `it.todo`
|
|
32
|
-
* so test reporters track the gap.
|
|
33
|
+
* RFC 0042 §B note: RFC 0041 §C is `Active` (not yet `Accepted`), so its
|
|
34
|
+
* wire shape MAY shift compatibly within v1.x — a host wiring this before
|
|
35
|
+
* RFC 0041 graduates SHOULD advertise `multiAgent.executionModel.tier:
|
|
36
|
+
* 'experimental'` + `experimentalUntil` per RFC 0042 §A.
|
|
33
37
|
*
|
|
34
38
|
* @see RFCS/0041-multi-agent-replay-under-nondeterminism.md §C
|
|
35
39
|
* @see spec/v1/replay.md §"Observable-output-sequence determinism vs bit-equivalent execution (MAE-9 closure)"
|
|
36
40
|
* @see spec/v1/multi-agent-execution.md §"Replay determinism under nondeterminism (RFC 0041)"
|
|
37
41
|
*/
|
|
38
42
|
|
|
39
|
-
import { describe, it } from 'vitest';
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
})
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
43
|
+
import { describe, it, expect } from 'vitest';
|
|
44
|
+
import { driver } from '../lib/driver.js';
|
|
45
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
46
|
+
|
|
47
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
48
|
+
const FIXTURE = 'conformance-phase4-nondet-tool';
|
|
49
|
+
const NONDET_NODE_ID = 'nondet-tool';
|
|
50
|
+
|
|
51
|
+
interface ExecutionModelCaps {
|
|
52
|
+
version?: unknown;
|
|
53
|
+
replayDeterminism?: { supported?: unknown };
|
|
54
|
+
}
|
|
55
|
+
interface DiscoveryDoc {
|
|
56
|
+
capabilities?: {
|
|
57
|
+
multiAgent?: { executionModel?: ExecutionModelCaps };
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
interface RunSnapshot {
|
|
62
|
+
status?: string;
|
|
63
|
+
}
|
|
64
|
+
interface RunEventDoc {
|
|
65
|
+
type: string;
|
|
66
|
+
nodeId?: string;
|
|
67
|
+
sequence?: number;
|
|
68
|
+
payload?: Record<string, unknown>;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
72
|
+
try {
|
|
73
|
+
const res = await driver.get('/.well-known/openwop');
|
|
74
|
+
if (res.status !== 200) return null;
|
|
75
|
+
return res.json as DiscoveryDoc;
|
|
76
|
+
} catch {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** Soft-skip unless the host advertises the RFC 0041 §C version-4 contract. */
|
|
82
|
+
async function gateOnPhase4(ctx: { skip: () => void }): Promise<boolean> {
|
|
83
|
+
const d = await readDiscovery();
|
|
84
|
+
const em = capabilityFamily<{ executionModel?: ExecutionModelCaps }>(d, 'multiAgent')?.executionModel;
|
|
85
|
+
const version = typeof em?.version === 'number' ? em.version : 0;
|
|
86
|
+
if (em?.replayDeterminism?.supported !== true || version < 4) {
|
|
87
|
+
ctx.skip();
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async function pollUntilTerminal(runId: string): Promise<RunSnapshot> {
|
|
94
|
+
for (let i = 0; i < 50; i++) {
|
|
95
|
+
const r = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
|
|
96
|
+
const snap = r.json as RunSnapshot;
|
|
97
|
+
if (snap.status === 'completed' || snap.status === 'failed' || snap.status === 'cancelled') {
|
|
98
|
+
return snap;
|
|
99
|
+
}
|
|
100
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
101
|
+
}
|
|
102
|
+
throw new Error(`run ${runId} did not reach terminal within 5s`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async function readEvents(runId: string): Promise<RunEventDoc[]> {
|
|
106
|
+
const r = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
107
|
+
const body = r.json as { events?: RunEventDoc[] };
|
|
108
|
+
return body.events ?? [];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Strip volatile per-event fields so two runs of the same workflow are
|
|
113
|
+
* comparable. Removes the run id, freshly-minted event ids/ULIDs, and the
|
|
114
|
+
* per-region observed-at clock (RFC 0036 §E carve-out) wherever they
|
|
115
|
+
* appear at the event top level.
|
|
116
|
+
*/
|
|
117
|
+
function stripVolatile(ev: RunEventDoc): Record<string, unknown> {
|
|
118
|
+
const clone = JSON.parse(JSON.stringify(ev)) as Record<string, unknown>;
|
|
119
|
+
for (const k of ['eventId', 'runId', 'observedAt', 'timestamp', 'occurredAt', 'emittedAt', 'id']) {
|
|
120
|
+
delete clone[k];
|
|
121
|
+
}
|
|
122
|
+
return clone;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Create the fixture run; returns null (with a skip) if it isn't advertised. */
|
|
126
|
+
async function startFixtureRun(ctx: { skip: () => void }): Promise<string | null> {
|
|
127
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
128
|
+
if (create.status === 404 || create.status === 422) {
|
|
129
|
+
ctx.skip(); // fixture not advertised by this host
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
expect(create.status).toBe(201);
|
|
133
|
+
return (create.json as { runId: string }).runId;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
describe.skipIf(HTTP_SKIP)(
|
|
137
|
+
'replay-observable-sequence-determinism: prefix byte-equivalence (RFC 0041 §C)',
|
|
138
|
+
() => {
|
|
139
|
+
it('original and replay event-log prefixes MUST be byte-equivalent (modulo per-event clock + ULID entropy)', async (ctx) => {
|
|
140
|
+
if (!(await gateOnPhase4(ctx))) return;
|
|
141
|
+
|
|
142
|
+
const sourceRunId = await startFixtureRun(ctx);
|
|
143
|
+
if (sourceRunId === null) return;
|
|
144
|
+
const sourceTerminal = await pollUntilTerminal(sourceRunId);
|
|
145
|
+
expect(sourceTerminal.status).toBe('completed');
|
|
146
|
+
const sourceEvents = await readEvents(sourceRunId);
|
|
147
|
+
|
|
148
|
+
const forkRes = await driver.post(`/v1/runs/${encodeURIComponent(sourceRunId)}:fork`, {
|
|
149
|
+
fromSeq: 0,
|
|
150
|
+
mode: 'replay',
|
|
151
|
+
});
|
|
152
|
+
expect(forkRes.status).toBe(201);
|
|
153
|
+
const replayRunId = (forkRes.json as { runId: string }).runId;
|
|
154
|
+
await pollUntilTerminal(replayRunId);
|
|
155
|
+
const replayEvents = await readEvents(replayRunId);
|
|
156
|
+
|
|
157
|
+
const sourceNorm = sourceEvents.map(stripVolatile);
|
|
158
|
+
const replayNorm = replayEvents.map(stripVolatile);
|
|
159
|
+
expect(
|
|
160
|
+
replayNorm,
|
|
161
|
+
driver.describe(
|
|
162
|
+
'RFCS/0041-multi-agent-replay-under-nondeterminism.md §C',
|
|
163
|
+
'a mode:replay fork MUST reproduce the original observable event-log sequence byte-for-byte modulo volatile per-event fields (eventId/ULID entropy, per-region observedAt clock)',
|
|
164
|
+
),
|
|
165
|
+
).toEqual(sourceNorm);
|
|
166
|
+
});
|
|
167
|
+
},
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
describe.skipIf(HTTP_SKIP)(
|
|
171
|
+
'replay-observable-sequence-determinism: observable-result caching (RFC 0041 §C)',
|
|
172
|
+
() => {
|
|
173
|
+
it('replay of a nondeterministic tool node reproduces the ORIGINAL observable result, NOT a fresh call', async (ctx) => {
|
|
174
|
+
if (!(await gateOnPhase4(ctx))) return;
|
|
175
|
+
|
|
176
|
+
const sourceRunId = await startFixtureRun(ctx);
|
|
177
|
+
if (sourceRunId === null) return;
|
|
178
|
+
expect((await pollUntilTerminal(sourceRunId)).status).toBe('completed');
|
|
179
|
+
const sourceEvents = await readEvents(sourceRunId);
|
|
180
|
+
|
|
181
|
+
// The terminal event(s) for the nondeterministic node carry its
|
|
182
|
+
// observable result. Capture every event scoped to that node.
|
|
183
|
+
const sourceNodeEvents = sourceEvents.filter((e) => e.nodeId === NONDET_NODE_ID).map(stripVolatile);
|
|
184
|
+
expect(
|
|
185
|
+
sourceNodeEvents.length,
|
|
186
|
+
driver.describe(
|
|
187
|
+
'RFCS/0041-multi-agent-replay-under-nondeterminism.md §C',
|
|
188
|
+
`the fixture's nondeterministic node \`${NONDET_NODE_ID}\` MUST emit at least one observable event`,
|
|
189
|
+
),
|
|
190
|
+
).toBeGreaterThan(0);
|
|
191
|
+
|
|
192
|
+
const forkRes = await driver.post(`/v1/runs/${encodeURIComponent(sourceRunId)}:fork`, {
|
|
193
|
+
fromSeq: 0,
|
|
194
|
+
mode: 'replay',
|
|
195
|
+
});
|
|
196
|
+
expect(forkRes.status).toBe(201);
|
|
197
|
+
const replayRunId = (forkRes.json as { runId: string }).runId;
|
|
198
|
+
await pollUntilTerminal(replayRunId);
|
|
199
|
+
const replayEvents = await readEvents(replayRunId);
|
|
200
|
+
const replayNodeEvents = replayEvents.filter((e) => e.nodeId === NONDET_NODE_ID).map(stripVolatile);
|
|
201
|
+
|
|
202
|
+
expect(
|
|
203
|
+
replayNodeEvents,
|
|
204
|
+
driver.describe(
|
|
205
|
+
'RFCS/0041-multi-agent-replay-under-nondeterminism.md §C',
|
|
206
|
+
'the nondeterministic tool node MUST replay its ORIGINAL observable result (cached event-log entry) rather than re-executing — bit-equivalent re-execution would require unbounded caching, rejected per RFC 0041 §"Alternatives considered" #2',
|
|
207
|
+
),
|
|
208
|
+
).toEqual(sourceNodeEvents);
|
|
209
|
+
});
|
|
210
|
+
},
|
|
211
|
+
);
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0035 §B sandbox isolation — portable, server-free behavioral conformance.
|
|
3
|
+
*
|
|
4
|
+
* Drives the committed `fixtures/wasm-sandbox/*.wasm` modules through the
|
|
5
|
+
* suite-local `probeSandboxed` reference (see `../lib/wasm-sandbox-probe.ts`).
|
|
6
|
+
* Every assertion exercises real WebAssembly isolation — there are NO `it.todo`
|
|
7
|
+
* placeholders and NO mocks. These are the behavioral probes that graduate the
|
|
8
|
+
* cross-runtime `node-pack-sandbox-*` invariants from reference-impl to protocol
|
|
9
|
+
* tier (`SECURITY/invariants.yaml`).
|
|
10
|
+
*
|
|
11
|
+
* Coverage (six invariants, proven by construction, server-free):
|
|
12
|
+
* - node-pack-sandbox-fs-gated / -no-env / -network-gated / -no-process:
|
|
13
|
+
* a forbidden operation can only be a DECLARED IMPORT; the probe statically
|
|
14
|
+
* refuses any un-granted import → `sandbox_escape_attempt` + `escapeKind`.
|
|
15
|
+
* - capability gate: an un-granted `openwop.*` import → `sandbox_capability_denied`.
|
|
16
|
+
* - node-pack-sandbox-memory-cap: an access past the host memory bound traps →
|
|
17
|
+
* `sandbox_memory_exceeded`.
|
|
18
|
+
* - node-pack-sandbox-isolated-context: a fresh instance per invocation carries
|
|
19
|
+
* no state across calls.
|
|
20
|
+
*
|
|
21
|
+
* `node-pack-sandbox-timeout` requires thread preemption (a worker kill-timer) and
|
|
22
|
+
* stays reference-impl, proven by `examples/hosts/wasm-sandbox/test/sandbox.test.ts`
|
|
23
|
+
* (real worker kill). `node-pack-sandbox-no-eval` is JS-runtime-specific (WASM has
|
|
24
|
+
* no `eval`) and is exempt per RFC 0035.
|
|
25
|
+
*
|
|
26
|
+
* Spec reference:
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0035-sandbox-execution-contract.md
|
|
28
|
+
*/
|
|
29
|
+
import { describe, it, expect } from 'vitest';
|
|
30
|
+
import { readFileSync } from 'node:fs';
|
|
31
|
+
import { join } from 'node:path';
|
|
32
|
+
import { FIXTURES_DIR } from '../lib/paths.js';
|
|
33
|
+
import { probeSandboxed } from '../lib/wasm-sandbox-probe.js';
|
|
34
|
+
|
|
35
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
36
|
+
const dir = join(FIXTURES_DIR, 'wasm-sandbox');
|
|
37
|
+
const fix = (name: string): Uint8Array => new Uint8Array(readFileSync(join(dir, `${name}.wasm`)));
|
|
38
|
+
const BASE = { allowedHostCalls: [] as string[], memoryLimitBytes: 2 * 1024 * 1024 };
|
|
39
|
+
|
|
40
|
+
describe('sandbox-wasm-isolation: positive controls (RFC 0035 §B, server-free)', () => {
|
|
41
|
+
it('a well-behaved pure module runs and returns its input', () => {
|
|
42
|
+
const r = probeSandboxed(fix('well-behaved-echo'), BASE, 'invoke', 42);
|
|
43
|
+
expect(r.ok, why('RFC 0035 §B', 'a pure-compute module runs')).toBe(true);
|
|
44
|
+
expect(r.result).toBe(42);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('a granted host capability is callable when in allowedHostCalls', () => {
|
|
48
|
+
const r = probeSandboxed(fix('well-behaved-host-fetch'), { ...BASE, allowedHostCalls: ['fetch'] }, 'invoke', 7);
|
|
49
|
+
expect(r.ok, why('RFC 0035 §B invariant 7', 'a granted openwop.* capability is callable')).toBe(true);
|
|
50
|
+
expect(r.result).toBe(7);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe('sandbox-wasm-isolation: escape attempts fail closed (RFC 0035 §B 1–4, server-free)', () => {
|
|
55
|
+
const cases: ReadonlyArray<readonly [string, string, string]> = [
|
|
56
|
+
['misbehaving-fs', 'host-fs-escape', 'node-pack-sandbox-fs-gated'],
|
|
57
|
+
['misbehaving-env', 'host-env-leak', 'node-pack-sandbox-no-env'],
|
|
58
|
+
['misbehaving-network', 'network-escape', 'node-pack-sandbox-network-gated'],
|
|
59
|
+
['misbehaving-process', 'host-process-escape', 'node-pack-sandbox-no-process'],
|
|
60
|
+
];
|
|
61
|
+
for (const [fixture, escapeKind, invariant] of cases) {
|
|
62
|
+
it(`${invariant}: ${fixture} → sandbox_escape_attempt (${escapeKind})`, () => {
|
|
63
|
+
const r = probeSandboxed(fix(fixture), BASE);
|
|
64
|
+
expect(r.code, why('RFC 0035 §B', `${invariant} fails closed before instantiation`)).toBe('sandbox_escape_attempt');
|
|
65
|
+
expect(r.escapeKind).toBe(escapeKind);
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
describe('sandbox-wasm-isolation: capability gate (RFC 0035 §B 7, server-free)', () => {
|
|
71
|
+
it('an un-granted openwop capability is denied with its name', () => {
|
|
72
|
+
const r = probeSandboxed(fix('misbehaving-capability-gate'), BASE);
|
|
73
|
+
expect(r.code, why('RFC 0035 §B invariant 7', 'undeclared host capability fails closed')).toBe('sandbox_capability_denied');
|
|
74
|
+
expect(r.requestedCapability).toBe('privileged');
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('host-fetch WITHOUT the grant is denied (the gate works both directions)', () => {
|
|
78
|
+
const r = probeSandboxed(fix('well-behaved-host-fetch'), BASE);
|
|
79
|
+
expect(r.code).toBe('sandbox_capability_denied');
|
|
80
|
+
expect(r.requestedCapability).toBe('fetch');
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
describe('sandbox-wasm-isolation: memory cap (RFC 0035 §B 5, server-free)', () => {
|
|
85
|
+
it('node-pack-sandbox-memory-cap: access beyond the host memory bound is sandbox_memory_exceeded', () => {
|
|
86
|
+
const r = probeSandboxed(fix('misbehaving-memory'), BASE);
|
|
87
|
+
expect(r.ok, why('RFC 0035 §B invariant 5', 'memory bound is engine-enforced')).toBe(false);
|
|
88
|
+
expect(r.code).toBe('sandbox_memory_exceeded');
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
describe('sandbox-wasm-isolation: isolated context (RFC 0035 §B 8, server-free)', () => {
|
|
93
|
+
it('node-pack-sandbox-isolated-context: each invocation gets a fresh instance (no cross-pack state)', () => {
|
|
94
|
+
const iso = fix('isolation-global');
|
|
95
|
+
expect(probeSandboxed(iso, BASE, 'bump').result, why('RFC 0035 §B invariant 8', 'a fresh instance starts at 0')).toBe(1);
|
|
96
|
+
expect(probeSandboxed(iso, BASE, 'read').result, why('RFC 0035 §B invariant 8', 'no state leaks across invocations')).toBe(0);
|
|
97
|
+
});
|
|
98
|
+
});
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0035 §B invariant 6 — sandbox wall-clock timeout, worker-driven + server-free.
|
|
3
|
+
*
|
|
4
|
+
* The worker-thread counterpart to `sandbox-wasm-isolation.test.ts` (which proves
|
|
5
|
+
* the other six cross-runtime invariants in-process but deliberately cannot run a
|
|
6
|
+
* non-terminating module). A wall-clock cap can only be enforced by THREAD
|
|
7
|
+
* PREEMPTION — a same-thread timer cannot interrupt a synchronous WASM loop — so
|
|
8
|
+
* `probeTimeout` (see `../lib/wasm-sandbox-probe.ts`) spawns a worker running the
|
|
9
|
+
* committed `misbehaving-timeout.wasm` fixture and races a main-thread kill-timer.
|
|
10
|
+
*
|
|
11
|
+
* This is the worker-driven conformance probe that graduates
|
|
12
|
+
* `node-pack-sandbox-timeout` from reference-impl to protocol tier (the prior gap:
|
|
13
|
+
* the cap was proven only host-internally by the WASM host's `test/sandbox.test.ts`).
|
|
14
|
+
*
|
|
15
|
+
* @see RFCS/0035-sandbox-execution-contract.md §B invariant 6
|
|
16
|
+
* @see SECURITY/invariants.yaml node-pack-sandbox-timeout
|
|
17
|
+
*/
|
|
18
|
+
import { describe, it, expect } from 'vitest';
|
|
19
|
+
import { readFileSync } from 'node:fs';
|
|
20
|
+
import { join } from 'node:path';
|
|
21
|
+
import { FIXTURES_DIR } from '../lib/paths.js';
|
|
22
|
+
import { probeTimeout } from '../lib/wasm-sandbox-probe.js';
|
|
23
|
+
|
|
24
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
25
|
+
const dir = join(FIXTURES_DIR, 'wasm-sandbox');
|
|
26
|
+
const fix = (name: string): Uint8Array => new Uint8Array(readFileSync(join(dir, `${name}.wasm`)));
|
|
27
|
+
|
|
28
|
+
describe('sandbox-wasm-timeout: wall-clock cap is engine/worker-enforced (RFC 0035 §B 6, server-free)', () => {
|
|
29
|
+
it('node-pack-sandbox-timeout: a non-terminating module is killed with sandbox_timeout', async () => {
|
|
30
|
+
const r = await probeTimeout(fix('misbehaving-timeout'), { memoryLimitBytes: 2 * 1024 * 1024, wallClockLimitMs: 300 });
|
|
31
|
+
expect(r.ok, why('RFC 0035 §B invariant 6', 'an over-budget invocation MUST fail')).toBe(false);
|
|
32
|
+
expect(r.code, why('RFC 0035 §C', 'the failure code MUST be sandbox_timeout')).toBe('sandbox_timeout');
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('positive control: a well-behaved module completes within the budget (the kill-timer does not false-positive)', async () => {
|
|
36
|
+
const r = await probeTimeout(fix('well-behaved-echo'), { memoryLimitBytes: 2 * 1024 * 1024, wallClockLimitMs: 1000 }, 'invoke', 7);
|
|
37
|
+
expect(r.ok, why('RFC 0035 §B', 'a within-budget invocation completes before the kill-timer')).toBe(true);
|
|
38
|
+
expect(r.result).toBe(7);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
@@ -56,6 +56,7 @@ import { driver } from '../lib/driver.js';
|
|
|
56
56
|
import { pollUntilTerminal } from '../lib/polling.js';
|
|
57
57
|
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
58
58
|
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
59
|
+
import { getCollector, waitForRunSpans } from '../lib/otel-collector.js';
|
|
59
60
|
|
|
60
61
|
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
61
62
|
const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
|
|
@@ -205,6 +206,57 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
|
|
|
205
206
|
},
|
|
206
207
|
);
|
|
207
208
|
|
|
209
|
+
describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
|
|
210
|
+
'secret-leakage-otel-attribute: real OTLP export scrape (collector-side)',
|
|
211
|
+
() => {
|
|
212
|
+
// Distinct from the scrape-seam probe above: this asserts against what
|
|
213
|
+
// the host's OTLP exporter ACTUALLY shipped over the wire to the
|
|
214
|
+
// conformance collector, not what the host self-reports via its
|
|
215
|
+
// `/v1/host/sample/test/otel/spans` seam. A host could redact in its
|
|
216
|
+
// seam yet leak on the real export — only this catches that. Closes
|
|
217
|
+
// the `docs/KNOWN-LIMITS.md` "collector seam doesn't inspect span
|
|
218
|
+
// attributes" gap. Gated on the in-process collector being active
|
|
219
|
+
// (`OPENWOP_OTEL_COLLECTOR=true` + the host configured to export to it).
|
|
220
|
+
it('NO real-exported OTel span/metric attribute MUST contain the BYOK canary plaintext', async (ctx) => {
|
|
221
|
+
const collector = getCollector();
|
|
222
|
+
if (!collector || !CANARY_VALUE) {
|
|
223
|
+
ctx.skip();
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
const d = await readDiscovery();
|
|
227
|
+
const secretsOk = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported === true;
|
|
228
|
+
const obsOk = capabilityFamily<unknown>(d, 'observability') !== undefined;
|
|
229
|
+
if (!secretsOk || !obsOk) {
|
|
230
|
+
ctx.skip();
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
collector.reset();
|
|
235
|
+
const runId = await startByokRun();
|
|
236
|
+
if (runId === null) {
|
|
237
|
+
ctx.skip();
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
240
|
+
const terminal = await pollUntilTerminal(runId);
|
|
241
|
+
expect(terminal.status).toBe('completed');
|
|
242
|
+
|
|
243
|
+
// Hosts export spans asynchronously after terminal; poll until the
|
|
244
|
+
// run's spans land (or the timeout elapses — an absent export is a
|
|
245
|
+
// separate coverage concern, not a leak).
|
|
246
|
+
await waitForRunSpans(runId, { timeoutMs: 8_000 });
|
|
247
|
+
|
|
248
|
+
const leaks = collector.findCanaryLeakage(CANARY_VALUE);
|
|
249
|
+
expect(
|
|
250
|
+
leaks,
|
|
251
|
+
driver.describe(
|
|
252
|
+
'SECURITY/invariants.yaml secret-leakage-otel-attribute',
|
|
253
|
+
`no real-exported OTel span/metric attribute may contain the BYOK canary plaintext. Leaking surfaces: ${JSON.stringify(leaks)}`,
|
|
254
|
+
),
|
|
255
|
+
).toEqual([]);
|
|
256
|
+
});
|
|
257
|
+
},
|
|
258
|
+
);
|
|
259
|
+
|
|
208
260
|
describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
|
|
209
261
|
'secret-leakage-otel-attribute: advertisement-shape probe (RFC 0034 §A)',
|
|
210
262
|
() => {
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Portable tool catalog — the `GET /v1/tools` projection (RFC 0078 §B/§F) —
|
|
3
|
+
* behavioral.
|
|
4
|
+
*
|
|
5
|
+
* Capability-gated on `toolCatalog.supported` (root-first per RFC 0073).
|
|
6
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
7
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
|
|
8
|
+
* `tool-descriptor-shape.test.ts`; this asserts host BEHAVIOR black-box on the
|
|
9
|
+
* NORMATIVE reads:
|
|
10
|
+
*
|
|
11
|
+
* 1. LIST (§B) — `GET /v1/tools` returns a `ToolDescriptor[]`, each
|
|
12
|
+
* schema-valid, `source` ∈ the closed vocab, `safetyTier` ∈ the closed
|
|
13
|
+
* vocab, and content-free (no credential material, SR-1).
|
|
14
|
+
* 2. BY-ID (§B) — `GET /v1/tools/{toolId}` returns that descriptor; an unknown
|
|
15
|
+
* id 404s.
|
|
16
|
+
* 3. AUTH-GATED — an unauthenticated `GET /v1/tools` is `401` (not public).
|
|
17
|
+
* 4. §F-2 NON-DISCLOSURE — a tool id known to belong to a DIFFERENT principal
|
|
18
|
+
* (`OPENWOP_CROSS_PRINCIPAL_TOOL_ID`) 404s for this caller, identically to
|
|
19
|
+
* "not found" — the authorization-scoped projection never discloses another
|
|
20
|
+
* principal's tools. Soft-skips when the env var is unset.
|
|
21
|
+
*
|
|
22
|
+
* Spec references:
|
|
23
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md (§B/§F)
|
|
24
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { describe, it, expect } from 'vitest';
|
|
28
|
+
import { readFileSync } from 'node:fs';
|
|
29
|
+
import { join } from 'node:path';
|
|
30
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
31
|
+
import addFormats from 'ajv-formats';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
34
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
35
|
+
import {
|
|
36
|
+
readToolCatalogCap,
|
|
37
|
+
listTools,
|
|
38
|
+
getTool,
|
|
39
|
+
TOOL_SOURCES,
|
|
40
|
+
SAFETY_TIERS,
|
|
41
|
+
TOOL_CONTENT_FORBIDDEN,
|
|
42
|
+
} from '../lib/toolCatalog.js';
|
|
43
|
+
|
|
44
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
45
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function expectContentFree(d: Record<string, unknown>, where: string): void {
|
|
49
|
+
for (const f of TOOL_CONTENT_FORBIDDEN) {
|
|
50
|
+
expect(
|
|
51
|
+
!(f in d),
|
|
52
|
+
driver.describe('RFC 0078 §F (SR-1)', `${where} MUST be content-free (no ${f})`),
|
|
53
|
+
).toBe(true);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
describe('tool-catalog-projection (RFC 0078 §B/§F)', () => {
|
|
58
|
+
it('lists schema-valid ToolDescriptors, serves by-id + 404s, is auth-gated, and never discloses another principal', async () => {
|
|
59
|
+
const cap = await readToolCatalogCap();
|
|
60
|
+
if (!behaviorGate('openwop-tool-catalog', cap?.supported === true)) return;
|
|
61
|
+
|
|
62
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
63
|
+
addFormats(ajv);
|
|
64
|
+
const validate = ajv.compile(loadSchema('tool-descriptor.schema.json'));
|
|
65
|
+
|
|
66
|
+
// ---- Leg 3: auth-gated (unauthenticated list MUST be 401) -------------
|
|
67
|
+
const unauth = await driver.get('/v1/tools', { authenticated: false });
|
|
68
|
+
expect(
|
|
69
|
+
unauth.status === 401,
|
|
70
|
+
driver.describe('tool-catalog.md §B', 'GET /v1/tools MUST require authentication (401 unauthenticated)'),
|
|
71
|
+
).toBe(true);
|
|
72
|
+
|
|
73
|
+
// ---- Leg 1: the list (§B) -------------------------------------------
|
|
74
|
+
const tools = await listTools();
|
|
75
|
+
if (tools === null) return; // host advertises the cap but doesn't serve the read — soft-skip the rest
|
|
76
|
+
|
|
77
|
+
for (const t of tools) {
|
|
78
|
+
expect(
|
|
79
|
+
validate(t),
|
|
80
|
+
driver.describe('tool-descriptor.schema.json', `each ToolDescriptor MUST validate (${ajv.errorsText(validate.errors)})`),
|
|
81
|
+
).toBe(true);
|
|
82
|
+
expect(
|
|
83
|
+
typeof t.source === 'string' && TOOL_SOURCES.includes(t.source as string),
|
|
84
|
+
driver.describe('tool-catalog.md §C', 'ToolDescriptor.source MUST be in the closed vocabulary'),
|
|
85
|
+
).toBe(true);
|
|
86
|
+
expect(
|
|
87
|
+
typeof t.safetyTier === 'string' && SAFETY_TIERS.includes(t.safetyTier as string),
|
|
88
|
+
driver.describe('tool-catalog.md §C', 'ToolDescriptor.safetyTier MUST be pure|read|write|exec'),
|
|
89
|
+
).toBe(true);
|
|
90
|
+
expectContentFree(t, 'ToolDescriptor');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ---- Leg 2: by-id round-trip + unknown 404 (§B) ---------------------
|
|
94
|
+
if (tools.length > 0 && typeof tools[0]!.toolId === 'string') {
|
|
95
|
+
const id = tools[0]!.toolId as string;
|
|
96
|
+
const one = await getTool(id);
|
|
97
|
+
if (one.status === 200) {
|
|
98
|
+
expect(
|
|
99
|
+
one.descriptor?.toolId === id,
|
|
100
|
+
driver.describe('tool-catalog.md §B', 'GET /v1/tools/{toolId} MUST return the requested descriptor'),
|
|
101
|
+
).toBe(true);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const unknown = await getTool('__conformance_nonexistent_tool__');
|
|
105
|
+
expect(
|
|
106
|
+
unknown.status === 404,
|
|
107
|
+
driver.describe('tool-catalog.md §B', 'GET /v1/tools/{unknown} MUST 404'),
|
|
108
|
+
).toBe(true);
|
|
109
|
+
|
|
110
|
+
// ---- Leg 4: §F-2 cross-principal non-disclosure (env-gated) ---------
|
|
111
|
+
const crossId = process.env.OPENWOP_CROSS_PRINCIPAL_TOOL_ID;
|
|
112
|
+
if (crossId) {
|
|
113
|
+
const cross = await getTool(crossId);
|
|
114
|
+
expect(
|
|
115
|
+
cross.status === 404,
|
|
116
|
+
driver.describe('tool-catalog.md §F-2', 'a tool owned by a different principal MUST 404 (non-disclosure)'),
|
|
117
|
+
).toBe(true);
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
});
|