@openwop/openwop-conformance 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +2 -2
- package/api/asyncapi.yaml +25 -4
- package/api/openapi.yaml +371 -0
- package/coverage.md +31 -4
- package/fixtures/conformance-phase4-nondet-tool.json +53 -0
- package/fixtures/conformance-phase4-replay-divergence.json +40 -0
- package/fixtures.md +5 -3
- package/package.json +1 -1
- package/schemas/README.md +4 -0
- package/schemas/annotation-create.schema.json +37 -0
- package/schemas/annotation.schema.json +56 -0
- package/schemas/capabilities.schema.json +191 -3
- package/schemas/credential-reference.schema.json +21 -0
- package/schemas/node-pack-manifest.schema.json +112 -1
- package/schemas/run-diff-response.schema.json +64 -0
- package/schemas/run-event-payloads.schema.json +104 -2
- package/schemas/run-event.schema.json +8 -1
- package/schemas/run-snapshot.schema.json +11 -0
- package/src/lib/behavior-gate.ts +51 -0
- package/src/lib/driver.ts +13 -1
- package/src/lib/feedback.ts +31 -0
- package/src/lib/saml-idp.ts +179 -0
- package/src/scenarios/approval-gate-events.test.ts +61 -0
- package/src/scenarios/approval-gate-flow.test.ts +68 -0
- package/src/scenarios/auth-saml-profile.test.ts +119 -0
- package/src/scenarios/auth-scim-profile.test.ts +65 -0
- package/src/scenarios/authorization-fail-closed.test.ts +80 -0
- package/src/scenarios/authorization-roles-shape.test.ts +83 -0
- package/src/scenarios/connector-manifest-validity.test.ts +142 -0
- package/src/scenarios/credential-payload-redaction.test.ts +93 -0
- package/src/scenarios/credentials-capability-shape.test.ts +90 -0
- package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
- package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
- package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
- package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
- package/src/scenarios/experimental-tier-shape.test.ts +192 -0
- package/src/scenarios/feedback-capability-shape.test.ts +35 -0
- package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
- package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
- package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
- package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
- package/src/scenarios/feedback-record-and-list.test.ts +32 -0
- package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
- package/src/scenarios/identity-owner-shape.test.ts +64 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
- package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
- package/src/scenarios/oauth-capability-shape.test.ts +97 -0
- package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
- package/src/scenarios/pack-registry-isolation.test.ts +108 -0
- package/src/scenarios/pack-registry-publish.test.ts +1 -1
- package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
- package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
- package/src/scenarios/redaction.test.ts +4 -1
- package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
- package/src/scenarios/run-diff.test.ts +143 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
- package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
- package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
- package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
- package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
- package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
- package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
- package/src/scenarios/spec-corpus-validity.test.ts +6 -3
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sandbox-mvp-behavior — RFC 0035 §B behavioral probes via the node:vm sandbox MVP.
|
|
3
|
+
*
|
|
4
|
+
* Companion to the 8 advertisement-shape `sandbox-*.test.ts` files. This
|
|
5
|
+
* file exercises the 5 RFC 0035 §B failure-mode invariants the
|
|
6
|
+
* node:vm-based reference MVP supports:
|
|
7
|
+
*
|
|
8
|
+
* 1. host-fs-escape — sandboxed code attempting `require('fs')` fails closed
|
|
9
|
+
* 2. host-env-leak — sandboxed code attempting `process.env` access fails closed
|
|
10
|
+
* 3. network-escape — sandboxed code attempting `require('http')` fails closed
|
|
11
|
+
* 4. host-process-escape — sandboxed code attempting `require('child_process')` fails closed
|
|
12
|
+
* 5. sandbox-timeout — runaway loop terminated by the host's wallClockLimitMs
|
|
13
|
+
*
|
|
14
|
+
* Plus 2 more by-construction invariants:
|
|
15
|
+
*
|
|
16
|
+
* 6. cross-pack-mutation — each invocation gets a fresh vm context;
|
|
17
|
+
* sandboxed code that mutates a "shared" global sees the same fresh
|
|
18
|
+
* value (0 or undefined) every invocation
|
|
19
|
+
* 7. capability-gate-respected — host.X invocations not in
|
|
20
|
+
* allowedHostCalls throw with code `sandbox_capability_denied` +
|
|
21
|
+
* `details.requestedCapability: <method-name>` per the spec's
|
|
22
|
+
* canonical 4-code error catalog at `host-capabilities.md` §"Error codes"
|
|
23
|
+
*
|
|
24
|
+
* Plus 1 spec-required terminal-failure invariant:
|
|
25
|
+
*
|
|
26
|
+
* 8. memory-exceeded — runaway allocation fails with the canonical
|
|
27
|
+
* `sandbox_memory_exceeded` (or `sandbox_timeout` when the wall-clock
|
|
28
|
+
* cap catches it first)
|
|
29
|
+
*
|
|
30
|
+
* The 8th RFC 0035 §B invariant (`node-pack-sandbox-no-eval`) is JS-
|
|
31
|
+
* runtime-specific and reserved per the RFC's exemption clause; this MVP
|
|
32
|
+
* does not enforce it.
|
|
33
|
+
*
|
|
34
|
+
* @see RFCS/0035-sandbox-execution-contract.md §B
|
|
35
|
+
* @see apps/workflow-engine/backend/typescript/src/routes/testSeam.ts §"sandbox-vm MVP"
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
import { describe, it, expect } from 'vitest';
|
|
39
|
+
import { driver } from '../lib/driver.js';
|
|
40
|
+
|
|
41
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
42
|
+
|
|
43
|
+
interface SandboxCaps {
|
|
44
|
+
supported?: unknown;
|
|
45
|
+
isolationModel?: unknown;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
interface DiscoveryDoc {
|
|
49
|
+
capabilities?: { sandbox?: SandboxCaps };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
interface SandboxResponse {
|
|
53
|
+
result?: unknown;
|
|
54
|
+
error?: {
|
|
55
|
+
code: string;
|
|
56
|
+
details?: {
|
|
57
|
+
escapeKind?: string;
|
|
58
|
+
requestedCapability?: string;
|
|
59
|
+
requestedBytes?: number;
|
|
60
|
+
message?: string;
|
|
61
|
+
};
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function isSandboxAdvertised(): Promise<boolean> {
|
|
66
|
+
try {
|
|
67
|
+
const res = await driver.get('/.well-known/openwop');
|
|
68
|
+
if (res.status !== 200) return false;
|
|
69
|
+
return (res.json as DiscoveryDoc).capabilities?.sandbox?.supported === true;
|
|
70
|
+
} catch {
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async function invoke(typeId: string, args: Record<string, unknown> = {}, allowedHostCalls: string[] = []): Promise<{ status: number; body: SandboxResponse }> {
|
|
76
|
+
const res = await driver.post('/v1/host/sample/test/sandbox-invoke', { typeId, args, allowedHostCalls });
|
|
77
|
+
return { status: res.status, body: (res.json as SandboxResponse) ?? {} };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
describe.skipIf(HTTP_SKIP)('sandbox-mvp-behavior: RFC 0035 §B failure-mode invariants (node:vm MVP)', () => {
|
|
81
|
+
it('host-fs-escape — fs access from sandboxed code fails closed', async (ctx) => {
|
|
82
|
+
if (!(await isSandboxAdvertised())) {
|
|
83
|
+
ctx.skip();
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
const probe = await invoke('misbehave.fs-escape-read');
|
|
87
|
+
expect(probe.status).toBe(200);
|
|
88
|
+
expect(
|
|
89
|
+
probe.body.error?.code,
|
|
90
|
+
driver.describe(
|
|
91
|
+
'RFCS/0035-sandbox-execution-contract.md §B node-pack-sandbox-fs-gated',
|
|
92
|
+
'sandboxed `require("fs")` MUST fail closed with `sandbox_escape_attempt`',
|
|
93
|
+
),
|
|
94
|
+
).toBe('sandbox_escape_attempt');
|
|
95
|
+
expect(
|
|
96
|
+
probe.body.error?.details?.escapeKind,
|
|
97
|
+
driver.describe(
|
|
98
|
+
'RFCS/0035-sandbox-execution-contract.md §B',
|
|
99
|
+
'escapeKind MUST be host-fs-escape',
|
|
100
|
+
),
|
|
101
|
+
).toBe('host-fs-escape');
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('host-env-leak — process.env access from sandboxed code fails closed', async (ctx) => {
|
|
105
|
+
if (!(await isSandboxAdvertised())) {
|
|
106
|
+
ctx.skip();
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
const probe = await invoke('misbehave.env-leak');
|
|
110
|
+
expect(probe.status).toBe(200);
|
|
111
|
+
expect(
|
|
112
|
+
probe.body.error?.code,
|
|
113
|
+
driver.describe(
|
|
114
|
+
'RFCS/0035-sandbox-execution-contract.md §B node-pack-sandbox-no-env',
|
|
115
|
+
'sandboxed `process.env` access MUST fail closed with `sandbox_escape_attempt`',
|
|
116
|
+
),
|
|
117
|
+
).toBe('sandbox_escape_attempt');
|
|
118
|
+
expect(probe.body.error?.details?.escapeKind).toBe('host-env-leak');
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('network-escape — http/net access from sandboxed code fails closed', async (ctx) => {
|
|
122
|
+
if (!(await isSandboxAdvertised())) {
|
|
123
|
+
ctx.skip();
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
const probe = await invoke('misbehave.network-escape');
|
|
127
|
+
expect(probe.status).toBe(200);
|
|
128
|
+
expect(
|
|
129
|
+
probe.body.error?.code,
|
|
130
|
+
driver.describe(
|
|
131
|
+
'RFCS/0035-sandbox-execution-contract.md §B node-pack-sandbox-network-gated',
|
|
132
|
+
'sandboxed `require("http")` MUST fail closed with `sandbox_escape_attempt`',
|
|
133
|
+
),
|
|
134
|
+
).toBe('sandbox_escape_attempt');
|
|
135
|
+
expect(['network-escape', 'host-fs-escape'].includes(probe.body.error?.details?.escapeKind ?? '')).toBe(true);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('host-process-escape — child_process access from sandboxed code fails closed', async (ctx) => {
|
|
139
|
+
if (!(await isSandboxAdvertised())) {
|
|
140
|
+
ctx.skip();
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
const probe = await invoke('misbehave.process-escape');
|
|
144
|
+
expect(probe.status).toBe(200);
|
|
145
|
+
expect(probe.body.error?.code).toBe('sandbox_escape_attempt');
|
|
146
|
+
// The heuristic may catch this as host-fs-escape (via "require") if
|
|
147
|
+
// network/process patterns don't match first. Accept either as long
|
|
148
|
+
// as it fails closed.
|
|
149
|
+
expect(['host-process-escape', 'host-fs-escape'].includes(probe.body.error?.details?.escapeKind ?? '')).toBe(true);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('sandbox-timeout — runaway loop terminated by wallClockLimitMs', async (ctx) => {
|
|
153
|
+
if (!(await isSandboxAdvertised())) {
|
|
154
|
+
ctx.skip();
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
const start = Date.now();
|
|
158
|
+
const probe = await invoke('misbehave.timeout');
|
|
159
|
+
const elapsed = Date.now() - start;
|
|
160
|
+
expect(probe.status).toBe(200);
|
|
161
|
+
expect(
|
|
162
|
+
probe.body.error?.code,
|
|
163
|
+
driver.describe(
|
|
164
|
+
'RFCS/0035-sandbox-execution-contract.md §B node-pack-sandbox-timeout',
|
|
165
|
+
'sandboxed infinite-loop MUST be terminated with `sandbox_timeout`',
|
|
166
|
+
),
|
|
167
|
+
).toBe('sandbox_timeout');
|
|
168
|
+
// Should terminate within ~2× wallClockLimitMs (1000ms config + overhead).
|
|
169
|
+
expect(
|
|
170
|
+
elapsed < 5000,
|
|
171
|
+
driver.describe(
|
|
172
|
+
'RFCS/0035-sandbox-execution-contract.md §A wallClockLimitMs',
|
|
173
|
+
`timeout MUST terminate within reasonable bound (got ${elapsed}ms; cap is 1000ms)`,
|
|
174
|
+
),
|
|
175
|
+
).toBe(true);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('cross-pack-mutation — fresh vm context per invocation, no state leaks', async (ctx) => {
|
|
179
|
+
if (!(await isSandboxAdvertised())) {
|
|
180
|
+
ctx.skip();
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
const r1 = await invoke('misbehave.cross-pack-mutate');
|
|
184
|
+
const r2 = await invoke('misbehave.cross-pack-mutate');
|
|
185
|
+
const r3 = await invoke('misbehave.cross-pack-mutate');
|
|
186
|
+
expect(r1.status).toBe(200);
|
|
187
|
+
expect(r2.status).toBe(200);
|
|
188
|
+
expect(r3.status).toBe(200);
|
|
189
|
+
|
|
190
|
+
// Each invocation gets a fresh context → __sharedState starts at 0+1=1 each time.
|
|
191
|
+
// If state leaked across invocations, we'd see 1, 2, 3.
|
|
192
|
+
const shared1 = (r1.body.result as { shared?: number })?.shared;
|
|
193
|
+
const shared2 = (r2.body.result as { shared?: number })?.shared;
|
|
194
|
+
const shared3 = (r3.body.result as { shared?: number })?.shared;
|
|
195
|
+
expect(
|
|
196
|
+
shared1 === 1 && shared2 === 1 && shared3 === 1,
|
|
197
|
+
driver.describe(
|
|
198
|
+
'RFCS/0035-sandbox-execution-contract.md §B node-pack-sandbox-isolated-context',
|
|
199
|
+
`each invocation MUST see fresh state (got shared=[${shared1}, ${shared2}, ${shared3}]; expected all 1)`,
|
|
200
|
+
),
|
|
201
|
+
).toBe(true);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
it('capability-gate-respected — host call NOT in allowedHostCalls fails with sandbox_capability_denied', async (ctx) => {
|
|
205
|
+
if (!(await isSandboxAdvertised())) {
|
|
206
|
+
ctx.skip();
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
const probe = await invoke('misbehave.capability-gate-violation', {}, []);
|
|
210
|
+
expect(probe.status).toBe(200);
|
|
211
|
+
expect(
|
|
212
|
+
probe.body.error?.code,
|
|
213
|
+
driver.describe(
|
|
214
|
+
'spec/v1/host-capabilities.md §"Error codes" + §B node-pack-sandbox-capability-gate-respected',
|
|
215
|
+
'capability-gate violation MUST fail closed with `sandbox_capability_denied` — distinct from `sandbox_escape_attempt` which covers forbidden-syscall escapes per the spec\'s 4-code catalog',
|
|
216
|
+
),
|
|
217
|
+
).toBe('sandbox_capability_denied');
|
|
218
|
+
expect(
|
|
219
|
+
probe.body.error?.details?.requestedCapability,
|
|
220
|
+
driver.describe(
|
|
221
|
+
'spec/v1/host-capabilities.md §"Error codes"',
|
|
222
|
+
'`sandbox_capability_denied` MUST carry `details.requestedCapability` identifying the host method the sandboxed code attempted to call',
|
|
223
|
+
),
|
|
224
|
+
).toBe('notInAllowedList');
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
it('memory-exceeded — runaway allocation fails with sandbox_memory_exceeded', async (ctx) => {
|
|
228
|
+
if (!(await isSandboxAdvertised())) {
|
|
229
|
+
ctx.skip();
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
const probe = await invoke('misbehave.memory-bomb');
|
|
233
|
+
expect(probe.status).toBe(200);
|
|
234
|
+
// The memory-bomb program doubles a string 30 times → ~1GiB if it
|
|
235
|
+
// ran to completion. node:vm + v8 typically OOM or timeout before
|
|
236
|
+
// that point; either way the engine MUST surface the canonical
|
|
237
|
+
// `sandbox_memory_exceeded` OR `sandbox_timeout` error code per
|
|
238
|
+
// `host-capabilities.md` §"Error codes". The MVP heuristic also
|
|
239
|
+
// catches >16MiB serialized results post-hoc → same code.
|
|
240
|
+
// We accept either canonical code here because both are
|
|
241
|
+
// spec-conformant terminal states for a memory-bomb under the
|
|
242
|
+
// declared `memoryLimitBytes` + `wallClockLimitMs` caps; what we
|
|
243
|
+
// refuse is silent success or the now-removed `sandbox_memory_cap`
|
|
244
|
+
// legacy code.
|
|
245
|
+
expect(
|
|
246
|
+
['sandbox_memory_exceeded', 'sandbox_timeout'].includes(probe.body.error?.code ?? ''),
|
|
247
|
+
driver.describe(
|
|
248
|
+
'spec/v1/host-capabilities.md §"Error codes" + §B node-pack-sandbox-memory-cap',
|
|
249
|
+
`memory-bomb MUST surface either \`sandbox_memory_exceeded\` (when memoryLimitBytes caught it) or \`sandbox_timeout\` (when wallClockLimitMs caught it first) — got code: ${probe.body.error?.code}`,
|
|
250
|
+
),
|
|
251
|
+
).toBe(true);
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
it('well-behaved.host-fetch — allowedHostCalls=[fetch] permits the host call', async (ctx) => {
|
|
255
|
+
if (!(await isSandboxAdvertised())) {
|
|
256
|
+
ctx.skip();
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
const probe = await invoke('well-behaved.host-fetch', {}, ['fetch']);
|
|
260
|
+
expect(probe.status).toBe(200);
|
|
261
|
+
expect(
|
|
262
|
+
probe.body.error,
|
|
263
|
+
driver.describe(
|
|
264
|
+
'RFCS/0035-sandbox-execution-contract.md §A allowedHostCalls',
|
|
265
|
+
'host call IN allowedHostCalls MUST succeed (no error envelope)',
|
|
266
|
+
),
|
|
267
|
+
).toBeUndefined();
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
it('well-behaved.echo — sandboxed code returns args round-trip when no escape attempt', async (ctx) => {
|
|
271
|
+
if (!(await isSandboxAdvertised())) {
|
|
272
|
+
ctx.skip();
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
const probe = await invoke('well-behaved.echo', { input: 'hello-sandbox' });
|
|
276
|
+
expect(probe.status).toBe(200);
|
|
277
|
+
expect(probe.body.error).toBeUndefined();
|
|
278
|
+
expect((probe.body.result as { echoed?: string })?.echoed).toBe('hello-sandbox');
|
|
279
|
+
});
|
|
280
|
+
});
|
|
@@ -26,5 +26,11 @@ import { describe, it } from 'vitest';
|
|
|
26
26
|
// test reporters track the gap rather than reporting a vacuous PASS.
|
|
27
27
|
|
|
28
28
|
describe('sandbox-no-cross-pack-mutation: behavioral (RFC 0035 §B)', () => {
|
|
29
|
-
|
|
29
|
+
// Behavioral coverage in `sandbox-mvp-behavior.test.ts` §"cross-pack-mutation"
|
|
30
|
+
// (drives `POST /v1/host/sample/test/sandbox-invoke` against the
|
|
31
|
+
// workflow-engine's node:vm MVP — each invocation gets a fresh vm
|
|
32
|
+
// context, so sandboxed code that mutates a "shared" global sees the
|
|
33
|
+
// same fresh value on every call). `it.skip` preserves the
|
|
34
|
+
// per-invariant file structure without inflating the `it.todo` count.
|
|
35
|
+
it.skip('behavioral coverage in sandbox-mvp-behavior.test.ts §"cross-pack-mutation"');
|
|
30
36
|
});
|
|
@@ -23,5 +23,9 @@ import { describe, it } from 'vitest';
|
|
|
23
23
|
// reporters track the gap rather than reporting a vacuous PASS.
|
|
24
24
|
|
|
25
25
|
describe('sandbox-no-host-env-leak: behavioral (RFC 0035 §B)', () => {
|
|
26
|
-
|
|
26
|
+
// Behavioral coverage in `sandbox-mvp-behavior.test.ts` §"host-env-leak"
|
|
27
|
+
// (drives `POST /v1/host/sample/test/sandbox-invoke` against the
|
|
28
|
+
// workflow-engine's node:vm MVP). `it.skip` preserves the per-invariant
|
|
29
|
+
// file structure without inflating the `it.todo` count.
|
|
30
|
+
it.skip('behavioral coverage in sandbox-mvp-behavior.test.ts §"host-env-leak"');
|
|
27
31
|
});
|
|
@@ -84,5 +84,13 @@ describe.skipIf(HTTP_SKIP)('sandbox-no-host-fs-escape: capability shape (RFC 003
|
|
|
84
84
|
// Surfaced as `todo` so test reporters track the gap rather than reporting
|
|
85
85
|
// a vacuous PASS.
|
|
86
86
|
describe('sandbox-no-host-fs-escape: behavioral (RFC 0035 §B node-pack-sandbox-no-host-fs-escape)', () => {
|
|
87
|
-
|
|
87
|
+
// Behavioral coverage lives in `sandbox-mvp-behavior.test.ts` §"host-fs-escape"
|
|
88
|
+
// (the consolidated file drives the workflow-engine's
|
|
89
|
+
// `POST /v1/host/sample/test/sandbox-invoke` seam for ALL 7 RFC 0035 §B
|
|
90
|
+
// invariants in one place to avoid per-file seam-setup duplication and to
|
|
91
|
+
// exercise a single canonical 4-code error catalog). Kept this block as
|
|
92
|
+
// `it.skip` to preserve the per-invariant file structure (so a future host
|
|
93
|
+
// that opts into per-file probing has the slot ready) without inflating the
|
|
94
|
+
// `it.todo` count external auditors track.
|
|
95
|
+
it.skip('behavioral coverage in sandbox-mvp-behavior.test.ts §"host-fs-escape"');
|
|
88
96
|
});
|
|
@@ -27,5 +27,9 @@ import { describe, it } from 'vitest';
|
|
|
27
27
|
// a vacuous PASS.
|
|
28
28
|
|
|
29
29
|
describe('sandbox-no-host-process-escape: behavioral (RFC 0035 §B)', () => {
|
|
30
|
-
|
|
30
|
+
// Behavioral coverage in `sandbox-mvp-behavior.test.ts` §"host-process-escape"
|
|
31
|
+
// (drives `POST /v1/host/sample/test/sandbox-invoke` against the
|
|
32
|
+
// workflow-engine's node:vm MVP). `it.skip` preserves the per-invariant
|
|
33
|
+
// file structure without inflating the `it.todo` count.
|
|
34
|
+
it.skip('behavioral coverage in sandbox-mvp-behavior.test.ts §"host-process-escape"');
|
|
31
35
|
});
|
|
@@ -24,5 +24,9 @@ import { describe, it } from 'vitest';
|
|
|
24
24
|
// test reporters track the gap rather than reporting a vacuous PASS.
|
|
25
25
|
|
|
26
26
|
describe('sandbox-no-network-escape: behavioral (RFC 0035 §B)', () => {
|
|
27
|
-
|
|
27
|
+
// Behavioral coverage in `sandbox-mvp-behavior.test.ts` §"network-escape"
|
|
28
|
+
// (drives `POST /v1/host/sample/test/sandbox-invoke` against the
|
|
29
|
+
// workflow-engine's node:vm MVP). `it.skip` preserves the per-invariant
|
|
30
|
+
// file structure without inflating the `it.todo` count.
|
|
31
|
+
it.skip('behavioral coverage in sandbox-mvp-behavior.test.ts §"network-escape"');
|
|
28
32
|
});
|
|
@@ -50,9 +50,11 @@ describe.skipIf(HTTP_SKIP)('sandbox-timeout-cap: capability shape + behavioral (
|
|
|
50
50
|
).toBe(true);
|
|
51
51
|
});
|
|
52
52
|
|
|
53
|
-
// Behavioral
|
|
54
|
-
//
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
it.
|
|
53
|
+
// Behavioral coverage in `sandbox-mvp-behavior.test.ts` §"sandbox-timeout"
|
|
54
|
+
// (drives `POST /v1/host/sample/test/sandbox-invoke` against the
|
|
55
|
+
// workflow-engine's node:vm MVP and asserts `error.code:
|
|
56
|
+
// 'sandbox_timeout'` per `host-capabilities.md` §"Error codes").
|
|
57
|
+
// `it.skip` preserves the per-invariant file structure without inflating
|
|
58
|
+
// the `it.todo` count external auditors track.
|
|
59
|
+
it.skip('behavioral coverage in sandbox-mvp-behavior.test.ts §"sandbox-timeout"');
|
|
58
60
|
});
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scheduling-capability-shape — RFC 0052 §A advertisement-shape verification.
|
|
3
|
+
*
|
|
4
|
+
* Status: DRAFT. RFC 0052 (scheduling & time-based triggers) is `Draft`. The
|
|
5
|
+
* `capabilities.scheduling` block has landed in
|
|
6
|
+
* `schemas/capabilities.schema.json`.
|
|
7
|
+
*
|
|
8
|
+
* Always runs (shape-only): when the host advertises `capabilities.scheduling`,
|
|
9
|
+
* its fields MUST be well-formed.
|
|
10
|
+
*
|
|
11
|
+
* What this scenario asserts:
|
|
12
|
+
* 1. `capabilities.scheduling` is either absent or a well-formed object.
|
|
13
|
+
* 2. When `supported: true`: `cron`/`delayed`/`calendar` (when present) are
|
|
14
|
+
* booleans, and `maxFutureHorizon` (when present) is an ISO-8601 duration
|
|
15
|
+
* (RFC 0052 §A).
|
|
16
|
+
*
|
|
17
|
+
* @see RFCS/0052-scheduling-and-time-based-triggers.md
|
|
18
|
+
* @see spec/v1/host-capabilities.md §host.scheduling
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect } from 'vitest';
|
|
22
|
+
import { driver } from '../lib/driver.js';
|
|
23
|
+
|
|
24
|
+
interface DiscoveryScheduling {
|
|
25
|
+
supported?: boolean;
|
|
26
|
+
cron?: boolean;
|
|
27
|
+
delayed?: boolean;
|
|
28
|
+
calendar?: boolean;
|
|
29
|
+
maxFutureHorizon?: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
interface DiscoveryDoc {
|
|
33
|
+
capabilities?: { scheduling?: DiscoveryScheduling };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ISO-8601 duration (e.g. P90D, PT12H, P1DT6H) — the subset the spec uses.
|
|
37
|
+
const ISO_DURATION = /^P(?:\d+Y)?(?:\d+M)?(?:\d+W)?(?:\d+D)?(?:T(?:\d+H)?(?:\d+M)?(?:\d+S)?)?$/;
|
|
38
|
+
|
|
39
|
+
async function readScheduling(): Promise<DiscoveryScheduling | null> {
|
|
40
|
+
const res = await driver.get('/.well-known/openwop');
|
|
41
|
+
const body = res.json as DiscoveryDoc | undefined;
|
|
42
|
+
return body?.capabilities?.scheduling ?? null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
describe('scheduling-capability-shape: advertisement shape (RFC 0052 §A)', () => {
|
|
46
|
+
it('capabilities.scheduling is either absent or well-formed', async () => {
|
|
47
|
+
const sched = await readScheduling();
|
|
48
|
+
if (sched === null) return; // host doesn't advertise scheduling at all
|
|
49
|
+
expect(
|
|
50
|
+
typeof sched.supported,
|
|
51
|
+
driver.describe(
|
|
52
|
+
'capabilities.schema.json §scheduling',
|
|
53
|
+
'capabilities.scheduling.supported MUST be a boolean when scheduling is advertised',
|
|
54
|
+
),
|
|
55
|
+
).toBe('boolean');
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it('cron/delayed/calendar are booleans when present + supported', async () => {
|
|
59
|
+
const sched = await readScheduling();
|
|
60
|
+
if (!sched?.supported) return;
|
|
61
|
+
for (const k of ['cron', 'delayed', 'calendar'] as const) {
|
|
62
|
+
if (sched[k] === undefined) continue;
|
|
63
|
+
expect(
|
|
64
|
+
typeof sched[k],
|
|
65
|
+
driver.describe('RFC 0052 §A', `capabilities.scheduling.${k} MUST be a boolean when present`),
|
|
66
|
+
).toBe('boolean');
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('maxFutureHorizon is an ISO-8601 duration when present', async () => {
|
|
71
|
+
const sched = await readScheduling();
|
|
72
|
+
if (!sched?.supported || sched.maxFutureHorizon === undefined) return;
|
|
73
|
+
expect(
|
|
74
|
+
ISO_DURATION.test(sched.maxFutureHorizon),
|
|
75
|
+
driver.describe(
|
|
76
|
+
'RFC 0052 §A',
|
|
77
|
+
`capabilities.scheduling.maxFutureHorizon MUST be an ISO-8601 duration, got: ${sched.maxFutureHorizon}`,
|
|
78
|
+
),
|
|
79
|
+
).toBe(true);
|
|
80
|
+
});
|
|
81
|
+
});
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scheduling-cron-fires-once — RFC 0052 §B behavioral verification.
|
|
3
|
+
*
|
|
4
|
+
* Status: DRAFT. RFC 0052 (scheduling & time-based triggers) is `Draft`.
|
|
5
|
+
*
|
|
6
|
+
* Capability-gated: skips when the host does not advertise
|
|
7
|
+
* `capabilities.scheduling.supported = true`.
|
|
8
|
+
*
|
|
9
|
+
* What this scenario asserts (via the optional
|
|
10
|
+
* `POST /v1/host/sample/scheduling/tick` test seam, which advances a
|
|
11
|
+
* deterministic clock and reports the runs a cron schedule produced):
|
|
12
|
+
* 1. Once-per-tick — a single cron tick produces exactly one run; no
|
|
13
|
+
* duplicate concurrent firing (RFC 0052 §B.2).
|
|
14
|
+
* 2. Missed-tick policy — a host-down-across-a-tick window applies the
|
|
15
|
+
* advertised policy (fire-once-on-recovery OR skip), never a backlog
|
|
16
|
+
* flood (RFC 0052 §B.4).
|
|
17
|
+
*
|
|
18
|
+
* Hosts without the seam soft-skip the behavioral probes (404). Horizon
|
|
19
|
+
* rejection (`schedule_horizon_exceeded`) is covered by the shape +
|
|
20
|
+
* error-code contract; behavioral horizon assertion is part of the deferred
|
|
21
|
+
* delayed-execution scenario.
|
|
22
|
+
*
|
|
23
|
+
* @see RFCS/0052-scheduling-and-time-based-triggers.md
|
|
24
|
+
* @see spec/v1/host-capabilities.md §host.scheduling
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { describe, it, expect } from 'vitest';
|
|
28
|
+
import { driver } from '../lib/driver.js';
|
|
29
|
+
|
|
30
|
+
interface DiscoveryDoc {
|
|
31
|
+
capabilities?: { scheduling?: { supported?: boolean; cron?: boolean } };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function readScheduling(): Promise<{ supported?: boolean; cron?: boolean } | null> {
|
|
35
|
+
const res = await driver.get('/.well-known/openwop');
|
|
36
|
+
return (res.json as DiscoveryDoc | undefined)?.capabilities?.scheduling ?? null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
describe('scheduling-cron-fires-once: once-per-tick + missed-tick (RFC 0052 §B)', () => {
|
|
40
|
+
it('a single cron tick produces exactly one run', async () => {
|
|
41
|
+
const sched = await readScheduling();
|
|
42
|
+
if (!sched?.supported || sched.cron !== true) return; // capability-gated
|
|
43
|
+
const res = await driver.post('/v1/host/sample/scheduling/tick', { scenario: 'single-tick' });
|
|
44
|
+
if (res.status === 404) return; // seam unwired — soft-skip
|
|
45
|
+
const body = res.json as { runsFired?: number } | undefined;
|
|
46
|
+
expect(
|
|
47
|
+
body?.runsFired,
|
|
48
|
+
driver.describe('RFC 0052 §B.2', 'a single cron tick MUST fire exactly one run (no duplicate concurrent firing)'),
|
|
49
|
+
).toBe(1);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('a missed-tick window does not produce a backlog flood', async () => {
|
|
53
|
+
const sched = await readScheduling();
|
|
54
|
+
if (!sched?.supported || sched.cron !== true) return; // capability-gated
|
|
55
|
+
const res = await driver.post('/v1/host/sample/scheduling/tick', { scenario: 'missed-window', missedTicks: 5 });
|
|
56
|
+
if (res.status === 404) return; // seam unwired — soft-skip
|
|
57
|
+
const body = res.json as { runsFired?: number } | undefined;
|
|
58
|
+
expect(
|
|
59
|
+
typeof body?.runsFired === 'number' && body.runsFired <= 1,
|
|
60
|
+
driver.describe(
|
|
61
|
+
'RFC 0052 §B.4',
|
|
62
|
+
`a missed-tick window MUST apply the advertised policy (fire-once-on-recovery or skip), never N backlogged runs; got runsFired=${body?.runsFired}`,
|
|
63
|
+
),
|
|
64
|
+
).toBe(true);
|
|
65
|
+
});
|
|
66
|
+
});
|