@openwop/openwop-conformance 1.13.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/README.md +2 -2
- package/api/openapi.yaml +60 -0
- package/coverage.md +11 -1
- package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
- package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
- package/package.json +1 -1
- package/src/lib/discovery-capabilities.ts +18 -19
- package/src/lib/egressPolicy.ts +76 -0
- package/src/lib/profiles.ts +15 -0
- package/src/lib/sandbox-timeout-worker.mjs +31 -0
- package/src/lib/toolCatalog.ts +81 -0
- package/src/lib/wasm-sandbox-probe.ts +168 -0
- package/src/scenarios/core-standard-profile.test.ts +75 -0
- package/src/scenarios/egress-audience-binding.test.ts +81 -0
- package/src/scenarios/egress-decision-content-free.test.ts +57 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
- package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
- package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
- package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
- package/src/scenarios/tool-catalog-projection.test.ts +120 -0
- package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
- package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for the RFC 0079 `httpClient.egressPolicy` conformance
|
|
3
|
+
* scenarios. Lives in lib/ (not a `*.test.ts`) so scenarios import it via
|
|
4
|
+
* `../lib/egressPolicy.js`.
|
|
5
|
+
*
|
|
6
|
+
* Egress policy is a BEHAVIOR layered over the RFC 0076 `safeFetch` — there is
|
|
7
|
+
* no new normative read endpoint. The behavior is driven through the
|
|
8
|
+
* host-sample egress-decision seam (`POST /v1/host/sample/egress/decide`): a
|
|
9
|
+
* host-issued credential carries `audiences[]` (RFC 0079 §A provenance), and an
|
|
10
|
+
* egress whose destination is OUTSIDE those audiences MUST emit
|
|
11
|
+
* `egress.decided { decision: "denied"|"downgraded", reason: "out-of-audience" }`
|
|
12
|
+
* and MUST NOT attach the credential (the §C confused-deputy MUST, backing the
|
|
13
|
+
* `egress-credential-audience-bound` invariant). A provenance-unevaluable egress
|
|
14
|
+
* MUST be `denied { reason: "provenance-unevaluable" }` — fail-closed. The seam
|
|
15
|
+
* is OPTIONAL — scenarios soft-skip on 404/405.
|
|
16
|
+
*
|
|
17
|
+
* Gating uses the `httpClient.egressPolicy.supported` capability flag from the
|
|
18
|
+
* live discovery doc (root-first per RFC 0073).
|
|
19
|
+
*
|
|
20
|
+
* @see RFCS/0079-credential-provenance-and-egress-policy.md
|
|
21
|
+
* @see spec/v1/host-capabilities.md (§"Credential provenance + egress policy")
|
|
22
|
+
*/
|
|
23
|
+
import { driver } from './driver.js';
|
|
24
|
+
import { readCapabilityFamily } from './discovery-capabilities.js';
|
|
25
|
+
|
|
26
|
+
/** Reads `httpClient.egressPolicy` from discovery (root-first per RFC 0073);
|
|
27
|
+
* null when unadvertised. */
|
|
28
|
+
export async function readEgressPolicyCap(): Promise<Record<string, unknown> | null> {
|
|
29
|
+
const http = await readCapabilityFamily<{ egressPolicy?: unknown }>('httpClient');
|
|
30
|
+
const ep = http?.egressPolicy;
|
|
31
|
+
return ep && typeof ep === 'object' ? (ep as Record<string, unknown>) : null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface EgressDecision {
|
|
35
|
+
decision?: string;
|
|
36
|
+
reason?: string;
|
|
37
|
+
destination?: string;
|
|
38
|
+
/** Whether the host-issued credential was attached to the egress (§C — MUST
|
|
39
|
+
* be false for an out-of-audience / unevaluable decision). */
|
|
40
|
+
credentialAttached?: boolean;
|
|
41
|
+
/** Set when the seam ran a canary credential and the canary leaked into any
|
|
42
|
+
* observable surface (the SR-1 negative — MUST stay false/absent). */
|
|
43
|
+
canaryLeaked?: boolean;
|
|
44
|
+
runId?: string;
|
|
45
|
+
[k: string]: unknown;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Drive one egress decision through the host-sample seam (RFC 0079 §C).
|
|
50
|
+
* `scenario`:
|
|
51
|
+
* - `out-of-audience` — credential bound to audience A, egress to B;
|
|
52
|
+
* MUST deny/downgrade + NOT attach the credential.
|
|
53
|
+
* - `provenance-unevaluable` — egress whose provenance can't be evaluated;
|
|
54
|
+
* MUST deny fail-closed.
|
|
55
|
+
* - `in-audience` — control: egress within audience; MAY allow.
|
|
56
|
+
* - `canary` — seed a credential whose value is a known canary
|
|
57
|
+
* and assert it never appears on the wire (SR-1).
|
|
58
|
+
* Returns null when the seam is unwired (404/405).
|
|
59
|
+
*/
|
|
60
|
+
export async function driveEgress(
|
|
61
|
+
body: { scenario: 'out-of-audience' | 'provenance-unevaluable' | 'in-audience' | 'canary' },
|
|
62
|
+
): Promise<EgressDecision | null> {
|
|
63
|
+
const res = await driver.post('/v1/host/sample/egress/decide', body);
|
|
64
|
+
if (res.status === 404 || res.status === 405) return null;
|
|
65
|
+
return (res.json as EgressDecision | undefined) ?? {};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** The closed egress-decision vocabulary (RFC 0079 §B). */
|
|
69
|
+
export const EGRESS_DECISIONS = ['allowed', 'denied', 'downgraded', 'approval-required'];
|
|
70
|
+
/** The closed egress-reason vocabulary (RFC 0079 §B — a CLOSED enum so a host
|
|
71
|
+
* cannot spill a blocked URL/host/header into a free-form reason). */
|
|
72
|
+
export const EGRESS_REASONS = ['ok', 'out-of-audience', 'expired', 'ssrf-blocked', 'provenance-unevaluable', 'scope-denied', 'policy-denied'];
|
|
73
|
+
/** Content keys an `egress.decided` payload / provenance descriptor MUST NEVER
|
|
74
|
+
* carry (SR-1 / `egress-decision-no-secret-leak`): no secret value, no blocked
|
|
75
|
+
* URL/header spill. */
|
|
76
|
+
export const EGRESS_CONTENT_FORBIDDEN = ['secret', 'credential', 'credentials', 'token', 'apiKey', 'password', 'url', 'header', 'headers', 'body'];
|
package/src/lib/profiles.ts
CHANGED
|
@@ -362,6 +362,21 @@ export function agentPlatformSatisfiedTerms(c: DiscoveryPayload): readonly strin
|
|
|
362
362
|
return checks.filter(([, ok]) => ok).map(([id]) => id);
|
|
363
363
|
}
|
|
364
364
|
|
|
365
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
366
|
+
// `openwop-core-standard` operational-annex predicate (RFC 0088). Like the
|
|
367
|
+
// agent-platform annex above, this is NOT a closed-catalog profile (so it is
|
|
368
|
+
// absent from deriveProfiles) — it is an operational ANNEX whose claim is backed
|
|
369
|
+
// by the §C floor scenarios passing black-box. This helper computes only the §B
|
|
370
|
+
// discovery predicate (the floor of MUSTs with black-box production-path proof).
|
|
371
|
+
//
|
|
372
|
+
// @see spec/v1/core-standard-profile.md
|
|
373
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
374
|
+
|
|
375
|
+
/** The `openwop-core-standard` floor discovery predicate — RFC 0088 §B. */
|
|
376
|
+
export function isCoreStandard(c: DiscoveryPayload): boolean {
|
|
377
|
+
return isCore(c) && isInterrupts(c) && (isStreamSse(c) || isStreamPoll(c));
|
|
378
|
+
}
|
|
379
|
+
|
|
365
380
|
/**
|
|
366
381
|
* Derive the full profile set from a discovery payload.
|
|
367
382
|
*
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// Suite-local worker for the RFC 0035 §B wall-clock-timeout conformance probe.
|
|
2
|
+
//
|
|
3
|
+
// Instantiates ONE WASM module on a dedicated worker thread and runs its entry.
|
|
4
|
+
// The main thread (see `probeTimeout` in wasm-sandbox-probe.ts) races a
|
|
5
|
+
// kill-timer against this worker: a non-terminating module (the
|
|
6
|
+
// `misbehaving-timeout` fixture) never posts and is terminated at the wall-clock
|
|
7
|
+
// cap → `sandbox_timeout`; a well-behaved module posts its result first. Mirrors
|
|
8
|
+
// the reference host's `examples/hosts/wasm-sandbox/src/sandbox-worker.mjs`; the
|
|
9
|
+
// suite carries its own copy so the published conformance package is
|
|
10
|
+
// self-contained (no dependency on the reference host).
|
|
11
|
+
import { workerData, parentPort } from 'node:worker_threads';
|
|
12
|
+
|
|
13
|
+
const { wasmBytes, entry, arg, memoryMaxPages } = workerData;
|
|
14
|
+
|
|
15
|
+
try {
|
|
16
|
+
const memory = new WebAssembly.Memory({ initial: 1, maximum: memoryMaxPages });
|
|
17
|
+
const instance = new WebAssembly.Instance(new WebAssembly.Module(wasmBytes), { env: { memory } });
|
|
18
|
+
const fn = instance.exports[entry];
|
|
19
|
+
if (typeof fn !== 'function') {
|
|
20
|
+
parentPort.postMessage({ ok: false, code: 'sandbox_invocation_error' });
|
|
21
|
+
} else {
|
|
22
|
+
const result = fn(arg); // a non-terminating module never returns — the host kill-timer fires
|
|
23
|
+
parentPort.postMessage({ ok: true, result: Number(result) });
|
|
24
|
+
}
|
|
25
|
+
} catch (err) {
|
|
26
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
27
|
+
const code = /out of bounds memory access|memory access out of bounds/i.test(message)
|
|
28
|
+
? 'sandbox_memory_exceeded'
|
|
29
|
+
: 'sandbox_invocation_error';
|
|
30
|
+
parentPort.postMessage({ ok: false, code });
|
|
31
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for the RFC 0078 `toolCatalog` conformance scenarios.
|
|
3
|
+
* Lives in lib/ (not a `*.test.ts`) so scenarios import it via
|
|
4
|
+
* `../lib/toolCatalog.js`.
|
|
5
|
+
*
|
|
6
|
+
* Two surfaces:
|
|
7
|
+
* - the NORMATIVE reads (`GET /v1/tools` + `GET /v1/tools/{toolId}`, RFC 0078
|
|
8
|
+
* §B), exercised black-box; and
|
|
9
|
+
* - the host-sample tool-session seam (`POST /v1/host/sample/tools/session-run`),
|
|
10
|
+
* used to drive the §D `tool.session.{opened,closed}` bracket over the RFC
|
|
11
|
+
* 0064 call events so the ordering + content-free guarantees can be asserted
|
|
12
|
+
* against the test event-log seam. The seam is OPTIONAL — scenarios soft-skip
|
|
13
|
+
* on 404/405 (the reference session lifecycle is deferred per RFC 0078
|
|
14
|
+
* §Conformance).
|
|
15
|
+
*
|
|
16
|
+
* Gating uses the `toolCatalog.supported` (and `toolCatalog.sessionLifecycle`)
|
|
17
|
+
* capability flags from the live discovery doc (root-first per RFC 0073).
|
|
18
|
+
*
|
|
19
|
+
* @see RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
|
|
20
|
+
* @see spec/v1/tool-catalog.md
|
|
21
|
+
*/
|
|
22
|
+
import { driver } from './driver.js';
|
|
23
|
+
import { readCapabilityFamily } from './discovery-capabilities.js';
|
|
24
|
+
|
|
25
|
+
/** Reads `toolCatalog` from discovery (root-first per RFC 0073); null when
|
|
26
|
+
* unadvertised. */
|
|
27
|
+
export async function readToolCatalogCap(): Promise<Record<string, unknown> | null> {
|
|
28
|
+
const tc = await readCapabilityFamily<Record<string, unknown>>('toolCatalog');
|
|
29
|
+
return tc && typeof tc === 'object' ? tc : null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ToolDescriptor {
|
|
33
|
+
toolId?: string;
|
|
34
|
+
source?: string;
|
|
35
|
+
safetyTier?: string;
|
|
36
|
+
[k: string]: unknown;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** GET the NORMATIVE tool catalog (RFC 0078 §B `GET /v1/tools`); null when the
|
|
40
|
+
* host doesn't serve it (404/405/501). */
|
|
41
|
+
export async function listTools(): Promise<ToolDescriptor[] | null> {
|
|
42
|
+
const res = await driver.get('/v1/tools');
|
|
43
|
+
if (res.status === 404 || res.status === 405 || res.status === 501) return null;
|
|
44
|
+
return (res.json as ToolDescriptor[] | undefined) ?? [];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** GET one tool by id (RFC 0078 §B `GET /v1/tools/{toolId}`); returns
|
|
48
|
+
* `{ status, descriptor }` so a caller can distinguish a 404 (absent /
|
|
49
|
+
* unauthorized / unadvertised) from a served descriptor. */
|
|
50
|
+
export async function getTool(
|
|
51
|
+
toolId: string,
|
|
52
|
+
): Promise<{ status: number; descriptor: ToolDescriptor | undefined }> {
|
|
53
|
+
const res = await driver.get(`/v1/tools/${encodeURIComponent(toolId)}`);
|
|
54
|
+
return { status: res.status, descriptor: res.json as ToolDescriptor | undefined };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface ToolSessionResult {
|
|
58
|
+
runId?: string;
|
|
59
|
+
sessionId?: string;
|
|
60
|
+
toolId?: string;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Drive one tool-session interaction through the host-sample seam (RFC 0078
|
|
64
|
+
* §D). Persists `tool.session.opened` → RFC 0064 call events → `tool.session.closed`
|
|
65
|
+
* to the durable run-event log (read back via the run event-log read seam).
|
|
66
|
+
* Returns null when the seam is unwired (404/405). */
|
|
67
|
+
export async function driveToolSession(
|
|
68
|
+
body: { toolId?: string } = {},
|
|
69
|
+
): Promise<ToolSessionResult | null> {
|
|
70
|
+
const res = await driver.post('/v1/host/sample/tools/session-run', body);
|
|
71
|
+
if (res.status === 404 || res.status === 405) return null;
|
|
72
|
+
return (res.json as ToolSessionResult | undefined) ?? {};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** The closed tool-source vocabulary (RFC 0078 §C). */
|
|
76
|
+
export const TOOL_SOURCES = ['node-pack', 'workflow', 'mcp', 'connector', 'host-extension'];
|
|
77
|
+
/** The closed safety-tier vocabulary (RFC 0078 §C). */
|
|
78
|
+
export const SAFETY_TIERS = ['pure', 'read', 'write', 'exec'];
|
|
79
|
+
/** Content keys a `ToolDescriptor` / `tool.session.*` MUST NEVER carry (SR-1):
|
|
80
|
+
* no credential/secret material. */
|
|
81
|
+
export const TOOL_CONTENT_FORBIDDEN = ['secret', 'credential', 'credentials', 'token', 'apiKey', 'password'];
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Portable WASM-sandbox probe — the suite-local reference for the RFC 0035 §B
|
|
3
|
+
* isolation invariants.
|
|
4
|
+
*
|
|
5
|
+
* The conformance suite is a standalone package, so it carries its own compact,
|
|
6
|
+
* server-free probe (no host, no worker) rather than importing a reference
|
|
7
|
+
* host's executor. It proves the invariants that hold *by construction* in any
|
|
8
|
+
* WebAssembly sandbox:
|
|
9
|
+
*
|
|
10
|
+
* - escape attempts (fs / env / network / process) and the capability gate are
|
|
11
|
+
* proven by STATIC inspection of `WebAssembly.Module.imports()` — a WASM
|
|
12
|
+
* module has no ambient host access, so a forbidden operation can only be a
|
|
13
|
+
* declared import; a sandbox refuses any import it did not grant, failing
|
|
14
|
+
* closed BEFORE instantiation.
|
|
15
|
+
* - the memory bound is proven by instantiating with a capped host memory and
|
|
16
|
+
* observing the engine trap on an access past the bound.
|
|
17
|
+
* - isolated-context is proven by instantiating the same module twice and
|
|
18
|
+
* observing no shared mutable state.
|
|
19
|
+
*
|
|
20
|
+
* The `timeout` invariant requires thread preemption (a worker kill-timer) and is
|
|
21
|
+
* proven at reference-impl tier by the WASM host's `test/sandbox.test.ts`; it is
|
|
22
|
+
* intentionally NOT exercised here (an in-process infinite loop cannot be
|
|
23
|
+
* interrupted server-free).
|
|
24
|
+
*
|
|
25
|
+
* @see RFCS/0035-sandbox-execution-contract.md §B
|
|
26
|
+
* @see examples/hosts/wasm-sandbox/ (the reference host this mirrors)
|
|
27
|
+
*/
|
|
28
|
+
import { Worker } from 'node:worker_threads';
|
|
29
|
+
import { fileURLToPath } from 'node:url';
|
|
30
|
+
|
|
31
|
+
export type SandboxErrorCode =
|
|
32
|
+
| 'sandbox_memory_exceeded'
|
|
33
|
+
| 'sandbox_timeout'
|
|
34
|
+
| 'sandbox_capability_denied'
|
|
35
|
+
| 'sandbox_escape_attempt'
|
|
36
|
+
| 'sandbox_invocation_error';
|
|
37
|
+
|
|
38
|
+
export type EscapeKind = 'host-fs-escape' | 'host-env-leak' | 'network-escape' | 'host-process-escape';
|
|
39
|
+
|
|
40
|
+
export interface ProbeResult {
|
|
41
|
+
readonly ok: boolean;
|
|
42
|
+
readonly result?: number;
|
|
43
|
+
readonly code?: SandboxErrorCode;
|
|
44
|
+
readonly escapeKind?: EscapeKind;
|
|
45
|
+
readonly requestedCapability?: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const WASM_PAGE_BYTES = 65536;
|
|
49
|
+
|
|
50
|
+
// Minimal local types for the Node-global `WebAssembly` value. The full
|
|
51
|
+
// `WebAssembly.*` namespace types live in lib.dom (not @types/node); rather than
|
|
52
|
+
// widen the suite's global lib (which would pull in conflicting DOM `fetch`/
|
|
53
|
+
// `BodyInit` types), we declare exactly what this probe uses and read the global.
|
|
54
|
+
interface WAModule {
|
|
55
|
+
readonly __wasmModule?: never;
|
|
56
|
+
}
|
|
57
|
+
interface WAInstance {
|
|
58
|
+
readonly exports: Record<string, unknown>;
|
|
59
|
+
}
|
|
60
|
+
interface WAImportDescriptor {
|
|
61
|
+
readonly module: string;
|
|
62
|
+
readonly name: string;
|
|
63
|
+
}
|
|
64
|
+
const WA = (globalThis as unknown as {
|
|
65
|
+
WebAssembly: {
|
|
66
|
+
Module: { new (bytes: Uint8Array): WAModule; imports(m: WAModule): readonly WAImportDescriptor[] };
|
|
67
|
+
Instance: { new (m: WAModule, imports: Record<string, Record<string, unknown>>): WAInstance };
|
|
68
|
+
Memory: { new (descriptor: { initial: number; maximum: number }): unknown };
|
|
69
|
+
};
|
|
70
|
+
}).WebAssembly;
|
|
71
|
+
|
|
72
|
+
function escapeKindFor(name: string): EscapeKind {
|
|
73
|
+
if (/^fd_|^path_/.test(name)) return 'host-fs-escape';
|
|
74
|
+
if (/^environ_/.test(name)) return 'host-env-leak';
|
|
75
|
+
if (/^sock_/.test(name)) return 'network-escape';
|
|
76
|
+
return 'host-process-escape';
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Static capability gate — the first un-granted import, or `null` if all are host-provided. */
|
|
80
|
+
function gateImports(module: WAModule, allowedHostCalls: readonly string[]): ProbeResult | null {
|
|
81
|
+
const allowed = new Set(allowedHostCalls);
|
|
82
|
+
for (const imp of WA.Module.imports(module)) {
|
|
83
|
+
if (imp.module === 'env' && imp.name === 'memory') continue;
|
|
84
|
+
if (imp.module === 'openwop') {
|
|
85
|
+
if (allowed.has(imp.name)) continue;
|
|
86
|
+
return { ok: false, code: 'sandbox_capability_denied', requestedCapability: imp.name };
|
|
87
|
+
}
|
|
88
|
+
return { ok: false, code: 'sandbox_escape_attempt', escapeKind: escapeKindFor(imp.name) };
|
|
89
|
+
}
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Probe one WASM-compiled typeId under the RFC 0035 sandbox contract, server-free.
|
|
95
|
+
* Statically gates imports; for a fully-granted module, instantiates with a
|
|
96
|
+
* capped host memory and runs the entry, classifying any trap. Does NOT spawn a
|
|
97
|
+
* worker — callers MUST NOT pass a non-terminating module (see `timeout` note).
|
|
98
|
+
*/
|
|
99
|
+
export function probeSandboxed(
|
|
100
|
+
wasmBytes: Uint8Array,
|
|
101
|
+
config: { readonly allowedHostCalls: readonly string[]; readonly memoryLimitBytes: number },
|
|
102
|
+
entry = 'invoke',
|
|
103
|
+
arg = 0,
|
|
104
|
+
): ProbeResult {
|
|
105
|
+
let module: WAModule;
|
|
106
|
+
try {
|
|
107
|
+
module = new WA.Module(wasmBytes);
|
|
108
|
+
} catch {
|
|
109
|
+
return { ok: false, code: 'sandbox_invocation_error' };
|
|
110
|
+
}
|
|
111
|
+
const gate = gateImports(module, config.allowedHostCalls);
|
|
112
|
+
if (gate) return gate;
|
|
113
|
+
|
|
114
|
+
const memoryMaxPages = Math.max(1, Math.ceil(config.memoryLimitBytes / WASM_PAGE_BYTES));
|
|
115
|
+
try {
|
|
116
|
+
const memory = new WA.Memory({ initial: 1, maximum: memoryMaxPages });
|
|
117
|
+
const openwop: Record<string, (x: number) => number> = {};
|
|
118
|
+
for (const name of config.allowedHostCalls) openwop[name] = (x: number): number => x;
|
|
119
|
+
const instance = new WA.Instance(module, { env: { memory }, openwop });
|
|
120
|
+
const fn = instance.exports[entry];
|
|
121
|
+
if (typeof fn !== 'function') return { ok: false, code: 'sandbox_invocation_error' };
|
|
122
|
+
return { ok: true, result: Number((fn as (a: number) => number)(arg)) };
|
|
123
|
+
} catch (e) {
|
|
124
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
125
|
+
if (/out of bounds memory access|memory access out of bounds/i.test(message)) {
|
|
126
|
+
return { ok: false, code: 'sandbox_memory_exceeded' };
|
|
127
|
+
}
|
|
128
|
+
return { ok: false, code: 'sandbox_invocation_error' };
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const timeoutWorkerPath = fileURLToPath(new URL('./sandbox-timeout-worker.mjs', import.meta.url));
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Worker-based timeout probe — RFC 0035 §B invariant 6 (`node-pack-sandbox-timeout`).
|
|
136
|
+
* A wall-clock cap can only be enforced by THREAD PREEMPTION: a same-thread timer
|
|
137
|
+
* cannot interrupt a synchronous WASM loop. So this spawns a worker thread running
|
|
138
|
+
* the module and races a main-thread kill-timer. A non-terminating module →
|
|
139
|
+
* `sandbox_timeout` (the worker is terminated at `wallClockLimitMs`); a module that
|
|
140
|
+
* completes within the budget posts its result first. This is the worker-driven
|
|
141
|
+
* counterpart to the server-free `probeSandboxed` (which deliberately cannot run a
|
|
142
|
+
* non-terminating module).
|
|
143
|
+
*/
|
|
144
|
+
export function probeTimeout(
|
|
145
|
+
wasmBytes: Uint8Array,
|
|
146
|
+
config: { readonly memoryLimitBytes: number; readonly wallClockLimitMs: number },
|
|
147
|
+
entry = 'invoke',
|
|
148
|
+
arg = 0,
|
|
149
|
+
): Promise<ProbeResult> {
|
|
150
|
+
const memoryMaxPages = Math.max(1, Math.ceil(config.memoryLimitBytes / WASM_PAGE_BYTES));
|
|
151
|
+
return new Promise((resolve) => {
|
|
152
|
+
const worker = new Worker(timeoutWorkerPath, { workerData: { wasmBytes, entry, arg, memoryMaxPages } });
|
|
153
|
+
let settled = false;
|
|
154
|
+
const finish = (r: ProbeResult): void => {
|
|
155
|
+
if (settled) return;
|
|
156
|
+
settled = true;
|
|
157
|
+
clearTimeout(timer);
|
|
158
|
+
void worker.terminate();
|
|
159
|
+
resolve(r);
|
|
160
|
+
};
|
|
161
|
+
const timer = setTimeout(() => finish({ ok: false, code: 'sandbox_timeout' }), config.wallClockLimitMs);
|
|
162
|
+
worker.on('message', (m: { ok: boolean; result?: number; code?: SandboxErrorCode }) => {
|
|
163
|
+
if (m.ok) finish(m.result === undefined ? { ok: true } : { ok: true, result: m.result });
|
|
164
|
+
else finish({ ok: false, code: m.code ?? 'sandbox_invocation_error' });
|
|
165
|
+
});
|
|
166
|
+
worker.on('error', () => finish({ ok: false, code: 'sandbox_invocation_error' }));
|
|
167
|
+
});
|
|
168
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* openwop-core-standard — operational-annex predicate derivation (RFC 0088).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free derivation probe. Verifies that `isCoreStandard`
|
|
5
|
+
* derives the Core Standard Profile floor correctly from representative
|
|
6
|
+
* discovery payloads (RFC 0088 §B / core-standard-profile.md §B):
|
|
7
|
+
* - a host meeting openwop-core + openwop-interrupts + a transport is core-standard;
|
|
8
|
+
* - a bare openwop-core host (no interrupts) is NOT core-standard — the floor is
|
|
9
|
+
* deliberately stricter than the v1 minimum;
|
|
10
|
+
* - a host with no event transport (supportedTransports: []) fails the floor;
|
|
11
|
+
* - the floor is the AND of three existing closed-catalog predicates (it composes,
|
|
12
|
+
* it does not redefine — so it is absent from deriveProfiles()).
|
|
13
|
+
*
|
|
14
|
+
* The LIVE aggregate-evidence assertion (does every §C floor scenario actually
|
|
15
|
+
* pass against a host claiming the profile?) is the `Active → Accepted` step per
|
|
16
|
+
* RFC 0088 §C — already satisfied by MyndHyve + all reference hosts, asserted via
|
|
17
|
+
* each constituent scenario, and deferred here. This scenario asserts the
|
|
18
|
+
* discovery-predicate derivation only.
|
|
19
|
+
*
|
|
20
|
+
* Spec references:
|
|
21
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/core-standard-profile.md
|
|
22
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0088-core-standard-profile.md
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { describe, it, expect } from 'vitest';
|
|
26
|
+
import { isCoreStandard, isCore, deriveProfiles } from '../lib/profiles.js';
|
|
27
|
+
|
|
28
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
29
|
+
|
|
30
|
+
const CORE = {
|
|
31
|
+
protocolVersion: '1.0',
|
|
32
|
+
supportedEnvelopes: ['clarification.request'],
|
|
33
|
+
schemaVersions: {},
|
|
34
|
+
limits: { clarificationRounds: 1, schemaRounds: 1, envelopesPerTurn: 1 },
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
describe('core-standard-profile: floor predicate (RFC 0088 §B, server-free)', () => {
|
|
38
|
+
it('a host meeting core + interrupts + a default transport is core-standard', () => {
|
|
39
|
+
// No supportedTransports ⇒ both stream predicates default-true (profiles.md).
|
|
40
|
+
const c = { ...CORE };
|
|
41
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'core + interrupts + transport ⇒ core-standard')).toBe(true);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('a bare openwop-core host without interrupts is NOT core-standard', () => {
|
|
45
|
+
// openwop-core minimum, but no clarification.request ⇒ fails openwop-interrupts.
|
|
46
|
+
const c = { ...CORE, supportedEnvelopes: ['schema.request'] };
|
|
47
|
+
expect(isCore(c), why('profiles.md §openwop-core', 'still a valid openwop-core host')).toBe(true);
|
|
48
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'the floor is stricter than the v1 minimum')).toBe(false);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('a host advertising no event transport fails the floor', () => {
|
|
52
|
+
const c = { ...CORE, supportedTransports: [] as string[] };
|
|
53
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'at least one event transport is required')).toBe(false);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('a host advertising the rest transport satisfies the transport term', () => {
|
|
57
|
+
const c = { ...CORE, supportedTransports: ['rest'] };
|
|
58
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'rest transport ⇒ stream term satisfied')).toBe(true);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('a non-1.x host is not core-standard', () => {
|
|
62
|
+
const c = { ...CORE, protocolVersion: '2.0' };
|
|
63
|
+
expect(isCoreStandard(c), why('profiles.md §openwop-core', 'core-standard implies openwop-core (1.x)')).toBe(false);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('core-standard-profile: composes, does not redefine (RFC 0088 §A, server-free)', () => {
|
|
68
|
+
it('openwop-core-standard is an annex, NOT a closed-catalog profile (absent from deriveProfiles)', () => {
|
|
69
|
+
const c = { ...CORE };
|
|
70
|
+
expect(
|
|
71
|
+
(deriveProfiles(c) as readonly string[]).includes('openwop-core-standard'),
|
|
72
|
+
why('core-standard-profile.md §A', 'the annex is not a closed-catalog predicate'),
|
|
73
|
+
).toBe(false);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Credential-audience-bound egress (RFC 0079 §C) — behavioral KEYSTONE.
|
|
3
|
+
*
|
|
4
|
+
* Gated on `httpClient.egressPolicy.supported` (root-first per RFC 0073).
|
|
5
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
6
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
|
|
7
|
+
* `egress-provenance-shape.test.ts`; this asserts host BEHAVIOR — the §C
|
|
8
|
+
* confused-deputy MUST that backs the `egress-credential-audience-bound`
|
|
9
|
+
* SECURITY invariant:
|
|
10
|
+
*
|
|
11
|
+
* 1. OUT-OF-AUDIENCE — a host-issued credential bound to audience A, used for
|
|
12
|
+
* an egress to destination B (B ∉ A), MUST be `denied` or `downgraded`
|
|
13
|
+
* with `reason: "out-of-audience"`, and the credential MUST NOT be attached
|
|
14
|
+
* to the egress (`credentialAttached !== true`).
|
|
15
|
+
* 2. PROVENANCE-UNEVALUABLE — an egress whose credential provenance cannot be
|
|
16
|
+
* evaluated MUST be `denied` with `reason: "provenance-unevaluable"`
|
|
17
|
+
* (fail-closed, not fail-open).
|
|
18
|
+
*
|
|
19
|
+
* The decision is driven through the OPTIONAL host-sample egress seam
|
|
20
|
+
* (`POST /v1/host/sample/egress/decide`) — soft-skip on 404/405. The decision
|
|
21
|
+
* reason is a CLOSED enum so a host cannot spill a blocked URL/host into a
|
|
22
|
+
* free-form string (SR-1, asserted in `egress-decision-content-free.test.ts`).
|
|
23
|
+
*
|
|
24
|
+
* Spec references:
|
|
25
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/host-capabilities.md (§"Credential provenance + egress policy")
|
|
26
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0079-credential-provenance-and-egress-policy.md
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (egress-credential-audience-bound)
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import { describe, it, expect } from 'vitest';
|
|
31
|
+
import { driver } from '../lib/driver.js';
|
|
32
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
33
|
+
import { readEgressPolicyCap, driveEgress, EGRESS_DECISIONS, EGRESS_REASONS } from '../lib/egressPolicy.js';
|
|
34
|
+
|
|
35
|
+
describe('egress-audience-binding (RFC 0079 §C)', () => {
|
|
36
|
+
it('denies/downgrades an out-of-audience egress without attaching the credential, and fails closed on unevaluable provenance', async () => {
|
|
37
|
+
const cap = await readEgressPolicyCap();
|
|
38
|
+
if (!behaviorGate('openwop-egress-audience-binding', cap?.supported === true)) return;
|
|
39
|
+
|
|
40
|
+
// ---- Leg 1: out-of-audience — deny|downgrade + credential NOT attached --
|
|
41
|
+
const oob = await driveEgress({ scenario: 'out-of-audience' });
|
|
42
|
+
if (oob === null) return; // egress seam absent — soft-skip the whole behavior
|
|
43
|
+
expect(
|
|
44
|
+
oob.decision === 'denied' || oob.decision === 'downgraded',
|
|
45
|
+
driver.describe('host-capabilities.md §"Credential provenance + egress policy"', 'an out-of-audience egress MUST be denied or downgraded'),
|
|
46
|
+
).toBe(true);
|
|
47
|
+
expect(
|
|
48
|
+
typeof oob.decision === 'string' && EGRESS_DECISIONS.includes(oob.decision),
|
|
49
|
+
driver.describe('run-event-payloads.schema.json#egressDecided', 'decision MUST be in the closed enum'),
|
|
50
|
+
).toBe(true);
|
|
51
|
+
expect(
|
|
52
|
+
oob.reason === 'out-of-audience',
|
|
53
|
+
driver.describe('RFC 0079 §C', 'an out-of-audience denial MUST carry reason "out-of-audience"'),
|
|
54
|
+
).toBe(true);
|
|
55
|
+
expect(
|
|
56
|
+
oob.credentialAttached !== true,
|
|
57
|
+
driver.describe('SECURITY/invariants.yaml egress-credential-audience-bound', 'the host MUST NOT attach a credential whose audience excludes the destination (confused-deputy)'),
|
|
58
|
+
).toBe(true);
|
|
59
|
+
|
|
60
|
+
// ---- Leg 2: provenance-unevaluable — fail closed (deny) ----------------
|
|
61
|
+
const uneval = await driveEgress({ scenario: 'provenance-unevaluable' });
|
|
62
|
+
if (uneval !== null) {
|
|
63
|
+
expect(
|
|
64
|
+
uneval.decision === 'denied',
|
|
65
|
+
driver.describe('RFC 0079 §C', 'an egress with unevaluable provenance MUST fail closed (denied)'),
|
|
66
|
+
).toBe(true);
|
|
67
|
+
expect(
|
|
68
|
+
uneval.reason === 'provenance-unevaluable',
|
|
69
|
+
driver.describe('RFC 0079 §C', 'a provenance-unevaluable denial MUST carry reason "provenance-unevaluable"'),
|
|
70
|
+
).toBe(true);
|
|
71
|
+
expect(
|
|
72
|
+
typeof uneval.reason === 'string' && EGRESS_REASONS.includes(uneval.reason),
|
|
73
|
+
driver.describe('run-event-payloads.schema.json#egressDecided', 'reason MUST be in the closed enum'),
|
|
74
|
+
).toBe(true);
|
|
75
|
+
expect(
|
|
76
|
+
uneval.credentialAttached !== true,
|
|
77
|
+
driver.describe('SECURITY/invariants.yaml egress-credential-audience-bound', 'a fail-closed egress MUST NOT attach the credential'),
|
|
78
|
+
).toBe(true);
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
});
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Egress-decision secret non-leak (RFC 0079 §F / SR-1) — behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Gated on `httpClient.egressPolicy.supported` (root-first per RFC 0073).
|
|
5
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
6
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`. Backs the `egress-decision-no-secret-leak`
|
|
7
|
+
* guarantee: an `egress.decided` payload is metadata-only — it MUST NOT carry
|
|
8
|
+
* the credential value, nor spill the blocked URL/host/header/body into a
|
|
9
|
+
* free-form field, and its `reason` MUST be drawn from the CLOSED vocabulary
|
|
10
|
+
* (so a host cannot smuggle a blocked destination into the reason string).
|
|
11
|
+
*
|
|
12
|
+
* Drives the host-sample seam with a `canary` credential whose value is a known
|
|
13
|
+
* sentinel and asserts the sentinel never surfaces in the decision
|
|
14
|
+
* (`canaryLeaked !== true`) and that the payload carries none of the forbidden
|
|
15
|
+
* content keys. Soft-skips on 404/405.
|
|
16
|
+
*
|
|
17
|
+
* Spec references:
|
|
18
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/host-capabilities.md (§"Credential provenance + egress policy")
|
|
19
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0079-credential-provenance-and-egress-policy.md
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { describe, it, expect } from 'vitest';
|
|
23
|
+
import { driver } from '../lib/driver.js';
|
|
24
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
25
|
+
import { readEgressPolicyCap, driveEgress, EGRESS_REASONS, EGRESS_CONTENT_FORBIDDEN } from '../lib/egressPolicy.js';
|
|
26
|
+
|
|
27
|
+
describe('egress-decision-content-free (RFC 0079 §F / SR-1)', () => {
|
|
28
|
+
it('never leaks the credential value or the blocked destination into the egress.decided payload', async () => {
|
|
29
|
+
const cap = await readEgressPolicyCap();
|
|
30
|
+
if (!behaviorGate('openwop-egress-decision-content-free', cap?.supported === true)) return;
|
|
31
|
+
|
|
32
|
+
const res = await driveEgress({ scenario: 'canary' });
|
|
33
|
+
if (res === null) return; // seam absent — soft-skip
|
|
34
|
+
|
|
35
|
+
// The canary sentinel MUST NOT appear anywhere observable.
|
|
36
|
+
expect(
|
|
37
|
+
res.canaryLeaked !== true,
|
|
38
|
+
driver.describe('RFC 0079 §F (SR-1)', 'the credential value (canary) MUST NOT leak into any observable surface'),
|
|
39
|
+
).toBe(true);
|
|
40
|
+
|
|
41
|
+
// No forbidden content keys on the decision payload.
|
|
42
|
+
for (const forbidden of EGRESS_CONTENT_FORBIDDEN) {
|
|
43
|
+
expect(
|
|
44
|
+
!(forbidden in res),
|
|
45
|
+
driver.describe('RFC 0079 §F (SR-1)', `egress.decided MUST be content-free (no ${forbidden})`),
|
|
46
|
+
).toBe(true);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// The reason stays in the closed vocabulary — no free-form destination spill.
|
|
50
|
+
if (res.reason !== undefined) {
|
|
51
|
+
expect(
|
|
52
|
+
typeof res.reason === 'string' && EGRESS_REASONS.includes(res.reason),
|
|
53
|
+
driver.describe('run-event-payloads.schema.json#egressDecided', 'reason MUST be in the closed enum (no free-form spill)'),
|
|
54
|
+
).toBe(true);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
});
|
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
import { describe, it, expect } from 'vitest';
|
|
50
50
|
import { driver } from '../lib/driver.js';
|
|
51
51
|
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
52
|
-
import {
|
|
52
|
+
import { pollUntil } from '../lib/polling.js';
|
|
53
53
|
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
54
54
|
|
|
55
55
|
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
@@ -111,12 +111,17 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
|
|
|
111
111
|
expect(create.status).toBe(201);
|
|
112
112
|
const runId = (create.json as { runId: string }).runId;
|
|
113
113
|
|
|
114
|
-
|
|
115
|
-
//
|
|
116
|
-
//
|
|
117
|
-
//
|
|
118
|
-
//
|
|
119
|
-
|
|
114
|
+
// RFC 0039 confidence escalation SUSPENDS the parent (a `waiting-*` status)
|
|
115
|
+
// — it is NOT a terminal `completed`/`failed`/`cancelled`. So poll until the
|
|
116
|
+
// run either suspends or settles; polling only for terminal statuses
|
|
117
|
+
// (`pollUntilTerminal`, whose set is {completed,failed,cancelled}) would time
|
|
118
|
+
// out before the suspension is ever observed — the cause of the prior flake.
|
|
119
|
+
const terminal = await pollUntil(runId, (s) => {
|
|
120
|
+
const st = s.status as string;
|
|
121
|
+
return st.startsWith('waiting-') || st === 'completed' || st === 'failed' || st === 'cancelled';
|
|
122
|
+
});
|
|
123
|
+
// RFC 0039 §A gives hosts a choice: clarify-kind escalation
|
|
124
|
+
// (→ waiting-clarification) OR escalate-kind approval (→ waiting-approval).
|
|
120
125
|
//
|
|
121
126
|
// RFC 0044 routing: when the host advertises
|
|
122
127
|
// `capabilities.multiAgent.executionModel.confidenceEscalationInterruptKind`
|