@openwop/openwop-conformance 1.13.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/README.md +2 -2
- package/api/openapi.yaml +60 -0
- package/coverage.md +11 -1
- package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
- package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
- package/package.json +1 -1
- package/src/lib/discovery-capabilities.ts +18 -19
- package/src/lib/egressPolicy.ts +76 -0
- package/src/lib/profiles.ts +15 -0
- package/src/lib/sandbox-timeout-worker.mjs +31 -0
- package/src/lib/toolCatalog.ts +81 -0
- package/src/lib/wasm-sandbox-probe.ts +168 -0
- package/src/scenarios/core-standard-profile.test.ts +75 -0
- package/src/scenarios/egress-audience-binding.test.ts +81 -0
- package/src/scenarios/egress-decision-content-free.test.ts +57 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
- package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
- package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
- package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
- package/src/scenarios/tool-catalog-projection.test.ts +120 -0
- package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
- package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* prompt-resolution-chain-event — RFC 0029 layer precedence on the PRODUCTION wire.
|
|
3
|
+
*
|
|
4
|
+
* The black-box, production-path counterpart to the three seam-driven
|
|
5
|
+
* `prompt-resolution-chain-{node-wins,agent-intrinsic,fallback-cascade}.test.ts`
|
|
6
|
+
* scenarios. Instead of the synchronous `POST /v1/host/sample/prompt/resolve`
|
|
7
|
+
* seam, this creates a real run from a prompt-exercising fixture, reads the
|
|
8
|
+
* run's DURABLE event log via the NORMATIVE `GET /v1/runs/{runId}/events/poll`
|
|
9
|
+
* endpoint, and asserts the `agent.promptResolved` event carries the full
|
|
10
|
+
* layer-by-layer precedence record (`spec/v1/prompts.md` §"Resolution chain") —
|
|
11
|
+
* no `/v1/host/sample/*` seam.
|
|
12
|
+
*
|
|
13
|
+
* The `agentPromptResolved` payload (`schemas/run-event-payloads.schema.json`)
|
|
14
|
+
* already REQUIRES `chain[]` with one `applied: true` entry + the full-traversal
|
|
15
|
+
* MUST, so the wire is already capable of conveying precedence without the seam.
|
|
16
|
+
* This is the "replace seam-gated proofs with black-box production-path
|
|
17
|
+
* conformance" step (independent-audit acceptance-bar item 3) for RFC 0029: once
|
|
18
|
+
* a host emits `agent.promptResolved`, prompt-chain precedence is proven on the
|
|
19
|
+
* production wire and the surface graduates INTO the `openwop-core-standard`
|
|
20
|
+
* floor (RFC 0088 §D Lever-2 → floor).
|
|
21
|
+
*
|
|
22
|
+
* Gating: soft-skips unless `capabilities.prompts.supported` AND the host
|
|
23
|
+
* actually emits `agent.promptResolved` for the run (emission is staged per
|
|
24
|
+
* RFC 0029 / RFC 0021 — a host advertising prompts MAY not yet emit the event).
|
|
25
|
+
*
|
|
26
|
+
* @see RFCS/0029-prompt-override-hierarchy.md
|
|
27
|
+
* @see spec/v1/prompts.md §"Resolution chain (normative)"
|
|
28
|
+
*/
|
|
29
|
+
import { describe, it, expect } from 'vitest';
|
|
30
|
+
import { driver } from '../lib/driver.js';
|
|
31
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
32
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
33
|
+
|
|
34
|
+
const PROMPT_FIXTURE_ID = 'conformance-prompt-end-to-end';
|
|
35
|
+
const VALID_LAYERS = new Set([
|
|
36
|
+
'run-configurable', 'node', 'agent-intrinsic', 'agent-overrides',
|
|
37
|
+
'agent-library-default', 'workflow-defaults', 'host-defaults',
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
interface ChainEntry { layer?: unknown; source?: unknown; applied?: unknown }
|
|
41
|
+
interface PromptResolvedPayload { chain?: ChainEntry[]; resolved?: unknown }
|
|
42
|
+
interface RawEvent { type?: string; payload?: PromptResolvedPayload }
|
|
43
|
+
|
|
44
|
+
async function promptsSupported(): Promise<boolean> {
|
|
45
|
+
const res = await driver.get('/.well-known/openwop');
|
|
46
|
+
return capabilityFamily(res.json as Record<string, unknown> | undefined, 'prompts')?.supported === true;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
describe('prompt-resolution-chain-event (black-box): agent.promptResolved carries the precedence record (RFC 0029)', () => {
|
|
50
|
+
it('the production agent.promptResolved event records the full four-layer resolution chain', async () => {
|
|
51
|
+
if (!(await promptsSupported())) return; // capability not advertised — skip
|
|
52
|
+
|
|
53
|
+
const create = await driver.post('/v1/runs', { workflowId: PROMPT_FIXTURE_ID });
|
|
54
|
+
if (create.status !== 201) {
|
|
55
|
+
// Fixture not seeded / run not accepted — not a prompt-chain failure.
|
|
56
|
+
// eslint-disable-next-line no-console
|
|
57
|
+
console.warn(`[prompt-resolution-chain-event] POST /v1/runs for ${PROMPT_FIXTURE_ID} returned ${create.status}; skipping the production-path assertion`);
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
const runId = (create.json as { runId?: string }).runId;
|
|
61
|
+
if (!runId) return;
|
|
62
|
+
await pollUntilTerminal(runId);
|
|
63
|
+
|
|
64
|
+
const poll = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events/poll`);
|
|
65
|
+
const events = (poll.json as { events?: RawEvent[] } | undefined)?.events ?? [];
|
|
66
|
+
const resolved = events.filter((e) => e.type === 'agent.promptResolved');
|
|
67
|
+
if (resolved.length === 0) {
|
|
68
|
+
// Host advertises prompts but does not yet emit agent.promptResolved
|
|
69
|
+
// (RFC 0029 emission is staged) — soft-skip the behavioral assertion.
|
|
70
|
+
// eslint-disable-next-line no-console
|
|
71
|
+
console.warn('[prompt-resolution-chain-event] host emitted no agent.promptResolved event; skipping (RFC 0029 emission staged)');
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for (const ev of resolved) {
|
|
76
|
+
const chain = ev.payload?.chain;
|
|
77
|
+
expect(
|
|
78
|
+
Array.isArray(chain) && chain.length > 0,
|
|
79
|
+
driver.describe('prompts.md §Resolution chain', 'agent.promptResolved MUST carry a non-empty chain[] of attempted layers'),
|
|
80
|
+
).toBe(true);
|
|
81
|
+
const entries = chain as ChainEntry[];
|
|
82
|
+
|
|
83
|
+
// Every entry is a well-formed layer record (the full-traversal shape).
|
|
84
|
+
for (const e of entries) {
|
|
85
|
+
expect(
|
|
86
|
+
typeof e.layer === 'string' && VALID_LAYERS.has(e.layer),
|
|
87
|
+
driver.describe('prompts.md §Resolution chain', `each chain entry MUST name a valid layer, got ${String(e.layer)}`),
|
|
88
|
+
).toBe(true);
|
|
89
|
+
expect(typeof e.applied, driver.describe('prompts.md §Resolution chain', 'each chain entry MUST carry a boolean `applied`')).toBe('boolean');
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Exactly one layer wins (or none, when resolved is null).
|
|
93
|
+
const applied = entries.filter((e) => e.applied === true);
|
|
94
|
+
expect(
|
|
95
|
+
applied.length <= 1,
|
|
96
|
+
driver.describe('prompts.md §Resolution chain', 'AT MOST one chain entry MAY be applied: true (the winning layer)'),
|
|
97
|
+
).toBe(true);
|
|
98
|
+
|
|
99
|
+
// resolved mirrors the applied entry's source (RFC 0029 §B).
|
|
100
|
+
if (applied.length === 1) {
|
|
101
|
+
expect(
|
|
102
|
+
ev.payload?.resolved,
|
|
103
|
+
driver.describe('run-event-payloads.schema.json agentPromptResolved', '`resolved` MUST mirror the applied chain entry\'s `source`'),
|
|
104
|
+
).toBe(applied[0]?.source);
|
|
105
|
+
} else {
|
|
106
|
+
expect(
|
|
107
|
+
ev.payload?.resolved === null || ev.payload?.resolved === undefined,
|
|
108
|
+
driver.describe('run-event-payloads.schema.json agentPromptResolved', 'with no applied layer, `resolved` MUST be null'),
|
|
109
|
+
).toBe(true);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
});
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0035 §B sandbox isolation — portable, server-free behavioral conformance.
|
|
3
|
+
*
|
|
4
|
+
* Drives the committed `fixtures/wasm-sandbox/*.wasm` modules through the
|
|
5
|
+
* suite-local `probeSandboxed` reference (see `../lib/wasm-sandbox-probe.ts`).
|
|
6
|
+
* Every assertion exercises real WebAssembly isolation — there are NO `it.todo`
|
|
7
|
+
* placeholders and NO mocks. These are the behavioral probes that graduate the
|
|
8
|
+
* cross-runtime `node-pack-sandbox-*` invariants from reference-impl to protocol
|
|
9
|
+
* tier (`SECURITY/invariants.yaml`).
|
|
10
|
+
*
|
|
11
|
+
* Coverage (six invariants, proven by construction, server-free):
|
|
12
|
+
* - node-pack-sandbox-fs-gated / -no-env / -network-gated / -no-process:
|
|
13
|
+
* a forbidden operation can only be a DECLARED IMPORT; the probe statically
|
|
14
|
+
* refuses any un-granted import → `sandbox_escape_attempt` + `escapeKind`.
|
|
15
|
+
* - capability gate: an un-granted `openwop.*` import → `sandbox_capability_denied`.
|
|
16
|
+
* - node-pack-sandbox-memory-cap: an access past the host memory bound traps →
|
|
17
|
+
* `sandbox_memory_exceeded`.
|
|
18
|
+
* - node-pack-sandbox-isolated-context: a fresh instance per invocation carries
|
|
19
|
+
* no state across calls.
|
|
20
|
+
*
|
|
21
|
+
* `node-pack-sandbox-timeout` requires thread preemption (a worker kill-timer) and
|
|
22
|
+
* stays reference-impl, proven by `examples/hosts/wasm-sandbox/test/sandbox.test.ts`
|
|
23
|
+
* (real worker kill). `node-pack-sandbox-no-eval` is JS-runtime-specific (WASM has
|
|
24
|
+
* no `eval`) and is exempt per RFC 0035.
|
|
25
|
+
*
|
|
26
|
+
* Spec reference:
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0035-sandbox-execution-contract.md
|
|
28
|
+
*/
|
|
29
|
+
import { describe, it, expect } from 'vitest';
|
|
30
|
+
import { readFileSync } from 'node:fs';
|
|
31
|
+
import { join } from 'node:path';
|
|
32
|
+
import { FIXTURES_DIR } from '../lib/paths.js';
|
|
33
|
+
import { probeSandboxed } from '../lib/wasm-sandbox-probe.js';
|
|
34
|
+
|
|
35
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
36
|
+
const dir = join(FIXTURES_DIR, 'wasm-sandbox');
|
|
37
|
+
const fix = (name: string): Uint8Array => new Uint8Array(readFileSync(join(dir, `${name}.wasm`)));
|
|
38
|
+
const BASE = { allowedHostCalls: [] as string[], memoryLimitBytes: 2 * 1024 * 1024 };
|
|
39
|
+
|
|
40
|
+
describe('sandbox-wasm-isolation: positive controls (RFC 0035 §B, server-free)', () => {
|
|
41
|
+
it('a well-behaved pure module runs and returns its input', () => {
|
|
42
|
+
const r = probeSandboxed(fix('well-behaved-echo'), BASE, 'invoke', 42);
|
|
43
|
+
expect(r.ok, why('RFC 0035 §B', 'a pure-compute module runs')).toBe(true);
|
|
44
|
+
expect(r.result).toBe(42);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('a granted host capability is callable when in allowedHostCalls', () => {
|
|
48
|
+
const r = probeSandboxed(fix('well-behaved-host-fetch'), { ...BASE, allowedHostCalls: ['fetch'] }, 'invoke', 7);
|
|
49
|
+
expect(r.ok, why('RFC 0035 §B invariant 7', 'a granted openwop.* capability is callable')).toBe(true);
|
|
50
|
+
expect(r.result).toBe(7);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe('sandbox-wasm-isolation: escape attempts fail closed (RFC 0035 §B 1–4, server-free)', () => {
|
|
55
|
+
const cases: ReadonlyArray<readonly [string, string, string]> = [
|
|
56
|
+
['misbehaving-fs', 'host-fs-escape', 'node-pack-sandbox-fs-gated'],
|
|
57
|
+
['misbehaving-env', 'host-env-leak', 'node-pack-sandbox-no-env'],
|
|
58
|
+
['misbehaving-network', 'network-escape', 'node-pack-sandbox-network-gated'],
|
|
59
|
+
['misbehaving-process', 'host-process-escape', 'node-pack-sandbox-no-process'],
|
|
60
|
+
];
|
|
61
|
+
for (const [fixture, escapeKind, invariant] of cases) {
|
|
62
|
+
it(`${invariant}: ${fixture} → sandbox_escape_attempt (${escapeKind})`, () => {
|
|
63
|
+
const r = probeSandboxed(fix(fixture), BASE);
|
|
64
|
+
expect(r.code, why('RFC 0035 §B', `${invariant} fails closed before instantiation`)).toBe('sandbox_escape_attempt');
|
|
65
|
+
expect(r.escapeKind).toBe(escapeKind);
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
describe('sandbox-wasm-isolation: capability gate (RFC 0035 §B 7, server-free)', () => {
|
|
71
|
+
it('an un-granted openwop capability is denied with its name', () => {
|
|
72
|
+
const r = probeSandboxed(fix('misbehaving-capability-gate'), BASE);
|
|
73
|
+
expect(r.code, why('RFC 0035 §B invariant 7', 'undeclared host capability fails closed')).toBe('sandbox_capability_denied');
|
|
74
|
+
expect(r.requestedCapability).toBe('privileged');
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('host-fetch WITHOUT the grant is denied (the gate works both directions)', () => {
|
|
78
|
+
const r = probeSandboxed(fix('well-behaved-host-fetch'), BASE);
|
|
79
|
+
expect(r.code).toBe('sandbox_capability_denied');
|
|
80
|
+
expect(r.requestedCapability).toBe('fetch');
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
describe('sandbox-wasm-isolation: memory cap (RFC 0035 §B 5, server-free)', () => {
|
|
85
|
+
it('node-pack-sandbox-memory-cap: access beyond the host memory bound is sandbox_memory_exceeded', () => {
|
|
86
|
+
const r = probeSandboxed(fix('misbehaving-memory'), BASE);
|
|
87
|
+
expect(r.ok, why('RFC 0035 §B invariant 5', 'memory bound is engine-enforced')).toBe(false);
|
|
88
|
+
expect(r.code).toBe('sandbox_memory_exceeded');
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
describe('sandbox-wasm-isolation: isolated context (RFC 0035 §B 8, server-free)', () => {
|
|
93
|
+
it('node-pack-sandbox-isolated-context: each invocation gets a fresh instance (no cross-pack state)', () => {
|
|
94
|
+
const iso = fix('isolation-global');
|
|
95
|
+
expect(probeSandboxed(iso, BASE, 'bump').result, why('RFC 0035 §B invariant 8', 'a fresh instance starts at 0')).toBe(1);
|
|
96
|
+
expect(probeSandboxed(iso, BASE, 'read').result, why('RFC 0035 §B invariant 8', 'no state leaks across invocations')).toBe(0);
|
|
97
|
+
});
|
|
98
|
+
});
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0035 §B invariant 6 — sandbox wall-clock timeout, worker-driven + server-free.
|
|
3
|
+
*
|
|
4
|
+
* The worker-thread counterpart to `sandbox-wasm-isolation.test.ts` (which proves
|
|
5
|
+
* the other six cross-runtime invariants in-process but deliberately cannot run a
|
|
6
|
+
* non-terminating module). A wall-clock cap can only be enforced by THREAD
|
|
7
|
+
* PREEMPTION — a same-thread timer cannot interrupt a synchronous WASM loop — so
|
|
8
|
+
* `probeTimeout` (see `../lib/wasm-sandbox-probe.ts`) spawns a worker running the
|
|
9
|
+
* committed `misbehaving-timeout.wasm` fixture and races a main-thread kill-timer.
|
|
10
|
+
*
|
|
11
|
+
* This is the worker-driven conformance probe that graduates
|
|
12
|
+
* `node-pack-sandbox-timeout` from reference-impl to protocol tier (the prior gap:
|
|
13
|
+
* the cap was proven only host-internally by the WASM host's `test/sandbox.test.ts`).
|
|
14
|
+
*
|
|
15
|
+
* @see RFCS/0035-sandbox-execution-contract.md §B invariant 6
|
|
16
|
+
* @see SECURITY/invariants.yaml node-pack-sandbox-timeout
|
|
17
|
+
*/
|
|
18
|
+
import { describe, it, expect } from 'vitest';
|
|
19
|
+
import { readFileSync } from 'node:fs';
|
|
20
|
+
import { join } from 'node:path';
|
|
21
|
+
import { FIXTURES_DIR } from '../lib/paths.js';
|
|
22
|
+
import { probeTimeout } from '../lib/wasm-sandbox-probe.js';
|
|
23
|
+
|
|
24
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
25
|
+
const dir = join(FIXTURES_DIR, 'wasm-sandbox');
|
|
26
|
+
const fix = (name: string): Uint8Array => new Uint8Array(readFileSync(join(dir, `${name}.wasm`)));
|
|
27
|
+
|
|
28
|
+
describe('sandbox-wasm-timeout: wall-clock cap is engine/worker-enforced (RFC 0035 §B 6, server-free)', () => {
|
|
29
|
+
it('node-pack-sandbox-timeout: a non-terminating module is killed with sandbox_timeout', async () => {
|
|
30
|
+
const r = await probeTimeout(fix('misbehaving-timeout'), { memoryLimitBytes: 2 * 1024 * 1024, wallClockLimitMs: 300 });
|
|
31
|
+
expect(r.ok, why('RFC 0035 §B invariant 6', 'an over-budget invocation MUST fail')).toBe(false);
|
|
32
|
+
expect(r.code, why('RFC 0035 §C', 'the failure code MUST be sandbox_timeout')).toBe('sandbox_timeout');
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('positive control: a well-behaved module completes within the budget (the kill-timer does not false-positive)', async () => {
|
|
36
|
+
const r = await probeTimeout(fix('well-behaved-echo'), { memoryLimitBytes: 2 * 1024 * 1024, wallClockLimitMs: 1000 }, 'invoke', 7);
|
|
37
|
+
expect(r.ok, why('RFC 0035 §B', 'a within-budget invocation completes before the kill-timer')).toBe(true);
|
|
38
|
+
expect(r.result).toBe(7);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Portable tool catalog — the `GET /v1/tools` projection (RFC 0078 §B/§F) —
|
|
3
|
+
* behavioral.
|
|
4
|
+
*
|
|
5
|
+
* Capability-gated on `toolCatalog.supported` (root-first per RFC 0073).
|
|
6
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
7
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
|
|
8
|
+
* `tool-descriptor-shape.test.ts`; this asserts host BEHAVIOR black-box on the
|
|
9
|
+
* NORMATIVE reads:
|
|
10
|
+
*
|
|
11
|
+
* 1. LIST (§B) — `GET /v1/tools` returns a `ToolDescriptor[]`, each
|
|
12
|
+
* schema-valid, `source` ∈ the closed vocab, `safetyTier` ∈ the closed
|
|
13
|
+
* vocab, and content-free (no credential material, SR-1).
|
|
14
|
+
* 2. BY-ID (§B) — `GET /v1/tools/{toolId}` returns that descriptor; an unknown
|
|
15
|
+
* id 404s.
|
|
16
|
+
* 3. AUTH-GATED — an unauthenticated `GET /v1/tools` is `401` (not public).
|
|
17
|
+
* 4. §F-2 NON-DISCLOSURE — a tool id known to belong to a DIFFERENT principal
|
|
18
|
+
* (`OPENWOP_CROSS_PRINCIPAL_TOOL_ID`) 404s for this caller, identically to
|
|
19
|
+
* "not found" — the authorization-scoped projection never discloses another
|
|
20
|
+
* principal's tools. Soft-skips when the env var is unset.
|
|
21
|
+
*
|
|
22
|
+
* Spec references:
|
|
23
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md (§B/§F)
|
|
24
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { describe, it, expect } from 'vitest';
|
|
28
|
+
import { readFileSync } from 'node:fs';
|
|
29
|
+
import { join } from 'node:path';
|
|
30
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
31
|
+
import addFormats from 'ajv-formats';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
34
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
35
|
+
import {
|
|
36
|
+
readToolCatalogCap,
|
|
37
|
+
listTools,
|
|
38
|
+
getTool,
|
|
39
|
+
TOOL_SOURCES,
|
|
40
|
+
SAFETY_TIERS,
|
|
41
|
+
TOOL_CONTENT_FORBIDDEN,
|
|
42
|
+
} from '../lib/toolCatalog.js';
|
|
43
|
+
|
|
44
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
45
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function expectContentFree(d: Record<string, unknown>, where: string): void {
|
|
49
|
+
for (const f of TOOL_CONTENT_FORBIDDEN) {
|
|
50
|
+
expect(
|
|
51
|
+
!(f in d),
|
|
52
|
+
driver.describe('RFC 0078 §F (SR-1)', `${where} MUST be content-free (no ${f})`),
|
|
53
|
+
).toBe(true);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
describe('tool-catalog-projection (RFC 0078 §B/§F)', () => {
|
|
58
|
+
it('lists schema-valid ToolDescriptors, serves by-id + 404s, is auth-gated, and never discloses another principal', async () => {
|
|
59
|
+
const cap = await readToolCatalogCap();
|
|
60
|
+
if (!behaviorGate('openwop-tool-catalog', cap?.supported === true)) return;
|
|
61
|
+
|
|
62
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
63
|
+
addFormats(ajv);
|
|
64
|
+
const validate = ajv.compile(loadSchema('tool-descriptor.schema.json'));
|
|
65
|
+
|
|
66
|
+
// ---- Leg 3: auth-gated (unauthenticated list MUST be 401) -------------
|
|
67
|
+
const unauth = await driver.get('/v1/tools', { authenticated: false });
|
|
68
|
+
expect(
|
|
69
|
+
unauth.status === 401,
|
|
70
|
+
driver.describe('tool-catalog.md §B', 'GET /v1/tools MUST require authentication (401 unauthenticated)'),
|
|
71
|
+
).toBe(true);
|
|
72
|
+
|
|
73
|
+
// ---- Leg 1: the list (§B) -------------------------------------------
|
|
74
|
+
const tools = await listTools();
|
|
75
|
+
if (tools === null) return; // host advertises the cap but doesn't serve the read — soft-skip the rest
|
|
76
|
+
|
|
77
|
+
for (const t of tools) {
|
|
78
|
+
expect(
|
|
79
|
+
validate(t),
|
|
80
|
+
driver.describe('tool-descriptor.schema.json', `each ToolDescriptor MUST validate (${ajv.errorsText(validate.errors)})`),
|
|
81
|
+
).toBe(true);
|
|
82
|
+
expect(
|
|
83
|
+
typeof t.source === 'string' && TOOL_SOURCES.includes(t.source as string),
|
|
84
|
+
driver.describe('tool-catalog.md §C', 'ToolDescriptor.source MUST be in the closed vocabulary'),
|
|
85
|
+
).toBe(true);
|
|
86
|
+
expect(
|
|
87
|
+
typeof t.safetyTier === 'string' && SAFETY_TIERS.includes(t.safetyTier as string),
|
|
88
|
+
driver.describe('tool-catalog.md §C', 'ToolDescriptor.safetyTier MUST be pure|read|write|exec'),
|
|
89
|
+
).toBe(true);
|
|
90
|
+
expectContentFree(t, 'ToolDescriptor');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ---- Leg 2: by-id round-trip + unknown 404 (§B) ---------------------
|
|
94
|
+
if (tools.length > 0 && typeof tools[0]!.toolId === 'string') {
|
|
95
|
+
const id = tools[0]!.toolId as string;
|
|
96
|
+
const one = await getTool(id);
|
|
97
|
+
if (one.status === 200) {
|
|
98
|
+
expect(
|
|
99
|
+
one.descriptor?.toolId === id,
|
|
100
|
+
driver.describe('tool-catalog.md §B', 'GET /v1/tools/{toolId} MUST return the requested descriptor'),
|
|
101
|
+
).toBe(true);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const unknown = await getTool('__conformance_nonexistent_tool__');
|
|
105
|
+
expect(
|
|
106
|
+
unknown.status === 404,
|
|
107
|
+
driver.describe('tool-catalog.md §B', 'GET /v1/tools/{unknown} MUST 404'),
|
|
108
|
+
).toBe(true);
|
|
109
|
+
|
|
110
|
+
// ---- Leg 4: §F-2 cross-principal non-disclosure (env-gated) ---------
|
|
111
|
+
const crossId = process.env.OPENWOP_CROSS_PRINCIPAL_TOOL_ID;
|
|
112
|
+
if (crossId) {
|
|
113
|
+
const cross = await getTool(crossId);
|
|
114
|
+
expect(
|
|
115
|
+
cross.status === 404,
|
|
116
|
+
driver.describe('tool-catalog.md §F-2', 'a tool owned by a different principal MUST 404 (non-disclosure)'),
|
|
117
|
+
).toBe(true);
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
});
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Portable tool-session bracket (RFC 0078 §D) — behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Gated on `toolCatalog.sessionLifecycle` (root-first per RFC 0073). Soft-skips
|
|
5
|
+
* when unadvertised (default) / hard-fails under `OPENWOP_REQUIRE_BEHAVIOR=true`.
|
|
6
|
+
* The always-on wire-shape coverage lives in `tool-descriptor-shape.test.ts`
|
|
7
|
+
* (the `tool.session.*` payload `$defs`); this asserts host BEHAVIOR: a tool session brackets its RFC 0064 call events
|
|
8
|
+
* with `tool.session.opened` (BEFORE the first call event) and
|
|
9
|
+
* `tool.session.closed` (AFTER the last), sharing one `sessionId`, carrying a
|
|
10
|
+
* `toolId`, an `outcome` in the enum, and both events content-free.
|
|
11
|
+
*
|
|
12
|
+
* Drives the OPTIONAL `POST /v1/host/sample/tools/session-run` seam + reads the
|
|
13
|
+
* bracket back via the test event-log seam (both deferred per RFC 0078
|
|
14
|
+
* §Conformance — soft-skip on 404).
|
|
15
|
+
*
|
|
16
|
+
* Spec references:
|
|
17
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md (§D)
|
|
18
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect } from 'vitest';
|
|
22
|
+
import { driver } from '../lib/driver.js';
|
|
23
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
24
|
+
import { readToolCatalogCap, driveToolSession, TOOL_CONTENT_FORBIDDEN } from '../lib/toolCatalog.js';
|
|
25
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
26
|
+
|
|
27
|
+
const SESSION_OUTCOMES = ['completed', 'failed', 'aborted', 'expired'];
|
|
28
|
+
/** RFC 0064 tool-call event family bracketed by a tool session. */
|
|
29
|
+
const CALL_EVENT = (t: string): boolean =>
|
|
30
|
+
t === 'agent.toolCalled' || t === 'agent.toolReturned' || t.startsWith('tool.call');
|
|
31
|
+
|
|
32
|
+
describe('tool-session-lifecycle (RFC 0078 §D)', () => {
|
|
33
|
+
it('brackets the call events with tool.session.opened-first / closed-last, one sessionId, content-free', async () => {
|
|
34
|
+
const cap = await readToolCatalogCap();
|
|
35
|
+
const lifecycle = cap?.sessionLifecycle === true || (typeof cap?.sessionLifecycle === 'object' && cap?.sessionLifecycle !== null);
|
|
36
|
+
if (!behaviorGate('openwop-tool-session-lifecycle', lifecycle)) return;
|
|
37
|
+
|
|
38
|
+
if (!(await isEventLogSeamAvailable())) return; // event-log seam absent — soft-skip
|
|
39
|
+
const res = await driveToolSession({});
|
|
40
|
+
if (res === null || !res.runId) return; // session seam absent — soft-skip
|
|
41
|
+
|
|
42
|
+
const q = await queryTestEvents(res.runId);
|
|
43
|
+
if (!q.ok) return;
|
|
44
|
+
const events = q.events.slice().sort((a, b) => a.sequence - b.sequence);
|
|
45
|
+
|
|
46
|
+
const opened = events.filter((e) => e.type === 'tool.session.opened');
|
|
47
|
+
const closed = events.filter((e) => e.type === 'tool.session.closed');
|
|
48
|
+
expect(
|
|
49
|
+
opened.length >= 1 && closed.length >= 1,
|
|
50
|
+
driver.describe('tool-catalog.md §D', 'a tool session MUST emit tool.session.opened + tool.session.closed'),
|
|
51
|
+
).toBe(true);
|
|
52
|
+
if (opened.length === 0 || closed.length === 0) return;
|
|
53
|
+
|
|
54
|
+
const open = opened[0]!;
|
|
55
|
+
const close = closed[closed.length - 1]!;
|
|
56
|
+
|
|
57
|
+
// §D ordering: opened precedes every call event; closed follows them all.
|
|
58
|
+
const calls = events.filter((e) => CALL_EVENT(e.type));
|
|
59
|
+
if (calls.length > 0) {
|
|
60
|
+
expect(
|
|
61
|
+
open.sequence < calls[0]!.sequence,
|
|
62
|
+
driver.describe('RFC 0078 §D', 'tool.session.opened MUST precede the first call event'),
|
|
63
|
+
).toBe(true);
|
|
64
|
+
expect(
|
|
65
|
+
close.sequence > calls[calls.length - 1]!.sequence,
|
|
66
|
+
driver.describe('RFC 0078 §D', 'tool.session.closed MUST follow the last call event'),
|
|
67
|
+
).toBe(true);
|
|
68
|
+
} else {
|
|
69
|
+
expect(
|
|
70
|
+
open.sequence < close.sequence,
|
|
71
|
+
driver.describe('RFC 0078 §D', 'tool.session.opened MUST precede tool.session.closed'),
|
|
72
|
+
).toBe(true);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// One sessionId across the bracket, both carrying a toolId.
|
|
76
|
+
const openSid = open.payload.sessionId;
|
|
77
|
+
const closeSid = close.payload.sessionId;
|
|
78
|
+
expect(
|
|
79
|
+
typeof openSid === 'string' && openSid === closeSid,
|
|
80
|
+
driver.describe('run-event-payloads.schema.json#toolSession*', 'the bracket MUST share one sessionId'),
|
|
81
|
+
).toBe(true);
|
|
82
|
+
expect(
|
|
83
|
+
typeof open.payload.toolId === 'string' && typeof close.payload.toolId === 'string',
|
|
84
|
+
driver.describe('run-event-payloads.schema.json#toolSession*', 'tool.session.* MUST carry a toolId'),
|
|
85
|
+
).toBe(true);
|
|
86
|
+
|
|
87
|
+
// Closed outcome enum discipline.
|
|
88
|
+
expect(
|
|
89
|
+
typeof close.payload.outcome === 'string' && SESSION_OUTCOMES.includes(close.payload.outcome as string),
|
|
90
|
+
driver.describe('run-event-payloads.schema.json#toolSessionClosed', 'outcome MUST be in the closed enum'),
|
|
91
|
+
).toBe(true);
|
|
92
|
+
|
|
93
|
+
// Content-free: identifiers + metadata only.
|
|
94
|
+
for (const evt of [open, close]) {
|
|
95
|
+
for (const forbidden of TOOL_CONTENT_FORBIDDEN) {
|
|
96
|
+
expect(
|
|
97
|
+
!(forbidden in evt.payload),
|
|
98
|
+
driver.describe('RFC 0078 §F (SR-1)', `tool.session.* MUST be content-free (no ${forbidden})`),
|
|
99
|
+
).toBe(true);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
await resetTestSeam();
|
|
104
|
+
});
|
|
105
|
+
});
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* workspace-cross-tenant-isolation-blackbox — RFC 0059 §E WCT-1 on the PRODUCTION wire.
|
|
3
|
+
*
|
|
4
|
+
* The black-box, production-path counterpart to the seam-driven
|
|
5
|
+
* `workspace-cross-tenant-isolation.test.ts`. Instead of the single-credential
|
|
6
|
+
* `POST /v1/host/sample/workspace/op` seam, this drives the NORMATIVE §C
|
|
7
|
+
* endpoints (`PUT`/`GET /v1/host/workspace/files/{path}`, `GET /v1/host/workspace
|
|
8
|
+
* /files`) with TWO distinct operator credentials that resolve to two different
|
|
9
|
+
* `{tenant, workspace}` owners (RFC 0048). It writes a secret as owner A and
|
|
10
|
+
* proves owner B cannot read or enumerate it — no `/v1/host/sample/*` seam, the
|
|
11
|
+
* exact contract a deployed multi-tenant host honors.
|
|
12
|
+
*
|
|
13
|
+
* This is the "replace seam-gated proofs with black-box production-path
|
|
14
|
+
* conformance" step (independent-audit acceptance-bar item 3) for RFC 0059. Once
|
|
15
|
+
* a host passes it non-vacuously, `workspace-cross-tenant-isolation` is proven on
|
|
16
|
+
* the production wire and the surface graduates INTO the `openwop-core-standard`
|
|
17
|
+
* floor (RFC 0088 §D Lever-2 → floor).
|
|
18
|
+
*
|
|
19
|
+
* Gating: soft-skips unless `capabilities.workspace.supported` AND
|
|
20
|
+
* `OPENWOP_TEST_TENANT_B_API_KEY` (a credential for a SECOND, distinct
|
|
21
|
+
* tenant·workspace) is supplied — the suite cannot mint a second tenant itself.
|
|
22
|
+
*
|
|
23
|
+
* @see RFCS/0059-agent-workspace.md §E WCT-1
|
|
24
|
+
* @see SECURITY/invariants.yaml workspace-cross-tenant-isolation
|
|
25
|
+
*/
|
|
26
|
+
import { describe, it, expect } from 'vitest';
|
|
27
|
+
import { randomUUID } from 'node:crypto';
|
|
28
|
+
import { driver } from '../lib/driver.js';
|
|
29
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
30
|
+
|
|
31
|
+
interface DiscoveryDoc {
|
|
32
|
+
workspace?: { supported?: boolean };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function workspaceSupported(): Promise<boolean> {
|
|
36
|
+
const res = await driver.get('/.well-known/openwop');
|
|
37
|
+
return capabilityFamily(res.json as DiscoveryDoc | undefined, 'workspace')?.supported === true;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const tenantBKey = process.env.OPENWOP_TEST_TENANT_B_API_KEY;
|
|
41
|
+
const asTenantB = { headers: { Authorization: `Bearer ${tenantBKey ?? ''}` } };
|
|
42
|
+
|
|
43
|
+
describe('workspace-cross-tenant-isolation (black-box): a §C file MUST NOT leak across owners (RFC 0059 §E WCT-1)', () => {
|
|
44
|
+
it('a file written by owner A is unreadable + un-enumerable by a second-tenant credential', async () => {
|
|
45
|
+
if (!(await workspaceSupported())) return; // capability not advertised — skip
|
|
46
|
+
if (!tenantBKey) {
|
|
47
|
+
// eslint-disable-next-line no-console
|
|
48
|
+
console.warn('[workspace-cross-tenant-isolation-blackbox] OPENWOP_TEST_TENANT_B_API_KEY not supplied; skipping the production-path cross-tenant assertion');
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const path = `wct-blackbox-${randomUUID()}.md`;
|
|
53
|
+
const secret = `WCT1-BLACKBOX-SECRET-${randomUUID()}`;
|
|
54
|
+
|
|
55
|
+
// Owner A (default credential) writes the file via the NORMATIVE PUT.
|
|
56
|
+
const put = await driver.put(`/v1/host/workspace/files/${path}`, { content: secret });
|
|
57
|
+
expect(put.status, driver.describe('agent-workspace.md §C PUT', 'the owning workspace MUST create its file (200)')).toBe(200);
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
// Owner B (second-tenant credential) MUST NOT read it — fail closed, no existence leak.
|
|
61
|
+
const crossRead = await driver.get(`/v1/host/workspace/files/${path}`, asTenantB);
|
|
62
|
+
expect(
|
|
63
|
+
crossRead.status === 404 || crossRead.status === 403,
|
|
64
|
+
driver.describe('agent-workspace.md §E WCT-1', `a second-tenant read MUST fail closed (404/403), got ${crossRead.status}`),
|
|
65
|
+
).toBe(true);
|
|
66
|
+
expect(
|
|
67
|
+
!JSON.stringify(crossRead.json ?? '').includes(secret),
|
|
68
|
+
driver.describe('agent-workspace.md §E WCT-1', 'a second-tenant read MUST NOT surface the owner\'s content'),
|
|
69
|
+
).toBe(true);
|
|
70
|
+
|
|
71
|
+
// Owner B MUST NOT enumerate it in the list projection.
|
|
72
|
+
const crossList = await driver.get('/v1/host/workspace/files', asTenantB);
|
|
73
|
+
if (crossList.status === 200) {
|
|
74
|
+
expect(
|
|
75
|
+
!JSON.stringify((crossList.json as { files?: unknown })?.files ?? []).includes(path),
|
|
76
|
+
driver.describe('agent-workspace.md §E WCT-1', 'a second-tenant list MUST NOT enumerate the owner\'s path'),
|
|
77
|
+
).toBe(true);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Isolation, not loss: owner A still reads its own file.
|
|
81
|
+
const ownerRead = await driver.get(`/v1/host/workspace/files/${path}`);
|
|
82
|
+
expect(ownerRead.status, driver.describe('agent-workspace.md §C GET', 'the owning workspace MUST still read its own file')).toBe(200);
|
|
83
|
+
expect((ownerRead.json as { content?: string } | undefined)?.content).toBe(secret);
|
|
84
|
+
} finally {
|
|
85
|
+
// Best-effort cleanup as owner A.
|
|
86
|
+
await driver.delete(`/v1/host/workspace/files/${path}`).catch(() => undefined);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
});
|