@openwop/openwop-conformance 1.13.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +2 -2
- package/api/openapi.yaml +60 -0
- package/coverage.md +15 -4
- package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
- package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
- package/package.json +1 -1
- package/src/lib/discovery-capabilities.ts +18 -19
- package/src/lib/egressPolicy.ts +76 -0
- package/src/lib/otel-collector.ts +72 -0
- package/src/lib/profiles.ts +15 -0
- package/src/lib/sandbox-timeout-worker.mjs +31 -0
- package/src/lib/toolCatalog.ts +81 -0
- package/src/lib/wasm-sandbox-probe.ts +168 -0
- package/src/scenarios/core-standard-profile.test.ts +75 -0
- package/src/scenarios/egress-audience-binding.test.ts +81 -0
- package/src/scenarios/egress-decision-content-free.test.ts +57 -0
- package/src/scenarios/memory-degraded-projection.test.ts +121 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
- package/src/scenarios/otel-collector-canary-inspection.test.ts +211 -0
- package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +192 -75
- package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
- package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +52 -0
- package/src/scenarios/tool-catalog-projection.test.ts +120 -0
- package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
- package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Portable tool-session bracket (RFC 0078 §D) — behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Gated on `toolCatalog.sessionLifecycle` (root-first per RFC 0073). Soft-skips
|
|
5
|
+
* when unadvertised (default) / hard-fails under `OPENWOP_REQUIRE_BEHAVIOR=true`.
|
|
6
|
+
* The always-on wire-shape coverage lives in `tool-descriptor-shape.test.ts`
|
|
7
|
+
* (the `tool.session.*` payload `$defs`); this asserts host BEHAVIOR: a tool session brackets its RFC 0064 call events
|
|
8
|
+
* with `tool.session.opened` (BEFORE the first call event) and
|
|
9
|
+
* `tool.session.closed` (AFTER the last), sharing one `sessionId`, carrying a
|
|
10
|
+
* `toolId`, an `outcome` in the enum, and both events content-free.
|
|
11
|
+
*
|
|
12
|
+
* Drives the OPTIONAL `POST /v1/host/sample/tools/session-run` seam + reads the
|
|
13
|
+
* bracket back via the test event-log seam (both deferred per RFC 0078
|
|
14
|
+
* §Conformance — soft-skip on 404).
|
|
15
|
+
*
|
|
16
|
+
* Spec references:
|
|
17
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md (§D)
|
|
18
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect } from 'vitest';
|
|
22
|
+
import { driver } from '../lib/driver.js';
|
|
23
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
24
|
+
import { readToolCatalogCap, driveToolSession, TOOL_CONTENT_FORBIDDEN } from '../lib/toolCatalog.js';
|
|
25
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
26
|
+
|
|
27
|
+
const SESSION_OUTCOMES = ['completed', 'failed', 'aborted', 'expired'];
|
|
28
|
+
/** RFC 0064 tool-call event family bracketed by a tool session. */
|
|
29
|
+
const CALL_EVENT = (t: string): boolean =>
|
|
30
|
+
t === 'agent.toolCalled' || t === 'agent.toolReturned' || t.startsWith('tool.call');
|
|
31
|
+
|
|
32
|
+
describe('tool-session-lifecycle (RFC 0078 §D)', () => {
|
|
33
|
+
it('brackets the call events with tool.session.opened-first / closed-last, one sessionId, content-free', async () => {
|
|
34
|
+
const cap = await readToolCatalogCap();
|
|
35
|
+
const lifecycle = cap?.sessionLifecycle === true || (typeof cap?.sessionLifecycle === 'object' && cap?.sessionLifecycle !== null);
|
|
36
|
+
if (!behaviorGate('openwop-tool-session-lifecycle', lifecycle)) return;
|
|
37
|
+
|
|
38
|
+
if (!(await isEventLogSeamAvailable())) return; // event-log seam absent — soft-skip
|
|
39
|
+
const res = await driveToolSession({});
|
|
40
|
+
if (res === null || !res.runId) return; // session seam absent — soft-skip
|
|
41
|
+
|
|
42
|
+
const q = await queryTestEvents(res.runId);
|
|
43
|
+
if (!q.ok) return;
|
|
44
|
+
const events = q.events.slice().sort((a, b) => a.sequence - b.sequence);
|
|
45
|
+
|
|
46
|
+
const opened = events.filter((e) => e.type === 'tool.session.opened');
|
|
47
|
+
const closed = events.filter((e) => e.type === 'tool.session.closed');
|
|
48
|
+
expect(
|
|
49
|
+
opened.length >= 1 && closed.length >= 1,
|
|
50
|
+
driver.describe('tool-catalog.md §D', 'a tool session MUST emit tool.session.opened + tool.session.closed'),
|
|
51
|
+
).toBe(true);
|
|
52
|
+
if (opened.length === 0 || closed.length === 0) return;
|
|
53
|
+
|
|
54
|
+
const open = opened[0]!;
|
|
55
|
+
const close = closed[closed.length - 1]!;
|
|
56
|
+
|
|
57
|
+
// §D ordering: opened precedes every call event; closed follows them all.
|
|
58
|
+
const calls = events.filter((e) => CALL_EVENT(e.type));
|
|
59
|
+
if (calls.length > 0) {
|
|
60
|
+
expect(
|
|
61
|
+
open.sequence < calls[0]!.sequence,
|
|
62
|
+
driver.describe('RFC 0078 §D', 'tool.session.opened MUST precede the first call event'),
|
|
63
|
+
).toBe(true);
|
|
64
|
+
expect(
|
|
65
|
+
close.sequence > calls[calls.length - 1]!.sequence,
|
|
66
|
+
driver.describe('RFC 0078 §D', 'tool.session.closed MUST follow the last call event'),
|
|
67
|
+
).toBe(true);
|
|
68
|
+
} else {
|
|
69
|
+
expect(
|
|
70
|
+
open.sequence < close.sequence,
|
|
71
|
+
driver.describe('RFC 0078 §D', 'tool.session.opened MUST precede tool.session.closed'),
|
|
72
|
+
).toBe(true);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// One sessionId across the bracket, both carrying a toolId.
|
|
76
|
+
const openSid = open.payload.sessionId;
|
|
77
|
+
const closeSid = close.payload.sessionId;
|
|
78
|
+
expect(
|
|
79
|
+
typeof openSid === 'string' && openSid === closeSid,
|
|
80
|
+
driver.describe('run-event-payloads.schema.json#toolSession*', 'the bracket MUST share one sessionId'),
|
|
81
|
+
).toBe(true);
|
|
82
|
+
expect(
|
|
83
|
+
typeof open.payload.toolId === 'string' && typeof close.payload.toolId === 'string',
|
|
84
|
+
driver.describe('run-event-payloads.schema.json#toolSession*', 'tool.session.* MUST carry a toolId'),
|
|
85
|
+
).toBe(true);
|
|
86
|
+
|
|
87
|
+
// Closed outcome enum discipline.
|
|
88
|
+
expect(
|
|
89
|
+
typeof close.payload.outcome === 'string' && SESSION_OUTCOMES.includes(close.payload.outcome as string),
|
|
90
|
+
driver.describe('run-event-payloads.schema.json#toolSessionClosed', 'outcome MUST be in the closed enum'),
|
|
91
|
+
).toBe(true);
|
|
92
|
+
|
|
93
|
+
// Content-free: identifiers + metadata only.
|
|
94
|
+
for (const evt of [open, close]) {
|
|
95
|
+
for (const forbidden of TOOL_CONTENT_FORBIDDEN) {
|
|
96
|
+
expect(
|
|
97
|
+
!(forbidden in evt.payload),
|
|
98
|
+
driver.describe('RFC 0078 §F (SR-1)', `tool.session.* MUST be content-free (no ${forbidden})`),
|
|
99
|
+
).toBe(true);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
await resetTestSeam();
|
|
104
|
+
});
|
|
105
|
+
});
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* workspace-cross-tenant-isolation-blackbox — RFC 0059 §E WCT-1 on the PRODUCTION wire.
|
|
3
|
+
*
|
|
4
|
+
* The black-box, production-path counterpart to the seam-driven
|
|
5
|
+
* `workspace-cross-tenant-isolation.test.ts`. Instead of the single-credential
|
|
6
|
+
* `POST /v1/host/sample/workspace/op` seam, this drives the NORMATIVE §C
|
|
7
|
+
* endpoints (`PUT`/`GET /v1/host/workspace/files/{path}`, `GET /v1/host/workspace
|
|
8
|
+
* /files`) with TWO distinct operator credentials that resolve to two different
|
|
9
|
+
* `{tenant, workspace}` owners (RFC 0048). It writes a secret as owner A and
|
|
10
|
+
* proves owner B cannot read or enumerate it — no `/v1/host/sample/*` seam, the
|
|
11
|
+
* exact contract a deployed multi-tenant host honors.
|
|
12
|
+
*
|
|
13
|
+
* This is the "replace seam-gated proofs with black-box production-path
|
|
14
|
+
* conformance" step (independent-audit acceptance-bar item 3) for RFC 0059. Once
|
|
15
|
+
* a host passes it non-vacuously, `workspace-cross-tenant-isolation` is proven on
|
|
16
|
+
* the production wire and the surface graduates INTO the `openwop-core-standard`
|
|
17
|
+
* floor (RFC 0088 §D Lever-2 → floor).
|
|
18
|
+
*
|
|
19
|
+
* Gating: soft-skips unless `capabilities.workspace.supported` AND
|
|
20
|
+
* `OPENWOP_TEST_TENANT_B_API_KEY` (a credential for a SECOND, distinct
|
|
21
|
+
* tenant·workspace) is supplied — the suite cannot mint a second tenant itself.
|
|
22
|
+
*
|
|
23
|
+
* @see RFCS/0059-agent-workspace.md §E WCT-1
|
|
24
|
+
* @see SECURITY/invariants.yaml workspace-cross-tenant-isolation
|
|
25
|
+
*/
|
|
26
|
+
import { describe, it, expect } from 'vitest';
|
|
27
|
+
import { randomUUID } from 'node:crypto';
|
|
28
|
+
import { driver } from '../lib/driver.js';
|
|
29
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
30
|
+
|
|
31
|
+
interface DiscoveryDoc {
|
|
32
|
+
workspace?: { supported?: boolean };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function workspaceSupported(): Promise<boolean> {
|
|
36
|
+
const res = await driver.get('/.well-known/openwop');
|
|
37
|
+
return capabilityFamily(res.json as DiscoveryDoc | undefined, 'workspace')?.supported === true;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const tenantBKey = process.env.OPENWOP_TEST_TENANT_B_API_KEY;
|
|
41
|
+
const asTenantB = { headers: { Authorization: `Bearer ${tenantBKey ?? ''}` } };
|
|
42
|
+
|
|
43
|
+
describe('workspace-cross-tenant-isolation (black-box): a §C file MUST NOT leak across owners (RFC 0059 §E WCT-1)', () => {
|
|
44
|
+
it('a file written by owner A is unreadable + un-enumerable by a second-tenant credential', async () => {
|
|
45
|
+
if (!(await workspaceSupported())) return; // capability not advertised — skip
|
|
46
|
+
if (!tenantBKey) {
|
|
47
|
+
// eslint-disable-next-line no-console
|
|
48
|
+
console.warn('[workspace-cross-tenant-isolation-blackbox] OPENWOP_TEST_TENANT_B_API_KEY not supplied; skipping the production-path cross-tenant assertion');
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const path = `wct-blackbox-${randomUUID()}.md`;
|
|
53
|
+
const secret = `WCT1-BLACKBOX-SECRET-${randomUUID()}`;
|
|
54
|
+
|
|
55
|
+
// Owner A (default credential) writes the file via the NORMATIVE PUT.
|
|
56
|
+
const put = await driver.put(`/v1/host/workspace/files/${path}`, { content: secret });
|
|
57
|
+
expect(put.status, driver.describe('agent-workspace.md §C PUT', 'the owning workspace MUST create its file (200)')).toBe(200);
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
// Owner B (second-tenant credential) MUST NOT read it — fail closed, no existence leak.
|
|
61
|
+
const crossRead = await driver.get(`/v1/host/workspace/files/${path}`, asTenantB);
|
|
62
|
+
expect(
|
|
63
|
+
crossRead.status === 404 || crossRead.status === 403,
|
|
64
|
+
driver.describe('agent-workspace.md §E WCT-1', `a second-tenant read MUST fail closed (404/403), got ${crossRead.status}`),
|
|
65
|
+
).toBe(true);
|
|
66
|
+
expect(
|
|
67
|
+
!JSON.stringify(crossRead.json ?? '').includes(secret),
|
|
68
|
+
driver.describe('agent-workspace.md §E WCT-1', 'a second-tenant read MUST NOT surface the owner\'s content'),
|
|
69
|
+
).toBe(true);
|
|
70
|
+
|
|
71
|
+
// Owner B MUST NOT enumerate it in the list projection.
|
|
72
|
+
const crossList = await driver.get('/v1/host/workspace/files', asTenantB);
|
|
73
|
+
if (crossList.status === 200) {
|
|
74
|
+
expect(
|
|
75
|
+
!JSON.stringify((crossList.json as { files?: unknown })?.files ?? []).includes(path),
|
|
76
|
+
driver.describe('agent-workspace.md §E WCT-1', 'a second-tenant list MUST NOT enumerate the owner\'s path'),
|
|
77
|
+
).toBe(true);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Isolation, not loss: owner A still reads its own file.
|
|
81
|
+
const ownerRead = await driver.get(`/v1/host/workspace/files/${path}`);
|
|
82
|
+
expect(ownerRead.status, driver.describe('agent-workspace.md §C GET', 'the owning workspace MUST still read its own file')).toBe(200);
|
|
83
|
+
expect((ownerRead.json as { content?: string } | undefined)?.content).toBe(secret);
|
|
84
|
+
} finally {
|
|
85
|
+
// Best-effort cleanup as owner A.
|
|
86
|
+
await driver.delete(`/v1/host/workspace/files/${path}`).catch(() => undefined);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
});
|