@openwop/openwop-conformance 1.13.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/README.md +2 -2
  3. package/api/openapi.yaml +60 -0
  4. package/coverage.md +15 -4
  5. package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
  6. package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
  7. package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
  8. package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
  9. package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
  10. package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
  11. package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
  12. package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
  13. package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
  14. package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
  15. package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
  16. package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
  17. package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
  18. package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
  19. package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
  20. package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
  21. package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
  22. package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
  23. package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
  24. package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
  25. package/package.json +1 -1
  26. package/src/lib/discovery-capabilities.ts +18 -19
  27. package/src/lib/egressPolicy.ts +76 -0
  28. package/src/lib/otel-collector.ts +72 -0
  29. package/src/lib/profiles.ts +15 -0
  30. package/src/lib/sandbox-timeout-worker.mjs +31 -0
  31. package/src/lib/toolCatalog.ts +81 -0
  32. package/src/lib/wasm-sandbox-probe.ts +168 -0
  33. package/src/scenarios/core-standard-profile.test.ts +75 -0
  34. package/src/scenarios/egress-audience-binding.test.ts +81 -0
  35. package/src/scenarios/egress-decision-content-free.test.ts +57 -0
  36. package/src/scenarios/memory-degraded-projection.test.ts +121 -0
  37. package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
  38. package/src/scenarios/otel-collector-canary-inspection.test.ts +211 -0
  39. package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
  40. package/src/scenarios/replay-observable-sequence-determinism.test.ts +192 -75
  41. package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
  42. package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
  43. package/src/scenarios/secret-leakage-otel-attribute.test.ts +52 -0
  44. package/src/scenarios/tool-catalog-projection.test.ts +120 -0
  45. package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
  46. package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Portable tool-session bracket (RFC 0078 §D) — behavioral.
3
+ *
4
+ * Gated on `toolCatalog.sessionLifecycle` (root-first per RFC 0073). Soft-skips
5
+ * when unadvertised (default) / hard-fails under `OPENWOP_REQUIRE_BEHAVIOR=true`.
6
+ * The always-on wire-shape coverage lives in `tool-descriptor-shape.test.ts`
7
+ * (the `tool.session.*` payload `$defs`); this asserts host BEHAVIOR: a tool session brackets its RFC 0064 call events
8
+ * with `tool.session.opened` (BEFORE the first call event) and
9
+ * `tool.session.closed` (AFTER the last), sharing one `sessionId`, carrying a
10
+ * `toolId`, an `outcome` in the enum, and both events content-free.
11
+ *
12
+ * Drives the OPTIONAL `POST /v1/host/sample/tools/session-run` seam + reads the
13
+ * bracket back via the test event-log seam (both deferred per RFC 0078
14
+ * §Conformance — soft-skip on 404).
15
+ *
16
+ * Spec references:
17
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md (§D)
18
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
19
+ */
20
+
21
+ import { describe, it, expect } from 'vitest';
22
+ import { driver } from '../lib/driver.js';
23
+ import { behaviorGate } from '../lib/behavior-gate.js';
24
+ import { readToolCatalogCap, driveToolSession, TOOL_CONTENT_FORBIDDEN } from '../lib/toolCatalog.js';
25
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
26
+
27
+ const SESSION_OUTCOMES = ['completed', 'failed', 'aborted', 'expired'];
28
+ /** RFC 0064 tool-call event family bracketed by a tool session. */
29
+ const CALL_EVENT = (t: string): boolean =>
30
+ t === 'agent.toolCalled' || t === 'agent.toolReturned' || t.startsWith('tool.call');
31
+
32
+ describe('tool-session-lifecycle (RFC 0078 §D)', () => {
33
+ it('brackets the call events with tool.session.opened-first / closed-last, one sessionId, content-free', async () => {
34
+ const cap = await readToolCatalogCap();
35
+ const lifecycle = cap?.sessionLifecycle === true || (typeof cap?.sessionLifecycle === 'object' && cap?.sessionLifecycle !== null);
36
+ if (!behaviorGate('openwop-tool-session-lifecycle', lifecycle)) return;
37
+
38
+ if (!(await isEventLogSeamAvailable())) return; // event-log seam absent — soft-skip
39
+ const res = await driveToolSession({});
40
+ if (res === null || !res.runId) return; // session seam absent — soft-skip
41
+
42
+ const q = await queryTestEvents(res.runId);
43
+ if (!q.ok) return;
44
+ const events = q.events.slice().sort((a, b) => a.sequence - b.sequence);
45
+
46
+ const opened = events.filter((e) => e.type === 'tool.session.opened');
47
+ const closed = events.filter((e) => e.type === 'tool.session.closed');
48
+ expect(
49
+ opened.length >= 1 && closed.length >= 1,
50
+ driver.describe('tool-catalog.md §D', 'a tool session MUST emit tool.session.opened + tool.session.closed'),
51
+ ).toBe(true);
52
+ if (opened.length === 0 || closed.length === 0) return;
53
+
54
+ const open = opened[0]!;
55
+ const close = closed[closed.length - 1]!;
56
+
57
+ // §D ordering: opened precedes every call event; closed follows them all.
58
+ const calls = events.filter((e) => CALL_EVENT(e.type));
59
+ if (calls.length > 0) {
60
+ expect(
61
+ open.sequence < calls[0]!.sequence,
62
+ driver.describe('RFC 0078 §D', 'tool.session.opened MUST precede the first call event'),
63
+ ).toBe(true);
64
+ expect(
65
+ close.sequence > calls[calls.length - 1]!.sequence,
66
+ driver.describe('RFC 0078 §D', 'tool.session.closed MUST follow the last call event'),
67
+ ).toBe(true);
68
+ } else {
69
+ expect(
70
+ open.sequence < close.sequence,
71
+ driver.describe('RFC 0078 §D', 'tool.session.opened MUST precede tool.session.closed'),
72
+ ).toBe(true);
73
+ }
74
+
75
+ // One sessionId across the bracket, both carrying a toolId.
76
+ const openSid = open.payload.sessionId;
77
+ const closeSid = close.payload.sessionId;
78
+ expect(
79
+ typeof openSid === 'string' && openSid === closeSid,
80
+ driver.describe('run-event-payloads.schema.json#toolSession*', 'the bracket MUST share one sessionId'),
81
+ ).toBe(true);
82
+ expect(
83
+ typeof open.payload.toolId === 'string' && typeof close.payload.toolId === 'string',
84
+ driver.describe('run-event-payloads.schema.json#toolSession*', 'tool.session.* MUST carry a toolId'),
85
+ ).toBe(true);
86
+
87
+ // Closed outcome enum discipline.
88
+ expect(
89
+ typeof close.payload.outcome === 'string' && SESSION_OUTCOMES.includes(close.payload.outcome as string),
90
+ driver.describe('run-event-payloads.schema.json#toolSessionClosed', 'outcome MUST be in the closed enum'),
91
+ ).toBe(true);
92
+
93
+ // Content-free: identifiers + metadata only.
94
+ for (const evt of [open, close]) {
95
+ for (const forbidden of TOOL_CONTENT_FORBIDDEN) {
96
+ expect(
97
+ !(forbidden in evt.payload),
98
+ driver.describe('RFC 0078 §F (SR-1)', `tool.session.* MUST be content-free (no ${forbidden})`),
99
+ ).toBe(true);
100
+ }
101
+ }
102
+
103
+ await resetTestSeam();
104
+ });
105
+ });
@@ -0,0 +1,89 @@
1
+ /**
2
+ * workspace-cross-tenant-isolation-blackbox — RFC 0059 §E WCT-1 on the PRODUCTION wire.
3
+ *
4
+ * The black-box, production-path counterpart to the seam-driven
5
+ * `workspace-cross-tenant-isolation.test.ts`. Instead of the single-credential
6
+ * `POST /v1/host/sample/workspace/op` seam, this drives the NORMATIVE §C
7
+ * endpoints (`PUT`/`GET /v1/host/workspace/files/{path}`, `GET /v1/host/workspace
8
+ * /files`) with TWO distinct operator credentials that resolve to two different
9
+ * `{tenant, workspace}` owners (RFC 0048). It writes a secret as owner A and
10
+ * proves owner B cannot read or enumerate it — no `/v1/host/sample/*` seam, the
11
+ * exact contract a deployed multi-tenant host honors.
12
+ *
13
+ * This is the "replace seam-gated proofs with black-box production-path
14
+ * conformance" step (independent-audit acceptance-bar item 3) for RFC 0059. Once
15
+ * a host passes it non-vacuously, `workspace-cross-tenant-isolation` is proven on
16
+ * the production wire and the surface graduates INTO the `openwop-core-standard`
17
+ * floor (RFC 0088 §D Lever-2 → floor).
18
+ *
19
+ * Gating: soft-skips unless `capabilities.workspace.supported` AND
20
+ * `OPENWOP_TEST_TENANT_B_API_KEY` (a credential for a SECOND, distinct
21
+ * tenant·workspace) is supplied — the suite cannot mint a second tenant itself.
22
+ *
23
+ * @see RFCS/0059-agent-workspace.md §E WCT-1
24
+ * @see SECURITY/invariants.yaml workspace-cross-tenant-isolation
25
+ */
26
+ import { describe, it, expect } from 'vitest';
27
+ import { randomUUID } from 'node:crypto';
28
+ import { driver } from '../lib/driver.js';
29
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
30
+
31
+ interface DiscoveryDoc {
32
+ workspace?: { supported?: boolean };
33
+ }
34
+
35
+ async function workspaceSupported(): Promise<boolean> {
36
+ const res = await driver.get('/.well-known/openwop');
37
+ return capabilityFamily(res.json as DiscoveryDoc | undefined, 'workspace')?.supported === true;
38
+ }
39
+
40
+ const tenantBKey = process.env.OPENWOP_TEST_TENANT_B_API_KEY;
41
+ const asTenantB = { headers: { Authorization: `Bearer ${tenantBKey ?? ''}` } };
42
+
43
+ describe('workspace-cross-tenant-isolation (black-box): a §C file MUST NOT leak across owners (RFC 0059 §E WCT-1)', () => {
44
+ it('a file written by owner A is unreadable + un-enumerable by a second-tenant credential', async () => {
45
+ if (!(await workspaceSupported())) return; // capability not advertised — skip
46
+ if (!tenantBKey) {
47
+ // eslint-disable-next-line no-console
48
+ console.warn('[workspace-cross-tenant-isolation-blackbox] OPENWOP_TEST_TENANT_B_API_KEY not supplied; skipping the production-path cross-tenant assertion');
49
+ return;
50
+ }
51
+
52
+ const path = `wct-blackbox-${randomUUID()}.md`;
53
+ const secret = `WCT1-BLACKBOX-SECRET-${randomUUID()}`;
54
+
55
+ // Owner A (default credential) writes the file via the NORMATIVE PUT.
56
+ const put = await driver.put(`/v1/host/workspace/files/${path}`, { content: secret });
57
+ expect(put.status, driver.describe('agent-workspace.md §C PUT', 'the owning workspace MUST create its file (200)')).toBe(200);
58
+
59
+ try {
60
+ // Owner B (second-tenant credential) MUST NOT read it — fail closed, no existence leak.
61
+ const crossRead = await driver.get(`/v1/host/workspace/files/${path}`, asTenantB);
62
+ expect(
63
+ crossRead.status === 404 || crossRead.status === 403,
64
+ driver.describe('agent-workspace.md §E WCT-1', `a second-tenant read MUST fail closed (404/403), got ${crossRead.status}`),
65
+ ).toBe(true);
66
+ expect(
67
+ !JSON.stringify(crossRead.json ?? '').includes(secret),
68
+ driver.describe('agent-workspace.md §E WCT-1', 'a second-tenant read MUST NOT surface the owner\'s content'),
69
+ ).toBe(true);
70
+
71
+ // Owner B MUST NOT enumerate it in the list projection.
72
+ const crossList = await driver.get('/v1/host/workspace/files', asTenantB);
73
+ if (crossList.status === 200) {
74
+ expect(
75
+ !JSON.stringify((crossList.json as { files?: unknown })?.files ?? []).includes(path),
76
+ driver.describe('agent-workspace.md §E WCT-1', 'a second-tenant list MUST NOT enumerate the owner\'s path'),
77
+ ).toBe(true);
78
+ }
79
+
80
+ // Isolation, not loss: owner A still reads its own file.
81
+ const ownerRead = await driver.get(`/v1/host/workspace/files/${path}`);
82
+ expect(ownerRead.status, driver.describe('agent-workspace.md §C GET', 'the owning workspace MUST still read its own file')).toBe(200);
83
+ expect((ownerRead.json as { content?: string } | undefined)?.content).toBe(secret);
84
+ } finally {
85
+ // Best-effort cleanup as owner A.
86
+ await driver.delete(`/v1/host/workspace/files/${path}`).catch(() => undefined);
87
+ }
88
+ });
89
+ });