@openwop/openwop-conformance 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +2 -2
- package/api/asyncapi.yaml +25 -4
- package/api/openapi.yaml +371 -0
- package/coverage.md +31 -4
- package/fixtures/conformance-phase4-nondet-tool.json +53 -0
- package/fixtures/conformance-phase4-replay-divergence.json +40 -0
- package/fixtures.md +5 -3
- package/package.json +1 -1
- package/schemas/README.md +4 -0
- package/schemas/annotation-create.schema.json +37 -0
- package/schemas/annotation.schema.json +56 -0
- package/schemas/capabilities.schema.json +191 -3
- package/schemas/credential-reference.schema.json +21 -0
- package/schemas/node-pack-manifest.schema.json +112 -1
- package/schemas/run-diff-response.schema.json +64 -0
- package/schemas/run-event-payloads.schema.json +104 -2
- package/schemas/run-event.schema.json +8 -1
- package/schemas/run-snapshot.schema.json +11 -0
- package/src/lib/behavior-gate.ts +51 -0
- package/src/lib/driver.ts +13 -1
- package/src/lib/feedback.ts +31 -0
- package/src/lib/saml-idp.ts +179 -0
- package/src/scenarios/approval-gate-events.test.ts +61 -0
- package/src/scenarios/approval-gate-flow.test.ts +68 -0
- package/src/scenarios/auth-saml-profile.test.ts +119 -0
- package/src/scenarios/auth-scim-profile.test.ts +65 -0
- package/src/scenarios/authorization-fail-closed.test.ts +80 -0
- package/src/scenarios/authorization-roles-shape.test.ts +83 -0
- package/src/scenarios/connector-manifest-validity.test.ts +142 -0
- package/src/scenarios/credential-payload-redaction.test.ts +93 -0
- package/src/scenarios/credentials-capability-shape.test.ts +90 -0
- package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
- package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
- package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
- package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
- package/src/scenarios/experimental-tier-shape.test.ts +192 -0
- package/src/scenarios/feedback-capability-shape.test.ts +35 -0
- package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
- package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
- package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
- package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
- package/src/scenarios/feedback-record-and-list.test.ts +32 -0
- package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
- package/src/scenarios/identity-owner-shape.test.ts +64 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
- package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
- package/src/scenarios/oauth-capability-shape.test.ts +97 -0
- package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
- package/src/scenarios/pack-registry-isolation.test.ts +108 -0
- package/src/scenarios/pack-registry-publish.test.ts +1 -1
- package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
- package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
- package/src/scenarios/redaction.test.ts +4 -1
- package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
- package/src/scenarios/run-diff.test.ts +143 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
- package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
- package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
- package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
- package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
- package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
- package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
- package/src/scenarios/spec-corpus-validity.test.ts +6 -3
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cross-engine-append-behavior — RFC 0036 §B cross-engine append-ordering behavioral probe.
|
|
3
|
+
*
|
|
4
|
+
* Companion to `cross-engine-append-ordering.test.ts` which carries the
|
|
5
|
+
* advertisement-shape probes. This file exercises the canonical cross-engine
|
|
6
|
+
* append-ordering behavior specified by `spec/v1/channels-and-reducers.md`
|
|
7
|
+
* §"Cross-engine ordering" via the host-extension test seams:
|
|
8
|
+
*
|
|
9
|
+
* POST /v1/host/sample/test/cross-engine/append — single engine append
|
|
10
|
+
* GET /v1/host/sample/test/cross-engine/read — read ordered sequence
|
|
11
|
+
* POST /v1/host/sample/test/cross-engine/reset — clear log
|
|
12
|
+
*
|
|
13
|
+
* The seam is conformance-only (host-extension namespace), gated on the
|
|
14
|
+
* host's `OPENWOP_TEST_CROSS_ENGINE_HARNESS=true` env var. The seam itself
|
|
15
|
+
* is OPTIONAL — hosts that don't expose it soft-skip; hosts that DO expose
|
|
16
|
+
* it MUST honor the §B contract:
|
|
17
|
+
*
|
|
18
|
+
* 1. Multiple engines appending concurrently to the same channelId
|
|
19
|
+
* converge to a single globally-ordered linearization on read.
|
|
20
|
+
* 2. Per-engine order is preserved within each engine's local sequence
|
|
21
|
+
* (writes from engine A appear in A's submission order, ditto B).
|
|
22
|
+
* 3. The host's advertised `orderingModel` (lamport / vector-clock /
|
|
23
|
+
* global-sequencer) determines the cross-engine merge semantics.
|
|
24
|
+
* 4. Read after partition heal converges to the same total order
|
|
25
|
+
* regardless of which engine's view we read from.
|
|
26
|
+
*
|
|
27
|
+
* @see RFCS/0036-multi-region-and-cross-engine-guarantees.md §B
|
|
28
|
+
* @see spec/v1/channels-and-reducers.md §"Cross-engine ordering"
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect } from 'vitest';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
|
|
34
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
35
|
+
|
|
36
|
+
interface AppendEntry {
|
|
37
|
+
engineId: string;
|
|
38
|
+
value: unknown;
|
|
39
|
+
lamport: number;
|
|
40
|
+
seq: number;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async function appendEntry(
|
|
44
|
+
engineId: string,
|
|
45
|
+
channelId: string,
|
|
46
|
+
value: unknown,
|
|
47
|
+
lamport?: number,
|
|
48
|
+
): Promise<{ status: number; entry?: AppendEntry }> {
|
|
49
|
+
const body: Record<string, unknown> = { engineId, channelId, value };
|
|
50
|
+
if (lamport !== undefined) body.lamport = lamport;
|
|
51
|
+
const res = await driver.post('/v1/host/sample/test/cross-engine/append', body);
|
|
52
|
+
if (res.status === 200) {
|
|
53
|
+
return { status: res.status, entry: res.json as AppendEntry };
|
|
54
|
+
}
|
|
55
|
+
return { status: res.status };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function readEntries(channelId: string): Promise<{ status: number; entries: AppendEntry[] }> {
|
|
59
|
+
const res = await driver.get(`/v1/host/sample/test/cross-engine/read?channelId=${encodeURIComponent(channelId)}`);
|
|
60
|
+
return {
|
|
61
|
+
status: res.status,
|
|
62
|
+
entries: res.status === 200 ? (res.json as { entries: AppendEntry[] }).entries : [],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async function resetLog(): Promise<number> {
|
|
67
|
+
const res = await driver.post('/v1/host/sample/test/cross-engine/reset', {});
|
|
68
|
+
return res.status;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
describe.skipIf(HTTP_SKIP)('cross-engine-append-behavior: §B cross-engine ordering (RFC 0036)', () => {
|
|
72
|
+
it('interleaved appends from two engines converge to a single globally-ordered sequence', async (ctx) => {
|
|
73
|
+
const resetStatus = await resetLog();
|
|
74
|
+
if (resetStatus === 404) {
|
|
75
|
+
ctx.skip(); // host doesn't expose the cross-engine harness seam
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
expect(resetStatus).toBe(200);
|
|
79
|
+
|
|
80
|
+
const ch = 'channel-A';
|
|
81
|
+
|
|
82
|
+
// Engine A: 3 appends. Engine B: 2 appends. Interleaved.
|
|
83
|
+
const a1 = await appendEntry('engine-A', ch, 'a-1');
|
|
84
|
+
const b1 = await appendEntry('engine-B', ch, 'b-1');
|
|
85
|
+
const a2 = await appendEntry('engine-A', ch, 'a-2');
|
|
86
|
+
const a3 = await appendEntry('engine-A', ch, 'a-3');
|
|
87
|
+
const b2 = await appendEntry('engine-B', ch, 'b-2');
|
|
88
|
+
|
|
89
|
+
for (const r of [a1, b1, a2, a3, b2]) {
|
|
90
|
+
expect(r.status).toBe(200);
|
|
91
|
+
expect(r.entry).toBeDefined();
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const read = await readEntries(ch);
|
|
95
|
+
expect(read.status).toBe(200);
|
|
96
|
+
|
|
97
|
+
expect(
|
|
98
|
+
read.entries.length,
|
|
99
|
+
driver.describe(
|
|
100
|
+
'channels-and-reducers.md §"Cross-engine ordering"',
|
|
101
|
+
'all appends across all engines MUST appear in the linearized read',
|
|
102
|
+
),
|
|
103
|
+
).toBe(5);
|
|
104
|
+
|
|
105
|
+
// Per-engine order MUST be preserved within each engine's submissions.
|
|
106
|
+
const engineAEntries = read.entries.filter((e) => e.engineId === 'engine-A').map((e) => e.value);
|
|
107
|
+
const engineBEntries = read.entries.filter((e) => e.engineId === 'engine-B').map((e) => e.value);
|
|
108
|
+
expect(
|
|
109
|
+
engineAEntries,
|
|
110
|
+
driver.describe(
|
|
111
|
+
'channels-and-reducers.md §"Cross-engine ordering"',
|
|
112
|
+
'engine-A submissions MUST appear in submission order within the linearization',
|
|
113
|
+
),
|
|
114
|
+
).toEqual(['a-1', 'a-2', 'a-3']);
|
|
115
|
+
expect(
|
|
116
|
+
engineBEntries,
|
|
117
|
+
driver.describe(
|
|
118
|
+
'channels-and-reducers.md §"Cross-engine ordering"',
|
|
119
|
+
'engine-B submissions MUST appear in submission order within the linearization',
|
|
120
|
+
),
|
|
121
|
+
).toEqual(['b-1', 'b-2']);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
it('lamport clocks monotonically advance across engines', async (ctx) => {
|
|
125
|
+
const resetStatus = await resetLog();
|
|
126
|
+
if (resetStatus === 404) {
|
|
127
|
+
ctx.skip();
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
expect(resetStatus).toBe(200);
|
|
131
|
+
|
|
132
|
+
const ch = 'channel-B';
|
|
133
|
+
const a1 = await appendEntry('engine-A', ch, 'a-1');
|
|
134
|
+
const b1 = await appendEntry('engine-B', ch, 'b-1');
|
|
135
|
+
const a2 = await appendEntry('engine-A', ch, 'a-2');
|
|
136
|
+
|
|
137
|
+
expect(a1.entry?.lamport).toBeDefined();
|
|
138
|
+
expect(b1.entry?.lamport).toBeDefined();
|
|
139
|
+
expect(a2.entry?.lamport).toBeDefined();
|
|
140
|
+
|
|
141
|
+
// Lamport invariant: each subsequent append on the same shared
|
|
142
|
+
// channel MUST have strictly-higher clock than the previous.
|
|
143
|
+
expect(
|
|
144
|
+
a2.entry!.lamport > b1.entry!.lamport && b1.entry!.lamport > a1.entry!.lamport,
|
|
145
|
+
driver.describe(
|
|
146
|
+
'channels-and-reducers.md §"Cross-engine ordering" — Lamport',
|
|
147
|
+
'lamport clocks MUST be strictly monotonic on the shared channel',
|
|
148
|
+
),
|
|
149
|
+
).toBe(true);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('lamport hint from engine A advances engine B past it', async (ctx) => {
|
|
153
|
+
const resetStatus = await resetLog();
|
|
154
|
+
if (resetStatus === 404) {
|
|
155
|
+
ctx.skip();
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
expect(resetStatus).toBe(200);
|
|
159
|
+
|
|
160
|
+
const ch = 'channel-C';
|
|
161
|
+
// Engine A appends, gets lamport L. Engine B then appends with
|
|
162
|
+
// lamport hint == L (proxy for "B saw A's clock at L"); B's
|
|
163
|
+
// resulting clock MUST be > L per the lamport receive rule
|
|
164
|
+
// max(local, incoming) + 1.
|
|
165
|
+
const a1 = await appendEntry('engine-A', ch, 'a-1');
|
|
166
|
+
expect(a1.status).toBe(200);
|
|
167
|
+
const seen = a1.entry!.lamport;
|
|
168
|
+
const b1 = await appendEntry('engine-B', ch, 'b-1', seen);
|
|
169
|
+
expect(b1.status).toBe(200);
|
|
170
|
+
expect(
|
|
171
|
+
b1.entry!.lamport > seen,
|
|
172
|
+
driver.describe(
|
|
173
|
+
'channels-and-reducers.md §"Cross-engine ordering" — Lamport receive rule',
|
|
174
|
+
'when engine B sees engine A\'s clock at L, B\'s next append MUST have clock > L',
|
|
175
|
+
),
|
|
176
|
+
).toBe(true);
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it('linearization is deterministic — same appends → same total order', async (ctx) => {
|
|
180
|
+
const resetStatus = await resetLog();
|
|
181
|
+
if (resetStatus === 404) {
|
|
182
|
+
ctx.skip();
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
expect(resetStatus).toBe(200);
|
|
186
|
+
|
|
187
|
+
const ch = 'channel-D';
|
|
188
|
+
await appendEntry('engine-A', ch, 'a-1');
|
|
189
|
+
await appendEntry('engine-B', ch, 'b-1');
|
|
190
|
+
await appendEntry('engine-A', ch, 'a-2');
|
|
191
|
+
|
|
192
|
+
const r1 = await readEntries(ch);
|
|
193
|
+
const r2 = await readEntries(ch);
|
|
194
|
+
expect(r1.status).toBe(200);
|
|
195
|
+
expect(r2.status).toBe(200);
|
|
196
|
+
expect(
|
|
197
|
+
r1.entries.map((e) => `${e.engineId}:${String(e.value)}`),
|
|
198
|
+
driver.describe(
|
|
199
|
+
'channels-and-reducers.md §"Cross-engine ordering" — determinism',
|
|
200
|
+
'two reads MUST produce the same linearization (deterministic merge)',
|
|
201
|
+
),
|
|
202
|
+
).toEqual(r2.entries.map((e) => `${e.engineId}:${String(e.value)}`));
|
|
203
|
+
});
|
|
204
|
+
});
|
|
@@ -50,11 +50,18 @@ describe('cross-host-traceparent-propagation: behavioral (RFC 0040 §B)', () =>
|
|
|
50
50
|
// the format `00-{traceId}-{spanId}-{flags}` per W3C tracecontext.
|
|
51
51
|
// Until the peer harness lands, the assertion is surfaced as `todo` so
|
|
52
52
|
// test reporters track the gap rather than reporting a vacuous PASS.
|
|
53
|
-
|
|
53
|
+
// Marked out of stable profile via RFC 0042 §B (experimental tier):
|
|
54
|
+
// RFC 0040 remains Active. Hosts that wire Phase 3 cross-host causation
|
|
55
|
+
// before RFC 0040 graduates SHOULD advertise
|
|
56
|
+
// `multiAgent.executionModel.tier: 'experimental'` per RFC 0042 §A
|
|
57
|
+
// until cross-host evidence drives the promotion. Path-to-runnable
|
|
58
|
+
// requires the MCP peer harness (OPENWOP_MCP_REAL_SERVER_URL) +
|
|
59
|
+
// inbound-header recorder; flips to a real `it()` on first non-steward
|
|
60
|
+
// Phase 3 host advertising matching capabilities.
|
|
61
|
+
it.skip('Phase 3 host MUST inject parent run\'s traceparent into outbound MCP requests — out of stable profile via RFC 0042');
|
|
54
62
|
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
//
|
|
58
|
-
|
|
59
|
-
it.todo('Phase 3 host MUST inject parent run\'s traceparent into outbound A2A messages');
|
|
63
|
+
// Same routing — out of stable profile via RFC 0042 §B until RFC 0040
|
|
64
|
+
// graduates to Accepted; behavioral A2A test seam contract still to be
|
|
65
|
+
// designed alongside the corresponding peer harness.
|
|
66
|
+
it.skip('Phase 3 host MUST inject parent run\'s traceparent into outbound A2A messages — out of stable profile via RFC 0042');
|
|
60
67
|
});
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cross-workspace-isolation — RFC 0048 §D verification.
|
|
3
|
+
*
|
|
4
|
+
* Status: DRAFT. RFC 0048 (tenant·workspace·principal identity model) is
|
|
5
|
+
* `Draft`.
|
|
6
|
+
*
|
|
7
|
+
* What this scenario asserts:
|
|
8
|
+
* 1. Run-ownership echo shape — when a readable run snapshot carries
|
|
9
|
+
* `owner`, it MUST include a non-empty `tenant` (RFC 0048 §C).
|
|
10
|
+
* 2. Cross-workspace isolation MUST-NOT (§D) — when the host exposes the
|
|
11
|
+
* optional `POST /v1/host/sample/identity/cross-workspace-read` seam
|
|
12
|
+
* (a principal scoped to workspace A attempts to read a run owned by
|
|
13
|
+
* workspace B), the read MUST fail closed with `run_forbidden` (or a
|
|
14
|
+
* `404`/`403` that does not leak the other workspace's run contents).
|
|
15
|
+
*
|
|
16
|
+
* Hosts without the seam soft-skip the isolation probe (404). The
|
|
17
|
+
* advertisement/ownership-shape assertion still runs.
|
|
18
|
+
*
|
|
19
|
+
* @see RFCS/0048-tenant-workspace-principal-identity-model.md
|
|
20
|
+
* @see spec/v1/auth.md §"Identity claims — tenant · workspace · principal"
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { describe, it, expect } from 'vitest';
|
|
24
|
+
import { driver } from '../lib/driver.js';
|
|
25
|
+
|
|
26
|
+
const ISOLATION_CODES: ReadonlySet<string> = new Set(['run_forbidden', 'not_found']);
|
|
27
|
+
|
|
28
|
+
interface OwnerTriple {
|
|
29
|
+
tenant?: string;
|
|
30
|
+
workspace?: string;
|
|
31
|
+
principal?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
describe('cross-workspace-isolation: run-ownership echo shape (RFC 0048 §C)', () => {
|
|
35
|
+
it('owner, when present on a run snapshot, carries a non-empty tenant', async () => {
|
|
36
|
+
// Best-effort: probe a sample run if the host exposes one; otherwise skip.
|
|
37
|
+
const res = await driver.get('/v1/host/sample/identity/owned-run');
|
|
38
|
+
if (res.status === 404) return; // no sample-run seam — soft-skip
|
|
39
|
+
const owner = (res.json as { owner?: OwnerTriple } | undefined)?.owner;
|
|
40
|
+
if (owner === undefined) return; // single-tenant host — owner omitted
|
|
41
|
+
expect(
|
|
42
|
+
typeof owner.tenant === 'string' && owner.tenant.length > 0,
|
|
43
|
+
driver.describe('RFC 0048 §C', 'RunSnapshot.owner MUST carry a non-empty tenant when present'),
|
|
44
|
+
).toBe(true);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
describe('cross-workspace-isolation: a principal MUST NOT read another workspace\'s run (RFC 0048 §D)', () => {
|
|
49
|
+
it('cross-workspace read fails closed with run_forbidden', async () => {
|
|
50
|
+
// Seam contract: a principal scoped to workspace A requests a run owned
|
|
51
|
+
// by workspace B. The host MUST refuse rather than return B's run.
|
|
52
|
+
const res = await driver.post('/v1/host/sample/identity/cross-workspace-read', {});
|
|
53
|
+
if (res.status === 404) return; // seam unwired — soft-skip
|
|
54
|
+
|
|
55
|
+
expect(
|
|
56
|
+
res.status,
|
|
57
|
+
driver.describe(
|
|
58
|
+
'spec/v1/auth.md §Identity claims',
|
|
59
|
+
'a cross-workspace read MUST fail closed (4xx), never return the other workspace\'s run',
|
|
60
|
+
),
|
|
61
|
+
).toBeGreaterThanOrEqual(400);
|
|
62
|
+
|
|
63
|
+
const code = (res.json as { error?: string } | undefined)?.error;
|
|
64
|
+
expect(
|
|
65
|
+
code !== undefined && ISOLATION_CODES.has(code),
|
|
66
|
+
driver.describe(
|
|
67
|
+
'spec/v1/rest-endpoints.md run_forbidden',
|
|
68
|
+
`error MUST be one of {${[...ISOLATION_CODES].join(', ')}} (fail-closed, no existence leak), got: ${code ?? '(absent)'}`,
|
|
69
|
+
),
|
|
70
|
+
).toBe(true);
|
|
71
|
+
});
|
|
72
|
+
});
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* deadletter-capability-shape — RFC 0053 §A advertisement-shape verification.
|
|
3
|
+
*
|
|
4
|
+
* Status: DRAFT. RFC 0053 (dead-letter routing & failure sinks) is `Draft`.
|
|
5
|
+
* The `capabilities.deadLetter` block has landed in
|
|
6
|
+
* `schemas/capabilities.schema.json`.
|
|
7
|
+
*
|
|
8
|
+
* Always runs (shape-only): when the host advertises
|
|
9
|
+
* `capabilities.deadLetter`, its fields MUST be well-formed.
|
|
10
|
+
*
|
|
11
|
+
* What this scenario asserts:
|
|
12
|
+
* 1. `capabilities.deadLetter` is either absent or a well-formed object.
|
|
13
|
+
* 2. When `supported: true`, `retentionDays` (when present) is an integer ≥ 1
|
|
14
|
+
* (RFC 0053 §A).
|
|
15
|
+
*
|
|
16
|
+
* @see RFCS/0053-dead-letter-routing-and-failure-sinks.md
|
|
17
|
+
* @see spec/v1/host-capabilities.md §host.deadLetter
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { describe, it, expect } from 'vitest';
|
|
21
|
+
import { driver } from '../lib/driver.js';
|
|
22
|
+
|
|
23
|
+
interface DiscoveryDeadLetter {
|
|
24
|
+
supported?: boolean;
|
|
25
|
+
retentionDays?: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface DiscoveryDoc {
|
|
29
|
+
capabilities?: { deadLetter?: DiscoveryDeadLetter };
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function readDeadLetter(): Promise<DiscoveryDeadLetter | null> {
|
|
33
|
+
const res = await driver.get('/.well-known/openwop');
|
|
34
|
+
const body = res.json as DiscoveryDoc | undefined;
|
|
35
|
+
return body?.capabilities?.deadLetter ?? null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
describe('deadletter-capability-shape: advertisement shape (RFC 0053 §A)', () => {
|
|
39
|
+
it('capabilities.deadLetter is either absent or well-formed', async () => {
|
|
40
|
+
const dl = await readDeadLetter();
|
|
41
|
+
if (dl === null) return; // host doesn't advertise deadLetter at all
|
|
42
|
+
expect(
|
|
43
|
+
typeof dl.supported,
|
|
44
|
+
driver.describe(
|
|
45
|
+
'capabilities.schema.json §deadLetter',
|
|
46
|
+
'capabilities.deadLetter.supported MUST be a boolean when deadLetter is advertised',
|
|
47
|
+
),
|
|
48
|
+
).toBe('boolean');
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('retentionDays is an integer >= 1 when present + supported', async () => {
|
|
52
|
+
const dl = await readDeadLetter();
|
|
53
|
+
if (!dl?.supported || dl.retentionDays === undefined) return;
|
|
54
|
+
expect(
|
|
55
|
+
Number.isInteger(dl.retentionDays) && dl.retentionDays >= 1,
|
|
56
|
+
driver.describe('RFC 0053 §A', `capabilities.deadLetter.retentionDays MUST be an integer >= 1, got: ${dl.retentionDays}`),
|
|
57
|
+
).toBe(true);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* deadletter-retry-exhaustion — RFC 0053 §C behavioral verification.
|
|
3
|
+
*
|
|
4
|
+
* Status: DRAFT. RFC 0053 (dead-letter routing & failure sinks) is `Draft`.
|
|
5
|
+
*
|
|
6
|
+
* Capability-gated: skips when the host does not advertise
|
|
7
|
+
* `capabilities.deadLetter.supported = true`.
|
|
8
|
+
*
|
|
9
|
+
* What this scenario asserts (via the optional
|
|
10
|
+
* `POST /v1/host/sample/deadletter/exhaust` test seam, which drives a node
|
|
11
|
+
* that deterministically exhausts a short retry policy):
|
|
12
|
+
* 1. Retry exhaustion → `run.dead_lettered` — the host emits the event
|
|
13
|
+
* carrying `{ runId, reason, attempts }` (RFC 0053 §C.1).
|
|
14
|
+
* 2. Fork-eligibility — the dead-lettered run remains forkable per RFC 0011
|
|
15
|
+
* within the retention window (RFC 0053 §C.2).
|
|
16
|
+
*
|
|
17
|
+
* Hosts without the seam soft-skip the behavioral probes (404). Retention
|
|
18
|
+
* purge is part of the deferred retention scenario (needs a clock seam).
|
|
19
|
+
*
|
|
20
|
+
* @see RFCS/0053-dead-letter-routing-and-failure-sinks.md
|
|
21
|
+
* @see spec/v1/host-capabilities.md §host.deadLetter
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { describe, it, expect } from 'vitest';
|
|
25
|
+
import { driver } from '../lib/driver.js';
|
|
26
|
+
|
|
27
|
+
interface DiscoveryDoc {
|
|
28
|
+
capabilities?: { deadLetter?: { supported?: boolean } };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async function deadLetterSupported(): Promise<boolean> {
|
|
32
|
+
const res = await driver.get('/.well-known/openwop');
|
|
33
|
+
return (res.json as DiscoveryDoc | undefined)?.capabilities?.deadLetter?.supported === true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
describe('deadletter-retry-exhaustion: retry exhaustion → dead-lettered + fork-eligible (RFC 0053 §C)', () => {
|
|
37
|
+
it('a retry-exhausted run emits run.dead_lettered with attempts', async () => {
|
|
38
|
+
if (!(await deadLetterSupported())) return; // capability-gated
|
|
39
|
+
const res = await driver.post('/v1/host/sample/deadletter/exhaust', { scenario: 'exhaust-retries' });
|
|
40
|
+
if (res.status === 404) return; // seam unwired — soft-skip
|
|
41
|
+
const body = res.json as { event?: { type?: string; payload?: { attempts?: number; runId?: string } } } | undefined;
|
|
42
|
+
expect(
|
|
43
|
+
body?.event?.type,
|
|
44
|
+
driver.describe('RFC 0053 §C.1', 'retry exhaustion MUST emit a run.dead_lettered event'),
|
|
45
|
+
).toBe('run.dead_lettered');
|
|
46
|
+
expect(
|
|
47
|
+
typeof body?.event?.payload?.attempts === 'number' && body.event.payload.attempts >= 1,
|
|
48
|
+
driver.describe('RFC 0053 §C.1', 'run.dead_lettered MUST carry the total attempts (>= 1)'),
|
|
49
|
+
).toBe(true);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('the dead-lettered run is fork-eligible (RFC 0011)', async () => {
|
|
53
|
+
if (!(await deadLetterSupported())) return; // capability-gated
|
|
54
|
+
const res = await driver.post('/v1/host/sample/deadletter/exhaust', { scenario: 'fork-after-dead-letter' });
|
|
55
|
+
if (res.status === 404) return; // seam unwired — soft-skip
|
|
56
|
+
const body = res.json as { forkEligible?: boolean } | undefined;
|
|
57
|
+
expect(
|
|
58
|
+
body?.forkEligible,
|
|
59
|
+
driver.describe('RFC 0053 §C.2', 'a dead-lettered run MUST remain fork-eligible within the retention window'),
|
|
60
|
+
).toBe(true);
|
|
61
|
+
});
|
|
62
|
+
});
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* experimental-tier-shape — RFC 0042 §A + §B + §D advertisement-shape probes.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0042 lands the audit's "Active RFC → experimental carve-out" pattern as
|
|
5
|
+
* an optional `tier ∈ {"stable", "experimental"}` field on capability
|
|
6
|
+
* advertisements, paired with a required `experimentalUntil` ISO-8601 sunset
|
|
7
|
+
* date when `tier === "experimental"`. This scenario asserts:
|
|
8
|
+
*
|
|
9
|
+
* 1. Schema discipline: when `multiAgent.executionModel` advertises `tier:
|
|
10
|
+
* "experimental"`, `experimentalUntil` MUST be present + match
|
|
11
|
+
* `YYYY-MM-DD` + be ≤ 365 days in the future.
|
|
12
|
+
* 2. Default-mode soft-skip routing: scenarios consuming
|
|
13
|
+
* `experimentalGate()` honor the tier — the helper returns `false`
|
|
14
|
+
* under default mode for `tier: "experimental"` capabilities so the
|
|
15
|
+
* scenario soft-skips with a dedicated log line.
|
|
16
|
+
* 3. Sunset detection: a host advertising `experimentalUntil` in the
|
|
17
|
+
* past MUST fail discovery validation (host responsibility — the
|
|
18
|
+
* conformance probe simply asserts that the date format and bound
|
|
19
|
+
* hold for hosts that DO advertise correctly).
|
|
20
|
+
*
|
|
21
|
+
* The scenario lives at three describe levels per the RFC 0042 §D
|
|
22
|
+
* "Conformance suite changes" contract.
|
|
23
|
+
*
|
|
24
|
+
* @see RFCS/0042-experimental-capability-tier.md
|
|
25
|
+
* @see schemas/capabilities.schema.json §multiAgent.executionModel.tier
|
|
26
|
+
* @see conformance/src/lib/behavior-gate.ts experimentalGate()
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { describe, it, expect } from 'vitest';
|
|
30
|
+
import { driver } from '../lib/driver.js';
|
|
31
|
+
import { experimentalGate } from '../lib/behavior-gate.js';
|
|
32
|
+
|
|
33
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
34
|
+
|
|
35
|
+
interface DiscoveryDoc {
|
|
36
|
+
capabilities?: {
|
|
37
|
+
multiAgent?: {
|
|
38
|
+
executionModel?: {
|
|
39
|
+
supported?: unknown;
|
|
40
|
+
tier?: unknown;
|
|
41
|
+
experimentalUntil?: unknown;
|
|
42
|
+
};
|
|
43
|
+
};
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
48
|
+
try {
|
|
49
|
+
const res = await driver.get('/.well-known/openwop');
|
|
50
|
+
if (res.status !== 200) return null;
|
|
51
|
+
return res.json as DiscoveryDoc;
|
|
52
|
+
} catch {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC 0042 §A)', () => {
|
|
58
|
+
it('multiAgent.executionModel.tier (when present) MUST be one of {stable, experimental}', async (ctx) => {
|
|
59
|
+
const d = await readDiscovery();
|
|
60
|
+
const em = d?.capabilities?.multiAgent?.executionModel;
|
|
61
|
+
if (em === undefined) {
|
|
62
|
+
ctx.skip();
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
if (em.tier === undefined) {
|
|
66
|
+
ctx.skip(); // tier is optional with default 'stable'
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
expect(
|
|
70
|
+
em.tier === 'stable' || em.tier === 'experimental',
|
|
71
|
+
driver.describe(
|
|
72
|
+
'RFCS/0042-experimental-capability-tier.md §A',
|
|
73
|
+
'multiAgent.executionModel.tier MUST be one of the canonical enum values',
|
|
74
|
+
),
|
|
75
|
+
).toBe(true);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('when tier === "experimental", experimentalUntil MUST be present + valid date', async (ctx) => {
|
|
79
|
+
const d = await readDiscovery();
|
|
80
|
+
const em = d?.capabilities?.multiAgent?.executionModel;
|
|
81
|
+
if (em === undefined || em.tier !== 'experimental') {
|
|
82
|
+
ctx.skip();
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
expect(
|
|
87
|
+
typeof em.experimentalUntil,
|
|
88
|
+
driver.describe(
|
|
89
|
+
'RFCS/0042-experimental-capability-tier.md §B',
|
|
90
|
+
'when tier is "experimental", experimentalUntil MUST be present (the §B sunset-rule contract)',
|
|
91
|
+
),
|
|
92
|
+
).toBe('string');
|
|
93
|
+
|
|
94
|
+
const dateStr = em.experimentalUntil as string;
|
|
95
|
+
expect(
|
|
96
|
+
/^\d{4}-\d{2}-\d{2}$/.test(dateStr),
|
|
97
|
+
driver.describe(
|
|
98
|
+
'RFCS/0042-experimental-capability-tier.md §B',
|
|
99
|
+
'experimentalUntil MUST match YYYY-MM-DD',
|
|
100
|
+
),
|
|
101
|
+
).toBe(true);
|
|
102
|
+
|
|
103
|
+
const parsed = new Date(dateStr + 'T00:00:00Z');
|
|
104
|
+
expect(
|
|
105
|
+
!Number.isNaN(parsed.getTime()),
|
|
106
|
+
driver.describe(
|
|
107
|
+
'RFCS/0042-experimental-capability-tier.md §B',
|
|
108
|
+
'experimentalUntil MUST parse as a valid ISO-8601 date',
|
|
109
|
+
),
|
|
110
|
+
).toBe(true);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('experimentalUntil MUST be ≤ 365 days in the future (sunset bound)', async (ctx) => {
|
|
114
|
+
const d = await readDiscovery();
|
|
115
|
+
const em = d?.capabilities?.multiAgent?.executionModel;
|
|
116
|
+
if (em === undefined || em.tier !== 'experimental') {
|
|
117
|
+
ctx.skip();
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
if (typeof em.experimentalUntil !== 'string') {
|
|
121
|
+
ctx.skip(); // shape probe above will fail; don't double-fail
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
const target = new Date((em.experimentalUntil as string) + 'T00:00:00Z').getTime();
|
|
125
|
+
const now = Date.now();
|
|
126
|
+
const daysAhead = (target - now) / (1000 * 60 * 60 * 24);
|
|
127
|
+
expect(
|
|
128
|
+
daysAhead <= 365,
|
|
129
|
+
driver.describe(
|
|
130
|
+
'RFCS/0042-experimental-capability-tier.md §B',
|
|
131
|
+
`experimentalUntil MUST be ≤ 365 days from now (got ${Math.floor(daysAhead)} days; advertised ${em.experimentalUntil})`,
|
|
132
|
+
),
|
|
133
|
+
).toBe(true);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it('sunset detection: experimentalUntil in the past is non-conformant', async (ctx) => {
|
|
137
|
+
const d = await readDiscovery();
|
|
138
|
+
const em = d?.capabilities?.multiAgent?.executionModel;
|
|
139
|
+
if (em === undefined || em.tier !== 'experimental') {
|
|
140
|
+
ctx.skip();
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
if (typeof em.experimentalUntil !== 'string') {
|
|
144
|
+
ctx.skip();
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
const target = new Date((em.experimentalUntil as string) + 'T00:00:00Z').getTime();
|
|
148
|
+
const now = Date.now();
|
|
149
|
+
expect(
|
|
150
|
+
target >= now,
|
|
151
|
+
driver.describe(
|
|
152
|
+
'RFCS/0042-experimental-capability-tier.md §B',
|
|
153
|
+
`experimentalUntil MUST NOT be in the past (advertised ${em.experimentalUntil}; host MUST either flip tier to stable, retract the advertisement, or re-advertise with a future date + open deprecation RFC)`,
|
|
154
|
+
),
|
|
155
|
+
).toBe(true);
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §D experimentalGate helper routing (RFC 0042 §D)', () => {
|
|
160
|
+
it('experimentalGate returns false for tier="experimental" without OPENWOP_REQUIRE_EXPERIMENTAL', () => {
|
|
161
|
+
// Helper-level behavioral probe — no host needed, this is a pure
|
|
162
|
+
// function-routing assertion against the imported helper.
|
|
163
|
+
const prevReqExp = process.env.OPENWOP_REQUIRE_EXPERIMENTAL;
|
|
164
|
+
delete process.env.OPENWOP_REQUIRE_EXPERIMENTAL;
|
|
165
|
+
try {
|
|
166
|
+
const result = experimentalGate('test-profile', true, 'experimental', '2027-05-22');
|
|
167
|
+
expect(
|
|
168
|
+
result,
|
|
169
|
+
driver.describe(
|
|
170
|
+
'RFCS/0042-experimental-capability-tier.md §D',
|
|
171
|
+
'default mode + tier="experimental" MUST soft-skip — helper returns false',
|
|
172
|
+
),
|
|
173
|
+
).toBe(false);
|
|
174
|
+
} finally {
|
|
175
|
+
if (prevReqExp !== undefined) process.env.OPENWOP_REQUIRE_EXPERIMENTAL = prevReqExp;
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it('experimentalGate routes through behaviorGate when tier === undefined or "stable"', () => {
|
|
180
|
+
const prevReqBeh = process.env.OPENWOP_REQUIRE_BEHAVIOR;
|
|
181
|
+
delete process.env.OPENWOP_REQUIRE_BEHAVIOR;
|
|
182
|
+
try {
|
|
183
|
+
// Stable + advertised → proceed.
|
|
184
|
+
expect(experimentalGate('test-stable', true, 'stable')).toBe(true);
|
|
185
|
+
expect(experimentalGate('test-stable-undef', true, undefined)).toBe(true);
|
|
186
|
+
// Stable + NOT advertised, default mode → skip (returns false, no throw).
|
|
187
|
+
expect(experimentalGate('test-not-adv', false, 'stable')).toBe(false);
|
|
188
|
+
} finally {
|
|
189
|
+
if (prevReqBeh !== undefined) process.env.OPENWOP_REQUIRE_BEHAVIOR = prevReqBeh;
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
});
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-capability-shape — RFC 0056 §A. The `capabilities.feedback`
|
|
3
|
+
* advertisement block is either absent or a well-formed object.
|
|
4
|
+
*
|
|
5
|
+
* Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage
|
|
6
|
+
* lives in the sibling `feedback-*.test.ts` scenarios, gated on
|
|
7
|
+
* `capabilities.feedback.supported`.
|
|
8
|
+
*
|
|
9
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §A
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, it, expect } from 'vitest';
|
|
13
|
+
import { driver } from '../lib/driver.js';
|
|
14
|
+
import { readFeedbackCap } from '../lib/feedback.js';
|
|
15
|
+
|
|
16
|
+
describe('feedback-capability-shape: advertisement (RFC 0056 §A)', () => {
|
|
17
|
+
it('capabilities.feedback is absent or a well-formed object', async () => {
|
|
18
|
+
const cap = await readFeedbackCap();
|
|
19
|
+
if (cap === null) return; // not advertised — valid
|
|
20
|
+
expect(
|
|
21
|
+
typeof cap.supported,
|
|
22
|
+
driver.describe('capabilities.schema.json §feedback', 'capabilities.feedback.supported MUST be a boolean when present'),
|
|
23
|
+
).toBe('boolean');
|
|
24
|
+
if (Array.isArray(cap.targets)) {
|
|
25
|
+
for (const t of cap.targets) {
|
|
26
|
+
expect(['run', 'event', 'node']).toContain(t);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (Array.isArray(cap.signals)) {
|
|
30
|
+
for (const s of cap.signals) {
|
|
31
|
+
expect(['rating', 'correction', 'label', 'flag']).toContain(s);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
});
|