@openwop/openwop-conformance 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +25 -4
  4. package/api/openapi.yaml +371 -0
  5. package/coverage.md +31 -4
  6. package/fixtures/conformance-phase4-nondet-tool.json +53 -0
  7. package/fixtures/conformance-phase4-replay-divergence.json +40 -0
  8. package/fixtures.md +5 -3
  9. package/package.json +1 -1
  10. package/schemas/README.md +4 -0
  11. package/schemas/annotation-create.schema.json +37 -0
  12. package/schemas/annotation.schema.json +56 -0
  13. package/schemas/capabilities.schema.json +191 -3
  14. package/schemas/credential-reference.schema.json +21 -0
  15. package/schemas/node-pack-manifest.schema.json +112 -1
  16. package/schemas/run-diff-response.schema.json +64 -0
  17. package/schemas/run-event-payloads.schema.json +104 -2
  18. package/schemas/run-event.schema.json +8 -1
  19. package/schemas/run-snapshot.schema.json +11 -0
  20. package/src/lib/behavior-gate.ts +51 -0
  21. package/src/lib/driver.ts +13 -1
  22. package/src/lib/feedback.ts +31 -0
  23. package/src/lib/saml-idp.ts +179 -0
  24. package/src/scenarios/approval-gate-events.test.ts +61 -0
  25. package/src/scenarios/approval-gate-flow.test.ts +68 -0
  26. package/src/scenarios/auth-saml-profile.test.ts +119 -0
  27. package/src/scenarios/auth-scim-profile.test.ts +65 -0
  28. package/src/scenarios/authorization-fail-closed.test.ts +80 -0
  29. package/src/scenarios/authorization-roles-shape.test.ts +83 -0
  30. package/src/scenarios/connector-manifest-validity.test.ts +142 -0
  31. package/src/scenarios/credential-payload-redaction.test.ts +93 -0
  32. package/src/scenarios/credentials-capability-shape.test.ts +90 -0
  33. package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
  34. package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
  35. package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
  36. package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
  37. package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
  38. package/src/scenarios/experimental-tier-shape.test.ts +192 -0
  39. package/src/scenarios/feedback-capability-shape.test.ts +35 -0
  40. package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
  41. package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
  42. package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
  43. package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
  44. package/src/scenarios/feedback-record-and-list.test.ts +32 -0
  45. package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
  46. package/src/scenarios/identity-owner-shape.test.ts +64 -0
  47. package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
  48. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
  49. package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
  50. package/src/scenarios/oauth-capability-shape.test.ts +97 -0
  51. package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
  52. package/src/scenarios/pack-registry-isolation.test.ts +108 -0
  53. package/src/scenarios/pack-registry-publish.test.ts +1 -1
  54. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
  55. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
  56. package/src/scenarios/redaction.test.ts +4 -1
  57. package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
  58. package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
  59. package/src/scenarios/run-diff.test.ts +143 -0
  60. package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
  61. package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
  62. package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
  63. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
  64. package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
  65. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
  66. package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
  67. package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
  68. package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
  69. package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
  70. package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
  71. package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
  72. package/src/scenarios/spec-corpus-validity.test.ts +6 -3
@@ -0,0 +1,204 @@
1
+ /**
2
+ * cross-engine-append-behavior — RFC 0036 §B cross-engine append-ordering behavioral probe.
3
+ *
4
+ * Companion to `cross-engine-append-ordering.test.ts` which carries the
5
+ * advertisement-shape probes. This file exercises the canonical cross-engine
6
+ * append-ordering behavior specified by `spec/v1/channels-and-reducers.md`
7
+ * §"Cross-engine ordering" via the host-extension test seams:
8
+ *
9
+ * POST /v1/host/sample/test/cross-engine/append — single engine append
10
+ * GET /v1/host/sample/test/cross-engine/read — read ordered sequence
11
+ * POST /v1/host/sample/test/cross-engine/reset — clear log
12
+ *
13
+ * The seam is conformance-only (host-extension namespace), gated on the
14
+ * host's `OPENWOP_TEST_CROSS_ENGINE_HARNESS=true` env var. The seam itself
15
+ * is OPTIONAL — hosts that don't expose it soft-skip; hosts that DO expose
16
+ * it MUST honor the §B contract:
17
+ *
18
+ * 1. Multiple engines appending concurrently to the same channelId
19
+ * converge to a single globally-ordered linearization on read.
20
+ * 2. Per-engine order is preserved within each engine's local sequence
21
+ * (writes from engine A appear in A's submission order, ditto B).
22
+ * 3. The host's advertised `orderingModel` (lamport / vector-clock /
23
+ * global-sequencer) determines the cross-engine merge semantics.
24
+ * 4. Read after partition heal converges to the same total order
25
+ * regardless of which engine's view we read from.
26
+ *
27
+ * @see RFCS/0036-multi-region-and-cross-engine-guarantees.md §B
28
+ * @see spec/v1/channels-and-reducers.md §"Cross-engine ordering"
29
+ */
30
+
31
+ import { describe, it, expect } from 'vitest';
32
+ import { driver } from '../lib/driver.js';
33
+
34
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
35
+
36
+ interface AppendEntry {
37
+ engineId: string;
38
+ value: unknown;
39
+ lamport: number;
40
+ seq: number;
41
+ }
42
+
43
+ async function appendEntry(
44
+ engineId: string,
45
+ channelId: string,
46
+ value: unknown,
47
+ lamport?: number,
48
+ ): Promise<{ status: number; entry?: AppendEntry }> {
49
+ const body: Record<string, unknown> = { engineId, channelId, value };
50
+ if (lamport !== undefined) body.lamport = lamport;
51
+ const res = await driver.post('/v1/host/sample/test/cross-engine/append', body);
52
+ if (res.status === 200) {
53
+ return { status: res.status, entry: res.json as AppendEntry };
54
+ }
55
+ return { status: res.status };
56
+ }
57
+
58
+ async function readEntries(channelId: string): Promise<{ status: number; entries: AppendEntry[] }> {
59
+ const res = await driver.get(`/v1/host/sample/test/cross-engine/read?channelId=${encodeURIComponent(channelId)}`);
60
+ return {
61
+ status: res.status,
62
+ entries: res.status === 200 ? (res.json as { entries: AppendEntry[] }).entries : [],
63
+ };
64
+ }
65
+
66
+ async function resetLog(): Promise<number> {
67
+ const res = await driver.post('/v1/host/sample/test/cross-engine/reset', {});
68
+ return res.status;
69
+ }
70
+
71
+ describe.skipIf(HTTP_SKIP)('cross-engine-append-behavior: §B cross-engine ordering (RFC 0036)', () => {
72
+ it('interleaved appends from two engines converge to a single globally-ordered sequence', async (ctx) => {
73
+ const resetStatus = await resetLog();
74
+ if (resetStatus === 404) {
75
+ ctx.skip(); // host doesn't expose the cross-engine harness seam
76
+ return;
77
+ }
78
+ expect(resetStatus).toBe(200);
79
+
80
+ const ch = 'channel-A';
81
+
82
+ // Engine A: 3 appends. Engine B: 2 appends. Interleaved.
83
+ const a1 = await appendEntry('engine-A', ch, 'a-1');
84
+ const b1 = await appendEntry('engine-B', ch, 'b-1');
85
+ const a2 = await appendEntry('engine-A', ch, 'a-2');
86
+ const a3 = await appendEntry('engine-A', ch, 'a-3');
87
+ const b2 = await appendEntry('engine-B', ch, 'b-2');
88
+
89
+ for (const r of [a1, b1, a2, a3, b2]) {
90
+ expect(r.status).toBe(200);
91
+ expect(r.entry).toBeDefined();
92
+ }
93
+
94
+ const read = await readEntries(ch);
95
+ expect(read.status).toBe(200);
96
+
97
+ expect(
98
+ read.entries.length,
99
+ driver.describe(
100
+ 'channels-and-reducers.md §"Cross-engine ordering"',
101
+ 'all appends across all engines MUST appear in the linearized read',
102
+ ),
103
+ ).toBe(5);
104
+
105
+ // Per-engine order MUST be preserved within each engine's submissions.
106
+ const engineAEntries = read.entries.filter((e) => e.engineId === 'engine-A').map((e) => e.value);
107
+ const engineBEntries = read.entries.filter((e) => e.engineId === 'engine-B').map((e) => e.value);
108
+ expect(
109
+ engineAEntries,
110
+ driver.describe(
111
+ 'channels-and-reducers.md §"Cross-engine ordering"',
112
+ 'engine-A submissions MUST appear in submission order within the linearization',
113
+ ),
114
+ ).toEqual(['a-1', 'a-2', 'a-3']);
115
+ expect(
116
+ engineBEntries,
117
+ driver.describe(
118
+ 'channels-and-reducers.md §"Cross-engine ordering"',
119
+ 'engine-B submissions MUST appear in submission order within the linearization',
120
+ ),
121
+ ).toEqual(['b-1', 'b-2']);
122
+ });
123
+
124
+ it('lamport clocks monotonically advance across engines', async (ctx) => {
125
+ const resetStatus = await resetLog();
126
+ if (resetStatus === 404) {
127
+ ctx.skip();
128
+ return;
129
+ }
130
+ expect(resetStatus).toBe(200);
131
+
132
+ const ch = 'channel-B';
133
+ const a1 = await appendEntry('engine-A', ch, 'a-1');
134
+ const b1 = await appendEntry('engine-B', ch, 'b-1');
135
+ const a2 = await appendEntry('engine-A', ch, 'a-2');
136
+
137
+ expect(a1.entry?.lamport).toBeDefined();
138
+ expect(b1.entry?.lamport).toBeDefined();
139
+ expect(a2.entry?.lamport).toBeDefined();
140
+
141
+ // Lamport invariant: each subsequent append on the same shared
142
+ // channel MUST have strictly-higher clock than the previous.
143
+ expect(
144
+ a2.entry!.lamport > b1.entry!.lamport && b1.entry!.lamport > a1.entry!.lamport,
145
+ driver.describe(
146
+ 'channels-and-reducers.md §"Cross-engine ordering" — Lamport',
147
+ 'lamport clocks MUST be strictly monotonic on the shared channel',
148
+ ),
149
+ ).toBe(true);
150
+ });
151
+
152
+ it('lamport hint from engine A advances engine B past it', async (ctx) => {
153
+ const resetStatus = await resetLog();
154
+ if (resetStatus === 404) {
155
+ ctx.skip();
156
+ return;
157
+ }
158
+ expect(resetStatus).toBe(200);
159
+
160
+ const ch = 'channel-C';
161
+ // Engine A appends, gets lamport L. Engine B then appends with
162
+ // lamport hint == L (proxy for "B saw A's clock at L"); B's
163
+ // resulting clock MUST be > L per the lamport receive rule
164
+ // max(local, incoming) + 1.
165
+ const a1 = await appendEntry('engine-A', ch, 'a-1');
166
+ expect(a1.status).toBe(200);
167
+ const seen = a1.entry!.lamport;
168
+ const b1 = await appendEntry('engine-B', ch, 'b-1', seen);
169
+ expect(b1.status).toBe(200);
170
+ expect(
171
+ b1.entry!.lamport > seen,
172
+ driver.describe(
173
+ 'channels-and-reducers.md §"Cross-engine ordering" — Lamport receive rule',
174
+ 'when engine B sees engine A\'s clock at L, B\'s next append MUST have clock > L',
175
+ ),
176
+ ).toBe(true);
177
+ });
178
+
179
+ it('linearization is deterministic — same appends → same total order', async (ctx) => {
180
+ const resetStatus = await resetLog();
181
+ if (resetStatus === 404) {
182
+ ctx.skip();
183
+ return;
184
+ }
185
+ expect(resetStatus).toBe(200);
186
+
187
+ const ch = 'channel-D';
188
+ await appendEntry('engine-A', ch, 'a-1');
189
+ await appendEntry('engine-B', ch, 'b-1');
190
+ await appendEntry('engine-A', ch, 'a-2');
191
+
192
+ const r1 = await readEntries(ch);
193
+ const r2 = await readEntries(ch);
194
+ expect(r1.status).toBe(200);
195
+ expect(r2.status).toBe(200);
196
+ expect(
197
+ r1.entries.map((e) => `${e.engineId}:${String(e.value)}`),
198
+ driver.describe(
199
+ 'channels-and-reducers.md §"Cross-engine ordering" — determinism',
200
+ 'two reads MUST produce the same linearization (deterministic merge)',
201
+ ),
202
+ ).toEqual(r2.entries.map((e) => `${e.engineId}:${String(e.value)}`));
203
+ });
204
+ });
@@ -50,11 +50,18 @@ describe('cross-host-traceparent-propagation: behavioral (RFC 0040 §B)', () =>
50
50
  // the format `00-{traceId}-{spanId}-{flags}` per W3C tracecontext.
51
51
  // Until the peer harness lands, the assertion is surfaced as `todo` so
52
52
  // test reporters track the gap rather than reporting a vacuous PASS.
53
- it.todo('Phase 3 host MUST inject parent run\'s traceparent into outbound MCP requests');
53
+ // Marked out of stable profile via RFC 0042 §B (experimental tier):
54
+ // RFC 0040 remains Active. Hosts that wire Phase 3 cross-host causation
55
+ // before RFC 0040 graduates SHOULD advertise
56
+ // `multiAgent.executionModel.tier: 'experimental'` per RFC 0042 §A
57
+ // until cross-host evidence drives the promotion. Path-to-runnable
58
+ // requires the MCP peer harness (OPENWOP_MCP_REAL_SERVER_URL) +
59
+ // inbound-header recorder; flips to a real `it()` on first non-steward
60
+ // Phase 3 host advertising matching capabilities.
61
+ it.skip('Phase 3 host MUST inject parent run\'s traceparent into outbound MCP requests — out of stable profile via RFC 0042');
54
62
 
55
- // Behavioral assertion drives a workflow that dispatches an A2A message
56
- // via the host's `core.a2a.send` (or equivalent) node. The A2A peer
57
- // (configured via OPENWOP_A2A_REAL_PEER_URL) records inbound headers;
58
- // the test asserts `traceparent` is present + well-formed.
59
- it.todo('Phase 3 host MUST inject parent run\'s traceparent into outbound A2A messages');
63
+ // Same routing out of stable profile via RFC 0042 §B until RFC 0040
64
+ // graduates to Accepted; behavioral A2A test seam contract still to be
65
+ // designed alongside the corresponding peer harness.
66
+ it.skip('Phase 3 host MUST inject parent run\'s traceparent into outbound A2A messages — out of stable profile via RFC 0042');
60
67
  });
@@ -0,0 +1,72 @@
1
+ /**
2
+ * cross-workspace-isolation — RFC 0048 §D verification.
3
+ *
4
+ * Status: DRAFT. RFC 0048 (tenant·workspace·principal identity model) is
5
+ * `Draft`.
6
+ *
7
+ * What this scenario asserts:
8
+ * 1. Run-ownership echo shape — when a readable run snapshot carries
9
+ * `owner`, it MUST include a non-empty `tenant` (RFC 0048 §C).
10
+ * 2. Cross-workspace isolation MUST-NOT (§D) — when the host exposes the
11
+ * optional `POST /v1/host/sample/identity/cross-workspace-read` seam
12
+ * (a principal scoped to workspace A attempts to read a run owned by
13
+ * workspace B), the read MUST fail closed with `run_forbidden` (or a
14
+ * `404`/`403` that does not leak the other workspace's run contents).
15
+ *
16
+ * Hosts without the seam soft-skip the isolation probe (404). The
17
+ * advertisement/ownership-shape assertion still runs.
18
+ *
19
+ * @see RFCS/0048-tenant-workspace-principal-identity-model.md
20
+ * @see spec/v1/auth.md §"Identity claims — tenant · workspace · principal"
21
+ */
22
+
23
+ import { describe, it, expect } from 'vitest';
24
+ import { driver } from '../lib/driver.js';
25
+
26
+ const ISOLATION_CODES: ReadonlySet<string> = new Set(['run_forbidden', 'not_found']);
27
+
28
+ interface OwnerTriple {
29
+ tenant?: string;
30
+ workspace?: string;
31
+ principal?: string;
32
+ }
33
+
34
+ describe('cross-workspace-isolation: run-ownership echo shape (RFC 0048 §C)', () => {
35
+ it('owner, when present on a run snapshot, carries a non-empty tenant', async () => {
36
+ // Best-effort: probe a sample run if the host exposes one; otherwise skip.
37
+ const res = await driver.get('/v1/host/sample/identity/owned-run');
38
+ if (res.status === 404) return; // no sample-run seam — soft-skip
39
+ const owner = (res.json as { owner?: OwnerTriple } | undefined)?.owner;
40
+ if (owner === undefined) return; // single-tenant host — owner omitted
41
+ expect(
42
+ typeof owner.tenant === 'string' && owner.tenant.length > 0,
43
+ driver.describe('RFC 0048 §C', 'RunSnapshot.owner MUST carry a non-empty tenant when present'),
44
+ ).toBe(true);
45
+ });
46
+ });
47
+
48
+ describe('cross-workspace-isolation: a principal MUST NOT read another workspace\'s run (RFC 0048 §D)', () => {
49
+ it('cross-workspace read fails closed with run_forbidden', async () => {
50
+ // Seam contract: a principal scoped to workspace A requests a run owned
51
+ // by workspace B. The host MUST refuse rather than return B's run.
52
+ const res = await driver.post('/v1/host/sample/identity/cross-workspace-read', {});
53
+ if (res.status === 404) return; // seam unwired — soft-skip
54
+
55
+ expect(
56
+ res.status,
57
+ driver.describe(
58
+ 'spec/v1/auth.md §Identity claims',
59
+ 'a cross-workspace read MUST fail closed (4xx), never return the other workspace\'s run',
60
+ ),
61
+ ).toBeGreaterThanOrEqual(400);
62
+
63
+ const code = (res.json as { error?: string } | undefined)?.error;
64
+ expect(
65
+ code !== undefined && ISOLATION_CODES.has(code),
66
+ driver.describe(
67
+ 'spec/v1/rest-endpoints.md run_forbidden',
68
+ `error MUST be one of {${[...ISOLATION_CODES].join(', ')}} (fail-closed, no existence leak), got: ${code ?? '(absent)'}`,
69
+ ),
70
+ ).toBe(true);
71
+ });
72
+ });
@@ -0,0 +1,59 @@
1
+ /**
2
+ * deadletter-capability-shape — RFC 0053 §A advertisement-shape verification.
3
+ *
4
+ * Status: DRAFT. RFC 0053 (dead-letter routing & failure sinks) is `Draft`.
5
+ * The `capabilities.deadLetter` block has landed in
6
+ * `schemas/capabilities.schema.json`.
7
+ *
8
+ * Always runs (shape-only): when the host advertises
9
+ * `capabilities.deadLetter`, its fields MUST be well-formed.
10
+ *
11
+ * What this scenario asserts:
12
+ * 1. `capabilities.deadLetter` is either absent or a well-formed object.
13
+ * 2. When `supported: true`, `retentionDays` (when present) is an integer ≥ 1
14
+ * (RFC 0053 §A).
15
+ *
16
+ * @see RFCS/0053-dead-letter-routing-and-failure-sinks.md
17
+ * @see spec/v1/host-capabilities.md §host.deadLetter
18
+ */
19
+
20
+ import { describe, it, expect } from 'vitest';
21
+ import { driver } from '../lib/driver.js';
22
+
23
+ interface DiscoveryDeadLetter {
24
+ supported?: boolean;
25
+ retentionDays?: number;
26
+ }
27
+
28
+ interface DiscoveryDoc {
29
+ capabilities?: { deadLetter?: DiscoveryDeadLetter };
30
+ }
31
+
32
+ async function readDeadLetter(): Promise<DiscoveryDeadLetter | null> {
33
+ const res = await driver.get('/.well-known/openwop');
34
+ const body = res.json as DiscoveryDoc | undefined;
35
+ return body?.capabilities?.deadLetter ?? null;
36
+ }
37
+
38
+ describe('deadletter-capability-shape: advertisement shape (RFC 0053 §A)', () => {
39
+ it('capabilities.deadLetter is either absent or well-formed', async () => {
40
+ const dl = await readDeadLetter();
41
+ if (dl === null) return; // host doesn't advertise deadLetter at all
42
+ expect(
43
+ typeof dl.supported,
44
+ driver.describe(
45
+ 'capabilities.schema.json §deadLetter',
46
+ 'capabilities.deadLetter.supported MUST be a boolean when deadLetter is advertised',
47
+ ),
48
+ ).toBe('boolean');
49
+ });
50
+
51
+ it('retentionDays is an integer >= 1 when present + supported', async () => {
52
+ const dl = await readDeadLetter();
53
+ if (!dl?.supported || dl.retentionDays === undefined) return;
54
+ expect(
55
+ Number.isInteger(dl.retentionDays) && dl.retentionDays >= 1,
56
+ driver.describe('RFC 0053 §A', `capabilities.deadLetter.retentionDays MUST be an integer >= 1, got: ${dl.retentionDays}`),
57
+ ).toBe(true);
58
+ });
59
+ });
@@ -0,0 +1,62 @@
1
+ /**
2
+ * deadletter-retry-exhaustion — RFC 0053 §C behavioral verification.
3
+ *
4
+ * Status: DRAFT. RFC 0053 (dead-letter routing & failure sinks) is `Draft`.
5
+ *
6
+ * Capability-gated: skips when the host does not advertise
7
+ * `capabilities.deadLetter.supported = true`.
8
+ *
9
+ * What this scenario asserts (via the optional
10
+ * `POST /v1/host/sample/deadletter/exhaust` test seam, which drives a node
11
+ * that deterministically exhausts a short retry policy):
12
+ * 1. Retry exhaustion → `run.dead_lettered` — the host emits the event
13
+ * carrying `{ runId, reason, attempts }` (RFC 0053 §C.1).
14
+ * 2. Fork-eligibility — the dead-lettered run remains forkable per RFC 0011
15
+ * within the retention window (RFC 0053 §C.2).
16
+ *
17
+ * Hosts without the seam soft-skip the behavioral probes (404). Retention
18
+ * purge is part of the deferred retention scenario (needs a clock seam).
19
+ *
20
+ * @see RFCS/0053-dead-letter-routing-and-failure-sinks.md
21
+ * @see spec/v1/host-capabilities.md §host.deadLetter
22
+ */
23
+
24
+ import { describe, it, expect } from 'vitest';
25
+ import { driver } from '../lib/driver.js';
26
+
27
+ interface DiscoveryDoc {
28
+ capabilities?: { deadLetter?: { supported?: boolean } };
29
+ }
30
+
31
+ async function deadLetterSupported(): Promise<boolean> {
32
+ const res = await driver.get('/.well-known/openwop');
33
+ return (res.json as DiscoveryDoc | undefined)?.capabilities?.deadLetter?.supported === true;
34
+ }
35
+
36
+ describe('deadletter-retry-exhaustion: retry exhaustion → dead-lettered + fork-eligible (RFC 0053 §C)', () => {
37
+ it('a retry-exhausted run emits run.dead_lettered with attempts', async () => {
38
+ if (!(await deadLetterSupported())) return; // capability-gated
39
+ const res = await driver.post('/v1/host/sample/deadletter/exhaust', { scenario: 'exhaust-retries' });
40
+ if (res.status === 404) return; // seam unwired — soft-skip
41
+ const body = res.json as { event?: { type?: string; payload?: { attempts?: number; runId?: string } } } | undefined;
42
+ expect(
43
+ body?.event?.type,
44
+ driver.describe('RFC 0053 §C.1', 'retry exhaustion MUST emit a run.dead_lettered event'),
45
+ ).toBe('run.dead_lettered');
46
+ expect(
47
+ typeof body?.event?.payload?.attempts === 'number' && body.event.payload.attempts >= 1,
48
+ driver.describe('RFC 0053 §C.1', 'run.dead_lettered MUST carry the total attempts (>= 1)'),
49
+ ).toBe(true);
50
+ });
51
+
52
+ it('the dead-lettered run is fork-eligible (RFC 0011)', async () => {
53
+ if (!(await deadLetterSupported())) return; // capability-gated
54
+ const res = await driver.post('/v1/host/sample/deadletter/exhaust', { scenario: 'fork-after-dead-letter' });
55
+ if (res.status === 404) return; // seam unwired — soft-skip
56
+ const body = res.json as { forkEligible?: boolean } | undefined;
57
+ expect(
58
+ body?.forkEligible,
59
+ driver.describe('RFC 0053 §C.2', 'a dead-lettered run MUST remain fork-eligible within the retention window'),
60
+ ).toBe(true);
61
+ });
62
+ });
@@ -0,0 +1,192 @@
1
+ /**
2
+ * experimental-tier-shape — RFC 0042 §A + §B + §D advertisement-shape probes.
3
+ *
4
+ * RFC 0042 lands the audit's "Active RFC → experimental carve-out" pattern as
5
+ * an optional `tier ∈ {"stable", "experimental"}` field on capability
6
+ * advertisements, paired with a required `experimentalUntil` ISO-8601 sunset
7
+ * date when `tier === "experimental"`. This scenario asserts:
8
+ *
9
+ * 1. Schema discipline: when `multiAgent.executionModel` advertises `tier:
10
+ * "experimental"`, `experimentalUntil` MUST be present + match
11
+ * `YYYY-MM-DD` + be ≤ 365 days in the future.
12
+ * 2. Default-mode soft-skip routing: scenarios consuming
13
+ * `experimentalGate()` honor the tier — the helper returns `false`
14
+ * under default mode for `tier: "experimental"` capabilities so the
15
+ * scenario soft-skips with a dedicated log line.
16
+ * 3. Sunset detection: a host advertising `experimentalUntil` in the
17
+ * past MUST fail discovery validation (host responsibility — the
18
+ * conformance probe simply asserts that the date format and bound
19
+ * hold for hosts that DO advertise correctly).
20
+ *
21
+ * The scenario lives at three describe levels per the RFC 0042 §D
22
+ * "Conformance suite changes" contract.
23
+ *
24
+ * @see RFCS/0042-experimental-capability-tier.md
25
+ * @see schemas/capabilities.schema.json §multiAgent.executionModel.tier
26
+ * @see conformance/src/lib/behavior-gate.ts experimentalGate()
27
+ */
28
+
29
+ import { describe, it, expect } from 'vitest';
30
+ import { driver } from '../lib/driver.js';
31
+ import { experimentalGate } from '../lib/behavior-gate.js';
32
+
33
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
34
+
35
+ interface DiscoveryDoc {
36
+ capabilities?: {
37
+ multiAgent?: {
38
+ executionModel?: {
39
+ supported?: unknown;
40
+ tier?: unknown;
41
+ experimentalUntil?: unknown;
42
+ };
43
+ };
44
+ };
45
+ }
46
+
47
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
48
+ try {
49
+ const res = await driver.get('/.well-known/openwop');
50
+ if (res.status !== 200) return null;
51
+ return res.json as DiscoveryDoc;
52
+ } catch {
53
+ return null;
54
+ }
55
+ }
56
+
57
+ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC 0042 §A)', () => {
58
+ it('multiAgent.executionModel.tier (when present) MUST be one of {stable, experimental}', async (ctx) => {
59
+ const d = await readDiscovery();
60
+ const em = d?.capabilities?.multiAgent?.executionModel;
61
+ if (em === undefined) {
62
+ ctx.skip();
63
+ return;
64
+ }
65
+ if (em.tier === undefined) {
66
+ ctx.skip(); // tier is optional with default 'stable'
67
+ return;
68
+ }
69
+ expect(
70
+ em.tier === 'stable' || em.tier === 'experimental',
71
+ driver.describe(
72
+ 'RFCS/0042-experimental-capability-tier.md §A',
73
+ 'multiAgent.executionModel.tier MUST be one of the canonical enum values',
74
+ ),
75
+ ).toBe(true);
76
+ });
77
+
78
+ it('when tier === "experimental", experimentalUntil MUST be present + valid date', async (ctx) => {
79
+ const d = await readDiscovery();
80
+ const em = d?.capabilities?.multiAgent?.executionModel;
81
+ if (em === undefined || em.tier !== 'experimental') {
82
+ ctx.skip();
83
+ return;
84
+ }
85
+
86
+ expect(
87
+ typeof em.experimentalUntil,
88
+ driver.describe(
89
+ 'RFCS/0042-experimental-capability-tier.md §B',
90
+ 'when tier is "experimental", experimentalUntil MUST be present (the §B sunset-rule contract)',
91
+ ),
92
+ ).toBe('string');
93
+
94
+ const dateStr = em.experimentalUntil as string;
95
+ expect(
96
+ /^\d{4}-\d{2}-\d{2}$/.test(dateStr),
97
+ driver.describe(
98
+ 'RFCS/0042-experimental-capability-tier.md §B',
99
+ 'experimentalUntil MUST match YYYY-MM-DD',
100
+ ),
101
+ ).toBe(true);
102
+
103
+ const parsed = new Date(dateStr + 'T00:00:00Z');
104
+ expect(
105
+ !Number.isNaN(parsed.getTime()),
106
+ driver.describe(
107
+ 'RFCS/0042-experimental-capability-tier.md §B',
108
+ 'experimentalUntil MUST parse as a valid ISO-8601 date',
109
+ ),
110
+ ).toBe(true);
111
+ });
112
+
113
+ it('experimentalUntil MUST be ≤ 365 days in the future (sunset bound)', async (ctx) => {
114
+ const d = await readDiscovery();
115
+ const em = d?.capabilities?.multiAgent?.executionModel;
116
+ if (em === undefined || em.tier !== 'experimental') {
117
+ ctx.skip();
118
+ return;
119
+ }
120
+ if (typeof em.experimentalUntil !== 'string') {
121
+ ctx.skip(); // shape probe above will fail; don't double-fail
122
+ return;
123
+ }
124
+ const target = new Date((em.experimentalUntil as string) + 'T00:00:00Z').getTime();
125
+ const now = Date.now();
126
+ const daysAhead = (target - now) / (1000 * 60 * 60 * 24);
127
+ expect(
128
+ daysAhead <= 365,
129
+ driver.describe(
130
+ 'RFCS/0042-experimental-capability-tier.md §B',
131
+ `experimentalUntil MUST be ≤ 365 days from now (got ${Math.floor(daysAhead)} days; advertised ${em.experimentalUntil})`,
132
+ ),
133
+ ).toBe(true);
134
+ });
135
+
136
+ it('sunset detection: experimentalUntil in the past is non-conformant', async (ctx) => {
137
+ const d = await readDiscovery();
138
+ const em = d?.capabilities?.multiAgent?.executionModel;
139
+ if (em === undefined || em.tier !== 'experimental') {
140
+ ctx.skip();
141
+ return;
142
+ }
143
+ if (typeof em.experimentalUntil !== 'string') {
144
+ ctx.skip();
145
+ return;
146
+ }
147
+ const target = new Date((em.experimentalUntil as string) + 'T00:00:00Z').getTime();
148
+ const now = Date.now();
149
+ expect(
150
+ target >= now,
151
+ driver.describe(
152
+ 'RFCS/0042-experimental-capability-tier.md §B',
153
+ `experimentalUntil MUST NOT be in the past (advertised ${em.experimentalUntil}; host MUST either flip tier to stable, retract the advertisement, or re-advertise with a future date + open deprecation RFC)`,
154
+ ),
155
+ ).toBe(true);
156
+ });
157
+ });
158
+
159
+ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §D experimentalGate helper routing (RFC 0042 §D)', () => {
160
+ it('experimentalGate returns false for tier="experimental" without OPENWOP_REQUIRE_EXPERIMENTAL', () => {
161
+ // Helper-level behavioral probe — no host needed, this is a pure
162
+ // function-routing assertion against the imported helper.
163
+ const prevReqExp = process.env.OPENWOP_REQUIRE_EXPERIMENTAL;
164
+ delete process.env.OPENWOP_REQUIRE_EXPERIMENTAL;
165
+ try {
166
+ const result = experimentalGate('test-profile', true, 'experimental', '2027-05-22');
167
+ expect(
168
+ result,
169
+ driver.describe(
170
+ 'RFCS/0042-experimental-capability-tier.md §D',
171
+ 'default mode + tier="experimental" MUST soft-skip — helper returns false',
172
+ ),
173
+ ).toBe(false);
174
+ } finally {
175
+ if (prevReqExp !== undefined) process.env.OPENWOP_REQUIRE_EXPERIMENTAL = prevReqExp;
176
+ }
177
+ });
178
+
179
+ it('experimentalGate routes through behaviorGate when tier === undefined or "stable"', () => {
180
+ const prevReqBeh = process.env.OPENWOP_REQUIRE_BEHAVIOR;
181
+ delete process.env.OPENWOP_REQUIRE_BEHAVIOR;
182
+ try {
183
+ // Stable + advertised → proceed.
184
+ expect(experimentalGate('test-stable', true, 'stable')).toBe(true);
185
+ expect(experimentalGate('test-stable-undef', true, undefined)).toBe(true);
186
+ // Stable + NOT advertised, default mode → skip (returns false, no throw).
187
+ expect(experimentalGate('test-not-adv', false, 'stable')).toBe(false);
188
+ } finally {
189
+ if (prevReqBeh !== undefined) process.env.OPENWOP_REQUIRE_BEHAVIOR = prevReqBeh;
190
+ }
191
+ });
192
+ });
@@ -0,0 +1,35 @@
1
+ /**
2
+ * feedback-capability-shape — RFC 0056 §A. The `capabilities.feedback`
3
+ * advertisement block is either absent or a well-formed object.
4
+ *
5
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage
6
+ * lives in the sibling `feedback-*.test.ts` scenarios, gated on
7
+ * `capabilities.feedback.supported`.
8
+ *
9
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §A
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readFeedbackCap } from '../lib/feedback.js';
15
+
16
+ describe('feedback-capability-shape: advertisement (RFC 0056 §A)', () => {
17
+ it('capabilities.feedback is absent or a well-formed object', async () => {
18
+ const cap = await readFeedbackCap();
19
+ if (cap === null) return; // not advertised — valid
20
+ expect(
21
+ typeof cap.supported,
22
+ driver.describe('capabilities.schema.json §feedback', 'capabilities.feedback.supported MUST be a boolean when present'),
23
+ ).toBe('boolean');
24
+ if (Array.isArray(cap.targets)) {
25
+ for (const t of cap.targets) {
26
+ expect(['run', 'event', 'node']).toContain(t);
27
+ }
28
+ }
29
+ if (Array.isArray(cap.signals)) {
30
+ for (const s of cap.signals) {
31
+ expect(['rating', 'correction', 'label', 'flag']).toContain(s);
32
+ }
33
+ }
34
+ });
35
+ });