@openwop/openwop-conformance 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +8 -3
  4. package/api/openapi.yaml +305 -0
  5. package/coverage.md +29 -4
  6. package/fixtures/conformance-phase4-nondet-tool.json +53 -0
  7. package/fixtures/conformance-phase4-replay-divergence.json +40 -0
  8. package/fixtures.md +5 -3
  9. package/package.json +1 -1
  10. package/schemas/README.md +2 -0
  11. package/schemas/capabilities.schema.json +167 -3
  12. package/schemas/credential-reference.schema.json +21 -0
  13. package/schemas/node-pack-manifest.schema.json +112 -1
  14. package/schemas/run-diff-response.schema.json +64 -0
  15. package/schemas/run-event-payloads.schema.json +104 -2
  16. package/schemas/run-event.schema.json +8 -1
  17. package/schemas/run-snapshot.schema.json +11 -0
  18. package/src/lib/behavior-gate.ts +51 -0
  19. package/src/lib/driver.ts +13 -1
  20. package/src/lib/saml-idp.ts +179 -0
  21. package/src/scenarios/approval-gate-events.test.ts +61 -0
  22. package/src/scenarios/approval-gate-flow.test.ts +68 -0
  23. package/src/scenarios/auth-saml-profile.test.ts +119 -0
  24. package/src/scenarios/auth-scim-profile.test.ts +65 -0
  25. package/src/scenarios/authorization-fail-closed.test.ts +80 -0
  26. package/src/scenarios/authorization-roles-shape.test.ts +83 -0
  27. package/src/scenarios/connector-manifest-validity.test.ts +142 -0
  28. package/src/scenarios/credential-payload-redaction.test.ts +93 -0
  29. package/src/scenarios/credentials-capability-shape.test.ts +90 -0
  30. package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
  31. package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
  32. package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
  33. package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
  34. package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
  35. package/src/scenarios/experimental-tier-shape.test.ts +192 -0
  36. package/src/scenarios/identity-owner-shape.test.ts +64 -0
  37. package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
  38. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
  39. package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
  40. package/src/scenarios/oauth-capability-shape.test.ts +97 -0
  41. package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
  42. package/src/scenarios/pack-registry-isolation.test.ts +108 -0
  43. package/src/scenarios/pack-registry-publish.test.ts +1 -1
  44. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
  45. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
  46. package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
  47. package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
  48. package/src/scenarios/run-diff.test.ts +143 -0
  49. package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
  50. package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
  51. package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
  52. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
  53. package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
  54. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
  55. package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
  56. package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
  57. package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
  58. package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
  59. package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
  60. package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
  61. package/src/scenarios/spec-corpus-validity.test.ts +2 -2
@@ -1,15 +1,16 @@
1
1
  /**
2
2
  * multi-agent-confidence-escalation — RFC 0039 §A behavioral.
3
3
  *
4
- * Status: ACTIVE (advertisement-shape + behavioral). RFC 0039 Phase 2
5
- * filed Draft graduated Active 2026-05-22 in the same commit chain as
6
- * this scenario. Capability-gated on
4
+ * Status: ACTIVE (advertisement-shape + behavioral). RFC 0039
5
+ * (multi-agent execution model `version: 2`) filed Draft graduated
6
+ * Active 2026-05-22 in the same commit chain as this scenario.
7
+ * Capability-gated on
7
8
  * `capabilities.multiAgent.executionModel.supported: true` AND
8
9
  * `capabilities.multiAgent.executionModel.version >= 2` AND fixture
9
- * availability. Hosts that advertise only Phase 1 (version: 1) soft-skip
10
- * cleanly — the confidence-floor MUST applies only at version >= 2.
10
+ * availability. Hosts that advertise only `version: 1` soft-skip
11
+ * cleanly — the confidence-floor MUST applies only at `version >= 2`.
11
12
  *
12
- * Asserts (behavioral when host advertises Phase 2):
13
+ * Asserts (behavioral when host advertises `version >= 2`):
13
14
  *
14
15
  * 1. Advertisement shape: confidenceEscalationFloor (when present) MUST be
15
16
  * a number in [0.5, 1.0]; floor < 0.5 is non-conformant per RFC 0039 §A.
@@ -37,11 +38,11 @@
37
38
  * interrupt fires AND BEFORE any `core.workflowChain.event` with
38
39
  * `phase: 'dispatch.began'` for the escalated decision's intended
39
40
  * next-worker"). This is the load-bearing test that distinguishes
40
- * Phase 2 from Phase 1: Phase 1 hosts dispatch unconditionally; Phase 2
41
- * hosts gate on confidence.
41
+ * `version: 2` from `version: 1`: `version: 1` hosts dispatch
42
+ * unconditionally; `version: 2` hosts gate on confidence.
42
43
  *
43
44
  * @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A
44
- * @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039 Phase 2)"
45
+ * @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039)"
45
46
  * @see schemas/run-event-payloads.schema.json §coreWorkflowChainConfidenceEscalated
46
47
  */
47
48
 
@@ -103,14 +104,14 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
103
104
  const supported = d?.capabilities?.multiAgent?.executionModel?.supported === true;
104
105
  const versionRaw = d?.capabilities?.multiAgent?.executionModel?.version;
105
106
  const version = typeof versionRaw === 'number' ? versionRaw : 0;
106
- if (!supported || version < 2) return; // soft-skip — Phase 1 hosts pass via this absence
107
+ if (!supported || version < 2) return; // soft-skip — `version: 1` hosts pass via this absence
107
108
 
108
109
  const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
109
110
  expect(create.status).toBe(201);
110
111
  const runId = (create.json as { runId: string }).runId;
111
112
 
112
113
  const terminal = await pollUntilTerminal(runId);
113
- // Phase 2 escalation suspends the parent — NOT a terminal `completed`.
114
+ // RFC 0039 escalation suspends the parent — NOT a terminal `completed`.
114
115
  // The conformance pollUntilTerminal returns when the run reaches any
115
116
  // settled status. RFC 0039 §A gives hosts a choice: clarify-kind
116
117
  // escalation (→ waiting-clarification) OR escalate-kind approval
@@ -188,7 +189,7 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
188
189
  'confidence-escalated causationId MUST point at the runOrchestrator.decided that surfaced the low-confidence decision',
189
190
  ).toBe('runOrchestrator.decided');
190
191
 
191
- // Load-bearing: NO dispatch event fired. Phase 2 gates BEFORE the loop.
192
+ // Load-bearing: NO dispatch event fired. RFC 0039 gates BEFORE the loop.
192
193
  const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
193
194
  expect(
194
195
  chainEvents.length,
@@ -108,17 +108,92 @@ describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: behavioral (RFC 0039
108
108
  // Until a memory-advertising Phase 2 host wires the seam, the contract
109
109
  // is documentation-only — surfaced as `todo` so test reporters track
110
110
  // the gap rather than reporting a vacuous PASS.
111
- it.todo('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start');
111
+ // MAE-2 is still out of stable profile via RFC 0042 §B (experimental
112
+ // tier): RFC 0039 §B Half B (MAE-2 + MAE-3) landed on MyndHyve
113
+ // 2026-05-23 via commit `a51f7bbd` (`snapshotAtSeq()` +
114
+ // `crossChildMemoryConcurrency: 'strict'`). The MAE-2 cross-run-ttl-
115
+ // roundtrip seam (POST /v1/host/sample/test/memory/cross-run-ttl-
116
+ // roundtrip) is still open per host-sample-test-seams.md §"Open seams"
117
+ // — no host has wired the seam endpoint yet, so the behavioral
118
+ // assertion stays `it.skip`. Hosts that implement Half B SHOULD
119
+ // advertise `multiAgent.executionModel.tier: 'experimental'` per
120
+ // RFC 0042 §A until the seam contract is wired.
121
+ it.skip('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start — out of stable profile via RFC 0042');
112
122
 
113
- // Behavioral assertion lands when the host implements the snapshot
114
- // mechanism per RFC 0039 §B. The assertion drives:
115
- // 1. Run a workflow that writes MemoryEntry { key: 'k', value: 'v1' } at index 10.
116
- // 2. Write MemoryEntry { key: 'k', value: 'v2' } at index 20.
117
- // 3. POST /v1/runs/{runId}:fork { fromSeq: 15 }.
118
- // 4. Forked run reads MemoryEntry { key: 'k' }; MUST return 'v1' (not 'v2').
119
- // 5. Alternative compliance: fork refused with
120
- // error.code: 'replay_memory_snapshot_unavailable' AND
121
- // details.fromSeq === 15.
122
- // Silent substitution of v2 (current state) is non-conformant.
123
- it.todo('MAE-3 replay snapshot: fork from past index MUST return memory-as-of-index OR refuse with replay_memory_snapshot_unavailable');
123
+ // MAE-3 flipped to behavioral 2026-05-25 MyndHyve workflow-runtime
124
+ // revision `00206-tdh` advertises Phase 2 + memory and honors the
125
+ // POST /v1/runs/{runId}:fork mode:replay contract per
126
+ // host-sample-test-seams.md §"Canonical-endpoint conformance hooks"
127
+ // §9. The seam reuses the canonical fork endpoint plus the
128
+ // OPENWOP_TEST_EXPIRED_REPLAY_RUN_ID env-var convention (parallel
129
+ // naming to OPENWOP_TEST_EXPIRED_RUN_ID used by
130
+ // production-retention-expiry). Soft-skips on Phase 1 hosts, Phase 2
131
+ // hosts without memory, and hosts that have not seeded the env var.
132
+ it('MAE-3 replay snapshot refusal: fork mode:replay against a past-retention runId MUST return 422 replay_memory_snapshot_unavailable with documented envelope; silent substitution is non-conformant', async (ctx) => {
133
+ const d = await readDiscovery();
134
+ if (d === null) {
135
+ ctx.skip();
136
+ return;
137
+ }
138
+ const v = d.capabilities?.multiAgent?.executionModel?.version;
139
+ const memorySupported = d.capabilities?.memory?.supported;
140
+ const phase2OrLater = typeof v === 'number' && v >= 2;
141
+ const expiredRunId = process.env.OPENWOP_TEST_EXPIRED_REPLAY_RUN_ID;
142
+ if (!phase2OrLater || memorySupported !== true || !expiredRunId) {
143
+ ctx.skip();
144
+ return;
145
+ }
146
+
147
+ const fromSeq = 0;
148
+ const res = await driver.post(`/v1/runs/${encodeURIComponent(expiredRunId)}:fork`, {
149
+ mode: 'replay',
150
+ fromSeq,
151
+ });
152
+
153
+ expect(
154
+ res.status,
155
+ driver.describe(
156
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
157
+ 'fork mode:replay against a past-retention runId MUST refuse with 422; silent substitution of current memory is non-conformant',
158
+ ),
159
+ ).toBe(422);
160
+
161
+ const body = res.json as {
162
+ error?: unknown;
163
+ details?: { fromSeq?: unknown; sourceRunId?: unknown; reason?: unknown };
164
+ } | null;
165
+
166
+ expect(
167
+ body?.error,
168
+ driver.describe(
169
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
170
+ 'refusal envelope error code MUST be "replay_memory_snapshot_unavailable" (distinct from the pre-flight invalid_from_seq gate)',
171
+ ),
172
+ ).toBe('replay_memory_snapshot_unavailable');
173
+
174
+ expect(
175
+ body?.details?.fromSeq,
176
+ driver.describe(
177
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
178
+ 'refusal envelope details.fromSeq MUST echo the requested fromSeq',
179
+ ),
180
+ ).toBe(fromSeq);
181
+
182
+ expect(
183
+ body?.details?.sourceRunId,
184
+ driver.describe(
185
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
186
+ 'refusal envelope details.sourceRunId MUST echo the runId from the URL',
187
+ ),
188
+ ).toBe(expiredRunId);
189
+
190
+ const reason = body?.details?.reason;
191
+ expect(
192
+ reason === 'retention_expired' || reason === 'event_log_unavailable',
193
+ driver.describe(
194
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
195
+ 'refusal envelope details.reason MUST be one of {"retention_expired", "event_log_unavailable"}',
196
+ ),
197
+ ).toBe(true);
198
+ });
124
199
  });
@@ -0,0 +1,203 @@
1
+ /**
2
+ * multi-region-idempotency-behavior — RFC 0036 §C convergence-rule behavioral probe.
3
+ *
4
+ * Companion to `multi-region-idempotency.test.ts` which carries the
5
+ * advertisement-shape probes. This file exercises the canonical convergence
6
+ * algorithm specified by `spec/v1/idempotency.md` §"Multi-region idempotency
7
+ * annex" via the host-extension test seam at:
8
+ *
9
+ * POST /v1/host/sample/test/multi-region/simulate-partition
10
+ *
11
+ * The seam is conformance-only (host-extension namespace), gated on the
12
+ * host's `OPENWOP_TEST_MULTI_REGION_SIMULATOR=true` env var. The seam itself
13
+ * is OPTIONAL — hosts that don't expose it soft-skip; hosts that DO expose
14
+ * it MUST honor the annex's convergence rule:
15
+ *
16
+ * 1. Given ≥2 conflicting `ConflictClaim` records sharing
17
+ * `(tenantId, endpoint, key)`, the host's resolver MUST return the
18
+ * lex-min `runId` as the winner.
19
+ * 2. Every region (including the winner's) gets a cache redirect entry
20
+ * pointing at the winner's runId.
21
+ * 3. The loser's cancel reason MUST be the canonical string
22
+ * `cross_region_dedup_loss`.
23
+ * 4. The resolver MUST be order-invariant — shuffling the input claims
24
+ * MUST produce the same winner.
25
+ * 5. Cross-region partition simulation: same idempotency-key submitted
26
+ * to 2+ regions simultaneously converges to ONE survivor per the
27
+ * lex-min rule, with no coordination required.
28
+ *
29
+ * @see RFCS/0036-multi-region-and-cross-engine-guarantees.md §C
30
+ * @see spec/v1/idempotency.md §"Multi-region idempotency annex"
31
+ */
32
+
33
+ import { describe, it, expect } from 'vitest';
34
+ import { driver } from '../lib/driver.js';
35
+
36
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
37
+
38
+ interface ConflictClaim {
39
+ runId: string;
40
+ tenantId: string;
41
+ endpoint: string;
42
+ key: string;
43
+ region: string;
44
+ }
45
+
46
+ interface ConvergenceResult {
47
+ winner?: ConflictClaim;
48
+ losers?: ConflictClaim[];
49
+ cacheRedirects?: Array<{ region: string; cacheKey: string; redirectToRunId: string }>;
50
+ loserCancelReason?: string;
51
+ }
52
+
53
+ async function simulatePartition(claims: ConflictClaim[]): Promise<{ status: number; body: ConvergenceResult }> {
54
+ const res = await driver.post('/v1/host/sample/test/multi-region/simulate-partition', { claims });
55
+ return { status: res.status, body: (res.json as ConvergenceResult) ?? {} };
56
+ }
57
+
58
+ describe.skipIf(HTTP_SKIP)('multi-region-idempotency-behavior: convergence rule (RFC 0036 §C)', () => {
59
+ it('two-region conflict resolves to the lex-min runId per annex §"Convergence rule"', async (ctx) => {
60
+ const probe = await simulatePartition([
61
+ { runId: 'run-b-east', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-1', region: 'us-east-1' },
62
+ { runId: 'run-a-west', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-1', region: 'eu-west-1' },
63
+ ]);
64
+ if (probe.status === 404) {
65
+ ctx.skip(); // host doesn't expose the simulator seam
66
+ return;
67
+ }
68
+ expect(
69
+ probe.status,
70
+ driver.describe(
71
+ 'idempotency.md §"Multi-region idempotency annex"',
72
+ 'simulate-partition seam MUST return 200 when ≥2 conflicting claims are submitted',
73
+ ),
74
+ ).toBe(200);
75
+ expect(
76
+ probe.body.winner?.runId,
77
+ driver.describe(
78
+ 'idempotency.md §"Convergence rule"',
79
+ 'winner MUST be the lex-min runId (run-a-west < run-b-east)',
80
+ ),
81
+ ).toBe('run-a-west');
82
+ });
83
+
84
+ it('three-region partition resolves to a single winner', async (ctx) => {
85
+ const probe = await simulatePartition([
86
+ { runId: 'zzz-3', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r1' },
87
+ { runId: 'aaa-1', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r2' },
88
+ { runId: 'mmm-2', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r3' },
89
+ ]);
90
+ if (probe.status === 404) {
91
+ ctx.skip();
92
+ return;
93
+ }
94
+ expect(probe.status).toBe(200);
95
+ expect(
96
+ probe.body.winner?.runId,
97
+ driver.describe(
98
+ 'idempotency.md §"Convergence rule"',
99
+ 'winner MUST be the lex-min runId across all conflicting claims',
100
+ ),
101
+ ).toBe('aaa-1');
102
+ expect(
103
+ probe.body.losers?.length,
104
+ driver.describe(
105
+ 'idempotency.md §"Convergence rule"',
106
+ 'losers array MUST contain N-1 entries when N claims conflict',
107
+ ),
108
+ ).toBe(2);
109
+ });
110
+
111
+ it('every region gets a cache redirect entry pointing at the winner', async (ctx) => {
112
+ const probe = await simulatePartition([
113
+ { runId: 'run-x', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-3', region: 'r1' },
114
+ { runId: 'run-a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-3', region: 'r2' },
115
+ ]);
116
+ if (probe.status === 404) {
117
+ ctx.skip();
118
+ return;
119
+ }
120
+ expect(probe.status).toBe(200);
121
+ const redirects = probe.body.cacheRedirects ?? [];
122
+ expect(
123
+ redirects.length,
124
+ driver.describe(
125
+ 'idempotency.md §"Convergence rule"',
126
+ 'cacheRedirects MUST contain one entry per claim (including the winner)',
127
+ ),
128
+ ).toBe(2);
129
+ for (const redirect of redirects) {
130
+ expect(
131
+ redirect.redirectToRunId,
132
+ driver.describe(
133
+ 'idempotency.md §"Convergence rule"',
134
+ 'every cache redirect MUST point at the winner runId',
135
+ ),
136
+ ).toBe('run-a');
137
+ }
138
+ });
139
+
140
+ it('loser cancel reason MUST be the canonical `cross_region_dedup_loss` string', async (ctx) => {
141
+ const probe = await simulatePartition([
142
+ { runId: 'run-b', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-4', region: 'r1' },
143
+ { runId: 'run-a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-4', region: 'r2' },
144
+ ]);
145
+ if (probe.status === 404) {
146
+ ctx.skip();
147
+ return;
148
+ }
149
+ expect(probe.status).toBe(200);
150
+ expect(
151
+ probe.body.loserCancelReason,
152
+ driver.describe(
153
+ 'idempotency.md §"Convergence rule"',
154
+ 'loserCancelReason MUST be the canonical `cross_region_dedup_loss` string',
155
+ ),
156
+ ).toBe('cross_region_dedup_loss');
157
+ });
158
+
159
+ it('resolver is order-invariant — shuffled inputs produce the same winner', async (ctx) => {
160
+ const claims: ConflictClaim[] = [
161
+ { runId: 'c', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r1' },
162
+ { runId: 'a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r2' },
163
+ { runId: 'b', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r3' },
164
+ ];
165
+ const p1 = await simulatePartition(claims);
166
+ if (p1.status === 404) {
167
+ ctx.skip();
168
+ return;
169
+ }
170
+ expect(p1.status).toBe(200);
171
+ const p2 = await simulatePartition([claims[2]!, claims[0]!, claims[1]!]);
172
+ expect(p2.status).toBe(200);
173
+ const p3 = await simulatePartition([...claims].reverse());
174
+ expect(p3.status).toBe(200);
175
+ expect(
176
+ p1.body.winner?.runId,
177
+ driver.describe(
178
+ 'idempotency.md §"Convergence rule" — determinism',
179
+ 'resolver MUST be order-invariant; all permutations MUST produce the same lex-min winner',
180
+ ),
181
+ ).toBe('a');
182
+ expect(p2.body.winner?.runId).toBe('a');
183
+ expect(p3.body.winner?.runId).toBe('a');
184
+ });
185
+
186
+ it('mismatched tuple rejects with 400 validation_error', async (ctx) => {
187
+ const probe = await simulatePartition([
188
+ { runId: 'r1', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-6', region: 'r1' },
189
+ { runId: 'r2', tenantId: 't2', endpoint: 'POST /v1/runs', key: 'idem-6', region: 'r2' },
190
+ ]);
191
+ if (probe.status === 404) {
192
+ ctx.skip();
193
+ return;
194
+ }
195
+ expect(
196
+ probe.status,
197
+ driver.describe(
198
+ 'idempotency.md §"Convergence rule"',
199
+ 'claims with non-matching (tenantId, endpoint, key) MUST be rejected — it would be a programming error in the caller',
200
+ ),
201
+ ).toBe(400);
202
+ });
203
+ });
@@ -0,0 +1,97 @@
1
+ /**
2
+ * oauth-capability-shape — RFC 0047 §A advertisement-shape verification.
3
+ *
4
+ * Status: DRAFT. RFC 0047 (`host.oauth`) is `Draft`. The
5
+ * `capabilities.oauth` block has landed in `schemas/capabilities.schema.json`.
6
+ *
7
+ * Always runs (shape-only): when the host advertises `capabilities.oauth`,
8
+ * its fields MUST be well-formed; when it doesn't, the block is absent.
9
+ *
10
+ * What this scenario asserts:
11
+ * 1. `capabilities.oauth` is either absent or a well-formed object.
12
+ * 2. When `supported: true`, `grants` (when present) is a subset of
13
+ * {authorization_code, client_credentials, refresh_token}, and every
14
+ * `providers[]` entry has a non-empty `id` (RFC 0047 §A).
15
+ *
16
+ * @see RFCS/0047-host-oauth-connector-flows.md
17
+ */
18
+
19
+ import { describe, it, expect } from 'vitest';
20
+ import { driver } from '../lib/driver.js';
21
+
22
+ interface DiscoveryOAuthProvider {
23
+ id?: string;
24
+ authUrl?: string;
25
+ tokenUrl?: string;
26
+ scopesSupported?: string[];
27
+ }
28
+
29
+ interface DiscoveryOAuth {
30
+ supported?: boolean;
31
+ grants?: string[];
32
+ providers?: DiscoveryOAuthProvider[];
33
+ }
34
+
35
+ interface DiscoveryDoc {
36
+ capabilities?: {
37
+ oauth?: DiscoveryOAuth;
38
+ };
39
+ }
40
+
41
+ const VALID_GRANTS: ReadonlySet<string> = new Set([
42
+ 'authorization_code',
43
+ 'client_credentials',
44
+ 'refresh_token',
45
+ ]);
46
+
47
+ async function readOAuth(): Promise<DiscoveryOAuth | null> {
48
+ const res = await driver.get('/.well-known/openwop');
49
+ const body = res.json as DiscoveryDoc | undefined;
50
+ return body?.capabilities?.oauth ?? null;
51
+ }
52
+
53
+ describe('oauth-capability-shape: advertisement shape (RFC 0047 §A)', () => {
54
+ it('capabilities.oauth is either absent or well-formed', async () => {
55
+ const oauth = await readOAuth();
56
+ if (oauth === null) return; // host doesn't advertise host.oauth at all
57
+ expect(
58
+ typeof oauth.supported,
59
+ driver.describe(
60
+ 'capabilities.schema.json §oauth',
61
+ 'capabilities.oauth.supported MUST be a boolean when oauth is advertised',
62
+ ),
63
+ ).toBe('boolean');
64
+ });
65
+
66
+ it('grants is a subset of the canonical grant set when supported', async () => {
67
+ const oauth = await readOAuth();
68
+ if (!oauth?.supported || oauth.grants === undefined) return;
69
+ expect(
70
+ Array.isArray(oauth.grants),
71
+ driver.describe('RFC 0047 §A', 'capabilities.oauth.grants MUST be an array'),
72
+ ).toBe(true);
73
+ for (const grant of oauth.grants) {
74
+ expect(
75
+ VALID_GRANTS.has(grant),
76
+ driver.describe(
77
+ 'RFC 0047 §A',
78
+ `capabilities.oauth.grants entries MUST be one of {${[...VALID_GRANTS].join(', ')}}, got: ${grant}`,
79
+ ),
80
+ ).toBe(true);
81
+ }
82
+ });
83
+
84
+ it('every advertised provider has a non-empty id when supported', async () => {
85
+ const oauth = await readOAuth();
86
+ if (!oauth?.supported || oauth.providers === undefined) return;
87
+ for (const provider of oauth.providers) {
88
+ expect(
89
+ typeof provider.id === 'string' && provider.id.length > 0,
90
+ driver.describe(
91
+ 'RFC 0047 §A',
92
+ 'each capabilities.oauth.providers[] entry MUST declare a non-empty id',
93
+ ),
94
+ ).toBe(true);
95
+ }
96
+ });
97
+ });
@@ -0,0 +1,91 @@
1
+ /**
2
+ * oauth-connector-redaction — RFC 0047 §C / §D + `credential-payload-redaction`.
3
+ *
4
+ * Status: DRAFT. RFC 0047 (`host.oauth`) is `Draft`. Reuses the RFC 0046
5
+ * SECURITY invariant `credential-payload-redaction` — OAuth tokens acquired
6
+ * via host.oauth are stored as host.credentials entries and are subject to
7
+ * the same no-plaintext-on-the-wire rule.
8
+ *
9
+ * Capability-gated: skips when the host does not advertise
10
+ * `capabilities.oauth.supported = true`.
11
+ *
12
+ * What this scenario asserts:
13
+ * 1. Advertisement shape — `capabilities.oauth.supported` is a boolean.
14
+ * 2. Token-material redaction MUST-NOT — when the host exposes the optional
15
+ * `POST /v1/host/sample/oauth/connector-echo` test seam (a synthetic
16
+ * provider acquires a token whose value is a known canary, then a
17
+ * connector node runs), the canary MUST NOT appear in ANY observable
18
+ * run surface, and `connector.authorized` MUST carry the credential
19
+ * reference rather than the token.
20
+ *
21
+ * Hosts without the seam soft-skip the redaction probe (404).
22
+ *
23
+ * @see RFCS/0047-host-oauth-connector-flows.md
24
+ * @see SECURITY/invariants.yaml id: credential-payload-redaction
25
+ */
26
+
27
+ import { describe, it, expect } from 'vitest';
28
+ import { driver } from '../lib/driver.js';
29
+
30
+ interface DiscoveryOAuth {
31
+ supported?: boolean;
32
+ }
33
+
34
+ interface DiscoveryDoc {
35
+ capabilities?: {
36
+ oauth?: DiscoveryOAuth;
37
+ };
38
+ }
39
+
40
+ const TOKEN_CANARY = 'OPENWOP_OAUTH_CANARY_b7d3e1a9c2';
41
+
42
+ async function readOAuth(): Promise<DiscoveryOAuth | null> {
43
+ const res = await driver.get('/.well-known/openwop');
44
+ const body = res.json as DiscoveryDoc | undefined;
45
+ return body?.capabilities?.oauth ?? null;
46
+ }
47
+
48
+ describe('oauth-connector-redaction: advertisement shape (RFC 0047 §A)', () => {
49
+ it('capabilities.oauth.supported is a boolean when advertised', async () => {
50
+ const oauth = await readOAuth();
51
+ if (oauth === null) return;
52
+ expect(
53
+ typeof oauth.supported,
54
+ driver.describe(
55
+ 'capabilities.schema.json §oauth',
56
+ 'capabilities.oauth.supported MUST be a boolean when oauth is advertised',
57
+ ),
58
+ ).toBe('boolean');
59
+ });
60
+ });
61
+
62
+ describe('oauth-connector-redaction: token material MUST NOT cross the wire (RFC 0047 §C.2)', () => {
63
+ it('canary token is absent from every observable run surface', async () => {
64
+ const oauth = await readOAuth();
65
+ if (!oauth?.supported) return; // capability-gated
66
+
67
+ // Seam contract: a synthetic provider issues a token whose value is
68
+ // TOKEN_CANARY, a connector node runs, and the run's observable surfaces
69
+ // (events incl. connector.authorized + snapshot + debug bundle) are returned.
70
+ const res = await driver.post('/v1/host/sample/oauth/connector-echo', { canary: TOKEN_CANARY });
71
+ // 404 from a host that hasn't wired the test seam is a soft-skip.
72
+ if (res.status === 404) return;
73
+
74
+ expect(
75
+ res.status,
76
+ driver.describe(
77
+ 'RFC 0047 §C',
78
+ 'the oauth connector-echo seam MUST acquire the token and return the run observable surfaces',
79
+ ),
80
+ ).toBeLessThan(400);
81
+
82
+ const serialized = JSON.stringify(res.json ?? {});
83
+ expect(
84
+ serialized.includes(TOKEN_CANARY),
85
+ driver.describe(
86
+ 'SECURITY/invariants.yaml credential-payload-redaction',
87
+ 'acquired OAuth token material MUST NOT appear in inputs, variables, channels, events, snapshot, or debug bundle — only the credential reference may cross the wire',
88
+ ),
89
+ ).toBe(false);
90
+ });
91
+ });
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Pack-registry test-mode isolation — RFC 0025 §C point 1.
3
+ *
4
+ * Status: BEHAVIORAL (soft-skip). A pack PUT'd to `/v1/packs-test/*` MUST
5
+ * NOT appear in `/v1/packs/*` listings. This anchors the test-mode
6
+ * mirror's load-bearing safety invariant: the conformance suite is
7
+ * trusted to drive publish-error-catalog traffic against the test
8
+ * namespace precisely because the test catalog is guaranteed distinct
9
+ * from the production catalog.
10
+ *
11
+ * Soft-skips when the host doesn't advertise
12
+ * `capabilities.packs.testMode.supported: true` (or advertises
13
+ * `isolated: false` — in which case the host is honestly disclaiming
14
+ * the invariant and the conformance suite's other publish-error tests
15
+ * are not applicable either).
16
+ *
17
+ * @see RFCS/0025-test-mode-registry-namespace.md §C "Isolation guarantees"
18
+ * @see schemas/capabilities.schema.json §packs.testMode
19
+ * @see pack-registry-publish.test.ts (the 25 sibling scenarios this invariant unblocks)
20
+ */
21
+
22
+ import { describe, it, expect } from 'vitest';
23
+ import { driver } from '../lib/driver.js';
24
+
25
+ interface DiscoveryDoc {
26
+ capabilities?: Record<string, unknown>;
27
+ }
28
+
29
+ interface TestModeAdvertisement {
30
+ readonly supported: boolean;
31
+ readonly isolated: boolean;
32
+ }
33
+
34
+ async function getTestModeAdvertisement(): Promise<TestModeAdvertisement | null> {
35
+ const res = await driver.get('/.well-known/openwop');
36
+ const body = res.json as DiscoveryDoc | undefined;
37
+ const top = body?.capabilities as Record<string, unknown> | undefined;
38
+ const packs = top && typeof top === 'object' ? (top['packs'] as Record<string, unknown> | undefined) : undefined;
39
+ const testMode = packs && typeof packs === 'object' ? (packs['testMode'] as Record<string, unknown> | undefined) : undefined;
40
+ if (!testMode || typeof testMode !== 'object') return null;
41
+ return {
42
+ supported: testMode['supported'] === true,
43
+ isolated: testMode['isolated'] === true,
44
+ };
45
+ }
46
+
47
+ function freshPackName(): string {
48
+ return `core.openwop.test-isolation-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
49
+ }
50
+
51
+ describe('pack-registry-isolation: test catalog MUST NOT bleed into production (RFC 0025 §C.1)', () => {
52
+ it('a pack PUT to /v1/packs-test/{name} MUST NOT appear in GET /v1/packs/{name}', async () => {
53
+ const adv = await getTestModeAdvertisement();
54
+ if (!adv || !adv.supported) return; // host doesn't advertise the seam
55
+ if (!adv.isolated) return; // host explicitly disclaims the invariant — no contract to assert
56
+
57
+ const name = freshPackName();
58
+ const version = '1.0.0';
59
+
60
+ // PUT to the test namespace. The body is intentionally minimal — the
61
+ // isolation invariant is independent of whether validation accepts
62
+ // or rejects the publish. Either outcome is fine; what's tested is
63
+ // that NEITHER outcome causes the pack to surface in the production
64
+ // catalog.
65
+ const putRes = await driver.put(
66
+ `/v1/packs-test/${encodeURIComponent(name)}/-/${encodeURIComponent(version)}.tgz`,
67
+ Buffer.from([0x1f, 0x8b, 0]),
68
+ { headers: { 'Content-Type': 'application/octet-stream' } },
69
+ );
70
+
71
+ // If the seam returns 404, the test-mode endpoint isn't actually
72
+ // wired up despite the advertisement — pack-registry-publish.test.ts
73
+ // catches that drift in 24 other scenarios; soft-skip here.
74
+ if (putRes.status === 404) return;
75
+
76
+ // Probe the production namespace. The invariant: a pack written
77
+ // via /v1/packs-test/* MUST NOT be retrievable via /v1/packs/*.
78
+ const prodRes = await driver.get(`/v1/packs/${encodeURIComponent(name)}`);
79
+
80
+ // 404 is the canonical "not found" — exactly what isolation requires.
81
+ // 200 with a payload that does NOT name our pack would mean the host
82
+ // returned a listing of unrelated packs (some hosts serve search-shaped
83
+ // results on /v1/packs/{nonexistent}); we check the negative explicitly.
84
+ if (prodRes.status === 200) {
85
+ const body = prodRes.json as Record<string, unknown> | undefined;
86
+ const stringified = body ? JSON.stringify(body) : '';
87
+ expect(
88
+ stringified.includes(name),
89
+ driver.describe(
90
+ 'RFCS/0025-test-mode-registry-namespace.md §C point 1',
91
+ `pack name '${name}' was written via /v1/packs-test/${name}@${version} but appeared in /v1/packs/${name} response body — test-catalog isolation MUST hold`,
92
+ ),
93
+ ).toBe(false);
94
+ return;
95
+ }
96
+
97
+ // Acceptable: 4xx range (404 pack_not_found is the spec-canonical
98
+ // shape; 410/422 also fine — any "not present in production catalog"
99
+ // signal satisfies the invariant).
100
+ expect(
101
+ prodRes.status >= 400 && prodRes.status < 500,
102
+ driver.describe(
103
+ 'RFCS/0025-test-mode-registry-namespace.md §C point 1',
104
+ `expected production-namespace GET to return 4xx for a test-namespace-only pack '${name}', got ${prodRes.status}`,
105
+ ),
106
+ ).toBe(true);
107
+ });
108
+ });