@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -0,0 +1,164 @@
1
+ /**
2
+ * multi-agent-confidence-escalation — RFC 0039 §A behavioral.
3
+ *
4
+ * Status: ACTIVE (advertisement-shape + behavioral). RFC 0039 Phase 2
5
+ * filed Draft → graduated Active 2026-05-22 in the same commit chain as
6
+ * this scenario. Capability-gated on
7
+ * `capabilities.multiAgent.executionModel.supported: true` AND
8
+ * `capabilities.multiAgent.executionModel.version >= 2` AND fixture
9
+ * availability. Hosts that advertise only Phase 1 (version: 1) soft-skip
10
+ * cleanly — the confidence-floor MUST applies only at version >= 2.
11
+ *
12
+ * Asserts (behavioral when host advertises Phase 2):
13
+ *
14
+ * 1. Advertisement shape: confidenceEscalationFloor (when present) MUST be
15
+ * a number in [0.5, 1.0]; floor < 0.5 is non-conformant per RFC 0039 §A.
16
+ *
17
+ * 2. A run driven by the fixture's low-confidence (0.3) mockDispatchPlan
18
+ * reaches a `waiting-clarification` terminal-suspension status — NOT
19
+ * `completed`. The clarification interrupt MUST surface so the operator
20
+ * can confirm-or-adjust the supervisor's marginal decision.
21
+ *
22
+ * 3. The parent run's event log contains exactly ONE
23
+ * `core.workflowChain.confidence-escalated` event, with:
24
+ * - payload.confidence === 0.3
25
+ * - payload.floor in [0.5, 1.0] (whatever floor the host advertised
26
+ * — spec default 0.5, operator stricter is permitted)
27
+ * - payload.escalationKind === 'clarify' (the reference host emits
28
+ * clarify; hosts choosing 'escalate' would also be conformant)
29
+ * - payload.workerId === the dispatch's first nextWorkerIds entry
30
+ * - payload.originalDecision carries the verbatim OrchestratorDecision
31
+ * AND causationId chains back to the `runOrchestrator.decided` event
32
+ * that emitted the low-confidence decision.
33
+ *
34
+ * 4. The event log contains ZERO `core.workflowChain.event` records — the
35
+ * escalation fired BEFORE any dispatch.began event per RFC 0039 §A
36
+ * ("the escalation event MUST appear in the run event log BEFORE the
37
+ * interrupt fires AND BEFORE any `core.workflowChain.event` with
38
+ * `phase: 'dispatch.began'` for the escalated decision's intended
39
+ * next-worker"). This is the load-bearing test that distinguishes
40
+ * Phase 2 from Phase 1: Phase 1 hosts dispatch unconditionally; Phase 2
41
+ * hosts gate on confidence.
42
+ *
43
+ * @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A
44
+ * @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039 Phase 2)"
45
+ * @see schemas/run-event-payloads.schema.json §coreWorkflowChainConfidenceEscalated
46
+ */
47
+
48
+ import { describe, it, expect } from 'vitest';
49
+ import { driver } from '../lib/driver.js';
50
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
51
+ import { pollUntilTerminal } from '../lib/polling.js';
52
+
53
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
54
+ const FIXTURE = 'conformance-multi-agent-confidence-escalation';
55
+ const BEHAVIORAL_SKIP = HTTP_SKIP || !isFixtureAdvertised(FIXTURE);
56
+
57
+ interface RunEvent { type: string; eventId?: string; causationId?: string; payload?: Record<string, unknown>; }
58
+
59
+ interface DiscoveryDoc {
60
+ capabilities?: {
61
+ multiAgent?: {
62
+ executionModel?: {
63
+ supported?: unknown;
64
+ version?: unknown;
65
+ confidenceEscalationFloor?: unknown;
66
+ };
67
+ };
68
+ };
69
+ }
70
+
71
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
72
+ try {
73
+ const res = await driver.get('/.well-known/openwop');
74
+ if (res.status !== 200) return null;
75
+ return res.json as DiscoveryDoc;
76
+ } catch {
77
+ return null;
78
+ }
79
+ }
80
+
81
+ describe.skipIf(HTTP_SKIP)('multi-agent-confidence-escalation: capability shape (RFC 0039 §A)', () => {
82
+ it('confidenceEscalationFloor (when advertised) MUST be in [0.5, 1.0]', async () => {
83
+ const d = await readDiscovery();
84
+ if (d === null) return;
85
+ const em = d.capabilities?.multiAgent?.executionModel;
86
+ if (em === undefined) return;
87
+ const floor = em.confidenceEscalationFloor;
88
+ if (floor === undefined) return;
89
+ expect(
90
+ typeof floor === 'number' && Number.isFinite(floor) && floor >= 0.5 && floor <= 1.0,
91
+ driver.describe(
92
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A',
93
+ 'confidenceEscalationFloor MUST be number in [0.5, 1.0]; values below the spec floor are non-conformant',
94
+ ),
95
+ ).toBe(true);
96
+ });
97
+ });
98
+
99
+ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral (RFC 0039 §A)', () => {
100
+ it('happy-path: low-confidence decision → confidence-escalated event + clarification interrupt + zero dispatch events', async () => {
101
+ const d = await readDiscovery();
102
+ const supported = d?.capabilities?.multiAgent?.executionModel?.supported === true;
103
+ const versionRaw = d?.capabilities?.multiAgent?.executionModel?.version;
104
+ const version = typeof versionRaw === 'number' ? versionRaw : 0;
105
+ if (!supported || version < 2) return; // soft-skip — Phase 1 hosts pass via this absence
106
+
107
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
108
+ expect(create.status).toBe(201);
109
+ const runId = (create.json as { runId: string }).runId;
110
+
111
+ const terminal = await pollUntilTerminal(runId);
112
+ // Phase 2 escalation suspends the parent — NOT a terminal `completed`.
113
+ // The conformance pollUntilTerminal returns when the run reaches any
114
+ // settled status; we expect `waiting-clarification` or equivalent
115
+ // non-completed status carrying an open clarification interrupt.
116
+ expect(
117
+ terminal.status,
118
+ driver.describe(
119
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A + spec/v1/interrupt.md',
120
+ 'a host emitting `interrupt.kind: "clarification"` MUST surface the run as `waiting-clarification` per spec/v1/interrupt.md §"Interrupt kinds"; low-confidence decision MUST NOT reach `completed` because no dispatch fired',
121
+ ),
122
+ ).toBe('waiting-clarification');
123
+
124
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
125
+ expect(eventsRes.status).toBe(200);
126
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
127
+
128
+ const escalated = events.filter((e) => e.type === 'core.workflowChain.confidence-escalated');
129
+ expect(escalated.length, driver.describe(
130
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A',
131
+ 'low-confidence decision MUST emit exactly one core.workflowChain.confidence-escalated event',
132
+ )).toBe(1);
133
+
134
+ const ev = escalated[0]!;
135
+ const payload = (ev.payload ?? {}) as { confidence?: number; floor?: number; escalationKind?: string; workerId?: string };
136
+ expect(payload.confidence, 'payload.confidence echoes the decision').toBe(0.3);
137
+ expect(
138
+ typeof payload.floor === 'number' && payload.floor >= 0.5 && payload.floor <= 1.0,
139
+ 'payload.floor is the host-advertised floor (in [0.5, 1.0])',
140
+ ).toBe(true);
141
+ expect(
142
+ payload.escalationKind === 'clarify' || payload.escalationKind === 'escalate',
143
+ 'payload.escalationKind ∈ {clarify, escalate}',
144
+ ).toBe(true);
145
+
146
+ // Causation chain: escalation event causes back to the runOrchestrator.decided
147
+ // that named the worker.
148
+ const decidedEvent = events.find((e) => e.eventId === ev.causationId);
149
+ expect(
150
+ decidedEvent?.type,
151
+ 'confidence-escalated causationId MUST point at the runOrchestrator.decided that surfaced the low-confidence decision',
152
+ ).toBe('runOrchestrator.decided');
153
+
154
+ // Load-bearing: NO dispatch event fired. Phase 2 gates BEFORE the loop.
155
+ const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
156
+ expect(
157
+ chainEvents.length,
158
+ driver.describe(
159
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A',
160
+ 'low-confidence decision MUST NOT produce any core.workflowChain.event records — the escalation fires before any dispatch.began per the spec ordering',
161
+ ),
162
+ ).toBe(0);
163
+ });
164
+ });
@@ -0,0 +1,167 @@
1
+ /**
2
+ * multi-agent-handoff-state-machine — RFC 0037 Phase 1 advertisement-shape + behavioral.
3
+ *
4
+ * Status: ACTIVE (advertisement-shape + behavioral). RFC 0037 filed
5
+ * 2026-05-21 as Draft; this scenario lands the matching conformance gate.
6
+ * Capability-gated on `capabilities.multiAgent.executionModel.supported: true`
7
+ * AND fixture-gated on the `conformance-multi-agent-handoff` parent + child
8
+ * fixtures (when those land; current scenario is shape + soft-skip until then).
9
+ *
10
+ * Asserts (Phase 1 — execution-loop + handoff state machine per spec/v1/multi-agent-execution.md):
11
+ *
12
+ * 1. Advertisement shape: when capabilities.multiAgent.executionModel.supported
13
+ * is present, version MUST be integer in [1, 4]; supported MUST be boolean.
14
+ *
15
+ * 2. Behavioral (gated on supported: true + fixture availability): a
16
+ * supervisor → next-worker → child-completed run emits the 4 expected
17
+ * `core.workflowChain.event` records in causation order:
18
+ * - dispatch.began (causationId → runOrchestrator.decided eventId)
19
+ * - dispatch.succeeded (causationId → dispatch.began eventId)
20
+ * - child.completed (causationId → dispatch.succeeded eventId)
21
+ * - output.harvested (causationId → child.completed eventId; harvestedKeys present
22
+ * when the dispatch config carried outputMapping)
23
+ *
24
+ * 3. Behavioral negative: failed-child path emits dispatch.began → dispatch.succeeded
25
+ * → child.failed (NO output.harvested — per spec/v1/multi-agent-execution.md
26
+ * §"Handoff state machine" + RFC 0022 §B).
27
+ *
28
+ * @see RFCS/0037-multi-agent-execution-model.md
29
+ * @see spec/v1/multi-agent-execution.md §"Execution loop" + §"Handoff state machine"
30
+ * @see schemas/run-event-payloads.schema.json §coreWorkflowChainEvent
31
+ */
32
+
33
+ import { describe, it, expect } from 'vitest';
34
+ import { driver } from '../lib/driver.js';
35
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
36
+ import { pollUntilTerminal } from '../lib/polling.js';
37
+
38
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
39
+
40
+ interface DiscoveryDoc {
41
+ capabilities?: {
42
+ multiAgent?: {
43
+ executionModel?: {
44
+ supported?: unknown;
45
+ version?: unknown;
46
+ };
47
+ };
48
+ };
49
+ }
50
+
51
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
52
+ try {
53
+ const res = await driver.get('/.well-known/openwop');
54
+ if (res.status !== 200) return null;
55
+ return res.json as DiscoveryDoc;
56
+ } catch {
57
+ return null;
58
+ }
59
+ }
60
+
61
+ describe.skipIf(HTTP_SKIP)('multi-agent-handoff-state-machine: advertisement shape (RFC 0037 §C)', () => {
62
+ it('capabilities.multiAgent.executionModel (when present) conforms to RFC 0037 §C', async () => {
63
+ const d = await readDiscovery();
64
+ if (d === null) return; // discovery unavailable — skip
65
+ const executionModel = d.capabilities?.multiAgent?.executionModel;
66
+ if (executionModel === undefined) return; // host doesn't advertise — soft-skip
67
+ expect(
68
+ typeof executionModel.supported,
69
+ driver.describe(
70
+ 'RFCS/0037-multi-agent-execution-model.md §C',
71
+ 'capabilities.multiAgent.executionModel.supported MUST be boolean when present',
72
+ ),
73
+ ).toBe('boolean');
74
+ expect(
75
+ typeof executionModel.version,
76
+ driver.describe(
77
+ 'RFCS/0037-multi-agent-execution-model.md §C',
78
+ 'capabilities.multiAgent.executionModel.version MUST be integer when present',
79
+ ),
80
+ ).toBe('number');
81
+ const v = executionModel.version as number;
82
+ expect(
83
+ Number.isInteger(v) && v >= 1 && v <= 4,
84
+ driver.describe(
85
+ 'RFCS/0037-multi-agent-execution-model.md §C',
86
+ 'version MUST be an integer in [1, 4] (1 = Phase 1 only; Phases 2-4 lift the ceiling additively)',
87
+ ),
88
+ ).toBe(true);
89
+ });
90
+ });
91
+
92
+ // Behavioral assertion: when a host advertises capabilities.multiAgent.executionModel.supported,
93
+ // it MUST emit the 7-state handoff state machine's transition events as `core.workflowChain.event`
94
+ // records with causationId chained per the spec §"Transition events" table. The happy-path
95
+ // fixture (supervisor → next-worker → child completed with outputMapping non-empty) drives 4
96
+ // of the 7 transitions: dispatch.began → dispatch.succeeded → child.completed → output.harvested.
97
+
98
+ interface RunEvent { type: string; eventId?: string; causationId?: string; payload?: Record<string, unknown>; }
99
+
100
+ const PARENT_FIXTURE = 'conformance-multi-agent-handoff';
101
+ const CHILD_FIXTURE = 'conformance-multi-agent-handoff-child';
102
+ const BEHAVIORAL_SKIP = HTTP_SKIP || !isFixtureAdvertised(PARENT_FIXTURE) || !isFixtureAdvertised(CHILD_FIXTURE);
103
+
104
+ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-handoff-state-machine: behavioral 4-event causation chain (RFC 0037 §"Handoff state machine")', () => {
105
+ it('happy-path: dispatch.began → dispatch.succeeded → child.completed → output.harvested fire in causation order', async () => {
106
+ const d = await readDiscovery();
107
+ const advertised = d?.capabilities?.multiAgent?.executionModel?.supported === true;
108
+ if (!advertised) return; // soft-skip — host honest about not implementing
109
+
110
+ const create = await driver.post('/v1/runs', { workflowId: PARENT_FIXTURE });
111
+ expect(create.status).toBe(201);
112
+ const runId = (create.json as { runId: string }).runId;
113
+
114
+ const terminal = await pollUntilTerminal(runId);
115
+ expect(terminal.status, driver.describe(
116
+ 'spec/v1/multi-agent-execution.md §"Execution loop"',
117
+ 'parent run with supervisor → next-worker → terminate MUST reach terminal `completed`',
118
+ )).toBe('completed');
119
+
120
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
121
+ expect(eventsRes.status).toBe(200);
122
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
123
+ const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
124
+
125
+ expect(chainEvents.length, driver.describe(
126
+ 'RFCS/0037-multi-agent-execution-model.md §"Conformance"',
127
+ 'happy-path fixture MUST produce 4 core.workflowChain.event records (dispatch.began, dispatch.succeeded, child.completed, output.harvested)',
128
+ )).toBe(4);
129
+
130
+ const phases = chainEvents.map((e) => (e.payload as { phase?: string } | undefined)?.phase);
131
+ expect(phases, driver.describe(
132
+ 'spec/v1/multi-agent-execution.md §"Transition events"',
133
+ 'phase order MUST be dispatch.began → dispatch.succeeded → child.completed → output.harvested',
134
+ )).toEqual(['dispatch.began', 'dispatch.succeeded', 'child.completed', 'output.harvested']);
135
+
136
+ // Causation chain: each transition's causationId MUST equal the prior transition's eventId.
137
+ // dispatch.began causes back to a runOrchestrator.decided; the inner 3 chain through each other.
138
+ for (let i = 1; i < chainEvents.length; i++) {
139
+ const prior = chainEvents[i - 1];
140
+ const cur = chainEvents[i];
141
+ expect(cur?.causationId, driver.describe(
142
+ 'spec/v1/multi-agent-execution.md §"Transition events"',
143
+ `core.workflowChain.event #${i} (${phases[i]}) MUST have causationId === prior event's eventId`,
144
+ )).toBe(prior?.eventId);
145
+ }
146
+
147
+ // dispatch.began causationId MUST chain back to a runOrchestrator.decided event.
148
+ const dispatchBegan = chainEvents[0];
149
+ expect(dispatchBegan?.causationId).toBeDefined();
150
+ const decidedEvent = events.find((e) => e.eventId === dispatchBegan?.causationId);
151
+ expect(decidedEvent?.type, driver.describe(
152
+ 'spec/v1/multi-agent-execution.md §"Transition events"',
153
+ 'dispatch.began causationId MUST point at the runOrchestrator.decided event that named this worker',
154
+ )).toBe('runOrchestrator.decided');
155
+
156
+ // output.harvested.harvestedKeys MUST list the outputMapping keys harvested.
157
+ const harvested = chainEvents[3]?.payload as { harvestedKeys?: string[] } | undefined;
158
+ expect(harvested?.harvestedKeys, driver.describe(
159
+ 'spec/v1/multi-agent-execution.md §"Transition events"',
160
+ 'output.harvested payload MUST list harvested parent-variable keys (the fixture\'s outputMapping is { parentResult: \'childOutcome\' })',
161
+ )).toEqual(['parentResult']);
162
+ });
163
+ });
164
+
165
+ // Cross-host promotion path per RFCs/0001 §"Promotion to Accepted": once a non-steward host
166
+ // advertises capabilities.multiAgent.executionModel.supported + the behavioral assertion above
167
+ // passes against it, RFC 0037 Phase 1 graduates Active → Accepted.
@@ -0,0 +1,124 @@
1
+ /**
2
+ * multi-agent-memory-lifecycle — RFC 0039 §B advertisement-shape + behavioral stubs.
3
+ *
4
+ * Status: ACTIVE (advertisement-shape; behavioral stubs deferred to a
5
+ * host that advertises both `capabilities.memory.supported: true` AND
6
+ * `capabilities.multiAgent.executionModel.version >= 2`). Phase 1 hosts
7
+ * + Phase 2 hosts without memory + Phase 2 hosts with memory but no
8
+ * MAE-3 snapshot implementation all soft-skip cleanly.
9
+ *
10
+ * Closes the conformance gate for RFC 0039 §B (MAE-2 cross-run TTL +
11
+ * MAE-3 replay snapshot). Behavioral assertions require a host that
12
+ * actually advertises the MemoryAdapter surface; the reference
13
+ * workflow-engine sample advertises `capabilities.memory.supported:
14
+ * false` so this scenario soft-skips there. The Postgres reference
15
+ * host advertises memory.supported: true; once it adopts RFC 0039
16
+ * Phase 2 the behavioral assertions below light up.
17
+ *
18
+ * Asserts (advertisement-shape — always-on when discovery is reachable):
19
+ *
20
+ * 1. capabilities.multiAgent.executionModel.crossChildMemoryConcurrency
21
+ * (when advertised) MUST be one of {"strict", "advisory"} per
22
+ * RFC 0039 §B + schemas/capabilities.schema.json.
23
+ *
24
+ * 2. When a host advertises BOTH multiAgent.executionModel.version >= 2
25
+ * AND memory.supported: true, the host MUST honor the MAE-2 +
26
+ * MAE-3 contracts (behavioral assertions below).
27
+ *
28
+ * Behavioral assertions (capability-gated; soft-skip when no host
29
+ * advertises the conjunction):
30
+ *
31
+ * 3. MAE-2 cross-run TTL: a child writing MemoryEntry { ttl: 5 } at
32
+ * parent-clock T+10s has `expiresAt` reflecting T+15s (child
33
+ * write time + 5s), NOT parent-start + 5s. Implementation requires
34
+ * a host-side test seam to drive the cross-run write + read; once
35
+ * a memory-advertising host wires the seam the assertion runs.
36
+ *
37
+ * 4. MAE-3 replay snapshot refusal: a host that advertises Phase 2 +
38
+ * memory MUST either (a) serve the fork from a past event-log
39
+ * index returning memory state as-of that index, OR (b) refuse
40
+ * with error.code: "replay_memory_snapshot_unavailable" per
41
+ * spec/v1/rest-endpoints.md §"Common error codes". Silent
42
+ * substitution of current memory is non-conformant.
43
+ *
44
+ * @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B
45
+ * @see spec/v1/multi-agent-execution.md §"Agent memory lifecycle across sub-runs"
46
+ * @see spec/v1/agent-memory.md §"TTL semantics" (which the child-write-time MAE-2 anchoring extends to the cross-run case)
47
+ */
48
+
49
+ import { describe, it, expect } from 'vitest';
50
+ import { driver } from '../lib/driver.js';
51
+
52
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
53
+
54
+ interface DiscoveryDoc {
55
+ capabilities?: {
56
+ memory?: { supported?: unknown };
57
+ multiAgent?: {
58
+ executionModel?: {
59
+ supported?: unknown;
60
+ version?: unknown;
61
+ crossChildMemoryConcurrency?: unknown;
62
+ };
63
+ };
64
+ };
65
+ }
66
+
67
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
68
+ try {
69
+ const res = await driver.get('/.well-known/openwop');
70
+ if (res.status !== 200) return null;
71
+ return res.json as DiscoveryDoc;
72
+ } catch {
73
+ return null;
74
+ }
75
+ }
76
+
77
+ describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: advertisement shape (RFC 0039 §B)', () => {
78
+ it('crossChildMemoryConcurrency (when advertised) MUST be one of {strict, advisory}', async (ctx) => {
79
+ const d = await readDiscovery();
80
+ if (d === null) {
81
+ ctx.skip();
82
+ return;
83
+ }
84
+ const ccmc = d.capabilities?.multiAgent?.executionModel?.crossChildMemoryConcurrency;
85
+ if (ccmc === undefined) {
86
+ ctx.skip(); // optional advertisement — host hasn't opted in
87
+ return;
88
+ }
89
+ expect(
90
+ ccmc === 'strict' || ccmc === 'advisory',
91
+ driver.describe(
92
+ 'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B',
93
+ 'crossChildMemoryConcurrency MUST be one of {strict, advisory} when present; values outside the closed enum are non-conformant',
94
+ ),
95
+ ).toBe(true);
96
+ });
97
+ });
98
+
99
+ describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: behavioral (RFC 0039 §B MAE-2 + MAE-3)', () => {
100
+ // Behavioral assertion lands when a memory-advertising Phase 2 host
101
+ // exposes a host-side test seam for cross-run memory writes (e.g.,
102
+ // POST /v1/host/sample/test/memory/cross-run-ttl-roundtrip). The
103
+ // assertion drives:
104
+ // 1. Parent starts at parent-clock T+0
105
+ // 2. Child dispatched at T+10s, writes MemoryEntry { key: 'k', value: 'v', ttl: 5 }
106
+ // 3. Parent reads MemoryEntry { key: 'k' } at T+12s; expiresAt MUST be
107
+ // approximately T+15s (child write at T+10 + ttl 5), not T+5s.
108
+ // Until a memory-advertising Phase 2 host wires the seam, the contract
109
+ // is documentation-only — surfaced as `todo` so test reporters track
110
+ // the gap rather than reporting a vacuous PASS.
111
+ it.todo('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start');
112
+
113
+ // Behavioral assertion lands when the host implements the snapshot
114
+ // mechanism per RFC 0039 §B. The assertion drives:
115
+ // 1. Run a workflow that writes MemoryEntry { key: 'k', value: 'v1' } at index 10.
116
+ // 2. Write MemoryEntry { key: 'k', value: 'v2' } at index 20.
117
+ // 3. POST /v1/runs/{runId}:fork { fromSeq: 15 }.
118
+ // 4. Forked run reads MemoryEntry { key: 'k' }; MUST return 'v1' (not 'v2').
119
+ // 5. Alternative compliance: fork refused with
120
+ // error.code: 'replay_memory_snapshot_unavailable' AND
121
+ // details.fromSeq === 15.
122
+ // Silent substitution of v2 (current state) is non-conformant.
123
+ it.todo('MAE-3 replay snapshot: fork from past index MUST return memory-as-of-index OR refuse with replay_memory_snapshot_unavailable');
124
+ });
@@ -85,3 +85,61 @@ describe('multi-region-idempotency: capability shape', () => {
85
85
  }
86
86
  });
87
87
  });
88
+
89
+ // RFC 0036 — granular `multiRegion` sub-block advertisement shape. Hosts that
90
+ // opt into the granular advertisement (separate from the categorical `crossRegion`
91
+ // claim) MUST conform to the shape below: supported is boolean (required); when
92
+ // supported is true, replicationLagBoundMs is integer [0, 60000] and
93
+ // partitionRecoveryStrategy is either the categorical enum or an x-host-<host>-<key>
94
+ // extension namespace string. Hosts that don't advertise multiRegion stay on the
95
+ // categorical crossRegion claim (above); both forms are compatible.
96
+
97
+ interface MultiRegionCaps {
98
+ supported?: unknown;
99
+ replicationLagBoundMs?: unknown;
100
+ partitionRecoveryStrategy?: unknown;
101
+ }
102
+
103
+ describe('multi-region-idempotency: granular multiRegion advertisement shape (RFC 0036 §A)', () => {
104
+ it('capabilities.idempotency.multiRegion (when present) conforms to RFC 0036 §A', async () => {
105
+ const disco = await driver.get('/.well-known/openwop');
106
+ const idem =
107
+ (disco.json as { capabilities?: { idempotency?: IdempotencyCaps & { multiRegion?: MultiRegionCaps } } })
108
+ .capabilities?.idempotency;
109
+ const mr = idem?.multiRegion;
110
+ if (mr === undefined) return; // host doesn't advertise the granular block — soft-skip
111
+
112
+ expect(
113
+ typeof mr.supported,
114
+ driver.describe(
115
+ 'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
116
+ 'capabilities.idempotency.multiRegion.supported MUST be boolean when present',
117
+ ),
118
+ ).toBe('boolean');
119
+
120
+ if (mr.supported === true) {
121
+ if (mr.replicationLagBoundMs !== undefined) {
122
+ const n = mr.replicationLagBoundMs as number;
123
+ expect(
124
+ Number.isInteger(n) && n >= 0 && n <= 60000,
125
+ driver.describe(
126
+ 'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
127
+ 'replicationLagBoundMs MUST be integer in [0, 60000] when supported is true',
128
+ ),
129
+ ).toBe(true);
130
+ }
131
+ if (mr.partitionRecoveryStrategy !== undefined) {
132
+ const s = mr.partitionRecoveryStrategy as string;
133
+ const isCategorical = s === 'last-writer-wins' || s === 'first-writer-wins';
134
+ const isExtension = /^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$/.test(s);
135
+ expect(
136
+ isCategorical || isExtension,
137
+ driver.describe(
138
+ 'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
139
+ 'partitionRecoveryStrategy MUST be one of {last-writer-wins, first-writer-wins} OR match ^x-host-<host>-<key>$',
140
+ ),
141
+ ).toBe(true);
142
+ }
143
+ }
144
+ });
145
+ });