@openwop/openwop-conformance 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +156 -1
- package/README.md +3 -2
- package/api/asyncapi.yaml +8 -0
- package/api/openapi.yaml +371 -1
- package/api/redocly.yaml +15 -0
- package/coverage.md +26 -5
- package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
- package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
- package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
- package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
- package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
- package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
- package/fixtures/conformance-envelope-recovery-applied.json +39 -0
- package/fixtures/conformance-envelope-refusal.json +38 -0
- package/fixtures/conformance-envelope-retry-attempted.json +39 -0
- package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
- package/fixtures/conformance-envelope-truncated.json +39 -0
- package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
- package/fixtures/conformance-model-capability-insufficient.json +25 -0
- package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
- package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
- package/fixtures/conformance-multi-agent-handoff.json +49 -0
- package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
- package/fixtures/conformance-prompt-end-to-end.json +33 -0
- package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
- package/fixtures/openwop-smoke-cost-emit.json +37 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
- package/fixtures.md +45 -0
- package/package.json +1 -1
- package/schemas/README.md +5 -0
- package/schemas/agent-manifest.schema.json +16 -0
- package/schemas/capabilities.schema.json +390 -0
- package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
- package/schemas/envelopes/clarification.request.schema.json +9 -0
- package/schemas/envelopes/error.schema.json +4 -0
- package/schemas/envelopes/schema.request.schema.json +4 -0
- package/schemas/envelopes/schema.response.schema.json +1 -1
- package/schemas/node-pack-manifest.schema.json +28 -0
- package/schemas/orchestrator-decision.schema.json +12 -0
- package/schemas/prompt-kind.schema.json +8 -0
- package/schemas/prompt-pack-manifest.schema.json +80 -0
- package/schemas/prompt-ref.schema.json +40 -0
- package/schemas/prompt-template.schema.json +149 -0
- package/schemas/registry-version-manifest.schema.json +5 -0
- package/schemas/run-ancestry-response.schema.json +54 -0
- package/schemas/run-event-payloads.schema.json +513 -11
- package/schemas/run-event.schema.json +17 -1
- package/schemas/run-snapshot.schema.json +3 -2
- package/schemas/workflow-definition.schema.json +19 -1
- package/src/lib/driver.ts +15 -0
- package/src/lib/env.ts +51 -0
- package/src/lib/event-log-query.ts +62 -0
- package/src/lib/fixtures.ts +38 -1
- package/src/lib/host-toggle.ts +54 -0
- package/src/lib/llm-cache-key-recipe.ts +68 -0
- package/src/lib/multi-agent-capabilities.ts +10 -0
- package/src/lib/otel-scrape.ts +59 -0
- package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
- package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
- package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
- package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
- package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
- package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
- package/src/scenarios/blob-presign-expiry.test.ts +42 -9
- package/src/scenarios/blob-roundtrip.test.ts +0 -0
- package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
- package/src/scenarios/cost-attribution.test.ts +124 -11
- package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
- package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
- package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
- package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
- package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
- package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
- package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
- package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
- package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
- package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
- package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
- package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
- package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
- package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
- package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
- package/src/scenarios/envelope-truncated.test.ts +136 -0
- package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
- package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
- package/src/scenarios/fixtures-gating.test.ts +139 -1
- package/src/scenarios/fixtures-valid.test.ts +123 -15
- package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
- package/src/scenarios/model-capability-insufficient.test.ts +221 -0
- package/src/scenarios/model-capability-substituted.test.ts +203 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
- package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
- package/src/scenarios/multi-region-idempotency.test.ts +58 -0
- package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
- package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
- package/src/scenarios/pack-registry-publish.test.ts +231 -51
- package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
- package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
- package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
- package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
- package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
- package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
- package/src/scenarios/prompt-pack-install.test.ts +187 -0
- package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
- package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
- package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
- package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
- package/src/scenarios/prompt-template-shape.test.ts +359 -0
- package/src/scenarios/provider-usage.test.ts +185 -0
- package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
- package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
- package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
- package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
- package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
- package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
- package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
- package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
- package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
- package/src/scenarios/spec-corpus-validity.test.ts +34 -6
- package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
- package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
- package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
- package/src/scenarios/table-cursor-pagination.test.ts +47 -9
- package/src/scenarios/table-schema-enforcement.test.ts +46 -9
- package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
- package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* multi-agent-confidence-escalation — RFC 0039 §A behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Status: ACTIVE (advertisement-shape + behavioral). RFC 0039 Phase 2
|
|
5
|
+
* filed Draft → graduated Active 2026-05-22 in the same commit chain as
|
|
6
|
+
* this scenario. Capability-gated on
|
|
7
|
+
* `capabilities.multiAgent.executionModel.supported: true` AND
|
|
8
|
+
* `capabilities.multiAgent.executionModel.version >= 2` AND fixture
|
|
9
|
+
* availability. Hosts that advertise only Phase 1 (version: 1) soft-skip
|
|
10
|
+
* cleanly — the confidence-floor MUST applies only at version >= 2.
|
|
11
|
+
*
|
|
12
|
+
* Asserts (behavioral when host advertises Phase 2):
|
|
13
|
+
*
|
|
14
|
+
* 1. Advertisement shape: confidenceEscalationFloor (when present) MUST be
|
|
15
|
+
* a number in [0.5, 1.0]; floor < 0.5 is non-conformant per RFC 0039 §A.
|
|
16
|
+
*
|
|
17
|
+
* 2. A run driven by the fixture's low-confidence (0.3) mockDispatchPlan
|
|
18
|
+
* reaches a `waiting-clarification` terminal-suspension status — NOT
|
|
19
|
+
* `completed`. The clarification interrupt MUST surface so the operator
|
|
20
|
+
* can confirm-or-adjust the supervisor's marginal decision.
|
|
21
|
+
*
|
|
22
|
+
* 3. The parent run's event log contains exactly ONE
|
|
23
|
+
* `core.workflowChain.confidence-escalated` event, with:
|
|
24
|
+
* - payload.confidence === 0.3
|
|
25
|
+
* - payload.floor in [0.5, 1.0] (whatever floor the host advertised
|
|
26
|
+
* — spec default 0.5, operator stricter is permitted)
|
|
27
|
+
* - payload.escalationKind === 'clarify' (the reference host emits
|
|
28
|
+
* clarify; hosts choosing 'escalate' would also be conformant)
|
|
29
|
+
* - payload.workerId === the dispatch's first nextWorkerIds entry
|
|
30
|
+
* - payload.originalDecision carries the verbatim OrchestratorDecision
|
|
31
|
+
* AND causationId chains back to the `runOrchestrator.decided` event
|
|
32
|
+
* that emitted the low-confidence decision.
|
|
33
|
+
*
|
|
34
|
+
* 4. The event log contains ZERO `core.workflowChain.event` records — the
|
|
35
|
+
* escalation fired BEFORE any dispatch.began event per RFC 0039 §A
|
|
36
|
+
* ("the escalation event MUST appear in the run event log BEFORE the
|
|
37
|
+
* interrupt fires AND BEFORE any `core.workflowChain.event` with
|
|
38
|
+
* `phase: 'dispatch.began'` for the escalated decision's intended
|
|
39
|
+
* next-worker"). This is the load-bearing test that distinguishes
|
|
40
|
+
* Phase 2 from Phase 1: Phase 1 hosts dispatch unconditionally; Phase 2
|
|
41
|
+
* hosts gate on confidence.
|
|
42
|
+
*
|
|
43
|
+
* @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A
|
|
44
|
+
* @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039 Phase 2)"
|
|
45
|
+
* @see schemas/run-event-payloads.schema.json §coreWorkflowChainConfidenceEscalated
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
import { describe, it, expect } from 'vitest';
|
|
49
|
+
import { driver } from '../lib/driver.js';
|
|
50
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
51
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
52
|
+
|
|
53
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
54
|
+
const FIXTURE = 'conformance-multi-agent-confidence-escalation';
|
|
55
|
+
const BEHAVIORAL_SKIP = HTTP_SKIP || !isFixtureAdvertised(FIXTURE);
|
|
56
|
+
|
|
57
|
+
interface RunEvent { type: string; eventId?: string; causationId?: string; payload?: Record<string, unknown>; }
|
|
58
|
+
|
|
59
|
+
interface DiscoveryDoc {
|
|
60
|
+
capabilities?: {
|
|
61
|
+
multiAgent?: {
|
|
62
|
+
executionModel?: {
|
|
63
|
+
supported?: unknown;
|
|
64
|
+
version?: unknown;
|
|
65
|
+
confidenceEscalationFloor?: unknown;
|
|
66
|
+
};
|
|
67
|
+
};
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
72
|
+
try {
|
|
73
|
+
const res = await driver.get('/.well-known/openwop');
|
|
74
|
+
if (res.status !== 200) return null;
|
|
75
|
+
return res.json as DiscoveryDoc;
|
|
76
|
+
} catch {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
describe.skipIf(HTTP_SKIP)('multi-agent-confidence-escalation: capability shape (RFC 0039 §A)', () => {
|
|
82
|
+
it('confidenceEscalationFloor (when advertised) MUST be in [0.5, 1.0]', async () => {
|
|
83
|
+
const d = await readDiscovery();
|
|
84
|
+
if (d === null) return;
|
|
85
|
+
const em = d.capabilities?.multiAgent?.executionModel;
|
|
86
|
+
if (em === undefined) return;
|
|
87
|
+
const floor = em.confidenceEscalationFloor;
|
|
88
|
+
if (floor === undefined) return;
|
|
89
|
+
expect(
|
|
90
|
+
typeof floor === 'number' && Number.isFinite(floor) && floor >= 0.5 && floor <= 1.0,
|
|
91
|
+
driver.describe(
|
|
92
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A',
|
|
93
|
+
'confidenceEscalationFloor MUST be number in [0.5, 1.0]; values below the spec floor are non-conformant',
|
|
94
|
+
),
|
|
95
|
+
).toBe(true);
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral (RFC 0039 §A)', () => {
|
|
100
|
+
it('happy-path: low-confidence decision → confidence-escalated event + clarification interrupt + zero dispatch events', async () => {
|
|
101
|
+
const d = await readDiscovery();
|
|
102
|
+
const supported = d?.capabilities?.multiAgent?.executionModel?.supported === true;
|
|
103
|
+
const versionRaw = d?.capabilities?.multiAgent?.executionModel?.version;
|
|
104
|
+
const version = typeof versionRaw === 'number' ? versionRaw : 0;
|
|
105
|
+
if (!supported || version < 2) return; // soft-skip — Phase 1 hosts pass via this absence
|
|
106
|
+
|
|
107
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
108
|
+
expect(create.status).toBe(201);
|
|
109
|
+
const runId = (create.json as { runId: string }).runId;
|
|
110
|
+
|
|
111
|
+
const terminal = await pollUntilTerminal(runId);
|
|
112
|
+
// Phase 2 escalation suspends the parent — NOT a terminal `completed`.
|
|
113
|
+
// The conformance pollUntilTerminal returns when the run reaches any
|
|
114
|
+
// settled status; we expect `waiting-clarification` or equivalent
|
|
115
|
+
// non-completed status carrying an open clarification interrupt.
|
|
116
|
+
expect(
|
|
117
|
+
terminal.status,
|
|
118
|
+
driver.describe(
|
|
119
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A + spec/v1/interrupt.md',
|
|
120
|
+
'a host emitting `interrupt.kind: "clarification"` MUST surface the run as `waiting-clarification` per spec/v1/interrupt.md §"Interrupt kinds"; low-confidence decision MUST NOT reach `completed` because no dispatch fired',
|
|
121
|
+
),
|
|
122
|
+
).toBe('waiting-clarification');
|
|
123
|
+
|
|
124
|
+
const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
125
|
+
expect(eventsRes.status).toBe(200);
|
|
126
|
+
const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
|
|
127
|
+
|
|
128
|
+
const escalated = events.filter((e) => e.type === 'core.workflowChain.confidence-escalated');
|
|
129
|
+
expect(escalated.length, driver.describe(
|
|
130
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A',
|
|
131
|
+
'low-confidence decision MUST emit exactly one core.workflowChain.confidence-escalated event',
|
|
132
|
+
)).toBe(1);
|
|
133
|
+
|
|
134
|
+
const ev = escalated[0]!;
|
|
135
|
+
const payload = (ev.payload ?? {}) as { confidence?: number; floor?: number; escalationKind?: string; workerId?: string };
|
|
136
|
+
expect(payload.confidence, 'payload.confidence echoes the decision').toBe(0.3);
|
|
137
|
+
expect(
|
|
138
|
+
typeof payload.floor === 'number' && payload.floor >= 0.5 && payload.floor <= 1.0,
|
|
139
|
+
'payload.floor is the host-advertised floor (in [0.5, 1.0])',
|
|
140
|
+
).toBe(true);
|
|
141
|
+
expect(
|
|
142
|
+
payload.escalationKind === 'clarify' || payload.escalationKind === 'escalate',
|
|
143
|
+
'payload.escalationKind ∈ {clarify, escalate}',
|
|
144
|
+
).toBe(true);
|
|
145
|
+
|
|
146
|
+
// Causation chain: escalation event causes back to the runOrchestrator.decided
|
|
147
|
+
// that named the worker.
|
|
148
|
+
const decidedEvent = events.find((e) => e.eventId === ev.causationId);
|
|
149
|
+
expect(
|
|
150
|
+
decidedEvent?.type,
|
|
151
|
+
'confidence-escalated causationId MUST point at the runOrchestrator.decided that surfaced the low-confidence decision',
|
|
152
|
+
).toBe('runOrchestrator.decided');
|
|
153
|
+
|
|
154
|
+
// Load-bearing: NO dispatch event fired. Phase 2 gates BEFORE the loop.
|
|
155
|
+
const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
|
|
156
|
+
expect(
|
|
157
|
+
chainEvents.length,
|
|
158
|
+
driver.describe(
|
|
159
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A',
|
|
160
|
+
'low-confidence decision MUST NOT produce any core.workflowChain.event records — the escalation fires before any dispatch.began per the spec ordering',
|
|
161
|
+
),
|
|
162
|
+
).toBe(0);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* multi-agent-handoff-state-machine — RFC 0037 Phase 1 advertisement-shape + behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Status: ACTIVE (advertisement-shape + behavioral). RFC 0037 filed
|
|
5
|
+
* 2026-05-21 as Draft; this scenario lands the matching conformance gate.
|
|
6
|
+
* Capability-gated on `capabilities.multiAgent.executionModel.supported: true`
|
|
7
|
+
* AND fixture-gated on the `conformance-multi-agent-handoff` parent + child
|
|
8
|
+
* fixtures (when those land; current scenario is shape + soft-skip until then).
|
|
9
|
+
*
|
|
10
|
+
* Asserts (Phase 1 — execution-loop + handoff state machine per spec/v1/multi-agent-execution.md):
|
|
11
|
+
*
|
|
12
|
+
* 1. Advertisement shape: when capabilities.multiAgent.executionModel.supported
|
|
13
|
+
* is present, version MUST be integer in [1, 4]; supported MUST be boolean.
|
|
14
|
+
*
|
|
15
|
+
* 2. Behavioral (gated on supported: true + fixture availability): a
|
|
16
|
+
* supervisor → next-worker → child-completed run emits the 4 expected
|
|
17
|
+
* `core.workflowChain.event` records in causation order:
|
|
18
|
+
* - dispatch.began (causationId → runOrchestrator.decided eventId)
|
|
19
|
+
* - dispatch.succeeded (causationId → dispatch.began eventId)
|
|
20
|
+
* - child.completed (causationId → dispatch.succeeded eventId)
|
|
21
|
+
* - output.harvested (causationId → child.completed eventId; harvestedKeys present
|
|
22
|
+
* when the dispatch config carried outputMapping)
|
|
23
|
+
*
|
|
24
|
+
* 3. Behavioral negative: failed-child path emits dispatch.began → dispatch.succeeded
|
|
25
|
+
* → child.failed (NO output.harvested — per spec/v1/multi-agent-execution.md
|
|
26
|
+
* §"Handoff state machine" + RFC 0022 §B).
|
|
27
|
+
*
|
|
28
|
+
* @see RFCS/0037-multi-agent-execution-model.md
|
|
29
|
+
* @see spec/v1/multi-agent-execution.md §"Execution loop" + §"Handoff state machine"
|
|
30
|
+
* @see schemas/run-event-payloads.schema.json §coreWorkflowChainEvent
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, it, expect } from 'vitest';
|
|
34
|
+
import { driver } from '../lib/driver.js';
|
|
35
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
36
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
37
|
+
|
|
38
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
39
|
+
|
|
40
|
+
interface DiscoveryDoc {
|
|
41
|
+
capabilities?: {
|
|
42
|
+
multiAgent?: {
|
|
43
|
+
executionModel?: {
|
|
44
|
+
supported?: unknown;
|
|
45
|
+
version?: unknown;
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
52
|
+
try {
|
|
53
|
+
const res = await driver.get('/.well-known/openwop');
|
|
54
|
+
if (res.status !== 200) return null;
|
|
55
|
+
return res.json as DiscoveryDoc;
|
|
56
|
+
} catch {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
describe.skipIf(HTTP_SKIP)('multi-agent-handoff-state-machine: advertisement shape (RFC 0037 §C)', () => {
|
|
62
|
+
it('capabilities.multiAgent.executionModel (when present) conforms to RFC 0037 §C', async () => {
|
|
63
|
+
const d = await readDiscovery();
|
|
64
|
+
if (d === null) return; // discovery unavailable — skip
|
|
65
|
+
const executionModel = d.capabilities?.multiAgent?.executionModel;
|
|
66
|
+
if (executionModel === undefined) return; // host doesn't advertise — soft-skip
|
|
67
|
+
expect(
|
|
68
|
+
typeof executionModel.supported,
|
|
69
|
+
driver.describe(
|
|
70
|
+
'RFCS/0037-multi-agent-execution-model.md §C',
|
|
71
|
+
'capabilities.multiAgent.executionModel.supported MUST be boolean when present',
|
|
72
|
+
),
|
|
73
|
+
).toBe('boolean');
|
|
74
|
+
expect(
|
|
75
|
+
typeof executionModel.version,
|
|
76
|
+
driver.describe(
|
|
77
|
+
'RFCS/0037-multi-agent-execution-model.md §C',
|
|
78
|
+
'capabilities.multiAgent.executionModel.version MUST be integer when present',
|
|
79
|
+
),
|
|
80
|
+
).toBe('number');
|
|
81
|
+
const v = executionModel.version as number;
|
|
82
|
+
expect(
|
|
83
|
+
Number.isInteger(v) && v >= 1 && v <= 4,
|
|
84
|
+
driver.describe(
|
|
85
|
+
'RFCS/0037-multi-agent-execution-model.md §C',
|
|
86
|
+
'version MUST be an integer in [1, 4] (1 = Phase 1 only; Phases 2-4 lift the ceiling additively)',
|
|
87
|
+
),
|
|
88
|
+
).toBe(true);
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Behavioral assertion: when a host advertises capabilities.multiAgent.executionModel.supported,
|
|
93
|
+
// it MUST emit the 7-state handoff state machine's transition events as `core.workflowChain.event`
|
|
94
|
+
// records with causationId chained per the spec §"Transition events" table. The happy-path
|
|
95
|
+
// fixture (supervisor → next-worker → child completed with outputMapping non-empty) drives 4
|
|
96
|
+
// of the 7 transitions: dispatch.began → dispatch.succeeded → child.completed → output.harvested.
|
|
97
|
+
|
|
98
|
+
interface RunEvent { type: string; eventId?: string; causationId?: string; payload?: Record<string, unknown>; }
|
|
99
|
+
|
|
100
|
+
const PARENT_FIXTURE = 'conformance-multi-agent-handoff';
|
|
101
|
+
const CHILD_FIXTURE = 'conformance-multi-agent-handoff-child';
|
|
102
|
+
const BEHAVIORAL_SKIP = HTTP_SKIP || !isFixtureAdvertised(PARENT_FIXTURE) || !isFixtureAdvertised(CHILD_FIXTURE);
|
|
103
|
+
|
|
104
|
+
describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-handoff-state-machine: behavioral 4-event causation chain (RFC 0037 §"Handoff state machine")', () => {
|
|
105
|
+
it('happy-path: dispatch.began → dispatch.succeeded → child.completed → output.harvested fire in causation order', async () => {
|
|
106
|
+
const d = await readDiscovery();
|
|
107
|
+
const advertised = d?.capabilities?.multiAgent?.executionModel?.supported === true;
|
|
108
|
+
if (!advertised) return; // soft-skip — host honest about not implementing
|
|
109
|
+
|
|
110
|
+
const create = await driver.post('/v1/runs', { workflowId: PARENT_FIXTURE });
|
|
111
|
+
expect(create.status).toBe(201);
|
|
112
|
+
const runId = (create.json as { runId: string }).runId;
|
|
113
|
+
|
|
114
|
+
const terminal = await pollUntilTerminal(runId);
|
|
115
|
+
expect(terminal.status, driver.describe(
|
|
116
|
+
'spec/v1/multi-agent-execution.md §"Execution loop"',
|
|
117
|
+
'parent run with supervisor → next-worker → terminate MUST reach terminal `completed`',
|
|
118
|
+
)).toBe('completed');
|
|
119
|
+
|
|
120
|
+
const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
121
|
+
expect(eventsRes.status).toBe(200);
|
|
122
|
+
const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
|
|
123
|
+
const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
|
|
124
|
+
|
|
125
|
+
expect(chainEvents.length, driver.describe(
|
|
126
|
+
'RFCS/0037-multi-agent-execution-model.md §"Conformance"',
|
|
127
|
+
'happy-path fixture MUST produce 4 core.workflowChain.event records (dispatch.began, dispatch.succeeded, child.completed, output.harvested)',
|
|
128
|
+
)).toBe(4);
|
|
129
|
+
|
|
130
|
+
const phases = chainEvents.map((e) => (e.payload as { phase?: string } | undefined)?.phase);
|
|
131
|
+
expect(phases, driver.describe(
|
|
132
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
133
|
+
'phase order MUST be dispatch.began → dispatch.succeeded → child.completed → output.harvested',
|
|
134
|
+
)).toEqual(['dispatch.began', 'dispatch.succeeded', 'child.completed', 'output.harvested']);
|
|
135
|
+
|
|
136
|
+
// Causation chain: each transition's causationId MUST equal the prior transition's eventId.
|
|
137
|
+
// dispatch.began causes back to a runOrchestrator.decided; the inner 3 chain through each other.
|
|
138
|
+
for (let i = 1; i < chainEvents.length; i++) {
|
|
139
|
+
const prior = chainEvents[i - 1];
|
|
140
|
+
const cur = chainEvents[i];
|
|
141
|
+
expect(cur?.causationId, driver.describe(
|
|
142
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
143
|
+
`core.workflowChain.event #${i} (${phases[i]}) MUST have causationId === prior event's eventId`,
|
|
144
|
+
)).toBe(prior?.eventId);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// dispatch.began causationId MUST chain back to a runOrchestrator.decided event.
|
|
148
|
+
const dispatchBegan = chainEvents[0];
|
|
149
|
+
expect(dispatchBegan?.causationId).toBeDefined();
|
|
150
|
+
const decidedEvent = events.find((e) => e.eventId === dispatchBegan?.causationId);
|
|
151
|
+
expect(decidedEvent?.type, driver.describe(
|
|
152
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
153
|
+
'dispatch.began causationId MUST point at the runOrchestrator.decided event that named this worker',
|
|
154
|
+
)).toBe('runOrchestrator.decided');
|
|
155
|
+
|
|
156
|
+
// output.harvested.harvestedKeys MUST list the outputMapping keys harvested.
|
|
157
|
+
const harvested = chainEvents[3]?.payload as { harvestedKeys?: string[] } | undefined;
|
|
158
|
+
expect(harvested?.harvestedKeys, driver.describe(
|
|
159
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
160
|
+
'output.harvested payload MUST list harvested parent-variable keys (the fixture\'s outputMapping is { parentResult: \'childOutcome\' })',
|
|
161
|
+
)).toEqual(['parentResult']);
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// Cross-host promotion path per RFCs/0001 §"Promotion to Accepted": once a non-steward host
|
|
166
|
+
// advertises capabilities.multiAgent.executionModel.supported + the behavioral assertion above
|
|
167
|
+
// passes against it, RFC 0037 Phase 1 graduates Active → Accepted.
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* multi-agent-memory-lifecycle — RFC 0039 §B advertisement-shape + behavioral stubs.
|
|
3
|
+
*
|
|
4
|
+
* Status: ACTIVE (advertisement-shape; behavioral stubs deferred to a
|
|
5
|
+
* host that advertises both `capabilities.memory.supported: true` AND
|
|
6
|
+
* `capabilities.multiAgent.executionModel.version >= 2`). Phase 1 hosts
|
|
7
|
+
* + Phase 2 hosts without memory + Phase 2 hosts with memory but no
|
|
8
|
+
* MAE-3 snapshot implementation all soft-skip cleanly.
|
|
9
|
+
*
|
|
10
|
+
* Closes the conformance gate for RFC 0039 §B (MAE-2 cross-run TTL +
|
|
11
|
+
* MAE-3 replay snapshot). Behavioral assertions require a host that
|
|
12
|
+
* actually advertises the MemoryAdapter surface; the reference
|
|
13
|
+
* workflow-engine sample advertises `capabilities.memory.supported:
|
|
14
|
+
* false` so this scenario soft-skips there. The Postgres reference
|
|
15
|
+
* host advertises memory.supported: true; once it adopts RFC 0039
|
|
16
|
+
* Phase 2 the behavioral assertions below light up.
|
|
17
|
+
*
|
|
18
|
+
* Asserts (advertisement-shape — always-on when discovery is reachable):
|
|
19
|
+
*
|
|
20
|
+
* 1. capabilities.multiAgent.executionModel.crossChildMemoryConcurrency
|
|
21
|
+
* (when advertised) MUST be one of {"strict", "advisory"} per
|
|
22
|
+
* RFC 0039 §B + schemas/capabilities.schema.json.
|
|
23
|
+
*
|
|
24
|
+
* 2. When a host advertises BOTH multiAgent.executionModel.version >= 2
|
|
25
|
+
* AND memory.supported: true, the host MUST honor the MAE-2 +
|
|
26
|
+
* MAE-3 contracts (behavioral assertions below).
|
|
27
|
+
*
|
|
28
|
+
* Behavioral assertions (capability-gated; soft-skip when no host
|
|
29
|
+
* advertises the conjunction):
|
|
30
|
+
*
|
|
31
|
+
* 3. MAE-2 cross-run TTL: a child writing MemoryEntry { ttl: 5 } at
|
|
32
|
+
* parent-clock T+10s has `expiresAt` reflecting T+15s (child
|
|
33
|
+
* write time + 5s), NOT parent-start + 5s. Implementation requires
|
|
34
|
+
* a host-side test seam to drive the cross-run write + read; once
|
|
35
|
+
* a memory-advertising host wires the seam the assertion runs.
|
|
36
|
+
*
|
|
37
|
+
* 4. MAE-3 replay snapshot refusal: a host that advertises Phase 2 +
|
|
38
|
+
* memory MUST either (a) serve the fork from a past event-log
|
|
39
|
+
* index returning memory state as-of that index, OR (b) refuse
|
|
40
|
+
* with error.code: "replay_memory_snapshot_unavailable" per
|
|
41
|
+
* spec/v1/rest-endpoints.md §"Common error codes". Silent
|
|
42
|
+
* substitution of current memory is non-conformant.
|
|
43
|
+
*
|
|
44
|
+
* @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B
|
|
45
|
+
* @see spec/v1/multi-agent-execution.md §"Agent memory lifecycle across sub-runs"
|
|
46
|
+
* @see spec/v1/agent-memory.md §"TTL semantics" (which the child-write-time MAE-2 anchoring extends to the cross-run case)
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
import { describe, it, expect } from 'vitest';
|
|
50
|
+
import { driver } from '../lib/driver.js';
|
|
51
|
+
|
|
52
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
53
|
+
|
|
54
|
+
interface DiscoveryDoc {
|
|
55
|
+
capabilities?: {
|
|
56
|
+
memory?: { supported?: unknown };
|
|
57
|
+
multiAgent?: {
|
|
58
|
+
executionModel?: {
|
|
59
|
+
supported?: unknown;
|
|
60
|
+
version?: unknown;
|
|
61
|
+
crossChildMemoryConcurrency?: unknown;
|
|
62
|
+
};
|
|
63
|
+
};
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
68
|
+
try {
|
|
69
|
+
const res = await driver.get('/.well-known/openwop');
|
|
70
|
+
if (res.status !== 200) return null;
|
|
71
|
+
return res.json as DiscoveryDoc;
|
|
72
|
+
} catch {
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: advertisement shape (RFC 0039 §B)', () => {
|
|
78
|
+
it('crossChildMemoryConcurrency (when advertised) MUST be one of {strict, advisory}', async (ctx) => {
|
|
79
|
+
const d = await readDiscovery();
|
|
80
|
+
if (d === null) {
|
|
81
|
+
ctx.skip();
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
const ccmc = d.capabilities?.multiAgent?.executionModel?.crossChildMemoryConcurrency;
|
|
85
|
+
if (ccmc === undefined) {
|
|
86
|
+
ctx.skip(); // optional advertisement — host hasn't opted in
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
expect(
|
|
90
|
+
ccmc === 'strict' || ccmc === 'advisory',
|
|
91
|
+
driver.describe(
|
|
92
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B',
|
|
93
|
+
'crossChildMemoryConcurrency MUST be one of {strict, advisory} when present; values outside the closed enum are non-conformant',
|
|
94
|
+
),
|
|
95
|
+
).toBe(true);
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: behavioral (RFC 0039 §B MAE-2 + MAE-3)', () => {
|
|
100
|
+
// Behavioral assertion lands when a memory-advertising Phase 2 host
|
|
101
|
+
// exposes a host-side test seam for cross-run memory writes (e.g.,
|
|
102
|
+
// POST /v1/host/sample/test/memory/cross-run-ttl-roundtrip). The
|
|
103
|
+
// assertion drives:
|
|
104
|
+
// 1. Parent starts at parent-clock T+0
|
|
105
|
+
// 2. Child dispatched at T+10s, writes MemoryEntry { key: 'k', value: 'v', ttl: 5 }
|
|
106
|
+
// 3. Parent reads MemoryEntry { key: 'k' } at T+12s; expiresAt MUST be
|
|
107
|
+
// approximately T+15s (child write at T+10 + ttl 5), not T+5s.
|
|
108
|
+
// Until a memory-advertising Phase 2 host wires the seam, the contract
|
|
109
|
+
// is documentation-only — surfaced as `todo` so test reporters track
|
|
110
|
+
// the gap rather than reporting a vacuous PASS.
|
|
111
|
+
it.todo('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start');
|
|
112
|
+
|
|
113
|
+
// Behavioral assertion lands when the host implements the snapshot
|
|
114
|
+
// mechanism per RFC 0039 §B. The assertion drives:
|
|
115
|
+
// 1. Run a workflow that writes MemoryEntry { key: 'k', value: 'v1' } at index 10.
|
|
116
|
+
// 2. Write MemoryEntry { key: 'k', value: 'v2' } at index 20.
|
|
117
|
+
// 3. POST /v1/runs/{runId}:fork { fromSeq: 15 }.
|
|
118
|
+
// 4. Forked run reads MemoryEntry { key: 'k' }; MUST return 'v1' (not 'v2').
|
|
119
|
+
// 5. Alternative compliance: fork refused with
|
|
120
|
+
// error.code: 'replay_memory_snapshot_unavailable' AND
|
|
121
|
+
// details.fromSeq === 15.
|
|
122
|
+
// Silent substitution of v2 (current state) is non-conformant.
|
|
123
|
+
it.todo('MAE-3 replay snapshot: fork from past index MUST return memory-as-of-index OR refuse with replay_memory_snapshot_unavailable');
|
|
124
|
+
});
|
|
@@ -85,3 +85,61 @@ describe('multi-region-idempotency: capability shape', () => {
|
|
|
85
85
|
}
|
|
86
86
|
});
|
|
87
87
|
});
|
|
88
|
+
|
|
89
|
+
// RFC 0036 — granular `multiRegion` sub-block advertisement shape. Hosts that
|
|
90
|
+
// opt into the granular advertisement (separate from the categorical `crossRegion`
|
|
91
|
+
// claim) MUST conform to the shape below: supported is boolean (required); when
|
|
92
|
+
// supported is true, replicationLagBoundMs is integer [0, 60000] and
|
|
93
|
+
// partitionRecoveryStrategy is either the categorical enum or an x-host-<host>-<key>
|
|
94
|
+
// extension namespace string. Hosts that don't advertise multiRegion stay on the
|
|
95
|
+
// categorical crossRegion claim (above); both forms are compatible.
|
|
96
|
+
|
|
97
|
+
interface MultiRegionCaps {
|
|
98
|
+
supported?: unknown;
|
|
99
|
+
replicationLagBoundMs?: unknown;
|
|
100
|
+
partitionRecoveryStrategy?: unknown;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
describe('multi-region-idempotency: granular multiRegion advertisement shape (RFC 0036 §A)', () => {
|
|
104
|
+
it('capabilities.idempotency.multiRegion (when present) conforms to RFC 0036 §A', async () => {
|
|
105
|
+
const disco = await driver.get('/.well-known/openwop');
|
|
106
|
+
const idem =
|
|
107
|
+
(disco.json as { capabilities?: { idempotency?: IdempotencyCaps & { multiRegion?: MultiRegionCaps } } })
|
|
108
|
+
.capabilities?.idempotency;
|
|
109
|
+
const mr = idem?.multiRegion;
|
|
110
|
+
if (mr === undefined) return; // host doesn't advertise the granular block — soft-skip
|
|
111
|
+
|
|
112
|
+
expect(
|
|
113
|
+
typeof mr.supported,
|
|
114
|
+
driver.describe(
|
|
115
|
+
'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
|
|
116
|
+
'capabilities.idempotency.multiRegion.supported MUST be boolean when present',
|
|
117
|
+
),
|
|
118
|
+
).toBe('boolean');
|
|
119
|
+
|
|
120
|
+
if (mr.supported === true) {
|
|
121
|
+
if (mr.replicationLagBoundMs !== undefined) {
|
|
122
|
+
const n = mr.replicationLagBoundMs as number;
|
|
123
|
+
expect(
|
|
124
|
+
Number.isInteger(n) && n >= 0 && n <= 60000,
|
|
125
|
+
driver.describe(
|
|
126
|
+
'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
|
|
127
|
+
'replicationLagBoundMs MUST be integer in [0, 60000] when supported is true',
|
|
128
|
+
),
|
|
129
|
+
).toBe(true);
|
|
130
|
+
}
|
|
131
|
+
if (mr.partitionRecoveryStrategy !== undefined) {
|
|
132
|
+
const s = mr.partitionRecoveryStrategy as string;
|
|
133
|
+
const isCategorical = s === 'last-writer-wins' || s === 'first-writer-wins';
|
|
134
|
+
const isExtension = /^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$/.test(s);
|
|
135
|
+
expect(
|
|
136
|
+
isCategorical || isExtension,
|
|
137
|
+
driver.describe(
|
|
138
|
+
'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
|
|
139
|
+
'partitionRecoveryStrategy MUST be one of {last-writer-wins, first-writer-wins} OR match ^x-host-<host>-<key>$',
|
|
140
|
+
),
|
|
141
|
+
).toBe(true);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
});
|