@openwop/openwop-conformance 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +132 -1
- package/README.md +3 -2
- package/api/asyncapi.yaml +8 -0
- package/api/openapi.yaml +371 -1
- package/coverage.md +26 -6
- package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
- package/fixtures/conformance-envelope-recovery-applied.json +39 -0
- package/fixtures/conformance-envelope-refusal.json +38 -0
- package/fixtures/conformance-envelope-retry-attempted.json +39 -0
- package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
- package/fixtures/conformance-envelope-truncated.json +39 -0
- package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
- package/fixtures/conformance-model-capability-insufficient.json +25 -0
- package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
- package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
- package/fixtures/conformance-multi-agent-handoff.json +49 -0
- package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
- package/fixtures/conformance-prompt-end-to-end.json +33 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
- package/fixtures/openwop-smoke-cost-emit.json +37 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
- package/fixtures.md +39 -0
- package/package.json +1 -1
- package/schemas/README.md +5 -0
- package/schemas/agent-manifest.schema.json +16 -0
- package/schemas/capabilities.schema.json +384 -1
- package/schemas/envelopes/clarification.request.schema.json +9 -0
- package/schemas/envelopes/error.schema.json +4 -0
- package/schemas/envelopes/schema.request.schema.json +4 -0
- package/schemas/envelopes/schema.response.schema.json +1 -1
- package/schemas/node-pack-manifest.schema.json +28 -0
- package/schemas/orchestrator-decision.schema.json +12 -0
- package/schemas/prompt-kind.schema.json +8 -0
- package/schemas/prompt-pack-manifest.schema.json +80 -0
- package/schemas/prompt-ref.schema.json +40 -0
- package/schemas/prompt-template.schema.json +149 -0
- package/schemas/registry-version-manifest.schema.json +5 -0
- package/schemas/run-ancestry-response.schema.json +54 -0
- package/schemas/run-event-payloads.schema.json +479 -11
- package/schemas/run-event.schema.json +15 -1
- package/schemas/run-snapshot.schema.json +3 -2
- package/schemas/workflow-definition.schema.json +19 -1
- package/src/lib/llm-cache-key-recipe.ts +68 -0
- package/src/scenarios/aiEnvelope.contractRefusal.test.ts +104 -13
- package/src/scenarios/aiEnvelope.correlationReplay.test.ts +32 -15
- package/src/scenarios/aiEnvelope.redaction.test.ts +6 -5
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +5 -5
- package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +211 -12
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +7 -7
- package/src/scenarios/blob-presign-expiry.test.ts +7 -7
- package/src/scenarios/cache-ttl-expiry.test.ts +6 -6
- package/src/scenarios/cost-attribution.test.ts +124 -11
- package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
- package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
- package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
- package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
- package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
- package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
- package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
- package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
- package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
- package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
- package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
- package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
- package/src/scenarios/envelope-truncated.test.ts +136 -0
- package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
- package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
- package/src/scenarios/fixtures-valid.test.ts +123 -15
- package/src/scenarios/kv-ttl-expiry.test.ts +7 -7
- package/src/scenarios/model-capability-insufficient.test.ts +221 -0
- package/src/scenarios/model-capability-substituted.test.ts +203 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +201 -0
- package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
- package/src/scenarios/multi-region-idempotency.test.ts +58 -0
- package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
- package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
- package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
- package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
- package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
- package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
- package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
- package/src/scenarios/prompt-pack-install.test.ts +187 -0
- package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
- package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
- package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
- package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
- package/src/scenarios/prompt-template-shape.test.ts +359 -0
- package/src/scenarios/queue-ack-nack-dlq.test.ts +7 -7
- package/src/scenarios/queue-publish-consume-roundtrip.test.ts +7 -7
- package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
- package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
- package/src/scenarios/replay-llm-cache-key.test.ts +1 -40
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +27 -0
- package/src/scenarios/sandbox-memory-cap.test.ts +58 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +30 -0
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +27 -0
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +88 -0
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +31 -0
- package/src/scenarios/sandbox-no-network-escape.test.ts +28 -0
- package/src/scenarios/sandbox-timeout-cap.test.ts +58 -0
- package/src/scenarios/search-bm25-roundtrip.test.ts +7 -7
- package/src/scenarios/spec-corpus-validity.test.ts +34 -6
- package/src/scenarios/sql-transaction-atomicity.test.ts +6 -6
- package/src/scenarios/stream-subscribe-from-beginning.test.ts +7 -7
- package/src/scenarios/subworkflow-input-mapping.test.ts +70 -4
- package/src/scenarios/table-cursor-pagination.test.ts +7 -7
- package/src/scenarios/table-schema-enforcement.test.ts +7 -7
- package/src/scenarios/vector-knn-roundtrip.test.ts +7 -7
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* multi-agent-handoff-state-machine — RFC 0037 Phase 1 advertisement-shape + behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Status: ACTIVE (advertisement-shape + behavioral). RFC 0037 filed
|
|
5
|
+
* 2026-05-21 as Draft; this scenario lands the matching conformance gate.
|
|
6
|
+
* Capability-gated on `capabilities.multiAgent.executionModel.supported: true`
|
|
7
|
+
* AND fixture-gated on the `conformance-multi-agent-handoff` parent + child
|
|
8
|
+
* fixtures (when those land; current scenario is shape + soft-skip until then).
|
|
9
|
+
*
|
|
10
|
+
* Asserts (Phase 1 — execution-loop + handoff state machine per spec/v1/multi-agent-execution.md):
|
|
11
|
+
*
|
|
12
|
+
* 1. Advertisement shape: when capabilities.multiAgent.executionModel.supported
|
|
13
|
+
* is present, version MUST be integer in [1, 4]; supported MUST be boolean.
|
|
14
|
+
*
|
|
15
|
+
* 2. Behavioral (gated on supported: true + fixture availability): a
|
|
16
|
+
* supervisor → next-worker → child-completed run emits the 4 expected
|
|
17
|
+
* `core.workflowChain.event` records in causation order:
|
|
18
|
+
* - dispatch.began (causationId → runOrchestrator.decided eventId)
|
|
19
|
+
* - dispatch.succeeded (causationId → dispatch.began eventId)
|
|
20
|
+
* - child.completed (causationId → dispatch.succeeded eventId)
|
|
21
|
+
* - output.harvested (causationId → child.completed eventId; harvestedKeys present
|
|
22
|
+
* when the dispatch config carried outputMapping)
|
|
23
|
+
*
|
|
24
|
+
* 3. Behavioral negative: failed-child path emits dispatch.began → dispatch.succeeded
|
|
25
|
+
* → child.failed (NO output.harvested — per spec/v1/multi-agent-execution.md
|
|
26
|
+
* §"Handoff state machine" + RFC 0022 §B).
|
|
27
|
+
*
|
|
28
|
+
* @see RFCS/0037-multi-agent-execution-model.md
|
|
29
|
+
* @see spec/v1/multi-agent-execution.md §"Execution loop" + §"Handoff state machine"
|
|
30
|
+
* @see schemas/run-event-payloads.schema.json §coreWorkflowChainEvent
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, it, expect } from 'vitest';
|
|
34
|
+
import { driver } from '../lib/driver.js';
|
|
35
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
36
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
37
|
+
|
|
38
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
39
|
+
|
|
40
|
+
interface DiscoveryDoc {
|
|
41
|
+
capabilities?: {
|
|
42
|
+
multiAgent?: {
|
|
43
|
+
executionModel?: {
|
|
44
|
+
supported?: unknown;
|
|
45
|
+
version?: unknown;
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
52
|
+
try {
|
|
53
|
+
const res = await driver.get('/.well-known/openwop');
|
|
54
|
+
if (res.status !== 200) return null;
|
|
55
|
+
return res.json as DiscoveryDoc;
|
|
56
|
+
} catch {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
describe.skipIf(HTTP_SKIP)('multi-agent-handoff-state-machine: advertisement shape (RFC 0037 §C)', () => {
|
|
62
|
+
it('capabilities.multiAgent.executionModel (when present) conforms to RFC 0037 §C', async () => {
|
|
63
|
+
const d = await readDiscovery();
|
|
64
|
+
if (d === null) return; // discovery unavailable — skip
|
|
65
|
+
const executionModel = d.capabilities?.multiAgent?.executionModel;
|
|
66
|
+
if (executionModel === undefined) return; // host doesn't advertise — soft-skip
|
|
67
|
+
expect(
|
|
68
|
+
typeof executionModel.supported,
|
|
69
|
+
driver.describe(
|
|
70
|
+
'RFCS/0037-multi-agent-execution-model.md §C',
|
|
71
|
+
'capabilities.multiAgent.executionModel.supported MUST be boolean when present',
|
|
72
|
+
),
|
|
73
|
+
).toBe('boolean');
|
|
74
|
+
expect(
|
|
75
|
+
typeof executionModel.version,
|
|
76
|
+
driver.describe(
|
|
77
|
+
'RFCS/0037-multi-agent-execution-model.md §C',
|
|
78
|
+
'capabilities.multiAgent.executionModel.version MUST be integer when present',
|
|
79
|
+
),
|
|
80
|
+
).toBe('number');
|
|
81
|
+
const v = executionModel.version as number;
|
|
82
|
+
expect(
|
|
83
|
+
Number.isInteger(v) && v >= 1 && v <= 4,
|
|
84
|
+
driver.describe(
|
|
85
|
+
'RFCS/0037-multi-agent-execution-model.md §C',
|
|
86
|
+
'version MUST be an integer in [1, 4] (1 = Phase 1 only; Phases 2-4 lift the ceiling additively)',
|
|
87
|
+
),
|
|
88
|
+
).toBe(true);
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Behavioral assertion: when a host advertises capabilities.multiAgent.executionModel.supported,
|
|
93
|
+
// it MUST emit the 7-state handoff state machine's transition events as `core.workflowChain.event`
|
|
94
|
+
// records with causationId chained per the spec §"Transition events" table. The happy-path
|
|
95
|
+
// fixture (supervisor → next-worker → child completed with outputMapping non-empty) drives 4
|
|
96
|
+
// of the 7 transitions: dispatch.began → dispatch.succeeded → child.completed → output.harvested.
|
|
97
|
+
|
|
98
|
+
interface RunEvent { type: string; eventId?: string; causationId?: string; payload?: Record<string, unknown>; }
|
|
99
|
+
|
|
100
|
+
const PARENT_FIXTURE = 'conformance-multi-agent-handoff';
|
|
101
|
+
const CHILD_FIXTURE = 'conformance-multi-agent-handoff-child';
|
|
102
|
+
const BEHAVIORAL_SKIP = HTTP_SKIP || !isFixtureAdvertised(PARENT_FIXTURE) || !isFixtureAdvertised(CHILD_FIXTURE);
|
|
103
|
+
|
|
104
|
+
describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-handoff-state-machine: behavioral 4-event causation chain (RFC 0037 §"Handoff state machine")', () => {
|
|
105
|
+
it('happy-path: dispatch.began → dispatch.succeeded → child.completed → output.harvested fire in causation order', async () => {
|
|
106
|
+
const d = await readDiscovery();
|
|
107
|
+
const advertised = d?.capabilities?.multiAgent?.executionModel?.supported === true;
|
|
108
|
+
if (!advertised) return; // soft-skip — host honest about not implementing
|
|
109
|
+
|
|
110
|
+
const create = await driver.post('/v1/runs', { workflowId: PARENT_FIXTURE });
|
|
111
|
+
expect(create.status).toBe(201);
|
|
112
|
+
const runId = (create.json as { runId: string }).runId;
|
|
113
|
+
|
|
114
|
+
const terminal = await pollUntilTerminal(runId);
|
|
115
|
+
expect(terminal.status, driver.describe(
|
|
116
|
+
'spec/v1/multi-agent-execution.md §"Execution loop"',
|
|
117
|
+
'parent run with supervisor → next-worker → terminate MUST reach terminal `completed`',
|
|
118
|
+
)).toBe('completed');
|
|
119
|
+
|
|
120
|
+
const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
121
|
+
expect(eventsRes.status).toBe(200);
|
|
122
|
+
const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
|
|
123
|
+
const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
|
|
124
|
+
|
|
125
|
+
expect(chainEvents.length, driver.describe(
|
|
126
|
+
'RFCS/0037-multi-agent-execution-model.md §"Conformance"',
|
|
127
|
+
'happy-path fixture MUST produce 4 core.workflowChain.event records (dispatch.began, dispatch.succeeded, child.completed, output.harvested)',
|
|
128
|
+
)).toBe(4);
|
|
129
|
+
|
|
130
|
+
const phases = chainEvents.map((e) => (e.payload as { phase?: string } | undefined)?.phase);
|
|
131
|
+
expect(phases, driver.describe(
|
|
132
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
133
|
+
'phase order MUST be dispatch.began → dispatch.succeeded → child.completed → output.harvested',
|
|
134
|
+
)).toEqual(['dispatch.began', 'dispatch.succeeded', 'child.completed', 'output.harvested']);
|
|
135
|
+
|
|
136
|
+
// Causation chain: each transition's causationId MUST equal the prior transition's eventId.
|
|
137
|
+
// dispatch.began causes back to a runOrchestrator.decided; the inner 3 chain through each other.
|
|
138
|
+
for (let i = 1; i < chainEvents.length; i++) {
|
|
139
|
+
const prior = chainEvents[i - 1];
|
|
140
|
+
const cur = chainEvents[i];
|
|
141
|
+
expect(cur?.causationId, driver.describe(
|
|
142
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
143
|
+
`core.workflowChain.event #${i} (${phases[i]}) MUST have causationId === prior event's eventId`,
|
|
144
|
+
)).toBe(prior?.eventId);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// dispatch.began causationId MUST chain back to a runOrchestrator.decided event.
|
|
148
|
+
const dispatchBegan = chainEvents[0];
|
|
149
|
+
expect(dispatchBegan?.causationId).toBeDefined();
|
|
150
|
+
const decidedEvent = events.find((e) => e.eventId === dispatchBegan?.causationId);
|
|
151
|
+
expect(decidedEvent?.type, driver.describe(
|
|
152
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
153
|
+
'dispatch.began causationId MUST point at the runOrchestrator.decided event that named this worker',
|
|
154
|
+
)).toBe('runOrchestrator.decided');
|
|
155
|
+
|
|
156
|
+
// output.harvested.harvestedKeys MUST list the outputMapping keys harvested.
|
|
157
|
+
const harvested = chainEvents[3]?.payload as { harvestedKeys?: string[] } | undefined;
|
|
158
|
+
expect(harvested?.harvestedKeys, driver.describe(
|
|
159
|
+
'spec/v1/multi-agent-execution.md §"Transition events"',
|
|
160
|
+
'output.harvested payload MUST list harvested parent-variable keys (the fixture\'s outputMapping is { parentResult: \'childOutcome\' })',
|
|
161
|
+
)).toEqual(['parentResult']);
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// Cross-host promotion path per RFCs/0001 §"Promotion to Accepted": once a non-steward host
|
|
166
|
+
// advertises capabilities.multiAgent.executionModel.supported + the behavioral assertion above
|
|
167
|
+
// passes against it, RFC 0037 Phase 1 graduates Active → Accepted.
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* multi-agent-memory-lifecycle — RFC 0039 §B advertisement-shape + behavioral stubs.
|
|
3
|
+
*
|
|
4
|
+
* Status: ACTIVE (advertisement-shape; behavioral stubs deferred to a
|
|
5
|
+
* host that advertises both `capabilities.memory.supported: true` AND
|
|
6
|
+
* `capabilities.multiAgent.executionModel.version >= 2`). Phase 1 hosts
|
|
7
|
+
* + Phase 2 hosts without memory + Phase 2 hosts with memory but no
|
|
8
|
+
* MAE-3 snapshot implementation all soft-skip cleanly.
|
|
9
|
+
*
|
|
10
|
+
* Closes the conformance gate for RFC 0039 §B (MAE-2 cross-run TTL +
|
|
11
|
+
* MAE-3 replay snapshot). Behavioral assertions require a host that
|
|
12
|
+
* actually advertises the MemoryAdapter surface; the reference
|
|
13
|
+
* workflow-engine sample advertises `capabilities.memory.supported:
|
|
14
|
+
* false` so this scenario soft-skips there. The Postgres reference
|
|
15
|
+
* host advertises memory.supported: true; once it adopts RFC 0039
|
|
16
|
+
* Phase 2 the behavioral assertions below light up.
|
|
17
|
+
*
|
|
18
|
+
* Asserts (advertisement-shape — always-on when discovery is reachable):
|
|
19
|
+
*
|
|
20
|
+
* 1. capabilities.multiAgent.executionModel.crossChildMemoryConcurrency
|
|
21
|
+
* (when advertised) MUST be one of {"strict", "advisory"} per
|
|
22
|
+
* RFC 0039 §B + schemas/capabilities.schema.json.
|
|
23
|
+
*
|
|
24
|
+
* 2. When a host advertises BOTH multiAgent.executionModel.version >= 2
|
|
25
|
+
* AND memory.supported: true, the host MUST honor the MAE-2 +
|
|
26
|
+
* MAE-3 contracts (behavioral assertions below).
|
|
27
|
+
*
|
|
28
|
+
* Behavioral assertions (capability-gated; soft-skip when no host
|
|
29
|
+
* advertises the conjunction):
|
|
30
|
+
*
|
|
31
|
+
* 3. MAE-2 cross-run TTL: a child writing MemoryEntry { ttl: 5 } at
|
|
32
|
+
* parent-clock T+10s has `expiresAt` reflecting T+15s (child
|
|
33
|
+
* write time + 5s), NOT parent-start + 5s. Implementation requires
|
|
34
|
+
* a host-side test seam to drive the cross-run write + read; once
|
|
35
|
+
* a memory-advertising host wires the seam the assertion runs.
|
|
36
|
+
*
|
|
37
|
+
* 4. MAE-3 replay snapshot refusal: a host that advertises Phase 2 +
|
|
38
|
+
* memory MUST either (a) serve the fork from a past event-log
|
|
39
|
+
* index returning memory state as-of that index, OR (b) refuse
|
|
40
|
+
* with error.code: "replay_memory_snapshot_unavailable" per
|
|
41
|
+
* spec/v1/rest-endpoints.md §"Common error codes". Silent
|
|
42
|
+
* substitution of current memory is non-conformant.
|
|
43
|
+
*
|
|
44
|
+
* @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B
|
|
45
|
+
* @see spec/v1/multi-agent-execution.md §"Agent memory lifecycle across sub-runs"
|
|
46
|
+
* @see spec/v1/agent-memory.md §"TTL semantics" (which the child-write-time MAE-2 anchoring extends to the cross-run case)
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
import { describe, it, expect } from 'vitest';
|
|
50
|
+
import { driver } from '../lib/driver.js';
|
|
51
|
+
|
|
52
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
53
|
+
|
|
54
|
+
interface DiscoveryDoc {
|
|
55
|
+
capabilities?: {
|
|
56
|
+
memory?: { supported?: unknown };
|
|
57
|
+
multiAgent?: {
|
|
58
|
+
executionModel?: {
|
|
59
|
+
supported?: unknown;
|
|
60
|
+
version?: unknown;
|
|
61
|
+
crossChildMemoryConcurrency?: unknown;
|
|
62
|
+
};
|
|
63
|
+
};
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
68
|
+
try {
|
|
69
|
+
const res = await driver.get('/.well-known/openwop');
|
|
70
|
+
if (res.status !== 200) return null;
|
|
71
|
+
return res.json as DiscoveryDoc;
|
|
72
|
+
} catch {
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: advertisement shape (RFC 0039 §B)', () => {
|
|
78
|
+
it('crossChildMemoryConcurrency (when advertised) MUST be one of {strict, advisory}', async (ctx) => {
|
|
79
|
+
const d = await readDiscovery();
|
|
80
|
+
if (d === null) {
|
|
81
|
+
ctx.skip();
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
const ccmc = d.capabilities?.multiAgent?.executionModel?.crossChildMemoryConcurrency;
|
|
85
|
+
if (ccmc === undefined) {
|
|
86
|
+
ctx.skip(); // optional advertisement — host hasn't opted in
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
expect(
|
|
90
|
+
ccmc === 'strict' || ccmc === 'advisory',
|
|
91
|
+
driver.describe(
|
|
92
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B',
|
|
93
|
+
'crossChildMemoryConcurrency MUST be one of {strict, advisory} when present; values outside the closed enum are non-conformant',
|
|
94
|
+
),
|
|
95
|
+
).toBe(true);
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: behavioral (RFC 0039 §B MAE-2 + MAE-3)', () => {
|
|
100
|
+
// Behavioral assertion lands when a memory-advertising Phase 2 host
|
|
101
|
+
// exposes a host-side test seam for cross-run memory writes (e.g.,
|
|
102
|
+
// POST /v1/host/sample/test/memory/cross-run-ttl-roundtrip). The
|
|
103
|
+
// assertion drives:
|
|
104
|
+
// 1. Parent starts at parent-clock T+0
|
|
105
|
+
// 2. Child dispatched at T+10s, writes MemoryEntry { key: 'k', value: 'v', ttl: 5 }
|
|
106
|
+
// 3. Parent reads MemoryEntry { key: 'k' } at T+12s; expiresAt MUST be
|
|
107
|
+
// approximately T+15s (child write at T+10 + ttl 5), not T+5s.
|
|
108
|
+
// Until a memory-advertising Phase 2 host wires the seam, the contract
|
|
109
|
+
// is documentation-only — surfaced as `todo` so test reporters track
|
|
110
|
+
// the gap rather than reporting a vacuous PASS.
|
|
111
|
+
it.todo('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start');
|
|
112
|
+
|
|
113
|
+
// Behavioral assertion lands when the host implements the snapshot
|
|
114
|
+
// mechanism per RFC 0039 §B. The assertion drives:
|
|
115
|
+
// 1. Run a workflow that writes MemoryEntry { key: 'k', value: 'v1' } at index 10.
|
|
116
|
+
// 2. Write MemoryEntry { key: 'k', value: 'v2' } at index 20.
|
|
117
|
+
// 3. POST /v1/runs/{runId}:fork { fromSeq: 15 }.
|
|
118
|
+
// 4. Forked run reads MemoryEntry { key: 'k' }; MUST return 'v1' (not 'v2').
|
|
119
|
+
// 5. Alternative compliance: fork refused with
|
|
120
|
+
// error.code: 'replay_memory_snapshot_unavailable' AND
|
|
121
|
+
// details.fromSeq === 15.
|
|
122
|
+
// Silent substitution of v2 (current state) is non-conformant.
|
|
123
|
+
it.todo('MAE-3 replay snapshot: fork from past index MUST return memory-as-of-index OR refuse with replay_memory_snapshot_unavailable');
|
|
124
|
+
});
|
|
@@ -85,3 +85,61 @@ describe('multi-region-idempotency: capability shape', () => {
|
|
|
85
85
|
}
|
|
86
86
|
});
|
|
87
87
|
});
|
|
88
|
+
|
|
89
|
+
// RFC 0036 — granular `multiRegion` sub-block advertisement shape. Hosts that
|
|
90
|
+
// opt into the granular advertisement (separate from the categorical `crossRegion`
|
|
91
|
+
// claim) MUST conform to the shape below: supported is boolean (required); when
|
|
92
|
+
// supported is true, replicationLagBoundMs is integer [0, 60000] and
|
|
93
|
+
// partitionRecoveryStrategy is either the categorical enum or an x-host-<host>-<key>
|
|
94
|
+
// extension namespace string. Hosts that don't advertise multiRegion stay on the
|
|
95
|
+
// categorical crossRegion claim (above); both forms are compatible.
|
|
96
|
+
|
|
97
|
+
interface MultiRegionCaps {
|
|
98
|
+
supported?: unknown;
|
|
99
|
+
replicationLagBoundMs?: unknown;
|
|
100
|
+
partitionRecoveryStrategy?: unknown;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
describe('multi-region-idempotency: granular multiRegion advertisement shape (RFC 0036 §A)', () => {
|
|
104
|
+
it('capabilities.idempotency.multiRegion (when present) conforms to RFC 0036 §A', async () => {
|
|
105
|
+
const disco = await driver.get('/.well-known/openwop');
|
|
106
|
+
const idem =
|
|
107
|
+
(disco.json as { capabilities?: { idempotency?: IdempotencyCaps & { multiRegion?: MultiRegionCaps } } })
|
|
108
|
+
.capabilities?.idempotency;
|
|
109
|
+
const mr = idem?.multiRegion;
|
|
110
|
+
if (mr === undefined) return; // host doesn't advertise the granular block — soft-skip
|
|
111
|
+
|
|
112
|
+
expect(
|
|
113
|
+
typeof mr.supported,
|
|
114
|
+
driver.describe(
|
|
115
|
+
'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
|
|
116
|
+
'capabilities.idempotency.multiRegion.supported MUST be boolean when present',
|
|
117
|
+
),
|
|
118
|
+
).toBe('boolean');
|
|
119
|
+
|
|
120
|
+
if (mr.supported === true) {
|
|
121
|
+
if (mr.replicationLagBoundMs !== undefined) {
|
|
122
|
+
const n = mr.replicationLagBoundMs as number;
|
|
123
|
+
expect(
|
|
124
|
+
Number.isInteger(n) && n >= 0 && n <= 60000,
|
|
125
|
+
driver.describe(
|
|
126
|
+
'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
|
|
127
|
+
'replicationLagBoundMs MUST be integer in [0, 60000] when supported is true',
|
|
128
|
+
),
|
|
129
|
+
).toBe(true);
|
|
130
|
+
}
|
|
131
|
+
if (mr.partitionRecoveryStrategy !== undefined) {
|
|
132
|
+
const s = mr.partitionRecoveryStrategy as string;
|
|
133
|
+
const isCategorical = s === 'last-writer-wins' || s === 'first-writer-wins';
|
|
134
|
+
const isExtension = /^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$/.test(s);
|
|
135
|
+
expect(
|
|
136
|
+
isCategorical || isExtension,
|
|
137
|
+
driver.describe(
|
|
138
|
+
'RFCS/0036-multi-region-and-cross-engine-guarantees.md §A',
|
|
139
|
+
'partitionRecoveryStrategy MUST be one of {last-writer-wins, first-writer-wins} OR match ^x-host-<host>-<key>$',
|
|
140
|
+
),
|
|
141
|
+
).toBe(true);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
});
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* node-module-required-capabilities-shape — RFC 0031 §B authoring conformance.
|
|
3
|
+
*
|
|
4
|
+
* Capability-gated on `capabilities.modelCapabilities.supported: true`.
|
|
5
|
+
*
|
|
6
|
+
* SHOULD-tier scenario — verifies that every NodeModule in the host's pack
|
|
7
|
+
* registry whose `typeId` is in the `core.ai.*` namespace declares
|
|
8
|
+
* `requiredModelCapabilities`. Treated as a soft-fail; failures are
|
|
9
|
+
* surfaced as findings rather than blocking the suite.
|
|
10
|
+
*
|
|
11
|
+
* Reads the host's node catalog (via `GET /v1/host/sample/node-catalog`
|
|
12
|
+
* — vendor-prefixed per `spec/v1/host-extensions.md`). Hosts that don't
|
|
13
|
+
* expose the catalog endpoint soft-skip cleanly; the conformance check
|
|
14
|
+
* cannot enumerate NodeModules without a catalog surface.
|
|
15
|
+
*
|
|
16
|
+
* @see RFCS/0031-envelope-variants-and-model-capabilities.md §B + §C
|
|
17
|
+
* @see spec/v1/node-packs.md §"Model-capability declarations on NodeModules"
|
|
18
|
+
* @see schemas/node-pack-manifest.schema.json §NodeModule
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect, beforeAll } from 'vitest';
|
|
22
|
+
import { driver } from '../lib/driver.js';
|
|
23
|
+
|
|
24
|
+
/** RFC 0031 §C — spec-reserved capability identifiers. */
|
|
25
|
+
const RESERVED_IDENTIFIERS: ReadonlySet<string> = new Set([
|
|
26
|
+
'structured-output',
|
|
27
|
+
'discriminator-enum',
|
|
28
|
+
'long-context',
|
|
29
|
+
'reasoning',
|
|
30
|
+
'function-calling',
|
|
31
|
+
]);
|
|
32
|
+
|
|
33
|
+
/** Host-private extension prefix per `host-extensions.md §"Canonical-
|
|
34
|
+
* prefix table"` + RFC 0031 §C "Reservation policy". */
|
|
35
|
+
const HOST_EXTENSION_RE = /^x-host-[a-z0-9][a-z0-9-]*-[a-z0-9][a-z0-9-]*$/;
|
|
36
|
+
|
|
37
|
+
interface CatalogNode {
|
|
38
|
+
typeId: string;
|
|
39
|
+
source?: 'local' | 'pack';
|
|
40
|
+
requiredModelCapabilities?: unknown;
|
|
41
|
+
fallbackModel?: unknown;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
interface DiscoveryDoc {
|
|
45
|
+
capabilities?: {
|
|
46
|
+
modelCapabilities?: { supported?: unknown };
|
|
47
|
+
aiProviders?: { supported?: unknown };
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
let SKIP_REASON: string | null = null;
|
|
52
|
+
let CATALOG: CatalogNode[] = [];
|
|
53
|
+
let SUPPORTED_PROVIDERS: ReadonlySet<string> = new Set();
|
|
54
|
+
|
|
55
|
+
beforeAll(async () => {
|
|
56
|
+
const disco = await driver.get('/.well-known/openwop');
|
|
57
|
+
if (disco.status !== 200) {
|
|
58
|
+
SKIP_REASON = 'discovery doc unreachable';
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
const caps = (disco.json as DiscoveryDoc).capabilities ?? {};
|
|
62
|
+
if (caps.modelCapabilities?.supported !== true) {
|
|
63
|
+
SKIP_REASON = 'host does not advertise capabilities.modelCapabilities.supported: true';
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
if (Array.isArray(caps.aiProviders?.supported)) {
|
|
67
|
+
SUPPORTED_PROVIDERS = new Set(caps.aiProviders.supported as string[]);
|
|
68
|
+
}
|
|
69
|
+
const cat = await driver.get('/v1/host/sample/node-catalog');
|
|
70
|
+
if (cat.status === 404) {
|
|
71
|
+
SKIP_REASON = 'host does not expose /v1/host/sample/node-catalog';
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
if (cat.status !== 200) {
|
|
75
|
+
SKIP_REASON = `node-catalog returned ${cat.status}`;
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
CATALOG = (cat.json as { nodes?: CatalogNode[] }).nodes ?? [];
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
describe('node-module-required-capabilities-shape: authoring convention (RFC 0031 §B)', () => {
|
|
82
|
+
it('every NodeModule with typeId matching `core.ai.*` declares non-empty `requiredModelCapabilities` (SHOULD-tier)', () => {
|
|
83
|
+
if (SKIP_REASON) {
|
|
84
|
+
// eslint-disable-next-line no-console
|
|
85
|
+
console.warn(`[node-module-required-capabilities-shape] skip: ${SKIP_REASON}`);
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const aiNodes = CATALOG.filter((n) => /^core\.ai\./.test(n.typeId));
|
|
89
|
+
const missing: string[] = [];
|
|
90
|
+
for (const n of aiNodes) {
|
|
91
|
+
const rmc = n.requiredModelCapabilities;
|
|
92
|
+
if (!Array.isArray(rmc) || rmc.length === 0) missing.push(n.typeId);
|
|
93
|
+
}
|
|
94
|
+
// SHOULD-tier: surface as a finding (warning), don't fail the suite.
|
|
95
|
+
if (missing.length > 0) {
|
|
96
|
+
// eslint-disable-next-line no-console
|
|
97
|
+
console.warn(
|
|
98
|
+
`[node-module-required-capabilities-shape] RFC 0031 §B SHOULD: ${missing.length} core.ai.* NodeModule(s) omit requiredModelCapabilities: ${missing.join(', ')}`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
// The describe-itself assertion: catalog reached + AT LEAST one
|
|
102
|
+
// node was inspected (otherwise the test is vacuous). MUST hold.
|
|
103
|
+
expect(
|
|
104
|
+
aiNodes.length,
|
|
105
|
+
driver.describe(
|
|
106
|
+
'RFC 0031 §B',
|
|
107
|
+
'host MUST advertise at least one core.ai.* NodeModule in the node catalog (otherwise the SHOULD has no surface to bind to)',
|
|
108
|
+
),
|
|
109
|
+
).toBeGreaterThan(0);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it('every declared identifier MUST match the spec-reserved set OR the `x-host-<host>-<key>` extension pattern', () => {
|
|
113
|
+
if (SKIP_REASON) return;
|
|
114
|
+
const violations: Array<{ typeId: string; identifier: string }> = [];
|
|
115
|
+
for (const n of CATALOG) {
|
|
116
|
+
if (!Array.isArray(n.requiredModelCapabilities)) continue;
|
|
117
|
+
for (const id of n.requiredModelCapabilities) {
|
|
118
|
+
if (typeof id !== 'string') {
|
|
119
|
+
violations.push({ typeId: n.typeId, identifier: String(id) });
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
if (RESERVED_IDENTIFIERS.has(id)) continue;
|
|
123
|
+
if (HOST_EXTENSION_RE.test(id)) continue;
|
|
124
|
+
violations.push({ typeId: n.typeId, identifier: id });
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
expect(
|
|
128
|
+
violations,
|
|
129
|
+
driver.describe(
|
|
130
|
+
'RFC 0031 §C "Reservation policy"',
|
|
131
|
+
'every requiredModelCapabilities identifier MUST be spec-reserved OR match x-host-<host>-<key>',
|
|
132
|
+
),
|
|
133
|
+
).toEqual([]);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it('NodeModule.fallbackModel.provider (when declared) MUST be in `capabilities.aiProviders.supported[]`', () => {
|
|
137
|
+
if (SKIP_REASON) return;
|
|
138
|
+
const violations: Array<{ typeId: string; provider: string }> = [];
|
|
139
|
+
for (const n of CATALOG) {
|
|
140
|
+
const fm = n.fallbackModel;
|
|
141
|
+
if (!fm || typeof fm !== 'object') continue;
|
|
142
|
+
const provider = (fm as { provider?: unknown }).provider;
|
|
143
|
+
if (typeof provider !== 'string') continue;
|
|
144
|
+
if (SUPPORTED_PROVIDERS.size > 0 && !SUPPORTED_PROVIDERS.has(provider)) {
|
|
145
|
+
violations.push({ typeId: n.typeId, provider });
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
expect(
|
|
149
|
+
violations,
|
|
150
|
+
driver.describe(
|
|
151
|
+
'RFC 0031 §B',
|
|
152
|
+
'every fallbackModel.provider MUST appear in capabilities.aiProviders.supported[]',
|
|
153
|
+
),
|
|
154
|
+
).toEqual([]);
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
it('a NodeModule declaring `requiredModelCapabilities` without `fallbackModel` is conformant — refusal-only (no substitution) is the default posture', () => {
|
|
158
|
+
if (SKIP_REASON) return;
|
|
159
|
+
// The check is structural: catalog entries are not malformed when
|
|
160
|
+
// they carry requiredModelCapabilities AND lack fallbackModel. This
|
|
161
|
+
// asserts the host doesn't synthesize a default fallbackModel for
|
|
162
|
+
// nodes that didn't declare one — refusal-only is RFC 0031's
|
|
163
|
+
// default posture per §B.
|
|
164
|
+
const refusalOnly = CATALOG.filter(
|
|
165
|
+
(n) => Array.isArray(n.requiredModelCapabilities)
|
|
166
|
+
&& n.requiredModelCapabilities.length > 0
|
|
167
|
+
&& (n.fallbackModel === undefined || n.fallbackModel === null),
|
|
168
|
+
);
|
|
169
|
+
// Pass condition: the host SHOULD have at least one such node OR
|
|
170
|
+
// SHOULD have none — both are valid postures. The MUST-tier check
|
|
171
|
+
// is that when refusalOnly is non-empty, each entry's
|
|
172
|
+
// `fallbackModel` is genuinely absent (not coerced to `{}` or
|
|
173
|
+
// similar by an over-zealous projection). Trivially true given
|
|
174
|
+
// the filter; the assertion documents the spec contract.
|
|
175
|
+
for (const n of refusalOnly) {
|
|
176
|
+
expect(
|
|
177
|
+
n.fallbackModel,
|
|
178
|
+
driver.describe(
|
|
179
|
+
'RFC 0031 §B',
|
|
180
|
+
`${n.typeId}: refusal-only posture MUST surface as absent fallbackModel (not as {} or null wrapper)`,
|
|
181
|
+
),
|
|
182
|
+
).toBeUndefined();
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
});
|