@openwop/openwop-conformance 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +241 -0
- package/api/asyncapi.yaml +481 -0
- package/api/openapi.yaml +830 -0
- package/api/redocly.yaml +8 -0
- package/coverage.md +80 -0
- package/dist/cli.js +161 -0
- package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
- package/fixtures/conformance-agent-identity.json +27 -0
- package/fixtures/conformance-agent-low-confidence.json +29 -0
- package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
- package/fixtures/conformance-agent-memory-redaction.json +32 -0
- package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
- package/fixtures/conformance-agent-memory-ttl.json +31 -0
- package/fixtures/conformance-agent-pack-export.json +26 -0
- package/fixtures/conformance-agent-pack-install.json +26 -0
- package/fixtures/conformance-agent-pack-provenance.json +31 -0
- package/fixtures/conformance-agent-reasoning.json +29 -0
- package/fixtures/conformance-approval.json +27 -0
- package/fixtures/conformance-cancellable.json +33 -0
- package/fixtures/conformance-cap-breach.json +27 -0
- package/fixtures/conformance-capability-missing.json +23 -0
- package/fixtures/conformance-channel-ttl.json +60 -0
- package/fixtures/conformance-clarification.json +30 -0
- package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
- package/fixtures/conformance-conversation-lifecycle.json +32 -0
- package/fixtures/conformance-conversation-replay.json +33 -0
- package/fixtures/conformance-conversation-vs-clarification.json +26 -0
- package/fixtures/conformance-delay.json +33 -0
- package/fixtures/conformance-dispatch-loop.json +38 -0
- package/fixtures/conformance-failure.json +23 -0
- package/fixtures/conformance-idempotent.json +30 -0
- package/fixtures/conformance-identity.json +32 -0
- package/fixtures/conformance-interrupt-auth-required.json +28 -0
- package/fixtures/conformance-interrupt-external-event.json +33 -0
- package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
- package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
- package/fixtures/conformance-interrupt-quorum.json +30 -0
- package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
- package/fixtures/conformance-message-reducer.json +31 -0
- package/fixtures/conformance-multi-node.json +21 -0
- package/fixtures/conformance-noop.json +23 -0
- package/fixtures/conformance-orchestrator-dispatch.json +47 -0
- package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
- package/fixtures/conformance-orchestrator-terminate.json +44 -0
- package/fixtures/conformance-stream-text.json +26 -0
- package/fixtures/conformance-subworkflow-child.json +21 -0
- package/fixtures/conformance-subworkflow-parent.json +49 -0
- package/fixtures/conformance-version-fold.json +23 -0
- package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
- package/fixtures/pack-manifests/pack-private-example.json +26 -0
- package/fixtures.md +404 -0
- package/package.json +48 -0
- package/schemas/README.md +75 -0
- package/schemas/agent-manifest.schema.json +107 -0
- package/schemas/agent-ref.schema.json +53 -0
- package/schemas/capabilities.schema.json +287 -0
- package/schemas/channel-written-payload.schema.json +55 -0
- package/schemas/conversation-event.schema.json +120 -0
- package/schemas/conversation-turn.schema.json +72 -0
- package/schemas/debug-bundle.schema.json +196 -0
- package/schemas/dispatch-config.schema.json +46 -0
- package/schemas/error-envelope.schema.json +25 -0
- package/schemas/memory-entry.schema.json +36 -0
- package/schemas/memory-list-options.schema.json +21 -0
- package/schemas/node-pack-manifest.schema.json +235 -0
- package/schemas/orchestrator-decision.schema.json +60 -0
- package/schemas/run-event-payloads.schema.json +663 -0
- package/schemas/run-event.schema.json +116 -0
- package/schemas/run-options.schema.json +81 -0
- package/schemas/run-orchestrator-decided-event.schema.json +20 -0
- package/schemas/run-snapshot.schema.json +121 -0
- package/schemas/suspend-request.schema.json +182 -0
- package/schemas/workflow-definition.schema.json +430 -0
- package/src/cli.ts +187 -0
- package/src/lib/a2a-fake-peer.ts +233 -0
- package/src/lib/canaries.ts +186 -0
- package/src/lib/driver.ts +96 -0
- package/src/lib/env.ts +49 -0
- package/src/lib/fixtures.ts +93 -0
- package/src/lib/mcp-fake-server.ts +185 -0
- package/src/lib/multi-agent-capabilities.ts +155 -0
- package/src/lib/multiProcess.ts +141 -0
- package/src/lib/otel-collector.ts +312 -0
- package/src/lib/paths.ts +198 -0
- package/src/lib/polling.ts +81 -0
- package/src/lib/profiles.ts +258 -0
- package/src/lib/sse.ts +172 -0
- package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
- package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
- package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
- package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
- package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
- package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
- package/src/scenarios/agentMessageReducer.test.ts +57 -0
- package/src/scenarios/agentMetadata.test.ts +56 -0
- package/src/scenarios/agentPackExport.test.ts +45 -0
- package/src/scenarios/agentPackInstall.test.ts +50 -0
- package/src/scenarios/agentPackProvenance.test.ts +53 -0
- package/src/scenarios/agentReasoningEvents.test.ts +72 -0
- package/src/scenarios/append-ordering.test.ts +91 -0
- package/src/scenarios/approval-payload.test.ts +120 -0
- package/src/scenarios/audit-log-integrity.test.ts +106 -0
- package/src/scenarios/auth.test.ts +55 -0
- package/src/scenarios/byok-roundtrip.test.ts +166 -0
- package/src/scenarios/cancellation.test.ts +68 -0
- package/src/scenarios/cap-breach.test.ts +149 -0
- package/src/scenarios/channel-ttl.test.ts +70 -0
- package/src/scenarios/configurable-schema.test.ts +76 -0
- package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
- package/src/scenarios/conversationLifecycle.test.ts +64 -0
- package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
- package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
- package/src/scenarios/cost-attribution.test.ts +207 -0
- package/src/scenarios/debugBundle.test.ts +222 -0
- package/src/scenarios/discovery.test.ts +147 -0
- package/src/scenarios/dispatchLoop.test.ts +52 -0
- package/src/scenarios/errors.test.ts +144 -0
- package/src/scenarios/eventOrdering.test.ts +144 -0
- package/src/scenarios/failure-path.test.ts +46 -0
- package/src/scenarios/fixtures-gating.test.ts +137 -0
- package/src/scenarios/fixtures-valid.test.ts +140 -0
- package/src/scenarios/highConcurrency.test.ts +263 -0
- package/src/scenarios/idempotency.test.ts +83 -0
- package/src/scenarios/idempotencyRetry.test.ts +130 -0
- package/src/scenarios/identity-passthrough.test.ts +54 -0
- package/src/scenarios/interrupt-approval.test.ts +97 -0
- package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
- package/src/scenarios/interrupt-clarification.test.ts +45 -0
- package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
- package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
- package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
- package/src/scenarios/interruptRace.test.ts +176 -0
- package/src/scenarios/maliciousManifest.test.ts +154 -0
- package/src/scenarios/mcp-discoverability.test.ts +129 -0
- package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
- package/src/scenarios/multi-node-ordering.test.ts +60 -0
- package/src/scenarios/multi-region-idempotency.test.ts +52 -0
- package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
- package/src/scenarios/orchestratorDispatch.test.ts +66 -0
- package/src/scenarios/orchestratorTermination.test.ts +54 -0
- package/src/scenarios/otel-emission.test.ts +113 -0
- package/src/scenarios/otel-trace-propagation.test.ts +90 -0
- package/src/scenarios/pack-registry-publish.test.ts +93 -0
- package/src/scenarios/pack-registry.test.ts +328 -0
- package/src/scenarios/pause-resume.test.ts +109 -0
- package/src/scenarios/policies.test.ts +162 -0
- package/src/scenarios/profileDerivation.test.ts +335 -0
- package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
- package/src/scenarios/rate-limit-envelope.test.ts +97 -0
- package/src/scenarios/redaction.test.ts +254 -0
- package/src/scenarios/redactionAdversarial.test.ts +162 -0
- package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
- package/src/scenarios/replay-fork.test.ts +216 -0
- package/src/scenarios/replayDeterminism.test.ts +171 -0
- package/src/scenarios/route-coverage.test.ts +129 -0
- package/src/scenarios/runs-lifecycle.test.ts +65 -0
- package/src/scenarios/runtime-capabilities.test.ts +118 -0
- package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
- package/src/scenarios/staleClaim.test.ts +223 -0
- package/src/scenarios/stream-modes-buffer.test.ts +148 -0
- package/src/scenarios/stream-modes-mixed.test.ts +149 -0
- package/src/scenarios/stream-modes.test.ts +139 -0
- package/src/scenarios/streamReconnect.test.ts +162 -0
- package/src/scenarios/subworkflow.test.ts +126 -0
- package/src/scenarios/version-negotiation.test.ts +157 -0
- package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
- package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
- package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
- package/src/scenarios/wasm-pack-load.test.ts +75 -0
- package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
- package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
- package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
- package/src/setup.ts +173 -0
- package/vitest.config.ts +17 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Channel TTL scenarios (G5 / C3) — exercises `conformance-channel-ttl`.
|
|
3
|
+
*
|
|
4
|
+
* Workflow writes 3 entries (values a/b/c) to channel `events` with
|
|
5
|
+
* ttlMs=200, waits 300ms via `core.delay`, then writes a 4th (value d).
|
|
6
|
+
*
|
|
7
|
+
* Verifies:
|
|
8
|
+
* 1. Run reaches terminal `completed`.
|
|
9
|
+
* 2. After the post-TTL write, the `events` channel state contains
|
|
10
|
+
* exactly one entry.
|
|
11
|
+
* 3. The remaining entry has value `"d"` (the 3 priors were dropped at
|
|
12
|
+
* write time because their `_ts` predated `now - ttlMs`).
|
|
13
|
+
* 4. The remaining entry preserves the `_ts` timestamp produced at write.
|
|
14
|
+
*
|
|
15
|
+
* Spec references:
|
|
16
|
+
* - channels-and-reducers.md §append + §TTL
|
|
17
|
+
* - node-packs.md §Reserved Core openwop typeIds → core.channelWrite
|
|
18
|
+
* - spec gap G5
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect } from 'vitest';
|
|
22
|
+
import { driver } from '../lib/driver.js';
|
|
23
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
24
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
25
|
+
|
|
26
|
+
const WORKFLOW_ID = 'conformance-channel-ttl';
|
|
27
|
+
const SKIP_NO_FIXTURE = !isFixtureAdvertised(WORKFLOW_ID);
|
|
28
|
+
|
|
29
|
+
interface ChannelEntry {
|
|
30
|
+
value: unknown;
|
|
31
|
+
_ts: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
describe.skipIf(SKIP_NO_FIXTURE)('channel-ttl: conformance-channel-ttl drops entries older than ttlMs at write time', () => {
|
|
35
|
+
it('after the post-TTL write, the channel contains exactly one entry with value "d"', async () => {
|
|
36
|
+
const create = await driver.post('/v1/runs', { workflowId: WORKFLOW_ID });
|
|
37
|
+
expect(create.status).toBe(201);
|
|
38
|
+
const runId = (create.json as { runId: string }).runId;
|
|
39
|
+
|
|
40
|
+
const terminal = await pollUntilTerminal(runId);
|
|
41
|
+
expect(terminal.status, driver.describe(
|
|
42
|
+
'fixtures.md conformance-channel-ttl §Terminal status',
|
|
43
|
+
'fixture MUST reach terminal `completed`',
|
|
44
|
+
)).toBe('completed');
|
|
45
|
+
|
|
46
|
+
const variables = terminal.variables ?? {};
|
|
47
|
+
const events = variables.events as ChannelEntry[] | undefined;
|
|
48
|
+
|
|
49
|
+
expect(Array.isArray(events), driver.describe(
|
|
50
|
+
'channels-and-reducers.md §append',
|
|
51
|
+
'channel state MUST be stored as an array of {value, _ts} entries',
|
|
52
|
+
)).toBe(true);
|
|
53
|
+
|
|
54
|
+
expect(events!.length, driver.describe(
|
|
55
|
+
'channels-and-reducers.md §TTL — write-time filter',
|
|
56
|
+
'after the post-TTL write, exactly 1 entry MUST remain (the 3 priors aged out)',
|
|
57
|
+
)).toBe(1);
|
|
58
|
+
|
|
59
|
+
expect(events![0].value, driver.describe(
|
|
60
|
+
'fixtures.md conformance-channel-ttl §Topology',
|
|
61
|
+
'the surviving entry MUST be the post-delay write (value "d")',
|
|
62
|
+
)).toBe('d');
|
|
63
|
+
|
|
64
|
+
expect(typeof events![0]._ts, driver.describe(
|
|
65
|
+
'channels-and-reducers.md §append entry shape',
|
|
66
|
+
'each channel entry MUST carry a numeric `_ts` write timestamp',
|
|
67
|
+
)).toBe('number');
|
|
68
|
+
expect(events![0]._ts).toBeGreaterThan(0);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Track 13: per-workflow `configurableSchema` (run-options.md v1.1).
|
|
3
|
+
*
|
|
4
|
+
* Verifies that hosts which surface `configurableSchema` on a workflow
|
|
5
|
+
* definition (a) reject runs whose `configurable` violates the schema
|
|
6
|
+
* with `validation_error`, and (b) surface the schema on
|
|
7
|
+
* `GET /v1/workflows/{workflowId}`.
|
|
8
|
+
*
|
|
9
|
+
* Capability gating: skips when no advertised fixture declares
|
|
10
|
+
* `configurableSchema`. Hosts that don't yet support per-workflow
|
|
11
|
+
* configurableSchema return absence of the field on the manifest, in
|
|
12
|
+
* which case this scenario skips.
|
|
13
|
+
*
|
|
14
|
+
* @see spec/v1/run-options.md §"Per-workflow configurableSchema"
|
|
15
|
+
* @see schemas/workflow-definition.schema.json §configurableSchema
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect } from 'vitest';
|
|
19
|
+
import { driver } from '../lib/driver.js';
|
|
20
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
21
|
+
|
|
22
|
+
// Re-use `conformance-identity` if it advertises a configurableSchema;
|
|
23
|
+
// otherwise the scenario soft-skips. Hosts adopting v1.1 SHOULD seed a
|
|
24
|
+
// dedicated `conformance-configurable-schema` fixture; until that lands,
|
|
25
|
+
// reuse what's available.
|
|
26
|
+
const CANDIDATES = ['conformance-configurable-schema', 'conformance-identity'] as const;
|
|
27
|
+
|
|
28
|
+
async function pickFixture(): Promise<string | null> {
|
|
29
|
+
for (const id of CANDIDATES) {
|
|
30
|
+
if (!isFixtureAdvertised(id)) continue;
|
|
31
|
+
const manifest = await driver.get(`/v1/workflows/${encodeURIComponent(id)}`);
|
|
32
|
+
if (manifest.status !== 200) continue;
|
|
33
|
+
const def = manifest.json as { configurableSchema?: unknown };
|
|
34
|
+
if (def.configurableSchema && typeof def.configurableSchema === 'object') return id;
|
|
35
|
+
}
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
describe('configurable-schema: per-workflow schema enforced', () => {
|
|
40
|
+
it('manifest surfaces configurableSchema; mismatched configurable is rejected', async () => {
|
|
41
|
+
const fixture = await pickFixture();
|
|
42
|
+
if (!fixture) {
|
|
43
|
+
// eslint-disable-next-line no-console
|
|
44
|
+
console.warn(
|
|
45
|
+
'[configurable-schema] no advertised fixture declares configurableSchema; skipping',
|
|
46
|
+
);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const manifest = await driver.get(`/v1/workflows/${encodeURIComponent(fixture)}`);
|
|
51
|
+
const schema = (manifest.json as { configurableSchema?: Record<string, unknown> })
|
|
52
|
+
.configurableSchema;
|
|
53
|
+
expect(schema, driver.describe(
|
|
54
|
+
'run-options.md §"Per-workflow configurableSchema"',
|
|
55
|
+
'GET /v1/workflows/{workflowId} MUST surface configurableSchema when the workflow declares one',
|
|
56
|
+
)).toBeDefined();
|
|
57
|
+
|
|
58
|
+
// Provide a deliberately invalid override — a key the schema either
|
|
59
|
+
// forbids (additionalProperties: false) or whose type is wrong. The
|
|
60
|
+
// generic shape `{__invalid_key__: 'not-a-number'}` exercises both.
|
|
61
|
+
const create = await driver.post('/v1/runs', {
|
|
62
|
+
workflowId: fixture,
|
|
63
|
+
configurable: { __conformance_invalid_key__: { unexpected: true } },
|
|
64
|
+
});
|
|
65
|
+
expect(
|
|
66
|
+
[400, 422].includes(create.status),
|
|
67
|
+
driver.describe(
|
|
68
|
+
'run-options.md §"Per-workflow configurableSchema"',
|
|
69
|
+
'configurable that violates the workflow schema MUST be rejected with 400/422',
|
|
70
|
+
),
|
|
71
|
+
).toBe(true);
|
|
72
|
+
|
|
73
|
+
const body = create.json as { error?: string };
|
|
74
|
+
expect(body.error).toBe('validation_error');
|
|
75
|
+
});
|
|
76
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Agent Shift Phase 4 — capability-gate refusal contract.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that a host which does NOT advertise
|
|
5
|
+
* `capabilities.conversationPrimitive: true` MUST refuse a workflow
|
|
6
|
+
* registration that declares `core.conversationGate` nodes, with
|
|
7
|
+
* `validation_error` per spec/v1/capabilities.md §`conversationPrimitive`.
|
|
8
|
+
*
|
|
9
|
+
* This scenario is the COMPLEMENT of the other Phase-4 scenarios — it
|
|
10
|
+
* runs ONLY when conversation primitive is NOT advertised, to verify the
|
|
11
|
+
* refusal contract is honored.
|
|
12
|
+
*
|
|
13
|
+
* Fixture-gated: requires `conformance-conversation-capability-negotiation`.
|
|
14
|
+
*
|
|
15
|
+
* @see spec/v1/capabilities.md §`conversationPrimitive`
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect } from 'vitest';
|
|
19
|
+
import { driver } from '../lib/driver.js';
|
|
20
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
21
|
+
import { isConversationPrimitiveSupported } from '../lib/multi-agent-capabilities.js';
|
|
22
|
+
|
|
23
|
+
const FIXTURE = 'conformance-conversation-capability-negotiation';
|
|
24
|
+
// Inverted gate: this scenario runs when host does NOT advertise the
|
|
25
|
+
// capability, to verify the refusal contract.
|
|
26
|
+
const SKIP = isConversationPrimitiveSupported() || !isFixtureAdvertised(FIXTURE);
|
|
27
|
+
|
|
28
|
+
describe.skipIf(SKIP)('conversationCapabilityNegotiation: refusal contract', () => {
|
|
29
|
+
it('host without conversationPrimitive capability refuses conversation-bearing workflow', async () => {
|
|
30
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
31
|
+
// The host MUST reject — either at workflow registration (404/400)
|
|
32
|
+
// or at run-create (400). What MUST NOT happen is a successful
|
|
33
|
+
// 201 followed by silent fallback.
|
|
34
|
+
expect([400, 404, 422]).toContain(create.status);
|
|
35
|
+
const body = create.json as { error?: { code?: string }; code?: string };
|
|
36
|
+
const code = body.error?.code ?? body.code ?? '';
|
|
37
|
+
expect(typeof code).toBe('string');
|
|
38
|
+
});
|
|
39
|
+
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Agent Shift Phase 4 — conversation primitive lifecycle.
|
|
3
|
+
*
|
|
4
|
+
* Verifies the open → exchange → close lifecycle:
|
|
5
|
+
* 1. `conversation.opened` emitted on `core.conversationGate.open`.
|
|
6
|
+
* 2. `conversation.exchanged` emitted on resume after a single turn.
|
|
7
|
+
* 3. `conversation.closed` emitted on `core.conversationGate.close`.
|
|
8
|
+
* 4. All three events share the same `conversationId`.
|
|
9
|
+
* 5. Per CO-3: no `conversation.exchanged` events follow
|
|
10
|
+
* `conversation.closed` for the same conversationId.
|
|
11
|
+
*
|
|
12
|
+
* Capability-gated: skips when host doesn't advertise
|
|
13
|
+
* `capabilities.conversationPrimitive: true`. Fixture-gated: requires
|
|
14
|
+
* `conformance-conversation-lifecycle`.
|
|
15
|
+
*
|
|
16
|
+
* @see spec/v1/interrupt.md §`conversation.start`
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { describe, it, expect } from 'vitest';
|
|
20
|
+
import { driver } from '../lib/driver.js';
|
|
21
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
22
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
23
|
+
import { isConversationPrimitiveSupported } from '../lib/multi-agent-capabilities.js';
|
|
24
|
+
|
|
25
|
+
const FIXTURE = 'conformance-conversation-lifecycle';
|
|
26
|
+
const SKIP = !isConversationPrimitiveSupported() || !isFixtureAdvertised(FIXTURE);
|
|
27
|
+
|
|
28
|
+
describe.skipIf(SKIP)('conversationLifecycle: open → exchange → close round-trip', () => {
|
|
29
|
+
it('emits all three lifecycle events with matching conversationId; no exchanges after close', async () => {
|
|
30
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
31
|
+
expect(create.status).toBe(201);
|
|
32
|
+
const runId = (create.json as { runId: string }).runId;
|
|
33
|
+
|
|
34
|
+
// The fixture's exchange step requires resume input. Host-internal
|
|
35
|
+
// mock auto-resumes for conformance.
|
|
36
|
+
await pollUntilTerminal(runId);
|
|
37
|
+
|
|
38
|
+
const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
39
|
+
const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
|
|
40
|
+
.events ?? [];
|
|
41
|
+
|
|
42
|
+
const opened = list.filter((e) => e.type === 'conversation.opened');
|
|
43
|
+
const exchanged = list.filter((e) => e.type === 'conversation.exchanged');
|
|
44
|
+
const closed = list.filter((e) => e.type === 'conversation.closed');
|
|
45
|
+
|
|
46
|
+
expect(opened.length).toBeGreaterThan(0);
|
|
47
|
+
expect(closed.length).toBeGreaterThan(0);
|
|
48
|
+
|
|
49
|
+
// All three event types MUST share the same conversationId for the
|
|
50
|
+
// fixture's single conversation.
|
|
51
|
+
const convId = opened[0].payload?.conversationId as string;
|
|
52
|
+
expect(typeof convId).toBe('string');
|
|
53
|
+
for (const ev of [...opened, ...exchanged, ...closed]) {
|
|
54
|
+
expect(ev.payload?.conversationId).toBe(convId);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// CO-3: closed.sequence > all exchanged.sequence for the same id.
|
|
58
|
+
const closedIdx = list.findIndex((e) => e.type === 'conversation.closed');
|
|
59
|
+
const exchangedAfterClose = list
|
|
60
|
+
.slice(closedIdx + 1)
|
|
61
|
+
.filter((e) => e.type === 'conversation.exchanged' && e.payload?.conversationId === convId);
|
|
62
|
+
expect(exchangedAfterClose.length).toBe(0);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Agent Shift Phase 4 — replay-fork of a conversation produces identical log.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that running `:fork` on a conversation-bearing run yields
|
|
5
|
+
* a child run whose conversation log (folded via the `message` reducer)
|
|
6
|
+
* is byte-equal to the source run's. Replay determinism is required
|
|
7
|
+
* for audit + debug-bundle consistency.
|
|
8
|
+
*
|
|
9
|
+
* Capability-gated: skips when host doesn't advertise conversation
|
|
10
|
+
* primitive OR doesn't advertise replay-fork. Fixture-gated: requires
|
|
11
|
+
* `conformance-conversation-replay`.
|
|
12
|
+
*
|
|
13
|
+
* @see spec/v1/replay.md
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { describe, it, expect } from 'vitest';
|
|
17
|
+
import { driver } from '../lib/driver.js';
|
|
18
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
19
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
20
|
+
import { isConversationPrimitiveSupported } from '../lib/multi-agent-capabilities.js';
|
|
21
|
+
|
|
22
|
+
const FIXTURE = 'conformance-conversation-replay';
|
|
23
|
+
const SKIP = !isConversationPrimitiveSupported() || !isFixtureAdvertised(FIXTURE);
|
|
24
|
+
|
|
25
|
+
describe.skipIf(SKIP)('conversationReplayDeterminism: replay-fork preserves conversation log', () => {
|
|
26
|
+
it('forked run yields byte-equal conversation channel projection', async () => {
|
|
27
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
28
|
+
expect(create.status).toBe(201);
|
|
29
|
+
const sourceRunId = (create.json as { runId: string }).runId;
|
|
30
|
+
|
|
31
|
+
const terminal = await pollUntilTerminal(sourceRunId);
|
|
32
|
+
expect(terminal.status).toBe('completed');
|
|
33
|
+
|
|
34
|
+
const sourceSnap = await driver.get(`/v1/runs/${encodeURIComponent(sourceRunId)}`);
|
|
35
|
+
const sourceConv = (sourceSnap.json as { channels?: Record<string, unknown> }).channels;
|
|
36
|
+
|
|
37
|
+
const fork = await driver.post(`/v1/runs/${encodeURIComponent(sourceRunId)}:fork`, {
|
|
38
|
+
mode: 'replay',
|
|
39
|
+
});
|
|
40
|
+
if (fork.status === 404 || fork.status === 501) return; // host doesn't support replay-fork
|
|
41
|
+
expect([200, 201]).toContain(fork.status);
|
|
42
|
+
|
|
43
|
+
const forkedRunId = (fork.json as { runId: string }).runId;
|
|
44
|
+
const forkedTerminal = await pollUntilTerminal(forkedRunId);
|
|
45
|
+
expect(forkedTerminal.status).toBe('completed');
|
|
46
|
+
|
|
47
|
+
const forkedSnap = await driver.get(`/v1/runs/${encodeURIComponent(forkedRunId)}`);
|
|
48
|
+
const forkedConv = (forkedSnap.json as { channels?: Record<string, unknown> }).channels;
|
|
49
|
+
|
|
50
|
+
expect(JSON.stringify(forkedConv)).toBe(JSON.stringify(sourceConv));
|
|
51
|
+
});
|
|
52
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Agent Shift Phase 4 — `conversation.exchange` differs from `clarification.requested`.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that `core.conversationGate.exchange` produces
|
|
5
|
+
* `conversation.exchanged` events in the run log — distinct from the
|
|
6
|
+
* pre-MAS `clarification.requested` / `clarification.resolved` shape.
|
|
7
|
+
* Hosts MUST NOT emit `clarification.requested` for conversation.exchange
|
|
8
|
+
* suspends; the two surfaces are independent.
|
|
9
|
+
*
|
|
10
|
+
* Capability-gated: skips when host doesn't advertise
|
|
11
|
+
* `capabilities.conversationPrimitive: true`. Fixture-gated: requires
|
|
12
|
+
* `conformance-conversation-vs-clarification`.
|
|
13
|
+
*
|
|
14
|
+
* @see spec/v1/interrupt.md §`conversation.exchange` vs `clarification`
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, it, expect } from 'vitest';
|
|
18
|
+
import { driver } from '../lib/driver.js';
|
|
19
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
20
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
21
|
+
import { isConversationPrimitiveSupported } from '../lib/multi-agent-capabilities.js';
|
|
22
|
+
|
|
23
|
+
const FIXTURE = 'conformance-conversation-vs-clarification';
|
|
24
|
+
const SKIP = !isConversationPrimitiveSupported() || !isFixtureAdvertised(FIXTURE);
|
|
25
|
+
|
|
26
|
+
describe.skipIf(SKIP)('conversationVsLegacySuspend: distinct event surfaces', () => {
|
|
27
|
+
it('conversation suspend emits conversation.* events, not clarification.*', async () => {
|
|
28
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
29
|
+
expect(create.status).toBe(201);
|
|
30
|
+
const runId = (create.json as { runId: string }).runId;
|
|
31
|
+
|
|
32
|
+
await pollUntilTerminal(runId);
|
|
33
|
+
|
|
34
|
+
const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
35
|
+
const list = (events.json as { events?: Array<{ type: string }> }).events ?? [];
|
|
36
|
+
|
|
37
|
+
const conversationEvents = list.filter((e) => e.type.startsWith('conversation.'));
|
|
38
|
+
const clarificationEvents = list.filter((e) => e.type.startsWith('clarification.'));
|
|
39
|
+
|
|
40
|
+
expect(conversationEvents.length).toBeGreaterThan(0);
|
|
41
|
+
expect(
|
|
42
|
+
clarificationEvents.length,
|
|
43
|
+
'conversation.exchange MUST NOT emit clarification.* events',
|
|
44
|
+
).toBe(0);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost attribution scenarios (G6 / O4) — covered by the v1.0 conformance baseline.
|
|
3
|
+
*
|
|
4
|
+
* The runtime side of G6 is expected to provide:
|
|
5
|
+
* - OPENWOP_COST_ATTRIBUTE_NAMES allowlist (6 attributes)
|
|
6
|
+
* - sanitizeCostForOtel() pure function with redaction enforcement
|
|
7
|
+
* - cost-attribute application wired into the host's span recorder
|
|
8
|
+
* - RunSnapshot.metrics.openwopCost rollup exposed via GET /v1/runs/{runId}
|
|
9
|
+
*
|
|
10
|
+
* Two scenarios:
|
|
11
|
+
* 1. Forward-compat shape check on any run's metrics.openwopCost (passes if
|
|
12
|
+
* the field is absent — spec-allowed — AND if present validates the
|
|
13
|
+
* canonical shape).
|
|
14
|
+
* 2. End-to-end content roundtrip via the `openwop-smoke-cost-emit` fixture
|
|
15
|
+
* workflow + `conformance.cost.emit` fixture node. The scenario
|
|
16
|
+
* detects fixture availability via the `404 workflow_not_found`
|
|
17
|
+
* error envelope and skips trivially-pass when absent. When present,
|
|
18
|
+
* asserts the canary cost shape lands in `metrics.openwopCost` end-to-end.
|
|
19
|
+
*
|
|
20
|
+
* Two scenarios remain `it.todo` because they need observable-span
|
|
21
|
+
* access — the conformance suite is black-box and can only see what the
|
|
22
|
+
* REST + event-log surfaces expose. Hosts should cover runtime-side
|
|
23
|
+
* enforcement in host-specific observability tests.
|
|
24
|
+
*
|
|
25
|
+
* Spec references:
|
|
26
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/observability.md §"AI cost"
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/schemas/run-snapshot.schema.json §metrics.openwopCost
|
|
28
|
+
* - conformance/fixtures.md §O4 cost-attribution fixture
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect } from 'vitest';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
34
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
35
|
+
|
|
36
|
+
const NOOP_WORKFLOW_ID = 'conformance-noop';
|
|
37
|
+
const COST_EMIT_WORKFLOW_ID = 'openwop-smoke-cost-emit';
|
|
38
|
+
const SKIP_NO_NOOP = !isFixtureAdvertised(NOOP_WORKFLOW_ID);
|
|
39
|
+
const SKIP_NO_COST_EMIT = !isFixtureAdvertised(COST_EMIT_WORKFLOW_ID);
|
|
40
|
+
|
|
41
|
+
describe.skipIf(SKIP_NO_NOOP)('cost-attribution: metrics.openwopCost forward-compat shape (G6)', () => {
|
|
42
|
+
it('on any run, IF metrics.openwopCost is present, its shape MUST match the spec', async () => {
|
|
43
|
+
// Use the noop fixture so we don't depend on AI nodes. The fixture
|
|
44
|
+
// doesn't emit recordCost, so metrics.openwopCost will typically be
|
|
45
|
+
// absent — that's allowed. The assertion is forward-compat: when
|
|
46
|
+
// present, the structure MUST be the canonical one.
|
|
47
|
+
const create = await driver.post('/v1/runs', { workflowId: 'conformance-noop' });
|
|
48
|
+
expect(create.status).toBe(201);
|
|
49
|
+
const runId = (create.json as { runId: string }).runId;
|
|
50
|
+
|
|
51
|
+
const terminal = await pollUntilTerminal(runId);
|
|
52
|
+
const openwopCost = terminal.metrics?.openwopCost;
|
|
53
|
+
|
|
54
|
+
if (openwopCost === undefined) {
|
|
55
|
+
// Spec-allowed — the noop fixture has no cost emission. Assertion
|
|
56
|
+
// passes trivially; don't force a value on a workflow that produces
|
|
57
|
+
// no cost.
|
|
58
|
+
expect(openwopCost).toBeUndefined();
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// When present, validate the canonical shape per
|
|
63
|
+
// run-snapshot.schema.json §metrics.openwopCost.
|
|
64
|
+
if ('usd' in openwopCost) {
|
|
65
|
+
expect(typeof openwopCost.usd, 'metrics.openwopCost.usd MUST be a number').toBe('number');
|
|
66
|
+
expect(openwopCost.usd!, 'metrics.openwopCost.usd MUST be >= 0').toBeGreaterThanOrEqual(0);
|
|
67
|
+
}
|
|
68
|
+
if ('tokens' in openwopCost && openwopCost.tokens) {
|
|
69
|
+
if ('input' in openwopCost.tokens) {
|
|
70
|
+
expect(Number.isInteger(openwopCost.tokens.input)).toBe(true);
|
|
71
|
+
expect(openwopCost.tokens.input!).toBeGreaterThanOrEqual(0);
|
|
72
|
+
}
|
|
73
|
+
if ('output' in openwopCost.tokens) {
|
|
74
|
+
expect(Number.isInteger(openwopCost.tokens.output)).toBe(true);
|
|
75
|
+
expect(openwopCost.tokens.output!).toBeGreaterThanOrEqual(0);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
if ('duration_ms' in openwopCost) {
|
|
79
|
+
expect(Number.isInteger(openwopCost.duration_ms)).toBe(true);
|
|
80
|
+
expect(openwopCost.duration_ms!).toBeGreaterThanOrEqual(0);
|
|
81
|
+
}
|
|
82
|
+
if ('model' in openwopCost) {
|
|
83
|
+
expect(typeof openwopCost.model, 'metrics.openwopCost.model MUST be a string').toBe('string');
|
|
84
|
+
}
|
|
85
|
+
if ('provider' in openwopCost) {
|
|
86
|
+
expect(typeof openwopCost.provider, 'metrics.openwopCost.provider MUST be a string').toBe('string');
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Reference hosts MAY expose the same fixture node id with different
|
|
92
|
+
// canary numbers. These
|
|
93
|
+
// scenarios assert shape conformance + non-negative-integer/number
|
|
94
|
+
// constraints, not exact numeric equality, so any host-canary works.
|
|
95
|
+
|
|
96
|
+
describe.skipIf(SKIP_NO_COST_EMIT)('cost-attribution: end-to-end roundtrip via conformance.cost.emit (G6 / O4)', () => {
|
|
97
|
+
it('metrics.openwopCost MUST carry the canary cost shape after the fixture node runs', async () => {
|
|
98
|
+
// Try to start the cost-emit fixture workflow. If the host doesn't
|
|
99
|
+
// advertise the fixture surface (production deployments don't), we
|
|
100
|
+
// get 404 / 422 back and skip the scenario.
|
|
101
|
+
const create = await driver.post('/v1/runs', {
|
|
102
|
+
workflowId: 'openwop-smoke-cost-emit',
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// Fixture absent — host does not opt into OPENWOP_CONFORMANCE_FIXTURES.
|
|
106
|
+
// That's spec-allowed; the scenario passes trivially.
|
|
107
|
+
if (create.status === 404 || create.status === 422) {
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
expect(create.status, driver.describe(
|
|
112
|
+
'rest-endpoints.md POST /v1/runs',
|
|
113
|
+
'starting openwop-smoke-cost-emit MUST succeed when OPENWOP_CONFORMANCE_FIXTURES=1 is advertised',
|
|
114
|
+
)).toBe(201);
|
|
115
|
+
const runId = (create.json as { runId: string }).runId;
|
|
116
|
+
|
|
117
|
+
const terminal = await pollUntilTerminal(runId);
|
|
118
|
+
expect(terminal.status, driver.describe(
|
|
119
|
+
'observability.md §AI cost',
|
|
120
|
+
'cost-emit fixture run MUST reach terminal completed',
|
|
121
|
+
)).toBe('completed');
|
|
122
|
+
|
|
123
|
+
const openwopCost = terminal.metrics?.openwopCost;
|
|
124
|
+
expect(openwopCost, driver.describe(
|
|
125
|
+
'run-snapshot.schema.json §metrics.openwopCost',
|
|
126
|
+
'metrics.openwopCost MUST be populated after a node calls ctx.recordCost()',
|
|
127
|
+
)).toBeDefined();
|
|
128
|
+
|
|
129
|
+
// Provider — the fixture canary is a stable string. Host-defined
|
|
130
|
+
// overrides are spec-allowed; we assert shape rather than exact match.
|
|
131
|
+
if ('provider' in openwopCost!) {
|
|
132
|
+
expect(typeof openwopCost!.provider).toBe('string');
|
|
133
|
+
expect((openwopCost!.provider as string).length).toBeGreaterThan(0);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Model — same: shape, not exact.
|
|
137
|
+
if ('model' in openwopCost!) {
|
|
138
|
+
expect(typeof openwopCost!.model).toBe('string');
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Tokens — MUST be non-negative integers when present.
|
|
142
|
+
if ('tokens' in openwopCost! && openwopCost!.tokens) {
|
|
143
|
+
if ('input' in openwopCost!.tokens) {
|
|
144
|
+
expect(Number.isInteger(openwopCost!.tokens.input), driver.describe(
|
|
145
|
+
'observability.md §openwop.cost.tokens.input',
|
|
146
|
+
'tokens.input MUST be a non-negative integer',
|
|
147
|
+
)).toBe(true);
|
|
148
|
+
expect(openwopCost!.tokens.input!).toBeGreaterThanOrEqual(0);
|
|
149
|
+
}
|
|
150
|
+
if ('output' in openwopCost!.tokens) {
|
|
151
|
+
expect(Number.isInteger(openwopCost!.tokens.output), driver.describe(
|
|
152
|
+
'observability.md §openwop.cost.tokens.output',
|
|
153
|
+
'tokens.output MUST be a non-negative integer',
|
|
154
|
+
)).toBe(true);
|
|
155
|
+
expect(openwopCost!.tokens.output!).toBeGreaterThanOrEqual(0);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// USD — MUST be non-negative number (fractional allowed).
|
|
160
|
+
if ('usd' in openwopCost!) {
|
|
161
|
+
expect(typeof openwopCost!.usd, driver.describe(
|
|
162
|
+
'observability.md §openwop.cost.usd',
|
|
163
|
+
'usd MUST be a number',
|
|
164
|
+
)).toBe('number');
|
|
165
|
+
expect(openwopCost!.usd!, driver.describe(
|
|
166
|
+
'observability.md §openwop.cost.usd',
|
|
167
|
+
'usd MUST be >= 0',
|
|
168
|
+
)).toBeGreaterThanOrEqual(0);
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it('cost-emit fixture run MUST emit a node.completed event for the cost-emitting node', async () => {
|
|
173
|
+
const create = await driver.post('/v1/runs', {
|
|
174
|
+
workflowId: 'openwop-smoke-cost-emit',
|
|
175
|
+
});
|
|
176
|
+
if (create.status === 404 || create.status === 422) {
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
expect(create.status).toBe(201);
|
|
180
|
+
const runId = (create.json as { runId: string }).runId;
|
|
181
|
+
|
|
182
|
+
await pollUntilTerminal(runId);
|
|
183
|
+
|
|
184
|
+
const eventsResp = await driver.get(`/v1/runs/${runId}/events`);
|
|
185
|
+
expect(eventsResp.status).toBe(200);
|
|
186
|
+
const events = (eventsResp.json as { events: Array<{ type: string; nodeId?: string }> })
|
|
187
|
+
.events;
|
|
188
|
+
|
|
189
|
+
const completed = events.filter(
|
|
190
|
+
(e) => e.type === 'node.completed' && e.nodeId === 'emit-cost',
|
|
191
|
+
);
|
|
192
|
+
expect(completed.length, driver.describe(
|
|
193
|
+
'event-log.md §node.completed',
|
|
194
|
+
'cost-emit fixture node MUST emit exactly one node.completed event',
|
|
195
|
+
)).toBe(1);
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
describe('cost-attribution: G6 / O4 (still deferred — observable-span access required)', () => {
|
|
200
|
+
it.todo(
|
|
201
|
+
'the OTel span attribute set MUST NOT contain any key outside OPENWOP_COST_ATTRIBUTE_NAMES (redaction) — BLOCKED on observable-span access; runtime enforcement belongs in host-specific observability tests',
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
it.todo(
|
|
205
|
+
'credential-shaped fields in the upstream provider response MUST NOT appear in any OTel attribute or in metrics.openwopCost (regression test for G6 close-criteria allowlist enforcement) — BLOCKED on observable-span access; sanitizer-level redaction is unit-tested today',
|
|
206
|
+
);
|
|
207
|
+
});
|