@openwop/openwop-conformance 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +65 -0
  2. package/README.md +2 -2
  3. package/api/redocly.yaml +15 -0
  4. package/coverage.md +2 -1
  5. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  6. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  7. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  8. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  9. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  10. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  11. package/fixtures.md +6 -0
  12. package/package.json +1 -1
  13. package/schemas/capabilities.schema.json +16 -0
  14. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  15. package/schemas/run-event-payloads.schema.json +35 -1
  16. package/schemas/run-event.schema.json +2 -0
  17. package/src/lib/driver.ts +15 -0
  18. package/src/lib/env.ts +51 -0
  19. package/src/lib/event-log-query.ts +62 -0
  20. package/src/lib/fixtures.ts +38 -1
  21. package/src/lib/host-toggle.ts +54 -0
  22. package/src/lib/multi-agent-capabilities.ts +10 -0
  23. package/src/lib/otel-scrape.ts +59 -0
  24. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  25. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  26. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +128 -10
  27. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +236 -21
  28. package/src/scenarios/aiEnvelope.redaction.test.ts +204 -24
  29. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +158 -19
  30. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +59 -8
  31. package/src/scenarios/aiEnvelope.universalKinds.test.ts +100 -9
  32. package/src/scenarios/blob-presign-expiry.test.ts +35 -2
  33. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  34. package/src/scenarios/cache-ttl-expiry.test.ts +28 -2
  35. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  36. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  37. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  38. package/src/scenarios/fixtures-gating.test.ts +139 -1
  39. package/src/scenarios/kv-ttl-expiry.test.ts +33 -2
  40. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  41. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  42. package/src/scenarios/provider-usage.test.ts +185 -0
  43. package/src/scenarios/queue-ack-nack-dlq.test.ts +57 -3
  44. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +43 -3
  45. package/src/scenarios/replay-llm-cache-key.test.ts +166 -25
  46. package/src/scenarios/search-bm25-roundtrip.test.ts +47 -2
  47. package/src/scenarios/sql-transaction-atomicity.test.ts +31 -2
  48. package/src/scenarios/stream-subscribe-from-beginning.test.ts +39 -2
  49. package/src/scenarios/subworkflow-input-mapping.test.ts +77 -7
  50. package/src/scenarios/table-cursor-pagination.test.ts +40 -2
  51. package/src/scenarios/table-schema-enforcement.test.ts +39 -2
  52. package/src/scenarios/vector-knn-roundtrip.test.ts +43 -3
  53. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
package/src/lib/env.ts CHANGED
@@ -25,6 +25,28 @@
25
25
  * hosts go strict-mode green without falsifying capability claims.
26
26
  * Example for SQLite:
27
27
  * OPENWOP_OPTED_OUT_PROFILES=openwop-production,openwop-auth-mtls
28
+ *
29
+ * OPENWOP_OPTED_OUT_FIXTURES — comma-separated fixture ids (or
30
+ * trailing-`*` globs) the host operator has DELIBERATELY chosen
31
+ * not to honor. Applied in `lib/fixtures.ts` by filtering matching
32
+ * entries out of the cached advertised-fixture set, so any
33
+ * scenario gated via `isFixtureAdvertised(...)` skips cleanly.
34
+ * Use when a host auto-loads every `conformance-*.json` on disk
35
+ * (so the fixture id IS in the discovery doc) but the host doesn't
36
+ * implement the gated feature. Symmetric to `OPENWOP_OPTED_OUT_
37
+ * PROFILES` for the fixture-id axis. Example for SQLite:
38
+ * OPENWOP_OPTED_OUT_FIXTURES=conformance-dispatch-*,conformance-subworkflow-input-mapping*
39
+ *
40
+ * OPENWOP_OPTED_OUT_SCENARIOS — comma-separated scenario ids that
41
+ * individual tests consult to skip themselves where neither
42
+ * profile-opt-out nor fixture-opt-out is fine-grained enough
43
+ * (e.g., OTel trace-inheritance across `core.subWorkflow` —
44
+ * `conformance-subworkflow-parent` is correctly advertised because
45
+ * non-OTel subworkflow scenarios pass, but the host doesn't
46
+ * propagate traceparent across the dispatch boundary). Use
47
+ * `isScenarioOptedOut(scenarioId)` from `env.ts` in the test's
48
+ * skip predicate. Reserved for cases where the suite-wide
49
+ * skip mechanisms can't carry the granularity.
28
50
  */
29
51
 
30
52
  export interface ConformanceEnv {
@@ -84,3 +106,32 @@ export function loadEnv(): ConformanceEnv {
84
106
  };
85
107
  return cached;
86
108
  }
109
+
110
+ /**
111
+ * Returns true when the operator has listed `scenarioId` in
112
+ * `OPENWOP_OPTED_OUT_SCENARIOS`. Use inside a test's `describe.skipIf`
113
+ * predicate when neither profile-opt-out nor fixture-opt-out is
114
+ * granular enough. Logs the skip reason via the caller — this helper
115
+ * is silent so callers can format their own message.
116
+ *
117
+ * Re-reads `process.env` on every call (single env access + split, no
118
+ * cache). Symmetric with `lib/fixtures.ts:loadOptedOutPredicate` which
119
+ * re-reads on every `setAdvertisedFixtures(...)` call — so unit tests
120
+ * can mutate `process.env.OPENWOP_OPTED_OUT_SCENARIOS` between cases
121
+ * without having to invalidate a memoization.
122
+ */
123
+ export function isScenarioOptedOut(scenarioId: string): boolean {
124
+ const raw = process.env.OPENWOP_OPTED_OUT_SCENARIOS?.trim() ?? '';
125
+ if (raw.length === 0) return false;
126
+ for (const entry of raw.split(',')) {
127
+ if (entry.trim() === scenarioId) return true;
128
+ }
129
+ return false;
130
+ }
131
+
132
+ /** Test-only: clear the `loadEnv()` memoization so subsequent calls
133
+ * re-read `process.env`. Required for any test that mutates the env
134
+ * vars consumed by `loadEnv()` mid-suite. */
135
+ export function __resetEnvCacheForTests(): void {
136
+ cached = null;
137
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Driver helpers for the test-only event-log query seam
3
+ * (`GET /v1/host/sample/test/runs/:runId/events`).
4
+ *
5
+ * Used by aiEnvelope engine-projection scenarios that verify the
6
+ * spec-prescribed events the host MUST emit on each envelope outcome
7
+ * (per RFC 0021 §A point 1-7 + interrupt.md + capabilities.md
8
+ * §"cap.breached"). All operations soft-skip on HTTP 404 — hosts
9
+ * without the seam keep the existing advertisement-shape coverage.
10
+ *
11
+ * Reset semantics: callers SHOULD `resetTestSeam()` in their test's
12
+ * `afterEach` (or scope each test to a unique runId) to keep state
13
+ * from leaking across scenarios.
14
+ */
15
+
16
+ import { driver } from './driver.js';
17
+
18
+ export interface TestEvent {
19
+ readonly eventId: string;
20
+ readonly runId: string;
21
+ readonly type: string;
22
+ readonly payload: Record<string, unknown>;
23
+ readonly timestamp: string;
24
+ readonly sequence: number;
25
+ readonly causationId?: string;
26
+ readonly nodeId?: string;
27
+ readonly contentTrust?: 'trusted' | 'untrusted';
28
+ }
29
+
30
+ export type QueryOutcome =
31
+ | { ok: true; events: TestEvent[] }
32
+ | { ok: false; reason: 'seam_unavailable' }
33
+ | { ok: false; reason: 'http_error'; status: number };
34
+
35
+ /** Query the test-only event log for a run, with optional filters. */
36
+ export async function queryTestEvents(
37
+ runId: string,
38
+ filter: { type?: string; correlationId?: string; causationId?: string; nodeId?: string } = {},
39
+ ): Promise<QueryOutcome> {
40
+ const qs = new URLSearchParams();
41
+ if (filter.type) qs.set('type', filter.type);
42
+ if (filter.correlationId) qs.set('correlationId', filter.correlationId);
43
+ if (filter.causationId) qs.set('causationId', filter.causationId);
44
+ if (filter.nodeId) qs.set('nodeId', filter.nodeId);
45
+ const url = `/v1/host/sample/test/runs/${encodeURIComponent(runId)}/events${qs.toString() ? '?' + qs.toString() : ''}`;
46
+ const res = await driver.get(url);
47
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
48
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
49
+ const body = res.json as { events?: TestEvent[] };
50
+ return { ok: true, events: body.events ?? [] };
51
+ }
52
+
53
+ /** Reset the test-only event log + capability overlay (suite teardown). */
54
+ export async function resetTestSeam(): Promise<void> {
55
+ await driver.post('/v1/host/sample/test/reset', {});
56
+ }
57
+
58
+ /** Probe whether the seam is exposed. Use to soft-skip early. */
59
+ export async function isEventLogSeamAvailable(): Promise<boolean> {
60
+ const res = await queryTestEvents('__probe__');
61
+ return res.ok;
62
+ }
@@ -26,6 +26,16 @@
26
26
  * This module is sync. The async fetch lives in `setup.ts` which calls
27
27
  * `setAdvertisedFixtures(...)` from a top-level `await`.
28
28
  *
29
+ * Honest opt-out (symmetric to `OPENWOP_OPTED_OUT_PROFILES`):
30
+ * `OPENWOP_OPTED_OUT_FIXTURES` (CSV, supports trailing `*` glob)
31
+ * subtracts matching fixture-ids from the cached set even when the
32
+ * host advertises them. Operators use this when the host happens to
33
+ * carry a fixture file (e.g., it auto-loads every `conformance-*.json`
34
+ * on disk) but does NOT implement the underlying feature — so the
35
+ * gated scenario should skip instead of running and failing. The
36
+ * subtraction happens at cache-population time, so the predicate
37
+ * remains a single sync set lookup at scenario-evaluation time.
38
+ *
29
39
  * @see spec/v1/capabilities.md §`fixtures`
30
40
  * @see spec/v1/profiles.md §`openwop-fixtures`
31
41
  * @see RFCS/0003-fixture-gating.md
@@ -35,19 +45,46 @@ import type { DiscoveryPayload } from './profiles.js';
35
45
 
36
46
  let _advertisedFixtures: ReadonlySet<string> | null = null;
37
47
 
48
+ /**
49
+ * Parse `OPENWOP_OPTED_OUT_FIXTURES` into a match predicate. Each entry
50
+ * is either an exact id or a glob with a trailing `*`. Returns a
51
+ * function that answers "is this fixture-id opted out?" — empty / unset
52
+ * env reduces to "always false."
53
+ */
54
+ function loadOptedOutPredicate(): (id: string) => boolean {
55
+ const raw = process.env.OPENWOP_OPTED_OUT_FIXTURES?.trim() ?? '';
56
+ if (raw.length === 0) return () => false;
57
+ const exact = new Set<string>();
58
+ const prefixes: string[] = [];
59
+ for (const entry of raw.split(',').map((s) => s.trim()).filter((s) => s.length > 0)) {
60
+ if (entry.endsWith('*')) {
61
+ prefixes.push(entry.slice(0, -1));
62
+ } else {
63
+ exact.add(entry);
64
+ }
65
+ }
66
+ return (id) => exact.has(id) || prefixes.some((p) => id.startsWith(p));
67
+ }
68
+
38
69
  /**
39
70
  * Populate the cache from a discovery-doc payload. The function is
40
71
  * tolerant of malformed inputs — anything other than a string array
41
72
  * collapses to "no fixtures advertised" rather than throwing, so the
42
73
  * suite remains resilient against host bugs in the discovery surface.
74
+ *
75
+ * Applies `OPENWOP_OPTED_OUT_FIXTURES` at this step: opted-out ids are
76
+ * filtered out of the cache before storage so downstream lookups can
77
+ * stay a single sync set-membership test.
43
78
  */
44
79
  export function setAdvertisedFixtures(c: DiscoveryPayload | null | undefined): void {
45
80
  if (c == null || !Array.isArray(c.fixtures)) {
46
81
  _advertisedFixtures = new Set();
47
82
  return;
48
83
  }
84
+ const isOptedOut = loadOptedOutPredicate();
49
85
  const ids = c.fixtures.filter(
50
- (entry): entry is string => typeof entry === 'string' && entry.length > 0,
86
+ (entry): entry is string =>
87
+ typeof entry === 'string' && entry.length > 0 && !isOptedOut(entry),
51
88
  );
52
89
  _advertisedFixtures = new Set(ids);
53
90
  }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Capability-toggle harness primitive — driver helper for the
3
+ * env-gated test-seam endpoint at
4
+ * `POST /v1/host/sample/test/capability-toggle`.
5
+ *
6
+ * Lets refusal-case scenarios (RFC 0022 §C HVMAP-1a-refusal,
7
+ * HVMAP-2-refusal, etc.) flip a capability flag off temporarily,
8
+ * exercise the host's refusal path, then restore the default.
9
+ *
10
+ * All operations soft-skip on HTTP 404 — hosts that don't expose the
11
+ * seam keep the existing advertisement-shape coverage intact.
12
+ *
13
+ * Reset semantics: callers MUST `resetHostCapabilities()` in their
14
+ * test's `afterEach` (or equivalent) to keep state from leaking
15
+ * across scenarios.
16
+ */
17
+
18
+ import { driver } from './driver.js';
19
+
20
+ export type ToggleOutcome =
21
+ | { ok: true; overlay: Record<string, boolean> }
22
+ | { ok: false; reason: 'seam_unavailable' }
23
+ | { ok: false; reason: 'http_error'; status: number };
24
+
25
+ /** Set a capability flag's overlay value. `value: null` removes the
26
+ * overlay entry (restoring the host's hard-coded default). */
27
+ export async function setHostCapability(
28
+ name: string,
29
+ value: boolean | null,
30
+ ): Promise<ToggleOutcome> {
31
+ const res = await driver.post('/v1/host/sample/test/capability-toggle', { name, value });
32
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
33
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
34
+ const body = res.json as { overlay?: Record<string, boolean> };
35
+ return { ok: true, overlay: body.overlay ?? {} };
36
+ }
37
+
38
+ /** Clear ALL capability overlay entries on the host. */
39
+ export async function resetHostCapabilities(): Promise<ToggleOutcome> {
40
+ const res = await driver.post('/v1/host/sample/test/capability-toggle', { reset: true });
41
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
42
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
43
+ const body = res.json as { overlay?: Record<string, boolean> };
44
+ return { ok: true, overlay: body.overlay ?? {} };
45
+ }
46
+
47
+ /** Probe whether the host exposes the capability-toggle seam at all.
48
+ * Use this to soft-skip a scenario early when the host lacks the
49
+ * toggle (the refusal contract is still spec-normative; the test just
50
+ * can't drive it from outside). */
51
+ export async function isToggleAvailable(): Promise<boolean> {
52
+ const probe = await setHostCapability('__probe__', null);
53
+ return probe.ok;
54
+ }
@@ -37,6 +37,9 @@ interface AgentCaps {
37
37
  | {
38
38
  verbosity: 'summary' | 'full' | 'off' | undefined;
39
39
  tokenLimit: number | undefined;
40
+ /** RFC 0024. When true, host may emit `agent.reasoning.delta`
41
+ * events in addition to the closing `agent.reasoned`. */
42
+ streaming: boolean;
40
43
  }
41
44
  | undefined;
42
45
  }
@@ -84,6 +87,7 @@ export function setMultiAgentCapabilities(c: DiscoveryPayload | null | undefined
84
87
  typeof (reasoningRaw as Record<string, unknown>).tokenLimit === 'number'
85
88
  ? ((reasoningRaw as Record<string, unknown>).tokenLimit as number)
86
89
  : undefined,
90
+ streaming: asBoolean((reasoningRaw as Record<string, unknown>).streaming),
87
91
  }
88
92
  : undefined;
89
93
  _agentCaps = {
@@ -113,6 +117,12 @@ export function getReasoningVerbosity(): 'summary' | 'full' | 'off' | undefined
113
117
  return _agentCaps?.reasoning?.verbosity;
114
118
  }
115
119
 
120
+ /** RFC 0024 — host emits incremental `agent.reasoning.delta` events
121
+ * while a reasoning block is still open. */
122
+ export function isReasoningStreamingSupported(): boolean {
123
+ return _agentCaps?.reasoning?.streaming === true;
124
+ }
125
+
116
126
  /** Phase 2 — host supports the named modelClass. */
117
127
  export function hasModelClass(modelClass: string): boolean {
118
128
  return _agentCaps?.modelClasses.has(modelClass) === true;
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Driver helpers for the OTel + debug-bundle test seams (E.2 + E.3).
3
+ *
4
+ * Used by aiEnvelope + cost-attribution scenarios that need to verify
5
+ * span-attribute redaction (no BYOK canary in OTel attributes) and
6
+ * debug-bundle export shape.
7
+ */
8
+
9
+ import { driver } from './driver.js';
10
+
11
+ export interface TestSpan {
12
+ readonly spanId: string;
13
+ readonly name: string;
14
+ readonly attributes: Record<string, string | number | boolean>;
15
+ readonly envelopeId?: string;
16
+ readonly runId?: string;
17
+ readonly timestamp: string;
18
+ }
19
+
20
+ export interface DebugBundle {
21
+ readonly runId: string;
22
+ readonly events: unknown[];
23
+ readonly spans: TestSpan[];
24
+ readonly exportedAt: string;
25
+ }
26
+
27
+ export type ScrapeOutcome<T> =
28
+ | { ok: true; data: T }
29
+ | { ok: false; reason: 'seam_unavailable' }
30
+ | { ok: false; reason: 'http_error'; status: number };
31
+
32
+ export async function queryTestSpans(
33
+ filter: { envelopeId?: string; runId?: string; name?: string } = {},
34
+ ): Promise<ScrapeOutcome<TestSpan[]>> {
35
+ const qs = new URLSearchParams();
36
+ if (filter.envelopeId) qs.set('envelopeId', filter.envelopeId);
37
+ if (filter.runId) qs.set('runId', filter.runId);
38
+ if (filter.name) qs.set('name', filter.name);
39
+ const url = `/v1/host/sample/test/otel/spans${qs.toString() ? '?' + qs.toString() : ''}`;
40
+ const res = await driver.get(url);
41
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
42
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
43
+ const body = res.json as { spans?: TestSpan[] };
44
+ return { ok: true, data: body.spans ?? [] };
45
+ }
46
+
47
+ export async function exportDebugBundle(runId: string): Promise<ScrapeOutcome<DebugBundle>> {
48
+ const res = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId });
49
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
50
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
51
+ const body = res.json as { bundle?: DebugBundle };
52
+ if (!body.bundle) return { ok: false, reason: 'http_error', status: 500 };
53
+ return { ok: true, data: body.bundle };
54
+ }
55
+
56
+ export async function isOtelSeamAvailable(): Promise<boolean> {
57
+ const res = await queryTestSpans({ runId: '__probe__' });
58
+ return res.ok;
59
+ }
@@ -0,0 +1,193 @@
1
+ /**
2
+ * RFC 0024 — streaming `agent.reasoning.delta` events.
3
+ *
4
+ * Verifies that hosts advertising `capabilities.agents.reasoning.streaming: true`
5
+ * emit incremental `agent.reasoning.delta` events while a reasoning
6
+ * block is still open, followed by exactly one closing `agent.reasoned`
7
+ * event carrying the full authoritative content.
8
+ *
9
+ * Capability-gated: skips when the host doesn't advertise
10
+ * `capabilities.agents.supported: true` AND
11
+ * `capabilities.agents.reasoning.streaming: true`, OR when reasoning
12
+ * verbosity is `'off'`.
13
+ *
14
+ * Driven by the `core.conformance.mock-agent` typeId (RFC 0023)
15
+ * extended with `mockReasoning.streamChunks` per RFC 0024 §"Conformance"
16
+ * (see `schemas/core-conformance-mock-agent-config.schema.json`).
17
+ *
18
+ * @see RFCS/0024-agent-reasoning-streaming.md
19
+ * @see schemas/run-event-payloads.schema.json §`agentReasoningDelta`
20
+ * @see schemas/capabilities.schema.json §`agents.reasoning.streaming`
21
+ */
22
+
23
+ import { describe, it, expect } from 'vitest';
24
+ import { driver } from '../lib/driver.js';
25
+ import { pollUntilTerminal } from '../lib/polling.js';
26
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
27
+ import {
28
+ isAgentSupported,
29
+ isReasoningStreamingSupported,
30
+ getReasoningVerbosity,
31
+ } from '../lib/multi-agent-capabilities.js';
32
+
33
+ const FIXTURE = 'conformance-agent-reasoning-streaming';
34
+ /** Expected concatenation of the fixture's `streamChunks` — kept in sync
35
+ * with `conformance/fixtures/conformance-agent-reasoning-streaming.json`.
36
+ * When the fixture changes, this constant changes with it. */
37
+ const EXPECTED_CHUNKS = [
38
+ 'Let me think about this. ',
39
+ 'First, the user is asking a question. ',
40
+ 'Therefore, I should respond clearly.',
41
+ ] as const;
42
+ const EXPECTED_FULL = EXPECTED_CHUNKS.join('');
43
+
44
+ const SKIP =
45
+ !isAgentSupported() ||
46
+ !isReasoningStreamingSupported() ||
47
+ getReasoningVerbosity() === 'off' ||
48
+ !isFixtureAdvertised(FIXTURE);
49
+
50
+ describe.skipIf(SKIP)('agentReasoningStreaming: RFC 0024 incremental + closing event contract', () => {
51
+ it('emits N agent.reasoning.delta events followed by exactly one closing agent.reasoned', async () => {
52
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
53
+ expect(create.status).toBe(201);
54
+ const runId = (create.json as { runId: string }).runId;
55
+
56
+ await pollUntilTerminal(runId);
57
+
58
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
59
+ expect(events.status).toBe(200);
60
+ const list = (events.json as { events: Array<{ type: string; payload?: Record<string, unknown> }> }).events;
61
+
62
+ const deltas = list.filter((e) => e.type === 'agent.reasoning.delta');
63
+ const finals = list.filter((e) => e.type === 'agent.reasoned');
64
+
65
+ expect(
66
+ deltas.length,
67
+ driver.describe(
68
+ 'RFCS/0024-agent-reasoning-streaming.md §Proposal',
69
+ 'streaming host MUST emit one agent.reasoning.delta per streamChunks entry',
70
+ ),
71
+ ).toBe(EXPECTED_CHUNKS.length);
72
+ expect(
73
+ finals.length,
74
+ driver.describe(
75
+ 'RFCS/0024-agent-reasoning-streaming.md §Proposal',
76
+ 'streaming host MUST emit exactly one closing agent.reasoned event after the deltas',
77
+ ),
78
+ ).toBe(1);
79
+ });
80
+
81
+ it('agent.reasoning.delta `sequence` starts at 0 and increments by 1 within the block', async () => {
82
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
83
+ expect(create.status).toBe(201);
84
+ const runId = (create.json as { runId: string }).runId;
85
+
86
+ await pollUntilTerminal(runId);
87
+
88
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
89
+ const list = (events.json as { events: Array<{ type: string; payload?: Record<string, unknown> }> }).events;
90
+
91
+ const deltas = list.filter((e) => e.type === 'agent.reasoning.delta');
92
+ const sequences = deltas
93
+ .map((e) => e.payload?.sequence)
94
+ .filter((s): s is number => typeof s === 'number');
95
+
96
+ expect(
97
+ sequences,
98
+ driver.describe(
99
+ 'RFCS/0024-agent-reasoning-streaming.md §Proposal',
100
+ '`sequence` MUST start at 0 and increment by 1 per delta within a block',
101
+ ),
102
+ ).toEqual(EXPECTED_CHUNKS.map((_, i) => i));
103
+ });
104
+
105
+ it('closing agent.reasoned.reasoning is the concatenation of the deltas (authoritative)', async () => {
106
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
107
+ expect(create.status).toBe(201);
108
+ const runId = (create.json as { runId: string }).runId;
109
+
110
+ await pollUntilTerminal(runId);
111
+
112
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
113
+ const list = (events.json as { events: Array<{ type: string; payload?: Record<string, unknown> }> }).events;
114
+
115
+ const finalEvent = list.find((e) => e.type === 'agent.reasoned');
116
+ expect(finalEvent, 'closing agent.reasoned must be present').toBeDefined();
117
+ const reasoning = finalEvent?.payload?.reasoning;
118
+ expect(typeof reasoning, 'closing event MUST carry a reasoning string').toBe('string');
119
+ // The mock-agent's contract: closing reasoning equals concat(streamChunks).
120
+ // Real hosts MAY transform at finalize (summary truncation, redaction);
121
+ // for the mock-agent fixture, no transform applies — exact equality.
122
+ expect(
123
+ reasoning,
124
+ driver.describe(
125
+ 'RFCS/0024-agent-reasoning-streaming.md §Proposal',
126
+ 'closing agent.reasoned.reasoning is authoritative; for the mock-agent fixture, equals delta concatenation',
127
+ ),
128
+ ).toBe(EXPECTED_FULL);
129
+ });
130
+
131
+ it('agentId is consistent across all streaming + closing events in a block', async () => {
132
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
133
+ expect(create.status).toBe(201);
134
+ const runId = (create.json as { runId: string }).runId;
135
+
136
+ await pollUntilTerminal(runId);
137
+
138
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
139
+ const list = (events.json as { events: Array<{ type: string; payload?: Record<string, unknown> }> }).events;
140
+
141
+ const relevant = list.filter(
142
+ (e) => e.type === 'agent.reasoning.delta' || e.type === 'agent.reasoned',
143
+ );
144
+ const agentIds = new Set(
145
+ relevant
146
+ .map((e) => e.payload?.agentId)
147
+ .filter((a): a is string => typeof a === 'string' && a.length > 0),
148
+ );
149
+
150
+ expect(
151
+ agentIds.size,
152
+ driver.describe(
153
+ 'RFCS/0024-agent-reasoning-streaming.md §Proposal',
154
+ 'agentId MUST be consistent across all `agent.reasoning.delta` events AND the closing `agent.reasoned` for a given block',
155
+ ),
156
+ ).toBe(1);
157
+ });
158
+
159
+ it('all agent.reasoning.delta events arrive BEFORE the closing agent.reasoned', async () => {
160
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
161
+ expect(create.status).toBe(201);
162
+ const runId = (create.json as { runId: string }).runId;
163
+
164
+ await pollUntilTerminal(runId);
165
+
166
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
167
+ const list = (events.json as Array<{ type: string }> | { events: Array<{ type: string }> });
168
+ const arr = Array.isArray(list) ? list : list.events;
169
+
170
+ const closingIdx = arr.findIndex((e) => e.type === 'agent.reasoned');
171
+ expect(closingIdx, 'closing event present').toBeGreaterThan(-1);
172
+ const lastDeltaIdx = arr.map((e) => e.type).lastIndexOf('agent.reasoning.delta');
173
+
174
+ // Guard against vacuous pass: a host advertising streaming but
175
+ // emitting ZERO deltas would otherwise pass `-1 < closingIdx`
176
+ // trivially. The fixture configures 3 streamChunks, so at least
177
+ // one delta MUST appear in the event log.
178
+ expect(
179
+ lastDeltaIdx,
180
+ driver.describe(
181
+ 'RFCS/0024-agent-reasoning-streaming.md §Proposal',
182
+ 'streaming host MUST emit at least one `agent.reasoning.delta` for a fixture with non-empty `streamChunks`',
183
+ ),
184
+ ).toBeGreaterThan(-1);
185
+ expect(
186
+ lastDeltaIdx,
187
+ driver.describe(
188
+ 'RFCS/0024-agent-reasoning-streaming.md §Proposal',
189
+ 'every `agent.reasoning.delta` MUST precede the closing `agent.reasoned` for the same block',
190
+ ),
191
+ ).toBeLessThan(closingIdx);
192
+ });
193
+ });
@@ -161,13 +161,101 @@ describe('aiEnvelope.capBreached: behavioral cap enforcement (FINAL v1.1)', () =
161
161
  });
162
162
  });
163
163
 
164
- describe('aiEnvelope.capBreached: engine-integration placeholders', () => {
165
- // These require the engine to project `breached` outcomes onto the
166
- // existing `cap.breached` event surface per
167
- // capabilities.md §"Engine-enforced limits and the cap.breached event".
168
- // The pure-function acceptor surfaces the `breached` outcome with
169
- // capKind; the engine projects it to the event log.
170
- it.todo('project breached outcome onto cap.breached { kind: "envelopes" } event');
171
- it.todo('cap.breached payload includes limit, observed, and (for node-scoped kinds) nodeId per capabilities.md');
172
- it.todo('cap.breached node.failed terminal transition');
164
+ // E.1 engine-projection via the test-only event-log seam. The acceptor
165
+ // returns the breached outcome; the seam projects it onto cap.breached +
166
+ // node.failed per capabilities.md §"Engine-enforced limits". Tests
167
+ // soft-skip on HTTP 404 when the seam isn't exposed.
168
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
169
+
170
+ describe('aiEnvelope.capBreached: engine projection via event-log seam (capabilities.md §"cap.breached")', () => {
171
+ it('breached outcome projects to cap.breached { kind: "envelopes" } event with causationId chain', async () => {
172
+ if (!(await isEventLogSeamAvailable())) return;
173
+ const runId = `r-cap-env-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
174
+ const correlationId = `${runId}:node-1:turn-0:cap-env`;
175
+ const r = await accept(
176
+ {
177
+ type: 'error',
178
+ schemaVersion: 1,
179
+ envelopeId: 'env-proj-cap-env',
180
+ correlationId,
181
+ payload: { code: 'x', message: 'y' },
182
+ meta: baseMeta,
183
+ },
184
+ {
185
+ counters: { envelopesPerTurn: { current: 32, cap: 32 } },
186
+ projectTo: { runId, nodeId: 'node-1' },
187
+ },
188
+ );
189
+ if (r.status === 404) return;
190
+ expect(r.body.status).toBe('breached');
191
+
192
+ const events = await queryTestEvents(runId, { type: 'cap.breached' });
193
+ if (!events.ok) return;
194
+ expect(
195
+ events.events.length,
196
+ driver.describe('capabilities.md §"Engine-enforced limits and the cap.breached event"', 'breached outcome MUST project to exactly one cap.breached event'),
197
+ ).toBe(1);
198
+ const evt = events.events[0]!;
199
+ expect(evt.payload.kind).toBe('envelopes');
200
+ expect(evt.causationId).toBe(correlationId);
201
+ await resetTestSeam();
202
+ });
203
+
204
+ it('cap.breached payload includes limit, observed, and nodeId per capabilities.md', async () => {
205
+ if (!(await isEventLogSeamAvailable())) return;
206
+ const runId = `r-cap-payload-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
207
+ await accept(
208
+ {
209
+ type: 'clarification.request',
210
+ schemaVersion: 1,
211
+ envelopeId: 'env-proj-cap-clar',
212
+ correlationId: `${runId}:node-2:turn-0:cap`,
213
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
214
+ meta: baseMeta,
215
+ },
216
+ {
217
+ counters: { clarificationRounds: { current: 5, cap: 5 } },
218
+ projectTo: { runId, nodeId: 'node-2' },
219
+ },
220
+ );
221
+ const events = await queryTestEvents(runId, { type: 'cap.breached' });
222
+ if (!events.ok || events.events.length === 0) return;
223
+ const evt = events.events[0]!;
224
+ expect(evt.payload.kind).toBe('clarification');
225
+ expect(
226
+ typeof evt.payload.limit,
227
+ driver.describe('capabilities.md §"cap.breached"', 'payload.limit MUST be present as a number'),
228
+ ).toBe('number');
229
+ expect(evt.payload.nodeId).toBe('node-2');
230
+ await resetTestSeam();
231
+ });
232
+
233
+ it('cap.breached MUST be paired with a terminal node.failed transition', async () => {
234
+ if (!(await isEventLogSeamAvailable())) return;
235
+ const runId = `r-cap-fail-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
236
+ await accept(
237
+ {
238
+ type: 'schema.request',
239
+ schemaVersion: 1,
240
+ envelopeId: 'env-proj-cap-fail',
241
+ correlationId: `${runId}:node-3:turn-0:cap`,
242
+ payload: { envelopeType: 'vendor.acme.foo' },
243
+ meta: baseMeta,
244
+ },
245
+ {
246
+ counters: { schemaRounds: { current: 3, cap: 3 } },
247
+ projectTo: { runId, nodeId: 'node-3' },
248
+ },
249
+ );
250
+ const breached = await queryTestEvents(runId, { type: 'cap.breached' });
251
+ const failed = await queryTestEvents(runId, { type: 'node.failed' });
252
+ if (!breached.ok || !failed.ok) return;
253
+ expect(breached.events.length).toBe(1);
254
+ expect(
255
+ failed.events.length,
256
+ driver.describe('capabilities.md §"cap.breached"', 'cap.breached MUST be paired with a terminal node.failed event'),
257
+ ).toBe(1);
258
+ expect((failed.events[0]!.payload.error as { code?: string }).code).toBe('cap_breached');
259
+ await resetTestSeam();
260
+ });
173
261
  });