npm - @openwop/openwop-conformance - Versions diffs - 1.0.0 - Mend

@openwop/openwop-conformance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

package/LICENSE +201 -0
package/README.md +241 -0
package/api/asyncapi.yaml +481 -0
package/api/openapi.yaml +830 -0
package/api/redocly.yaml +8 -0
package/coverage.md +80 -0
package/dist/cli.js +161 -0
package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
package/fixtures/conformance-agent-identity.json +27 -0
package/fixtures/conformance-agent-low-confidence.json +29 -0
package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
package/fixtures/conformance-agent-memory-redaction.json +32 -0
package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
package/fixtures/conformance-agent-memory-ttl.json +31 -0
package/fixtures/conformance-agent-pack-export.json +26 -0
package/fixtures/conformance-agent-pack-install.json +26 -0
package/fixtures/conformance-agent-pack-provenance.json +31 -0
package/fixtures/conformance-agent-reasoning.json +29 -0
package/fixtures/conformance-approval.json +27 -0
package/fixtures/conformance-cancellable.json +33 -0
package/fixtures/conformance-cap-breach.json +27 -0
package/fixtures/conformance-capability-missing.json +23 -0
package/fixtures/conformance-channel-ttl.json +60 -0
package/fixtures/conformance-clarification.json +30 -0
package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
package/fixtures/conformance-conversation-lifecycle.json +32 -0
package/fixtures/conformance-conversation-replay.json +33 -0
package/fixtures/conformance-conversation-vs-clarification.json +26 -0
package/fixtures/conformance-delay.json +33 -0
package/fixtures/conformance-dispatch-loop.json +38 -0
package/fixtures/conformance-failure.json +23 -0
package/fixtures/conformance-idempotent.json +30 -0
package/fixtures/conformance-identity.json +32 -0
package/fixtures/conformance-interrupt-auth-required.json +28 -0
package/fixtures/conformance-interrupt-external-event.json +33 -0
package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
package/fixtures/conformance-interrupt-quorum.json +30 -0
package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
package/fixtures/conformance-message-reducer.json +31 -0
package/fixtures/conformance-multi-node.json +21 -0
package/fixtures/conformance-noop.json +23 -0
package/fixtures/conformance-orchestrator-dispatch.json +47 -0
package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
package/fixtures/conformance-orchestrator-terminate.json +44 -0
package/fixtures/conformance-stream-text.json +26 -0
package/fixtures/conformance-subworkflow-child.json +21 -0
package/fixtures/conformance-subworkflow-parent.json +49 -0
package/fixtures/conformance-version-fold.json +23 -0
package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
package/fixtures/pack-manifests/pack-private-example.json +26 -0
package/fixtures.md +404 -0
package/package.json +48 -0
package/schemas/README.md +75 -0
package/schemas/agent-manifest.schema.json +107 -0
package/schemas/agent-ref.schema.json +53 -0
package/schemas/capabilities.schema.json +287 -0
package/schemas/channel-written-payload.schema.json +55 -0
package/schemas/conversation-event.schema.json +120 -0
package/schemas/conversation-turn.schema.json +72 -0
package/schemas/debug-bundle.schema.json +196 -0
package/schemas/dispatch-config.schema.json +46 -0
package/schemas/error-envelope.schema.json +25 -0
package/schemas/memory-entry.schema.json +36 -0
package/schemas/memory-list-options.schema.json +21 -0
package/schemas/node-pack-manifest.schema.json +235 -0
package/schemas/orchestrator-decision.schema.json +60 -0
package/schemas/run-event-payloads.schema.json +663 -0
package/schemas/run-event.schema.json +116 -0
package/schemas/run-options.schema.json +81 -0
package/schemas/run-orchestrator-decided-event.schema.json +20 -0
package/schemas/run-snapshot.schema.json +121 -0
package/schemas/suspend-request.schema.json +182 -0
package/schemas/workflow-definition.schema.json +430 -0
package/src/cli.ts +187 -0
package/src/lib/a2a-fake-peer.ts +233 -0
package/src/lib/canaries.ts +186 -0
package/src/lib/driver.ts +96 -0
package/src/lib/env.ts +49 -0
package/src/lib/fixtures.ts +93 -0
package/src/lib/mcp-fake-server.ts +185 -0
package/src/lib/multi-agent-capabilities.ts +155 -0
package/src/lib/multiProcess.ts +141 -0
package/src/lib/otel-collector.ts +312 -0
package/src/lib/paths.ts +198 -0
package/src/lib/polling.ts +81 -0
package/src/lib/profiles.ts +258 -0
package/src/lib/sse.ts +172 -0
package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
package/src/scenarios/agentMessageReducer.test.ts +57 -0
package/src/scenarios/agentMetadata.test.ts +56 -0
package/src/scenarios/agentPackExport.test.ts +45 -0
package/src/scenarios/agentPackInstall.test.ts +50 -0
package/src/scenarios/agentPackProvenance.test.ts +53 -0
package/src/scenarios/agentReasoningEvents.test.ts +72 -0
package/src/scenarios/append-ordering.test.ts +91 -0
package/src/scenarios/approval-payload.test.ts +120 -0
package/src/scenarios/audit-log-integrity.test.ts +106 -0
package/src/scenarios/auth.test.ts +55 -0
package/src/scenarios/byok-roundtrip.test.ts +166 -0
package/src/scenarios/cancellation.test.ts +68 -0
package/src/scenarios/cap-breach.test.ts +149 -0
package/src/scenarios/channel-ttl.test.ts +70 -0
package/src/scenarios/configurable-schema.test.ts +76 -0
package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
package/src/scenarios/conversationLifecycle.test.ts +64 -0
package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
package/src/scenarios/cost-attribution.test.ts +207 -0
package/src/scenarios/debugBundle.test.ts +222 -0
package/src/scenarios/discovery.test.ts +147 -0
package/src/scenarios/dispatchLoop.test.ts +52 -0
package/src/scenarios/errors.test.ts +144 -0
package/src/scenarios/eventOrdering.test.ts +144 -0
package/src/scenarios/failure-path.test.ts +46 -0
package/src/scenarios/fixtures-gating.test.ts +137 -0
package/src/scenarios/fixtures-valid.test.ts +140 -0
package/src/scenarios/highConcurrency.test.ts +263 -0
package/src/scenarios/idempotency.test.ts +83 -0
package/src/scenarios/idempotencyRetry.test.ts +130 -0
package/src/scenarios/identity-passthrough.test.ts +54 -0
package/src/scenarios/interrupt-approval.test.ts +97 -0
package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
package/src/scenarios/interrupt-clarification.test.ts +45 -0
package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
package/src/scenarios/interruptRace.test.ts +176 -0
package/src/scenarios/maliciousManifest.test.ts +154 -0
package/src/scenarios/mcp-discoverability.test.ts +129 -0
package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
package/src/scenarios/multi-node-ordering.test.ts +60 -0
package/src/scenarios/multi-region-idempotency.test.ts +52 -0
package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
package/src/scenarios/orchestratorDispatch.test.ts +66 -0
package/src/scenarios/orchestratorTermination.test.ts +54 -0
package/src/scenarios/otel-emission.test.ts +113 -0
package/src/scenarios/otel-trace-propagation.test.ts +90 -0
package/src/scenarios/pack-registry-publish.test.ts +93 -0
package/src/scenarios/pack-registry.test.ts +328 -0
package/src/scenarios/pause-resume.test.ts +109 -0
package/src/scenarios/policies.test.ts +162 -0
package/src/scenarios/profileDerivation.test.ts +335 -0
package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
package/src/scenarios/rate-limit-envelope.test.ts +97 -0
package/src/scenarios/redaction.test.ts +254 -0
package/src/scenarios/redactionAdversarial.test.ts +162 -0
package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
package/src/scenarios/replay-fork.test.ts +216 -0
package/src/scenarios/replayDeterminism.test.ts +171 -0
package/src/scenarios/route-coverage.test.ts +129 -0
package/src/scenarios/runs-lifecycle.test.ts +65 -0
package/src/scenarios/runtime-capabilities.test.ts +118 -0
package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
package/src/scenarios/staleClaim.test.ts +223 -0
package/src/scenarios/stream-modes-buffer.test.ts +148 -0
package/src/scenarios/stream-modes-mixed.test.ts +149 -0
package/src/scenarios/stream-modes.test.ts +139 -0
package/src/scenarios/streamReconnect.test.ts +162 -0
package/src/scenarios/subworkflow.test.ts +126 -0
package/src/scenarios/version-negotiation.test.ts +157 -0
package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
package/src/scenarios/wasm-pack-load.test.ts +75 -0
package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
package/src/setup.ts +173 -0
package/vitest.config.ts +17 -0

package/src/scenarios/staleClaim.test.ts ADDED Viewed

@@ -0,0 +1,223 @@
+/**
+ * Stale-claim recovery scenario per spec/v1/scale-profiles.md
+ * §"Replay semantics" + spec/v1/storage-adapters.md §"Claim acquisition."
+ *
+ * When a process holding a run claim dies without releasing the claim,
+ * another process that boots later (after the claim TTL has expired)
+ * MUST pick up the run and resume execution. The conformance contract:
+ *
+ *   - Process A starts a long-running run, writes some events,
+ *     SIGKILLs (claim left as held + expires_at populated).
+ *   - After CLAIM_TTL_MS elapses, claim is "stale" by definition.
+ *   - Process B boots pointing at the same DB; resume-on-startup
+ *     re-acquires the claim and finishes the run.
+ *   - The run's terminal status is observable through process B's
+ *     HTTP surface.
+ *
+ * **`@multi-process`** — needs `child_process.spawn` to drive two host
+ * processes against a shared SQLite file. Skipped against hosts that
+ * aren't the SQLite reference (no shared-storage contract).
+ *
+ * **`@timing-sensitive`** — relies on a configurable claim TTL.
+ * Skipped automatically against hosts that don't expose the TTL via
+ * env (the test reads `OPENWOP_STALE_CLAIM_HOST_DIR`; if unset, the
+ * scenario skip-equivalents).
+ *
+ * @see lib/multiProcess.ts — spawnHost helper
+ * @see examples/hosts/sqlite/src/server.ts — heartbeat + resume
+ */
+import { describe, it, expect, afterEach } from 'vitest';
+import { mkdtempSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { spawnHost, type SpawnedHost } from '../lib/multiProcess.js';
+// Default off: scenario must be opted in via env. The opt-in lists
+// the host package dir relative to repo root that exposes the
+// OPENWOP_CLAIM_TTL_MS / OPENWOP_HEARTBEAT_INTERVAL_MS / OPENWOP_SQLITE_PATH env
+// vars. The reference SQLite host satisfies this contract.
+const HOST_PACKAGE_DIR = process.env.OPENWOP_STALE_CLAIM_HOST_DIR ?? 'examples/hosts/sqlite';
+const RUN_THIS_SCENARIO = process.env.OPENWOP_RUN_STALE_CLAIM === '1';
+const APIKEY_A = 'openwop-stale-claim-A';
+const APIKEY_B = 'openwop-stale-claim-B';
+const PORT_A = 4801;
+const PORT_B = 4802;
+const CLAIM_TTL_MS = 2000;
+const HEARTBEAT_INTERVAL_MS = 500;
+interface RunSnapshot {
+  status?: string;
+  runId?: string;
+}
+interface PollResponse {
+  events?: Array<{ type?: string; nodeId?: string | null; data?: unknown }>;
+  isComplete?: boolean;
+}
+async function fetchSnapshot(baseUrl: string, apiKey: string, runId: string): Promise<RunSnapshot> {
+  const res = await fetch(`${baseUrl}/v1/runs/${encodeURIComponent(runId)}`, {
+    headers: { Authorization: `Bearer ${apiKey}` },
+  });
+  if (!res.ok) throw new Error(`GET /v1/runs/${runId} failed: ${res.status}`);
+  return (await res.json()) as RunSnapshot;
+}
+async function fetchEvents(
+  baseUrl: string,
+  apiKey: string,
+  runId: string,
+): Promise<PollResponse> {
+  const res = await fetch(
+    `${baseUrl}/v1/runs/${encodeURIComponent(runId)}/events/poll`,
+    { headers: { Authorization: `Bearer ${apiKey}` } },
+  );
+  if (!res.ok) throw new Error(`poll failed: ${res.status}`);
+  return (await res.json()) as PollResponse;
+}
+async function pollUntilStatus(
+  baseUrl: string,
+  apiKey: string,
+  runId: string,
+  predicate: (s: string) => boolean,
+  timeoutMs: number,
+): Promise<RunSnapshot> {
+  const deadline = Date.now() + timeoutMs;
+  let last: RunSnapshot = {};
+  while (Date.now() < deadline) {
+    last = await fetchSnapshot(baseUrl, apiKey, runId);
+    if (typeof last.status === 'string' && predicate(last.status)) return last;
+    await new Promise((r) => setTimeout(r, 200));
+  }
+  throw new Error(
+    `pollUntilStatus did not match predicate within ${timeoutMs}ms; last status: ${last.status}`,
+  );
+}
+describe.skipIf(!RUN_THIS_SCENARIO)(
+  'staleClaim: orphaned run resumes on a second host process per spec/v1/storage-adapters.md',
+  () => {
+    let dbDir: string | null = null;
+    let hostA: SpawnedHost | null = null;
+    let hostB: SpawnedHost | null = null;
+    afterEach(async () => {
+      if (hostA) {
+        await hostA.kill().catch(() => {});
+        hostA = null;
+      }
+      if (hostB) {
+        await hostB.shutdown().catch(() => {});
+        hostB = null;
+      }
+      if (dbDir !== null) {
+        try {
+          rmSync(dbDir, { recursive: true, force: true });
+        } catch {
+          // best-effort cleanup
+        }
+        dbDir = null;
+      }
+    });
+    it(
+      'process B picks up the orphaned run after process A dies + claim expires',
+      async () => {
+        // Phase 1: shared DB file in a temp dir.
+        dbDir = mkdtempSync(join(tmpdir(), 'openwop-stale-claim-'));
+        const dbPath = join(dbDir, 'host.sqlite');
+        // Phase 2: spawn host A and start a long-running cancellable run.
+        hostA = await spawnHost({
+          packageDir: HOST_PACKAGE_DIR,
+          port: PORT_A,
+          apiKey: APIKEY_A,
+          dbPath,
+          claimTtlMs: CLAIM_TTL_MS,
+          heartbeatIntervalMs: HEARTBEAT_INTERVAL_MS,
+        });
+        await hostA.ready();
+        const createRes = await fetch(`${hostA.baseUrl}/v1/runs`, {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+            Authorization: `Bearer ${APIKEY_A}`,
+          },
+          body: JSON.stringify({
+            workflowId: 'conformance-cancellable',
+            inputs: { delayMs: 5000 },
+          }),
+        });
+        expect(createRes.status).toBe(201);
+        const { runId } = (await createRes.json()) as { runId: string };
+        // Phase 3: wait until A reports the run as `running`.
+        await pollUntilStatus(hostA.baseUrl, APIKEY_A, runId, (s) => s === 'running', 5000);
+        // Phase 4: SIGKILL A. The kill MUST NOT release the claim —
+        // graceful shutdown is the OPPOSITE behavior.
+        await hostA.kill();
+        hostA = null;
+        // Phase 5: wait for the claim TTL to lapse. With CLAIM_TTL_MS=2000
+        // we wait ~3s to be safely past expiry.
+        await new Promise((r) => setTimeout(r, CLAIM_TTL_MS + 1000));
+        // Phase 6: spawn host B at the SAME DB. Its resume-on-startup
+        // MUST find the orphaned run, claim it, and dispatch.
+        hostB = await spawnHost({
+          packageDir: HOST_PACKAGE_DIR,
+          port: PORT_B,
+          apiKey: APIKEY_B,
+          dbPath,
+          claimTtlMs: CLAIM_TTL_MS,
+          heartbeatIntervalMs: HEARTBEAT_INTERVAL_MS,
+        });
+        await hostB.ready();
+        // Phase 7: poll until B reports the run as terminal. The run
+        // restarts from the beginning of the delay node (5s) on B,
+        // plus a small slack window — generous timeout is fine.
+        const terminal = await pollUntilStatus(
+          hostB.baseUrl,
+          APIKEY_B,
+          runId,
+          (s) => s === 'completed' || s === 'failed' || s === 'cancelled',
+          15_000,
+        );
+        expect(terminal.status, 'orphaned run MUST resume to a terminal status under host B').toBe(
+          'completed',
+        );
+        // Phase 8: verify the event log records the resume. A
+        // `run.resumed` event MUST be present (per the SQLite host's
+        // implementation; other hosts MAY use a different marker but
+        // SOMETHING that distinguishes resume from fresh start MUST
+        // exist in the event log).
+        const events = await fetchEvents(hostB.baseUrl, APIKEY_B, runId);
+        expect(Array.isArray(events.events), 'events poll MUST return an events array').toBe(true);
+        if (events.events && events.events.length > 0) {
+          const types = events.events.map((e) => e.type);
+          expect(
+            types.includes('run.resumed') || types.includes('run.started'),
+            'event log MUST contain at least run.started; resume hosts SHOULD also emit run.resumed',
+          ).toBe(true);
+        }
+      },
+      60_000,
+    );
+  },
+);
+// Always-on smoke test for the multiProcess library shape — runs even
+// when the scenario is gated off.
+describe('staleClaim lib: spawnHost surface contract', () => {
+  it('spawnHost is exported and has the expected shape', async () => {
+    expect(typeof spawnHost).toBe('function');
+  });
+});

package/src/scenarios/stream-modes-buffer.test.ts ADDED Viewed

@@ -0,0 +1,148 @@
+/**
+ * SSE buffering scenarios (G1 / S3) — exercises `?bufferMs=` aggregation
+ * hint against the existing `conformance-delay` fixture.
+ *
+ * Verifies:
+ *   1. Server accepts `bufferMs` in [0..5000] without error.
+ *   2. Out-of-range `bufferMs` returns 400 with `validation_error`.
+ *   3. Buffered mode emits at least one `event: batch` SSE frame whose
+ *      data is a JSON array of `RunEventDoc`.
+ *   4. Force-flush on terminal: the run.completed event arrives bundled
+ *      in a batch, not held back to the next interval.
+ *   5. Total event count in buffered mode equals the unbuffered mode
+ *      count (no events dropped).
+ *
+ * Spec references:
+ *   - stream-modes.md §Aggregation hint
+ *   - spec gap G1
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { subscribe } from '../lib/sse.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const WORKFLOW_ID = 'conformance-delay';
+const SKIP_NO_FIXTURE = !isFixtureAdvertised(WORKFLOW_ID);
+interface RunEventDoc {
+  readonly type: string;
+  readonly sequence: number;
+}
+describe.skipIf(SKIP_NO_FIXTURE)('stream-modes-buffer: ?bufferMs= aggregation hint', () => {
+  it('accepts bufferMs in range and emits at least one event: batch frame', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: WORKFLOW_ID });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const result = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=updates&bufferMs=200`,
+      { timeoutMs: 30_000 },
+    );
+    expect(result.status, driver.describe(
+      'stream-modes.md §Aggregation hint',
+      'GET /v1/runs/{runId}/events with valid bufferMs MUST return 200 SSE',
+    )).toBe(200);
+    const batchEvents = result.events.filter((e) => e.event === 'batch');
+    expect(batchEvents.length, driver.describe(
+      'stream-modes.md §Aggregation hint',
+      'buffered mode MUST emit at least one `event: batch` SSE frame',
+    )).toBeGreaterThan(0);
+    // Each batch's data is a JSON array of RunEventDoc.
+    for (const batch of batchEvents) {
+      const parsed = JSON.parse(batch.data);
+      expect(Array.isArray(parsed), driver.describe(
+        'stream-modes.md §batch data shape',
+        'event: batch data MUST parse to a JSON array',
+      )).toBe(true);
+      expect(parsed.length).toBeGreaterThan(0);
+      for (const event of parsed) {
+        expect(typeof event.sequence).toBe('number');
+        expect(typeof event.type).toBe('string');
+      }
+    }
+  });
+  it('rejects out-of-range bufferMs with 400 validation_error', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: WORKFLOW_ID });
+    const runId = (create.json as { runId: string }).runId;
+    const result = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId)}/events?bufferMs=99999`,
+      { timeoutMs: 5_000 },
+    );
+    expect(result.status, driver.describe(
+      'stream-modes.md §Aggregation hint range',
+      'bufferMs > 5000 MUST return 400',
+    )).toBe(400);
+    // Drain the run so it doesn't stall the test runner.
+    await pollUntilTerminal(runId);
+  });
+  it('forces flush on terminal — run.completed arrives bundled in a batch BEFORE the timer fires', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: WORKFLOW_ID });
+    const runId = (create.json as { runId: string }).runId;
+    // Use a long bufferMs (4000ms) so the only flush before terminal
+    // would come from the force-flush rule. We measure elapsed time
+    // from subscribe-start to terminal-arrival; if force-flush works,
+    // it arrives in well under bufferMs/2 (i.e., the run completes +
+    // force-flush fires + we observe it before any timer-based flush
+    // could have happened). Without force-flush, terminal would either
+    // arrive AFTER bufferMs (timer-based delivery) OR not at all
+    // (stream closed before the timer fired).
+    const BUFFER_MS = 4000;
+    const startedAt = Date.now();
+    const result = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=updates&bufferMs=${BUFFER_MS}`,
+      { timeoutMs: 30_000 },
+    );
+    const elapsedMs = Date.now() - startedAt;
+    const batchEvents = result.events.filter((e) => e.event === 'batch');
+    const allFlattened: RunEventDoc[] = batchEvents.flatMap(
+      (b) => JSON.parse(b.data) as RunEventDoc[],
+    );
+    const hasTerminal = allFlattened.some(
+      (e) => e.type === 'run.completed' || e.type === 'run.failed' || e.type === 'run.cancelled',
+    );
+    expect(hasTerminal, driver.describe(
+      'stream-modes.md §Aggregation hint — force-flush triggers',
+      'terminal events MUST be force-flushed; the stream MUST NOT close before delivering run.completed',
+    )).toBe(true);
+    // Force-flush fires immediately on terminal; without it, terminal
+    // would arrive ~bufferMs after the run actually completed. We allow
+    // bufferMs/2 as headroom for cold-start latency on the conformance
+    // server, but failing here proves the timer fired before terminal
+    // arrived (i.e., force-flush is broken).
+    expect(elapsedMs, driver.describe(
+      'stream-modes.md §Aggregation hint — force-flush is immediate',
+      `terminal SHOULD arrive in well under bufferMs (${BUFFER_MS}ms); observed ${elapsedMs}ms — if elapsed is close to bufferMs, force-flush is not firing`,
+    )).toBeLessThan(BUFFER_MS / 2);
+  });
+  it('bufferMs=0 behaves identically to omitting (per-event mode)', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: WORKFLOW_ID });
+    const runId = (create.json as { runId: string }).runId;
+    const result = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=updates&bufferMs=0`,
+      { timeoutMs: 30_000 },
+    );
+    const batchEvents = result.events.filter((e) => e.event === 'batch');
+    expect(batchEvents.length, driver.describe(
+      'stream-modes.md §Aggregation hint — bufferMs=0 sentinel',
+      'bufferMs=0 MUST behave identically to omitting (no batch frames)',
+    )).toBe(0);
+  });
+});

package/src/scenarios/stream-modes-mixed.test.ts ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Mixed-mode SSE scenarios (G2 / S4) — exercises comma-separated
+ * `?streamMode=` against the existing `conformance-delay` fixture.
+ *
+ * Verifies:
+ *   1. Server accepts `streamMode=updates,messages` (mixed subset).
+ *   2. Server rejects `streamMode=values,updates` with 400 +
+ *      `unsupported_stream_mode` error envelope (values is exclusive).
+ *   3. Server rejects `streamMode=updates,bogus` (one bad mode → whole
+ *      list fails).
+ *   4. Mixed mode sees AT LEAST every event the corresponding single
+ *      mode would see (union semantics).
+ *
+ * Spec references:
+ *   - stream-modes.md §Mixed mode (closes S4)
+ *   - spec gap G2
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { subscribe, type SseEvent } from '../lib/sse.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const WORKFLOW_ID = 'conformance-delay';
+const SKIP_NO_FIXTURE = !isFixtureAdvertised(WORKFLOW_ID);
+function eventTypes(events: readonly SseEvent[]): string[] {
+  return events.map((e) => e.event);
+}
+describe.skipIf(SKIP_NO_FIXTURE)('stream-modes-mixed: comma-separated subsets', () => {
+  it('accepts streamMode=updates,messages and emits a server-closed stream', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      inputs: { delayMs: 500 },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const result = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=updates,messages`,
+      { timeoutMs: 15_000 },
+    );
+    expect(result.status, driver.describe(
+      'stream-modes.md §Mixed mode',
+      'streamMode=updates,messages MUST return 200',
+    )).toBe(200);
+    expect(result.closedBy, driver.describe(
+      'stream-modes.md §Mixed mode + §updates',
+      'server MUST close the stream on terminal run event',
+    )).toBe('server');
+    const types = eventTypes(result.events);
+    expect(types, driver.describe(
+      'stream-modes.md §Mixed mode (union semantics)',
+      'mixed updates,messages MUST include run.completed (admitted by updates)',
+    )).toContain('run.completed');
+  });
+  it('rejects streamMode=values,updates with 400 + unsupported_stream_mode', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      inputs: { delayMs: 100 },
+    });
+    const runId = (create.json as { runId: string }).runId;
+    const res = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=values,updates`,
+    );
+    expect(res.status, driver.describe(
+      'stream-modes.md §Mixed mode',
+      'values combined with another mode MUST return 400',
+    )).toBe(400);
+    const body = res.json as
+      | { error?: string; message?: string; details?: { supported?: string[] } }
+      | undefined;
+    expect(body?.error, driver.describe(
+      'stream-modes.md §Mode selection error envelope + error-envelope.schema.json',
+      'unsupported_stream_mode error envelope MUST carry an `error` string discriminator',
+    )).toBe('unsupported_stream_mode');
+    expect(typeof body?.message, driver.describe(
+      'error-envelope.schema.json',
+      'error envelope MUST carry a human-readable `message` string',
+    )).toBe('string');
+    expect(Array.isArray(body?.details?.supported), driver.describe(
+      'stream-modes.md §Mode selection error envelope',
+      'error body MUST carry `details.supported` array (NOT top-level — `details` is the canonical contextual-data slot per error-envelope.schema.json)',
+    )).toBe(true);
+    await pollUntilTerminal(runId);
+  });
+  it('rejects streamMode=updates,bogus (one bad mode fails the whole list)', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      inputs: { delayMs: 100 },
+    });
+    const runId = (create.json as { runId: string }).runId;
+    const res = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=updates,bogus`,
+    );
+    expect(res.status, driver.describe(
+      'stream-modes.md §Mixed mode + §Mode selection',
+      'partial-unknown lists MUST return 400',
+    )).toBe(400);
+    await pollUntilTerminal(runId);
+  });
+  it('mixed mode union: updates,debug sees every event updates sees', async () => {
+    // Run twice — once with updates only, once with updates,debug.
+    // The mixed-mode response MUST be a superset of the updates-only
+    // response (union semantics).
+    const r1 = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      inputs: { delayMs: 500 },
+    });
+    const runId1 = (r1.json as { runId: string }).runId;
+    const updatesOnly = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId1)}/events?streamMode=updates`,
+      { timeoutMs: 15_000 },
+    );
+    const r2 = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      inputs: { delayMs: 500 },
+    });
+    const runId2 = (r2.json as { runId: string }).runId;
+    const mixed = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId2)}/events?streamMode=updates,debug`,
+      { timeoutMs: 15_000 },
+    );
+    const updatesTypes = new Set(eventTypes(updatesOnly.events));
+    const mixedTypes = new Set(eventTypes(mixed.events));
+    for (const t of updatesTypes) {
+      expect(mixedTypes.has(t), driver.describe(
+        'stream-modes.md §Mixed mode (union)',
+        `updates,debug MUST include every event type updates produces (missing: ${t})`,
+      )).toBe(true);
+    }
+  });
+});

package/src/scenarios/stream-modes.test.ts ADDED Viewed

@@ -0,0 +1,139 @@
+/**
+ * Stream-mode scenarios — exercises `GET /v1/runs/{runId}/events` SSE
+ * with different `streamMode` query parameters per stream-modes.md.
+ *
+ * Uses the `conformance-delay` fixture with a short delay (1s) so the
+ * stream has well-defined start + completion bounds without making
+ * tests slow.
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { subscribe, type SseEvent } from '../lib/sse.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const WORKFLOW_ID = 'conformance-delay';
+const SKIP_NO_FIXTURE = !isFixtureAdvertised(WORKFLOW_ID);
+async function startDelayRun(delayMs: number): Promise<string> {
+  const create = await driver.post('/v1/runs', {
+    workflowId: WORKFLOW_ID,
+    inputs: { delayMs },
+  });
+  if (create.status !== 201) {
+    throw new Error(`Failed to start ${WORKFLOW_ID} run: ${create.status} ${create.text}`);
+  }
+  return (create.json as { runId: string }).runId;
+}
+function eventTypes(events: readonly SseEvent[]): string[] {
+  return events.map((e) => e.event);
+}
+describe.skipIf(SKIP_NO_FIXTURE)('stream-modes: updates (default) closes on terminal event', () => {
+  it('emits at least run.started + run.completed and server closes the stream', async () => {
+    const runId = await startDelayRun(1_000);
+    const { events, closedBy } = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=updates`,
+      { timeoutMs: 15_000 },
+    );
+    expect(closedBy, driver.describe(
+      'stream-modes.md §updates',
+      'server MUST close the connection on terminal run event',
+    )).toBe('server');
+    const types = eventTypes(events);
+    expect(types, driver.describe(
+      'stream-modes.md §updates',
+      'updates stream MUST include run.started',
+    )).toContain('run.started');
+    expect(types, driver.describe(
+      'stream-modes.md §updates',
+      'updates stream MUST include run.completed for a successful run',
+    )).toContain('run.completed');
+  });
+});
+describe.skipIf(SKIP_NO_FIXTURE)('stream-modes: invalid streamMode is rejected', () => {
+  it('returns 400 and a structured error body', async () => {
+    const runId = await startDelayRun(1_000);
+    const res = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=does-not-exist`,
+    );
+    expect(res.status, driver.describe(
+      'stream-modes.md §Mode selection',
+      'unsupported streamMode MUST return 400',
+    )).toBe(400);
+    const body = res.json as
+      | { error?: unknown; message?: unknown; details?: { supported?: unknown } }
+      | undefined;
+    expect(typeof body?.error, driver.describe(
+      'stream-modes.md §Mode selection + error-envelope.schema.json',
+      'unsupported_stream_mode error body MUST include `error` string discriminator',
+    )).toBe('string');
+    expect(typeof body?.message, driver.describe(
+      'error-envelope.schema.json',
+      'error envelope MUST include a human-readable `message` string',
+    )).toBe('string');
+    expect(Array.isArray(body?.details?.supported), driver.describe(
+      'stream-modes.md §Mode selection',
+      'error body MUST include `details.supported` array of mode names (under `details` per error-envelope.schema.json)',
+    )).toBe(true);
+  });
+});
+describe.skipIf(SKIP_NO_FIXTURE)('stream-modes: values mode is reachable + closes on terminal', () => {
+  it('returns 200 + emits at least one event + server-closes per stream-modes.md §values', async () => {
+    const runId = await startDelayRun(1_000);
+    const result = await subscribe(
+      `/v1/runs/${encodeURIComponent(runId)}/events?streamMode=values`,
+      { timeoutMs: 15_000 },
+    );
+    // The state.snapshot payload schema is implementation-shaped per
+    // spec gap S1, so we don't assert payload shape here. What's
+    // canonical: the connection MUST be reachable, MUST emit at least
+    // one event before terminal, AND the server MUST close on terminal.
+    expect(result.closedBy, driver.describe(
+      'stream-modes.md §values',
+      'server MUST close the connection on terminal run event',
+    )).toBe('server');
+    expect(result.events.length, driver.describe(
+      'stream-modes.md §values',
+      'values mode MUST emit at least one event before terminal',
+    )).toBeGreaterThan(0);
+  });
+});
+describe.skipIf(SKIP_NO_FIXTURE)('stream-modes: debug emits at least as many events as updates', () => {
+  it('debug stream is a superset of updates per stream-modes.md mode-mapping', async () => {
+    const runIdUpdates = await startDelayRun(1_000);
+    const updatesResult = await subscribe(
+      `/v1/runs/${encodeURIComponent(runIdUpdates)}/events?streamMode=updates`,
+      { timeoutMs: 15_000 },
+    );
+    const runIdDebug = await startDelayRun(1_000);
+    const debugResult = await subscribe(
+      `/v1/runs/${encodeURIComponent(runIdDebug)}/events?streamMode=debug`,
+      { timeoutMs: 15_000 },
+    );
+    // Both runs are conformance-delay with the same input, so updates
+    // events (run.started, node.started not in updates per spec, node.completed,
+    // run.completed) should be a subset of debug events.
+    expect(debugResult.events.length, driver.describe(
+      'stream-modes.md mode-to-event mapping',
+      'debug stream event count MUST be >= updates stream event count',
+    )).toBeGreaterThanOrEqual(updatesResult.events.length);
+    expect(debugResult.closedBy, driver.describe(
+      'stream-modes.md §debug',
+      'debug stream MUST close on terminal event',
+    )).toBe('server');
+  });
+});