npm - @openwop/openwop-conformance - Versions diffs - 1.0.0 - Mend

@openwop/openwop-conformance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

package/LICENSE +201 -0
package/README.md +241 -0
package/api/asyncapi.yaml +481 -0
package/api/openapi.yaml +830 -0
package/api/redocly.yaml +8 -0
package/coverage.md +80 -0
package/dist/cli.js +161 -0
package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
package/fixtures/conformance-agent-identity.json +27 -0
package/fixtures/conformance-agent-low-confidence.json +29 -0
package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
package/fixtures/conformance-agent-memory-redaction.json +32 -0
package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
package/fixtures/conformance-agent-memory-ttl.json +31 -0
package/fixtures/conformance-agent-pack-export.json +26 -0
package/fixtures/conformance-agent-pack-install.json +26 -0
package/fixtures/conformance-agent-pack-provenance.json +31 -0
package/fixtures/conformance-agent-reasoning.json +29 -0
package/fixtures/conformance-approval.json +27 -0
package/fixtures/conformance-cancellable.json +33 -0
package/fixtures/conformance-cap-breach.json +27 -0
package/fixtures/conformance-capability-missing.json +23 -0
package/fixtures/conformance-channel-ttl.json +60 -0
package/fixtures/conformance-clarification.json +30 -0
package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
package/fixtures/conformance-conversation-lifecycle.json +32 -0
package/fixtures/conformance-conversation-replay.json +33 -0
package/fixtures/conformance-conversation-vs-clarification.json +26 -0
package/fixtures/conformance-delay.json +33 -0
package/fixtures/conformance-dispatch-loop.json +38 -0
package/fixtures/conformance-failure.json +23 -0
package/fixtures/conformance-idempotent.json +30 -0
package/fixtures/conformance-identity.json +32 -0
package/fixtures/conformance-interrupt-auth-required.json +28 -0
package/fixtures/conformance-interrupt-external-event.json +33 -0
package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
package/fixtures/conformance-interrupt-quorum.json +30 -0
package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
package/fixtures/conformance-message-reducer.json +31 -0
package/fixtures/conformance-multi-node.json +21 -0
package/fixtures/conformance-noop.json +23 -0
package/fixtures/conformance-orchestrator-dispatch.json +47 -0
package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
package/fixtures/conformance-orchestrator-terminate.json +44 -0
package/fixtures/conformance-stream-text.json +26 -0
package/fixtures/conformance-subworkflow-child.json +21 -0
package/fixtures/conformance-subworkflow-parent.json +49 -0
package/fixtures/conformance-version-fold.json +23 -0
package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
package/fixtures/pack-manifests/pack-private-example.json +26 -0
package/fixtures.md +404 -0
package/package.json +48 -0
package/schemas/README.md +75 -0
package/schemas/agent-manifest.schema.json +107 -0
package/schemas/agent-ref.schema.json +53 -0
package/schemas/capabilities.schema.json +287 -0
package/schemas/channel-written-payload.schema.json +55 -0
package/schemas/conversation-event.schema.json +120 -0
package/schemas/conversation-turn.schema.json +72 -0
package/schemas/debug-bundle.schema.json +196 -0
package/schemas/dispatch-config.schema.json +46 -0
package/schemas/error-envelope.schema.json +25 -0
package/schemas/memory-entry.schema.json +36 -0
package/schemas/memory-list-options.schema.json +21 -0
package/schemas/node-pack-manifest.schema.json +235 -0
package/schemas/orchestrator-decision.schema.json +60 -0
package/schemas/run-event-payloads.schema.json +663 -0
package/schemas/run-event.schema.json +116 -0
package/schemas/run-options.schema.json +81 -0
package/schemas/run-orchestrator-decided-event.schema.json +20 -0
package/schemas/run-snapshot.schema.json +121 -0
package/schemas/suspend-request.schema.json +182 -0
package/schemas/workflow-definition.schema.json +430 -0
package/src/cli.ts +187 -0
package/src/lib/a2a-fake-peer.ts +233 -0
package/src/lib/canaries.ts +186 -0
package/src/lib/driver.ts +96 -0
package/src/lib/env.ts +49 -0
package/src/lib/fixtures.ts +93 -0
package/src/lib/mcp-fake-server.ts +185 -0
package/src/lib/multi-agent-capabilities.ts +155 -0
package/src/lib/multiProcess.ts +141 -0
package/src/lib/otel-collector.ts +312 -0
package/src/lib/paths.ts +198 -0
package/src/lib/polling.ts +81 -0
package/src/lib/profiles.ts +258 -0
package/src/lib/sse.ts +172 -0
package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
package/src/scenarios/agentMessageReducer.test.ts +57 -0
package/src/scenarios/agentMetadata.test.ts +56 -0
package/src/scenarios/agentPackExport.test.ts +45 -0
package/src/scenarios/agentPackInstall.test.ts +50 -0
package/src/scenarios/agentPackProvenance.test.ts +53 -0
package/src/scenarios/agentReasoningEvents.test.ts +72 -0
package/src/scenarios/append-ordering.test.ts +91 -0
package/src/scenarios/approval-payload.test.ts +120 -0
package/src/scenarios/audit-log-integrity.test.ts +106 -0
package/src/scenarios/auth.test.ts +55 -0
package/src/scenarios/byok-roundtrip.test.ts +166 -0
package/src/scenarios/cancellation.test.ts +68 -0
package/src/scenarios/cap-breach.test.ts +149 -0
package/src/scenarios/channel-ttl.test.ts +70 -0
package/src/scenarios/configurable-schema.test.ts +76 -0
package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
package/src/scenarios/conversationLifecycle.test.ts +64 -0
package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
package/src/scenarios/cost-attribution.test.ts +207 -0
package/src/scenarios/debugBundle.test.ts +222 -0
package/src/scenarios/discovery.test.ts +147 -0
package/src/scenarios/dispatchLoop.test.ts +52 -0
package/src/scenarios/errors.test.ts +144 -0
package/src/scenarios/eventOrdering.test.ts +144 -0
package/src/scenarios/failure-path.test.ts +46 -0
package/src/scenarios/fixtures-gating.test.ts +137 -0
package/src/scenarios/fixtures-valid.test.ts +140 -0
package/src/scenarios/highConcurrency.test.ts +263 -0
package/src/scenarios/idempotency.test.ts +83 -0
package/src/scenarios/idempotencyRetry.test.ts +130 -0
package/src/scenarios/identity-passthrough.test.ts +54 -0
package/src/scenarios/interrupt-approval.test.ts +97 -0
package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
package/src/scenarios/interrupt-clarification.test.ts +45 -0
package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
package/src/scenarios/interruptRace.test.ts +176 -0
package/src/scenarios/maliciousManifest.test.ts +154 -0
package/src/scenarios/mcp-discoverability.test.ts +129 -0
package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
package/src/scenarios/multi-node-ordering.test.ts +60 -0
package/src/scenarios/multi-region-idempotency.test.ts +52 -0
package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
package/src/scenarios/orchestratorDispatch.test.ts +66 -0
package/src/scenarios/orchestratorTermination.test.ts +54 -0
package/src/scenarios/otel-emission.test.ts +113 -0
package/src/scenarios/otel-trace-propagation.test.ts +90 -0
package/src/scenarios/pack-registry-publish.test.ts +93 -0
package/src/scenarios/pack-registry.test.ts +328 -0
package/src/scenarios/pause-resume.test.ts +109 -0
package/src/scenarios/policies.test.ts +162 -0
package/src/scenarios/profileDerivation.test.ts +335 -0
package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
package/src/scenarios/rate-limit-envelope.test.ts +97 -0
package/src/scenarios/redaction.test.ts +254 -0
package/src/scenarios/redactionAdversarial.test.ts +162 -0
package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
package/src/scenarios/replay-fork.test.ts +216 -0
package/src/scenarios/replayDeterminism.test.ts +171 -0
package/src/scenarios/route-coverage.test.ts +129 -0
package/src/scenarios/runs-lifecycle.test.ts +65 -0
package/src/scenarios/runtime-capabilities.test.ts +118 -0
package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
package/src/scenarios/staleClaim.test.ts +223 -0
package/src/scenarios/stream-modes-buffer.test.ts +148 -0
package/src/scenarios/stream-modes-mixed.test.ts +149 -0
package/src/scenarios/stream-modes.test.ts +139 -0
package/src/scenarios/streamReconnect.test.ts +162 -0
package/src/scenarios/subworkflow.test.ts +126 -0
package/src/scenarios/version-negotiation.test.ts +157 -0
package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
package/src/scenarios/wasm-pack-load.test.ts +75 -0
package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
package/src/setup.ts +173 -0
package/vitest.config.ts +17 -0

package/src/scenarios/approval-payload.test.ts ADDED Viewed

@@ -0,0 +1,120 @@
+/**
+ * Approval-payload scenarios — `capabilities.md` §"interrupt" +
+ * `interrupt.md` §"`ApprovalResume`" + `schemas/run-event-payloads
+ * .schema.json#$defs/approvalReceived`.
+ *
+ * Vendor-neutral DISCOVERY-SHAPE contracts for the approval payload
+ * vocabulary. These run against every host's `/.well-known/openwop`
+ * surface — they don't drive an actual approval flow, which would
+ * require a configured workflow + RBAC + interactive interrupt
+ * resolution (outside the black-box contract surface this suite
+ * asserts).
+ *
+ * Why discovery-shape only:
+ *
+ *   The wire vocabulary (action enum, refineFeedback object shape,
+ *   decidedBy contract) is the cross-implementation contract. The
+ *   round-trip path (configure → trigger → resolve → assert event
+ *   shape) needs server fixtures the conformance suite doesn't
+ *   currently provide. Hosts MUST run their own integration tests
+ *   against their resolution endpoints.
+ *
+ *   Per-action required-fields scenarios (`refine` MUST carry
+ *   `refineFeedback.scope`; `edit-accept` MUST carry
+ *   `editedArtifactData`) are deferred pending a future test-mode
+ *   capability that lets conformance suites trigger an
+ *   `awaiting_approval` state without going through the full
+ *   workflow registration + run-create flow.
+ *
+ * Scenario gating:
+ *
+ *   - **Vocabulary advertisement** runs against every host. Asserts
+ *     that any approval-related capability the host advertises uses
+ *     the spec-documented action vocabulary, not the legacy
+ *     pre-correction `'edit'` form.
+ *
+ *   - **Interrupt-payload retrieval** is a future scenario gated
+ *     on test-mode capability (see CHANGELOG entry).
+ *
+ * @see interrupt.md §"`ApprovalResume`"
+ * @see schemas/run-event-payloads.schema.json#$defs/approvalReceived
+ * @see schemas/suspend-request.schema.json (actions[] enum)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+const CANONICAL_ACTIONS = ['accept', 'reject', 'refine', 'edit-accept', 'ask'] as const;
+const CANONICAL_EVENT_ACTIONS = ['accept', 'reject', 'refine', 'edit-accept', 'timeout'] as const;
+const CANONICAL_REFINE_SCOPES = ['whole', 'section', 'items'] as const;
+describe('approval-payload: vocabulary discovery contract', () => {
+  it('host capability declaration does not regress on the legacy `edit` form (§7 drift pin)', async () => {
+    // The spec briefly used `'edit'` for the edit-accept action in
+    // commit 0e0171b (2026-04-30) before being corrected to
+    // `'edit-accept'`. Any host that captured the spec during that
+    // ~30-min window MAY have surfaced `'edit'` somewhere observable
+    // in their capability declaration.
+    //
+    // This scenario walks the discovery payload looking for any
+    // string-array field containing the legacy `'edit'` (without the
+    // `-accept` suffix). Findings are an indicator the host needs to
+    // re-derive its capability declaration from the corrected spec.
+    //
+    // Most hosts won't surface action vocabularies in /.well-known/openwop
+    // at all — that's a `runtimeCapabilities` extension, not a v1
+    // mandate. Pass-through (no occurrences) is the expected result.
+    const res = await driver.get('/.well-known/openwop', { authenticated: false });
+    expect(res.status).toBe(200);
+    const text = JSON.stringify(res.json ?? {});
+    // We look for `"edit"` (quoted) to avoid false positives on
+    // `"edit-accept"`. The trailing `-accept` ensures the legacy form
+    // is distinguishable from the canonical form.
+    const legacyHits = text.match(/"edit"/g) ?? [];
+    expect(legacyHits.length, driver.describe(
+      'interrupt.md §"`ApprovalResume`"',
+      'capability declaration MUST NOT contain the legacy `"edit"` action token (use `"edit-accept"` per spec)',
+    )).toBe(0);
+  });
+  it('canonical action vocabulary is documented in spec (assertion-free reference)', () => {
+    // Self-documenting test. The canonical resume actions per spec are
+    // accept/reject/refine/edit-accept/ask. Per-host advertisement is
+    // optional; this test pins the vocabulary itself for future
+    // scenarios that gate on it.
+    expect(CANONICAL_ACTIONS).toHaveLength(5);
+    expect(new Set(CANONICAL_ACTIONS)).toEqual(
+      new Set(['accept', 'reject', 'refine', 'edit-accept', 'ask']),
+    );
+  });
+  it('canonical event action vocabulary differs from resume (timeout instead of ask)', () => {
+    // Subtle: `'ask'` is a resume action that does NOT exit the
+    // suspend (per interrupt.md), so it doesn't appear in the
+    // approval.received event vocabulary. `'timeout'` IS an event-
+    // emitted action (host emits when the suspend window elapses)
+    // but isn't a resume action (clients can't submit a timeout).
+    //
+    // Pin this asymmetry so it doesn't drift.
+    expect(CANONICAL_EVENT_ACTIONS).toHaveLength(5);
+    expect(new Set(CANONICAL_EVENT_ACTIONS)).toEqual(
+      new Set(['accept', 'reject', 'refine', 'edit-accept', 'timeout']),
+    );
+    // Resume-only token (ask) MUST NOT appear in event vocabulary.
+    expect(CANONICAL_EVENT_ACTIONS as readonly string[]).not.toContain('ask');
+    // Event-only token (timeout) MUST NOT appear in resume vocabulary.
+    expect(CANONICAL_ACTIONS as readonly string[]).not.toContain('timeout');
+  });
+  it('refineFeedback scope vocabulary pin (§7 audit, A.5 prereq)', () => {
+    // The 3 documented scopes `whole/section/items` MUST be a stable
+    // set in v1.x. Adding a scope is additive (clients tolerating
+    // unknown values) but semantic changes need a spec discussion.
+    expect(CANONICAL_REFINE_SCOPES).toHaveLength(3);
+    expect(new Set(CANONICAL_REFINE_SCOPES)).toEqual(
+      new Set(['whole', 'section', 'items']),
+    );
+  });
+});

package/src/scenarios/audit-log-integrity.test.ts ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * Track 13: audit-log integrity profile (auth-profiles.md v1.1).
+ *
+ * Verifies that hosts claiming the `openwop-audit-log-integrity` profile:
+ *   1. Surface `capabilities.auth.auditLogIntegrity.hashChain: true`.
+ *   2. Expose `GET /v1/audit/verify` which returns `{chainValid, checkpoints, anomalies}`.
+ *   3. Report `chainValid: true` for an unmodified range.
+ *   4. Surface at least one signed checkpoint with a non-empty `signature`.
+ *
+ * Tamper detection (mutating an entry then asserting `chainValid: false`)
+ * requires admin access to the host's audit store and is NOT exercised
+ * by this black-box suite. Hosts SHOULD implement a separate internal
+ * test for tamper detection — see auth-profiles.md.
+ *
+ * @see spec/v1/auth-profiles.md §"Audit-log integrity"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+interface AuditIntegrityCaps {
+  hashChain?: boolean;
+  checkpointSignatureAlgorithm?: string;
+  checkpointPublicKey?: string;
+  checkpointIntervalEntries?: number;
+  checkpointIntervalSeconds?: number;
+}
+interface AuthCaps {
+  profiles?: string[];
+  auditLogIntegrity?: AuditIntegrityCaps;
+}
+async function isProfileAdvertised(): Promise<boolean> {
+  const disco = await driver.get('/.well-known/openwop');
+  const auth = (disco.json as { capabilities?: { auth?: AuthCaps } }).capabilities?.auth ?? {};
+  return Array.isArray(auth.profiles) && auth.profiles.includes('openwop-audit-log-integrity');
+}
+describe('audit-log-integrity: profile shape', () => {
+  it('host that claims the profile advertises required capability fields', async () => {
+    if (!(await isProfileAdvertised())) {
+      // eslint-disable-next-line no-console
+      console.warn('[audit-log-integrity] profile not advertised; skipping');
+      return;
+    }
+    const disco = await driver.get('/.well-known/openwop');
+    const integrity =
+      (disco.json as { capabilities?: { auth?: AuthCaps } }).capabilities?.auth
+        ?.auditLogIntegrity ?? {};
+    expect(integrity.hashChain, driver.describe(
+      'auth-profiles.md §"Audit-log integrity"',
+      "openwop-audit-log-integrity profile MUST advertise auditLogIntegrity.hashChain: true",
+    )).toBe(true);
+    expect(integrity.checkpointSignatureAlgorithm, driver.describe(
+      'auth-profiles.md §"Audit-log integrity" §"Key management"',
+      'checkpointSignatureAlgorithm MUST be present (canonical: ed25519)',
+    )).toBeDefined();
+    expect(typeof integrity.checkpointPublicKey).toBe('string');
+  });
+});
+describe('audit-log-integrity: verify endpoint returns chainValid', () => {
+  it('GET /v1/audit/verify on a recent range reports chainValid: true', async () => {
+    if (!(await isProfileAdvertised())) {
+      // eslint-disable-next-line no-console
+      console.warn('[audit-log-integrity] profile not advertised; skipping');
+      return;
+    }
+    const verify = await driver.get('/v1/audit/verify?fromSeq=0&toSeq=100');
+    if (verify.status === 404) {
+      // Host claims the profile but doesn't expose the endpoint — that's
+      // a profile-claim violation. Fail explicitly.
+      expect(verify.status, driver.describe(
+        'auth-profiles.md §"Audit-log integrity" §"Verification endpoint"',
+        'claiming openwop-audit-log-integrity profile REQUIRES exposing GET /v1/audit/verify',
+      )).not.toBe(404);
+      return;
+    }
+    expect(verify.status).toBe(200);
+    const body = verify.json as {
+      fromSeq?: number;
+      toSeq?: number;
+      chainValid?: boolean;
+      checkpoints?: Array<{ checkpoint?: string; merkleRoot?: string; signature?: string }>;
+      anomalies?: unknown[];
+    };
+    expect(body.chainValid, driver.describe(
+      'auth-profiles.md §"Audit-log integrity"',
+      'unmodified audit range MUST report chainValid: true',
+    )).toBe(true);
+    expect(Array.isArray(body.anomalies)).toBe(true);
+    expect(body.anomalies?.length ?? -1).toBe(0);
+    if (Array.isArray(body.checkpoints) && body.checkpoints.length > 0) {
+      const cp = body.checkpoints[0];
+      expect(typeof cp.signature, 'checkpoint signature MUST be a non-empty string').toBe('string');
+      expect((cp.signature ?? '').length).toBeGreaterThan(0);
+    }
+  });
+});

package/src/scenarios/auth.test.ts ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * Auth scenarios — credential rejection contracts.
+ *
+ * Tests that authenticated endpoints (manifest read, run create) return
+ * the canonical 401 envelope when called with no credential or an
+ * invalid credential. Per auth.md §3.
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+const KNOWN_AUTHED_PATH = '/v1/runs';
+describe('auth: missing credential', () => {
+  it('returns 401 with canonical error envelope per auth.md §3', async () => {
+    const res = await driver.post(
+      KNOWN_AUTHED_PATH,
+      { workflowId: 'conformance-noop' },
+      { authenticated: false },
+    );
+    expect(res.status, driver.describe(
+      'auth.md §3',
+      'request without Authorization header MUST return 401',
+    )).toBe(401);
+    const body = res.json as { error?: unknown; message?: unknown } | undefined;
+    expect(typeof body?.error, driver.describe(
+      'auth.md §3 + rest-endpoints.md error envelope',
+      'response body MUST include `error` (machine code) string',
+    )).toBe('string');
+    expect(typeof body?.message, driver.describe(
+      'auth.md §3 + rest-endpoints.md error envelope',
+      'response body MUST include `message` (human description) string',
+    )).toBe('string');
+  });
+});
+describe('auth: invalid credential', () => {
+  it('returns 401 (not 200, not 403) per auth.md §3', async () => {
+    const res = await driver.post(
+      KNOWN_AUTHED_PATH,
+      { workflowId: 'conformance-noop' },
+      {
+        authenticated: false,
+        headers: { Authorization: 'Bearer hk_definitely_not_a_real_key_12345' },
+      },
+    );
+    expect(res.status, driver.describe(
+      'auth.md §3',
+      'request with invalid Authorization MUST return 401, not 403',
+    )).toBe(401);
+  });
+});

package/src/scenarios/byok-roundtrip.test.ts ADDED Viewed

@@ -0,0 +1,166 @@
+/**
+ * BYOK end-to-end roundtrip scenarios (`openwop-byok` profile).
+ *
+ * Companion to `redaction.test.ts` + `redactionAdversarial.test.ts`,
+ * which assert credentialRefs DON'T leak (negative tests). This file
+ * asserts credentialRefs DO resolve and DO get used (positive test) —
+ * with redaction-safe verification via SHA-256 hashing.
+ *
+ * Both scenarios skip trivially-pass when the host returns 404/422 from
+ * the start-run call (production deployments don't advertise the
+ * fixture surface). Hosts that opt into `OPENWOP_CONFORMANCE_FIXTURES=1`
+ * AND pre-provision a secret under `openwop-conformance-canary-secret`
+ * expose the surface and the scenarios run end-to-end.
+ *
+ * The scenarios assert shape + non-empty + redaction, not exact value
+ * equality — any host-defined canary value works as long as it's
+ * non-empty and the host's `secrets.resolve` returns it intact.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/run-options.md §"Credential references"
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/auth.md §"Secret resolution"
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/observability.md §"Redaction"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+/** SHA-256 hex regex — 64 lowercase hex chars exactly. */
+const SHA256_HEX_RE = /^[0-9a-f]{64}$/;
+const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
+const SKIP_NO_FIXTURE = !isFixtureAdvertised(BYOK_WORKFLOW_ID);
+describe.skipIf(SKIP_NO_FIXTURE)('byok: end-to-end credentialRef resolution roundtrip (openwop-byok profile)', () => {
+  it('the canary fixture run MUST resolve a host-provisioned secret and emit SHA-256 hex', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: 'openwop-smoke-byok-roundtrip',
+    });
+    // Fixture absent OR canary not provisioned — host doesn't opt in.
+    // Scenario passes trivially.
+    if (create.status === 404 || create.status === 422) {
+      return;
+    }
+    expect(create.status, driver.describe(
+      'rest-endpoints.md POST /v1/runs',
+      'starting openwop-smoke-byok-roundtrip MUST succeed when OPENWOP_CONFORMANCE_FIXTURES=1 is advertised AND openwop-conformance-canary-secret is provisioned',
+    )).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId);
+    expect(terminal.status, driver.describe(
+      'auth.md §"Secret resolution"',
+      'BYOK fixture run MUST reach terminal completed when canary is provisioned',
+    )).toBe('completed');
+    // The fixture writes secretSha256 + secretLength as node outputs
+    // (which surface on the run via the engine's variables/outputs map
+    // depending on host implementation). At minimum, the run terminates
+    // completed — that's enough to know secrets.resolve returned a
+    // non-empty value. The shape assertion below is the additional
+    // proof that the canary value reached the NodeModule intact.
+    const variables = terminal.variables ?? {};
+    const outputs =
+      (terminal as { outputs?: Record<string, unknown> }).outputs ?? {};
+    // The host MAY surface fixture outputs via variables OR via a
+    // host-specific outputs map. Look in both.
+    const candidate =
+      (outputs['resolve-secret'] as Record<string, unknown> | undefined) ??
+      (variables['resolve-secret'] as Record<string, unknown> | undefined) ??
+      (outputs as Record<string, unknown>) ??
+      (variables as Record<string, unknown>);
+    if (!candidate || typeof candidate !== 'object') {
+      // Host doesn't expose node outputs in variables/outputs map —
+      // some hosts only expose them on the events stream. Skip the
+      // shape check; the run-completed assertion above is sufficient.
+      return;
+    }
+    if ('secretSha256' in candidate && typeof candidate.secretSha256 === 'string') {
+      expect(candidate.secretSha256, driver.describe(
+        'auth.md §"Secret resolution"',
+        'fixture-emitted SHA-256 hex MUST be 64 lowercase hex chars',
+      )).toMatch(SHA256_HEX_RE);
+    }
+    if ('secretLength' in candidate && typeof candidate.secretLength === 'number') {
+      expect(candidate.secretLength, driver.describe(
+        'auth.md §"Secret resolution"',
+        'resolved canary length MUST be > 0 (non-empty)',
+      )).toBeGreaterThan(0);
+    }
+  });
+  it('BYOK fixture run MUST emit a node.completed event for the resolve step', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: 'openwop-smoke-byok-roundtrip',
+    });
+    if (create.status === 404 || create.status === 422) {
+      return;
+    }
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    await pollUntilTerminal(runId);
+    const eventsResp = await driver.get(`/v1/runs/${runId}/events`);
+    expect(eventsResp.status).toBe(200);
+    const events = (eventsResp.json as { events: Array<{ type: string; nodeId?: string }> })
+      .events;
+    const completed = events.filter(
+      (e) => e.type === 'node.completed' && e.nodeId === 'resolve-secret',
+    );
+    expect(completed.length, driver.describe(
+      'event-log.md §node.completed',
+      'BYOK fixture node MUST emit exactly one node.completed event when secrets.resolve succeeds',
+    )).toBe(1);
+  });
+  it('BYOK fixture run event log MUST NOT echo the resolved secret value (redaction)', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: 'openwop-smoke-byok-roundtrip',
+    });
+    if (create.status === 404 || create.status === 422) {
+      return;
+    }
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    await pollUntilTerminal(runId);
+    const eventsResp = await driver.get(`/v1/runs/${runId}/events`);
+    expect(eventsResp.status).toBe(200);
+    // Universal redaction marker — same pattern as redaction.test.ts.
+    // The test cannot know the exact canary value (host-defined), but
+    // it MUST NOT contain a SHA-256-shaped or base64-shaped substring
+    // adjacent to a `value:`/`secret:`/`password:` key. This is a
+    // defense-in-depth check; the existing redaction.test.ts has the
+    // canonical assertion.
+    const dump = JSON.stringify(eventsResp.json);
+    // The fixture emits `secretSha256: <hash>` and `secretLength: <n>`.
+    // These ARE allowed in the event log (they're hash + length, not
+    // the raw value). What MUST NOT appear: a key named `value` or
+    // `password` carrying string-typed content alongside a
+    // `secretSha256` field — that would suggest the raw value leaked.
+    const suspiciousPatterns = [
+      /"value"\s*:\s*"[^"]{8,}".*"secretSha256"/,
+      /"password"\s*:\s*"[^"]{8,}"/,
+      /"plaintext"\s*:\s*"[^"]{8,}"/,
+      /"raw_secret"\s*:\s*"[^"]{8,}"/,
+    ];
+    for (const pat of suspiciousPatterns) {
+      expect(dump, driver.describe(
+        'observability.md §"Redaction"',
+        `event log MUST NOT contain a payload matching ${pat} — secret.echo fixture only emits hash + length`,
+      )).not.toMatch(pat);
+    }
+  });
+});

package/src/scenarios/cancellation.test.ts ADDED Viewed

@@ -0,0 +1,68 @@
+/**
+ * Cancellation scenarios — exercises `POST /v1/runs/{runId}/cancel`
+ * mid-flight using the `conformance-cancellable` fixture.
+ *
+ * The fixture sleeps `delayMs` (caller-supplied). The test starts a
+ * run with delayMs=10s, polls until `running`, issues cancel, and
+ * verifies terminal `cancelled` within 5s.
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilStatus } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const WORKFLOW_ID = 'conformance-cancellable';
+const SKIP_NO_FIXTURE = !isFixtureAdvertised(WORKFLOW_ID);
+describe.skipIf(SKIP_NO_FIXTURE)('cancellation: in-flight :cancel reaches terminal `cancelled`', () => {
+  it('POST /v1/runs/{runId}/cancel returns 200 and run terminates as cancelled', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      inputs: { delayMs: 10_000 },
+    });
+    expect(create.status, driver.describe(
+      'rest-endpoints.md',
+      'POST /v1/runs MUST return 201 on accepted run',
+    )).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    // Wait for run to reach `running` so the cancel hits a live executor,
+    // not the dispatch queue. Allow up to 5s for boot.
+    await pollUntilStatus(runId, 'running', { timeoutMs: 5_000 });
+    const cancel = await driver.post(
+      `/v1/runs/${encodeURIComponent(runId)}/cancel`,
+      { reason: 'conformance-cancellation-test' },
+    );
+    expect(cancel.status, driver.describe(
+      'rest-endpoints.md POST /v1/runs/{runId}/cancel',
+      'cancel MUST return 200 on accepted cancellation',
+    )).toBe(200);
+    const cancelBody = cancel.json as { status?: string };
+    expect(
+      ['cancelled', 'cancelling'].includes(cancelBody.status ?? ''),
+      driver.describe(
+        'rest-endpoints.md POST /v1/runs/{runId}/cancel',
+        'cancel response status MUST be one of `cancelled` or `cancelling`',
+      ),
+    ).toBe(true);
+    const terminal = await pollUntilStatus(runId, 'cancelled', { timeoutMs: 5_000 });
+    expect(terminal.status, driver.describe(
+      'fixtures.md conformance-cancellable §Terminal status',
+      'fixture MUST reach terminal `cancelled` within 5s of cancel',
+    )).toBe('cancelled');
+  });
+});
+describe('cancellation: cancelling an unknown run returns 404', () => {
+  it('POST /v1/runs/{nonexistentId}/cancel returns 404', async () => {
+    const res = await driver.post('/v1/runs/openwop-conformance-no-such-run/cancel', {});
+    expect(
+      [403, 404].includes(res.status),
+      driver.describe('rest-endpoints.md', 'cancel on unknown run MUST return 404 or 403'),
+    ).toBe(true);
+  });
+});

package/src/scenarios/cap-breach.test.ts ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Cap-breach scenarios (G4 / CC-1) — exercises `conformance-cap-breach`
+ * fixture with `RunOptions.configurable.recursionLimit: 3` to trigger the
+ * per-run nodeExecutionCount cap.
+ *
+ * Verifies:
+ *   1. Run reaches terminal `failed` with `error.code = "recursion_limit_exceeded"`.
+ *   2. `cap.breached` event is emitted with `kind: "node-executions"` payload
+ *      containing `limit`, `observed`, and `nodeId`.
+ *   3. `cap.breached` precedes `run.failed` in the event log (the breach is
+ *      detected BEFORE the over-limit node fires, so `node.started` for the
+ *      over-limit node MUST NOT appear).
+ *
+ * Spec references:
+ *   - run-options.md §recursionLimit
+ *   - observability.md §cap.breached
+ *   - schemas/run-event-payloads.schema.json §capBreached
+ *   - docs/spec gap G4
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const WORKFLOW_ID = 'conformance-cap-breach';
+const RECURSION_LIMIT = 3;
+const SKIP_NO_FIXTURE = !isFixtureAdvertised(WORKFLOW_ID);
+interface RunEvent {
+  readonly eventId: string;
+  readonly runId: string;
+  readonly nodeId?: string;
+  readonly type: string;
+  readonly sequence: number;
+  readonly payload?: unknown;
+}
+describe.skipIf(SKIP_NO_FIXTURE)('cap-breach: conformance-cap-breach fixture fails with recursion_limit_exceeded', () => {
+  it('emits cap.breached + transitions to terminal failed when configurable.recursionLimit is exceeded', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      configurable: { recursionLimit: RECURSION_LIMIT },
+    });
+    expect(create.status, driver.describe(
+      'rest-endpoints.md POST /v1/runs',
+      'run creation MUST accept the request even when configurable.recursionLimit is below the workflow size',
+    )).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId);
+    expect(terminal.status, driver.describe(
+      'fixtures.md conformance-cap-breach §Terminal status',
+      'fixture MUST reach terminal `failed` when recursion limit is exceeded',
+    )).toBe('failed');
+    expect(terminal.error?.code, driver.describe(
+      'run-options.md §recursionLimit',
+      'RunSnapshot.error.code MUST equal "recursion_limit_exceeded"',
+    )).toBe('recursion_limit_exceeded');
+    expect(typeof terminal.error?.message, driver.describe(
+      'rest-endpoints.md RunSnapshot.error.message',
+      'RunSnapshot.error.message MUST be a string',
+    )).toBe('string');
+    const eventsRes = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=0&timeout=1`,
+    );
+    expect(eventsRes.status).toBe(200);
+    const events = (eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? [];
+    const capBreachEvents = events.filter((e) => e.type === 'cap.breached');
+    expect(capBreachEvents.length, driver.describe(
+      'observability.md §cap.breached',
+      'exactly one cap.breached event MUST be emitted on recursion-limit exceedance',
+    )).toBe(1);
+    const breach = capBreachEvents[0];
+    const payload = breach.payload as
+      | { kind?: string; limit?: number; observed?: number; nodeId?: string }
+      | undefined;
+    expect(payload?.kind, driver.describe(
+      'run-event-payloads.schema.json §capBreached.kind',
+      'cap.breached payload MUST carry kind="node-executions"',
+    )).toBe('node-executions');
+    expect(payload?.limit, driver.describe(
+      'run-event-payloads.schema.json §capBreached.limit',
+      'cap.breached payload MUST carry the resolved limit (3 from configurable.recursionLimit)',
+    )).toBe(RECURSION_LIMIT);
+    expect(typeof payload?.observed, driver.describe(
+      'run-event-payloads.schema.json §capBreached.observed',
+      'cap.breached payload MUST carry the observed count as a number',
+    )).toBe('number');
+    expect(payload?.observed).toBeGreaterThan(RECURSION_LIMIT);
+    expect(typeof payload?.nodeId, driver.describe(
+      'run-event-payloads.schema.json §capBreached.nodeId',
+      'cap.breached payload MUST carry the offending nodeId for node-executions kind',
+    )).toBe('string');
+  });
+  it('cap.breached precedes run.failed in the event sequence (breach detected before over-limit node fires)', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: WORKFLOW_ID,
+      configurable: { recursionLimit: RECURSION_LIMIT },
+    });
+    const runId = (create.json as { runId: string }).runId;
+    await pollUntilTerminal(runId);
+    const eventsRes = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=0&timeout=1`,
+    );
+    const events = (eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? [];
+    const capBreach = events.find((e) => e.type === 'cap.breached');
+    const runFailed = events.find((e) => e.type === 'run.failed');
+    expect(capBreach, 'cap.breached MUST be emitted').toBeDefined();
+    expect(runFailed, 'run.failed MUST be emitted').toBeDefined();
+    expect(capBreach!.sequence, driver.describe(
+      'observability.md §event ordering',
+      'cap.breached MUST precede run.failed in sequence (breach detected BEFORE over-limit node fires)',
+    )).toBeLessThan(runFailed!.sequence);
+    // Count node.started events. With recursionLimit=3 and the breach
+    // detected BEFORE the 4th node fires, AT MOST 3 node.started events
+    // SHOULD appear (the over-limit node MUST NOT receive node.started).
+    // We assert a range rather than equality to tolerate transient pre-
+    // breach node failures (e.g. a `node.failed` cutting the chain
+    // short) — those would emit fewer than `RECURSION_LIMIT` started
+    // events while still satisfying the invariant.
+    const nodeStarted = events.filter((e) => e.type === 'node.started');
+    expect(nodeStarted.length, driver.describe(
+      'run-options.md §recursionLimit',
+      'at most `limit` node.started events MUST be emitted; the over-limit node MUST NOT receive node.started',
+    )).toBeLessThanOrEqual(RECURSION_LIMIT);
+    expect(nodeStarted.length, driver.describe(
+      'run-options.md §recursionLimit',
+      'at least one node MUST start before the breach (otherwise the workflow never executed)',
+    )).toBeGreaterThan(0);
+  });
+});