npm - @openwop/openwop-conformance - Versions diffs - 1.5.0 → 1.6.1 - Mend

@openwop/openwop-conformance 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/CHANGELOG.md +27 -0
package/README.md +2 -2
package/api/asyncapi.yaml +25 -4
package/api/openapi.yaml +371 -0
package/coverage.md +31 -4
package/fixtures/conformance-phase4-nondet-tool.json +53 -0
package/fixtures/conformance-phase4-replay-divergence.json +40 -0
package/fixtures.md +5 -3
package/package.json +1 -1
package/schemas/README.md +4 -0
package/schemas/annotation-create.schema.json +37 -0
package/schemas/annotation.schema.json +56 -0
package/schemas/capabilities.schema.json +191 -3
package/schemas/credential-reference.schema.json +21 -0
package/schemas/node-pack-manifest.schema.json +112 -1
package/schemas/run-diff-response.schema.json +64 -0
package/schemas/run-event-payloads.schema.json +104 -2
package/schemas/run-event.schema.json +8 -1
package/schemas/run-snapshot.schema.json +11 -0
package/src/lib/behavior-gate.ts +51 -0
package/src/lib/driver.ts +13 -1
package/src/lib/feedback.ts +31 -0
package/src/lib/saml-idp.ts +179 -0
package/src/scenarios/approval-gate-events.test.ts +61 -0
package/src/scenarios/approval-gate-flow.test.ts +68 -0
package/src/scenarios/auth-saml-profile.test.ts +119 -0
package/src/scenarios/auth-scim-profile.test.ts +65 -0
package/src/scenarios/authorization-fail-closed.test.ts +80 -0
package/src/scenarios/authorization-roles-shape.test.ts +83 -0
package/src/scenarios/connector-manifest-validity.test.ts +142 -0
package/src/scenarios/credential-payload-redaction.test.ts +93 -0
package/src/scenarios/credentials-capability-shape.test.ts +90 -0
package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
package/src/scenarios/experimental-tier-shape.test.ts +192 -0
package/src/scenarios/feedback-capability-shape.test.ts +35 -0
package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
package/src/scenarios/feedback-record-and-list.test.ts +32 -0
package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
package/src/scenarios/identity-owner-shape.test.ts +64 -0
package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
package/src/scenarios/oauth-capability-shape.test.ts +97 -0
package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
package/src/scenarios/pack-registry-isolation.test.ts +108 -0
package/src/scenarios/pack-registry-publish.test.ts +1 -1
package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
package/src/scenarios/redaction.test.ts +4 -1
package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
package/src/scenarios/run-diff.test.ts +143 -0
package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
package/src/scenarios/spec-corpus-validity.test.ts +6 -3

package/src/scenarios/feedback-correction-redaction.test.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * feedback-correction-redaction — RFC 0056 §E + SECURITY/invariants.yaml
+ * `annotation-content-redaction`. `signal.correction` and `note` are
+ * untrusted user content; secret-shaped material MUST be redacted under
+ * SR-1 before persistence/listing/export.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §E
+ * @see SECURITY/invariants.yaml — annotation-content-redaction
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readFeedbackCap, seedRun } from '../lib/feedback.js';
+const CANARY = 'sk-canary-rfc0056-do-not-leak-abc123';
+describe('feedback-correction-redaction (RFC 0056 §E)', () => {
+  it('secret-shaped material in correction/note is redacted in the annotation list', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported !== true) return;
+    const runId = await seedRun('feedback-redact');
+    if (!runId) return;
+    const post = await driver.post(`/v1/runs/${runId}/annotations`, {
+      signal: { kind: 'correction', correction: `please use ${CANARY}` },
+      note: CANARY,
+    });
+    if (post.status === 501 || post.status === 404) return;
+    expect(post.status).toBe(201);
+    const list = await driver.get(`/v1/runs/${runId}/annotations`);
+    expect(
+      JSON.stringify(list.json ?? {}).includes(CANARY),
+      driver.describe('RFC 0056 §E', 'secret-shaped material MUST be redacted before persistence/listing (SR-1)'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/feedback-cross-tenant-isolation.test.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * feedback-cross-tenant-isolation — RFC 0056 §E + SECURITY/invariants.yaml
+ * `annotation-cross-tenant-isolation`. A run's annotation list MUST contain
+ * only that run's annotations (mirrors CTI-1).
+ *
+ * The run-scoped check runs against any feedback host. The full cross-tenant
+ * proof (tenant B cannot read tenant A's run) needs a multi-tenant auth seam
+ * not yet standardized for this surface — that half soft-skips, mirroring
+ * `kv-cross-tenant-isolation`'s seam gate.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §E
+ * @see SECURITY/invariants.yaml — annotation-cross-tenant-isolation
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readFeedbackCap, seedRun } from '../lib/feedback.js';
+describe('feedback-cross-tenant-isolation (RFC 0056 §E)', () => {
+  it('a run\'s annotation list contains only that run\'s annotations', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported !== true) return;
+    const runId = await seedRun('feedback-cti');
+    if (!runId) return;
+    const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'label', label: 'cti-probe' } });
+    if (post.status === 501 || post.status === 404) return;
+    expect(post.status).toBe(201);
+    const list = await driver.get(`/v1/runs/${runId}/annotations`);
+    const ann = (list.json as { annotations?: Array<{ target?: { runId?: string } }> } | undefined)?.annotations ?? [];
+    for (const a of ann) {
+      expect(
+        a.target?.runId,
+        driver.describe('RFC 0056 §E', 'an annotation list MUST contain only this run\'s annotations (CTI-1)'),
+      ).toBe(runId);
+    }
+  });
+});

package/src/scenarios/feedback-fork-not-copied.test.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * feedback-fork-not-copied — RFC 0056 §D. Annotations are a per-run
+ * side-store, NOT replayable event-log entries — so a fork of an annotated
+ * run starts with ZERO annotations. Gated on feedback + fork; soft-skips
+ * when either is unavailable.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §D
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { readFeedbackCap, seedRun } from '../lib/feedback.js';
+describe('feedback-fork-not-copied (RFC 0056 §D)', () => {
+  it('a fork of an annotated run starts with zero annotations', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported !== true) return;
+    const runId = await seedRun('feedback-fork');
+    if (!runId) return;
+    const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'flag' } });
+    if (post.status === 501 || post.status === 404) return;
+    expect(post.status).toBe(201);
+    try {
+      await pollUntilTerminal(runId, { timeoutMs: 10_000 });
+    } catch {
+      return;
+    }
+    const fork = await driver.post(`/v1/runs/${runId}:fork`, { fromSeq: 0, mode: 'branch' });
+    if (fork.status !== 200 && fork.status !== 201) return; // fork unsupported — soft-skip
+    const forkId = (fork.json as { runId?: string } | undefined)?.runId;
+    if (!forkId) return;
+    const list = await driver.get(`/v1/runs/${forkId}/annotations`);
+    const ann = (list.json as { annotations?: unknown[] } | undefined)?.annotations ?? [];
+    expect(
+      ann.length,
+      driver.describe('RFC 0056 §D', 'annotations are a side-store and MUST NOT be copied into a fork'),
+    ).toBe(0);
+  });
+});

package/src/scenarios/feedback-on-terminal-run.test.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * feedback-on-terminal-run — RFC 0056 §C. An annotation on a COMPLETED run
+ * is accepted (proves feedback is non-blocking and post-hoc). Gated on
+ * `capabilities.feedback.supported`; soft-skips when a run can't be seeded.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §C
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { readFeedbackCap, seedRun } from '../lib/feedback.js';
+describe('feedback-on-terminal-run (RFC 0056 §C)', () => {
+  it('annotating a terminal run is accepted', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported !== true) return;
+    const runId = await seedRun('feedback-terminal');
+    if (!runId) return;
+    try {
+      await pollUntilTerminal(runId, { timeoutMs: 10_000 });
+    } catch {
+      return; // run didn't reach terminal in time — soft-skip
+    }
+    const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'flag' }, note: 'post-hoc review' });
+    if (post.status === 501 || post.status === 404) return;
+    expect(
+      post.status,
+      driver.describe('RFC 0056 §C', 'a host MUST accept an annotation on a terminal run'),
+    ).toBe(201);
+  });
+});

package/src/scenarios/feedback-record-and-list.test.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * feedback-record-and-list — RFC 0056 §C. POST an annotation, then GET
+ * lists it back. Gated on `capabilities.feedback.supported` + the
+ * `conformance-a` seed fixture; soft-skips otherwise.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §C
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readFeedbackCap, seedRun } from '../lib/feedback.js';
+describe('feedback-record-and-list (RFC 0056 §C)', () => {
+  it('POST an annotation then GET returns it', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported !== true) return;
+    const runId = await seedRun('feedback-rl');
+    if (!runId) return;
+    const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'rating', rating: 5 } });
+    if (post.status === 501 || post.status === 404) return;
+    expect(
+      post.status,
+      driver.describe('RFC 0056 §C', 'POST annotation returns 201 with the persisted annotation'),
+    ).toBe(201);
+    const created = post.json as { annotationId?: string };
+    expect(typeof created.annotationId).toBe('string');
+    const list = await driver.get(`/v1/runs/${runId}/annotations`);
+    expect(list.status).toBe(200);
+    const ann = (list.json as { annotations?: Array<{ annotationId?: string }> } | undefined)?.annotations ?? [];
+    expect(ann.some((a) => a.annotationId === created.annotationId)).toBe(true);
+  });
+});

package/src/scenarios/feedback-unsupported-501.test.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * feedback-unsupported-501 — RFC 0056 §C. A host that does NOT advertise
+ * `capabilities.feedback.supported` MUST return `501 capability_not_provided`
+ * on the annotation endpoints (the honest signal, per `capabilities.md`) —
+ * not silently 404 the route.
+ *
+ * Soft-skips when the host advertises feedback (501 is N/A) or when the
+ * route is entirely absent (404/405 — host predates RFC 0056).
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §C
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readFeedbackCap } from '../lib/feedback.js';
+describe('feedback-unsupported-501 (RFC 0056 §C)', () => {
+  it('POST annotations returns 501 capability_not_provided when feedback is unadvertised', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported === true) return; // host supports feedback — 501 N/A
+    const res = await driver.post('/v1/runs/probe-run-rfc0056/annotations', {
+      signal: { kind: 'flag' },
+    });
+    if (res.status === 404 || res.status === 405) return; // route absent — host predates RFC 0056
+    expect(
+      res.status,
+      driver.describe('rest-endpoints.md / RFC 0056 §C', 'unadvertised feedback MUST return 501, not 404'),
+    ).toBe(501);
+    const code = (res.json as { error?: string } | undefined)?.error;
+    expect(code).toBe('capability_not_provided');
+  });
+});

package/src/scenarios/identity-owner-shape.test.ts ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * identity-owner-shape — RFC 0048 §C verification.
+ *
+ * Status: DRAFT. RFC 0048 (tenant·workspace·principal identity model) is
+ * `Draft`. The optional `RunSnapshot.owner` triple has landed in
+ * `schemas/run-snapshot.schema.json`.
+ *
+ * Server-free schema validation of the owner triple:
+ *   - Positive: `{ tenant }` and `{ tenant, workspace, principal }` validate.
+ *   - Negative: missing `tenant` (required), or an unknown property, is rejected.
+ *
+ * The owner subschema is self-contained (no external $ref), so it compiles
+ * standalone via ajv.
+ *
+ * @see RFCS/0048-tenant-workspace-principal-identity-model.md
+ * @see schemas/run-snapshot.schema.json properties.owner
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import Ajv2020 from 'ajv/dist/2020.js';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+interface SnapshotSchema {
+  $schema: string;
+  properties: { owner?: Record<string, unknown> };
+}
+const snapshot = JSON.parse(
+  readFileSync(join(SCHEMAS_DIR, 'run-snapshot.schema.json'), 'utf8'),
+) as SnapshotSchema;
+describe('category: identity owner-triple shape (RFC 0048 §C)', () => {
+  it('run-snapshot.schema.json defines an optional owner triple', () => {
+    expect(
+      snapshot.properties.owner,
+      'RFC 0048 §C: RunSnapshot MUST define an optional `owner` object',
+    ).toBeDefined();
+  });
+  const ajv = new Ajv2020({ allErrors: true, strict: false });
+  const ownerSchema = { $schema: snapshot.$schema, ...(snapshot.properties.owner as Record<string, unknown>) };
+  const validate = ajv.compile(ownerSchema);
+  it('positive: tenant-only owner validates', () => {
+    expect(validate({ tenant: 'acme' }), JSON.stringify(validate.errors)).toBe(true);
+  });
+  it('positive: full triple validates', () => {
+    expect(
+      validate({ tenant: 'acme', workspace: 'ws-eng', principal: 'user_42' }),
+      JSON.stringify(validate.errors),
+    ).toBe(true);
+  });
+  it('negative: owner missing tenant is rejected (tenant is required)', () => {
+    expect(validate({ workspace: 'ws-eng' })).toBe(false);
+  });
+  it('negative: unknown owner property is rejected (additionalProperties:false)', () => {
+    expect(validate({ tenant: 'acme', role: 'admin' })).toBe(false);
+  });
+});

package/src/scenarios/multi-agent-confidence-escalation.test.ts CHANGED Viewed

@@ -1,15 +1,16 @@
 /**
  * multi-agent-confidence-escalation — RFC 0039 §A behavioral.
  *
- * Status: ACTIVE (advertisement-shape + behavioral). RFC 0039 Phase 2
- * filed Draft → graduated Active 2026-05-22 in the same commit chain as
- * this scenario. Capability-gated on
+ * Status: ACTIVE (advertisement-shape + behavioral). RFC 0039
+ * (multi-agent execution model `version: 2`) filed Draft → graduated
+ * Active 2026-05-22 in the same commit chain as this scenario.
+ * Capability-gated on
  * `capabilities.multiAgent.executionModel.supported: true` AND
  * `capabilities.multiAgent.executionModel.version >= 2` AND fixture
- * availability. Hosts that advertise only Phase 1 (version: 1) soft-skip
- * cleanly — the confidence-floor MUST applies only at version >= 2.
+ * availability. Hosts that advertise only `version: 1` soft-skip
+ * cleanly — the confidence-floor MUST applies only at `version >= 2`.
  *
- * Asserts (behavioral when host advertises Phase 2):
+ * Asserts (behavioral when host advertises `version >= 2`):
  *
  *   1. Advertisement shape: confidenceEscalationFloor (when present) MUST be
  *      a number in [0.5, 1.0]; floor < 0.5 is non-conformant per RFC 0039 §A.
@@ -37,11 +38,11 @@
  *      interrupt fires AND BEFORE any `core.workflowChain.event` with
  *      `phase: 'dispatch.began'` for the escalated decision's intended
  *      next-worker"). This is the load-bearing test that distinguishes
- *      Phase 2 from Phase 1: Phase 1 hosts dispatch unconditionally; Phase 2
- *      hosts gate on confidence.
+ *      `version: 2` from `version: 1`: `version: 1` hosts dispatch
+ *      unconditionally; `version: 2` hosts gate on confidence.
  *
  * @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A
- * @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039 Phase 2)"
+ * @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039)"
  * @see schemas/run-event-payloads.schema.json §coreWorkflowChainConfidenceEscalated
  */
@@ -103,14 +104,14 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
     const supported = d?.capabilities?.multiAgent?.executionModel?.supported === true;
     const versionRaw = d?.capabilities?.multiAgent?.executionModel?.version;
     const version = typeof versionRaw === 'number' ? versionRaw : 0;
-    if (!supported || version < 2) return; // soft-skip — Phase 1 hosts pass via this absence
+    if (!supported || version < 2) return; // soft-skip — `version: 1` hosts pass via this absence
     const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
     expect(create.status).toBe(201);
     const runId = (create.json as { runId: string }).runId;
     const terminal = await pollUntilTerminal(runId);
-    // Phase 2 escalation suspends the parent — NOT a terminal `completed`.
+    // RFC 0039 escalation suspends the parent — NOT a terminal `completed`.
     // The conformance pollUntilTerminal returns when the run reaches any
     // settled status. RFC 0039 §A gives hosts a choice: clarify-kind
     // escalation (→ waiting-clarification) OR escalate-kind approval
@@ -188,7 +189,7 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
       'confidence-escalated causationId MUST point at the runOrchestrator.decided that surfaced the low-confidence decision',
     ).toBe('runOrchestrator.decided');
-    // Load-bearing: NO dispatch event fired. Phase 2 gates BEFORE the loop.
+    // Load-bearing: NO dispatch event fired. RFC 0039 gates BEFORE the loop.
     const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
     expect(
       chainEvents.length,

package/src/scenarios/multi-agent-memory-lifecycle.test.ts CHANGED Viewed

@@ -108,17 +108,92 @@ describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: behavioral (RFC 0039
   // Until a memory-advertising Phase 2 host wires the seam, the contract
   // is documentation-only — surfaced as `todo` so test reporters track
   // the gap rather than reporting a vacuous PASS.
-  it.todo('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start');
+  // MAE-2 is still out of stable profile via RFC 0042 §B (experimental
+  // tier): RFC 0039 §B Half B (MAE-2 + MAE-3) landed on MyndHyve
+  // 2026-05-23 via commit `a51f7bbd` (`snapshotAtSeq()` +
+  // `crossChildMemoryConcurrency: 'strict'`). The MAE-2 cross-run-ttl-
+  // roundtrip seam (POST /v1/host/sample/test/memory/cross-run-ttl-
+  // roundtrip) is still open per host-sample-test-seams.md §"Open seams"
+  // — no host has wired the seam endpoint yet, so the behavioral
+  // assertion stays `it.skip`. Hosts that implement Half B SHOULD
+  // advertise `multiAgent.executionModel.tier: 'experimental'` per
+  // RFC 0042 §A until the seam contract is wired.
+  it.skip('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start — out of stable profile via RFC 0042');
-  // Behavioral assertion lands when the host implements the snapshot
-  // mechanism per RFC 0039 §B. The assertion drives:
-  //   1. Run a workflow that writes MemoryEntry { key: 'k', value: 'v1' } at index 10.
-  //   2. Write MemoryEntry { key: 'k', value: 'v2' } at index 20.
-  //   3. POST /v1/runs/{runId}:fork { fromSeq: 15 }.
-  //   4. Forked run reads MemoryEntry { key: 'k' }; MUST return 'v1' (not 'v2').
-  //   5. Alternative compliance: fork refused with
-  //      error.code: 'replay_memory_snapshot_unavailable' AND
-  //      details.fromSeq === 15.
-  // Silent substitution of v2 (current state) is non-conformant.
-  it.todo('MAE-3 replay snapshot: fork from past index MUST return memory-as-of-index OR refuse with replay_memory_snapshot_unavailable');
+  // MAE-3 flipped to behavioral 2026-05-25 — MyndHyve workflow-runtime
+  // revision `00206-tdh` advertises Phase 2 + memory and honors the
+  // POST /v1/runs/{runId}:fork mode:replay contract per
+  // host-sample-test-seams.md §"Canonical-endpoint conformance hooks"
+  // §9. The seam reuses the canonical fork endpoint plus the
+  // OPENWOP_TEST_EXPIRED_REPLAY_RUN_ID env-var convention (parallel
+  // naming to OPENWOP_TEST_EXPIRED_RUN_ID used by
+  // production-retention-expiry). Soft-skips on Phase 1 hosts, Phase 2
+  // hosts without memory, and hosts that have not seeded the env var.
+  it('MAE-3 replay snapshot refusal: fork mode:replay against a past-retention runId MUST return 422 replay_memory_snapshot_unavailable with documented envelope; silent substitution is non-conformant', async (ctx) => {
+    const d = await readDiscovery();
+    if (d === null) {
+      ctx.skip();
+      return;
+    }
+    const v = d.capabilities?.multiAgent?.executionModel?.version;
+    const memorySupported = d.capabilities?.memory?.supported;
+    const phase2OrLater = typeof v === 'number' && v >= 2;
+    const expiredRunId = process.env.OPENWOP_TEST_EXPIRED_REPLAY_RUN_ID;
+    if (!phase2OrLater || memorySupported !== true || !expiredRunId) {
+      ctx.skip();
+      return;
+    }
+    const fromSeq = 0;
+    const res = await driver.post(`/v1/runs/${encodeURIComponent(expiredRunId)}:fork`, {
+      mode: 'replay',
+      fromSeq,
+    });
+    expect(
+      res.status,
+      driver.describe(
+        'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
+        'fork mode:replay against a past-retention runId MUST refuse with 422; silent substitution of current memory is non-conformant',
+      ),
+    ).toBe(422);
+    const body = res.json as {
+      error?: unknown;
+      details?: { fromSeq?: unknown; sourceRunId?: unknown; reason?: unknown };
+    } | null;
+    expect(
+      body?.error,
+      driver.describe(
+        'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
+        'refusal envelope error code MUST be "replay_memory_snapshot_unavailable" (distinct from the pre-flight invalid_from_seq gate)',
+      ),
+    ).toBe('replay_memory_snapshot_unavailable');
+    expect(
+      body?.details?.fromSeq,
+      driver.describe(
+        'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
+        'refusal envelope details.fromSeq MUST echo the requested fromSeq',
+      ),
+    ).toBe(fromSeq);
+    expect(
+      body?.details?.sourceRunId,
+      driver.describe(
+        'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
+        'refusal envelope details.sourceRunId MUST echo the runId from the URL',
+      ),
+    ).toBe(expiredRunId);
+    const reason = body?.details?.reason;
+    expect(
+      reason === 'retention_expired' || reason === 'event_log_unavailable',
+      driver.describe(
+        'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
+        'refusal envelope details.reason MUST be one of {"retention_expired", "event_log_unavailable"}',
+      ),
+    ).toBe(true);
+  });
 });

package/src/scenarios/multi-region-idempotency-behavior.test.ts ADDED Viewed

@@ -0,0 +1,203 @@
+/**
+ * multi-region-idempotency-behavior — RFC 0036 §C convergence-rule behavioral probe.
+ *
+ * Companion to `multi-region-idempotency.test.ts` which carries the
+ * advertisement-shape probes. This file exercises the canonical convergence
+ * algorithm specified by `spec/v1/idempotency.md` §"Multi-region idempotency
+ * annex" via the host-extension test seam at:
+ *
+ *   POST /v1/host/sample/test/multi-region/simulate-partition
+ *
+ * The seam is conformance-only (host-extension namespace), gated on the
+ * host's `OPENWOP_TEST_MULTI_REGION_SIMULATOR=true` env var. The seam itself
+ * is OPTIONAL — hosts that don't expose it soft-skip; hosts that DO expose
+ * it MUST honor the annex's convergence rule:
+ *
+ *   1. Given ≥2 conflicting `ConflictClaim` records sharing
+ *      `(tenantId, endpoint, key)`, the host's resolver MUST return the
+ *      lex-min `runId` as the winner.
+ *   2. Every region (including the winner's) gets a cache redirect entry
+ *      pointing at the winner's runId.
+ *   3. The loser's cancel reason MUST be the canonical string
+ *      `cross_region_dedup_loss`.
+ *   4. The resolver MUST be order-invariant — shuffling the input claims
+ *      MUST produce the same winner.
+ *   5. Cross-region partition simulation: same idempotency-key submitted
+ *      to 2+ regions simultaneously converges to ONE survivor per the
+ *      lex-min rule, with no coordination required.
+ *
+ * @see RFCS/0036-multi-region-and-cross-engine-guarantees.md §C
+ * @see spec/v1/idempotency.md §"Multi-region idempotency annex"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
+interface ConflictClaim {
+  runId: string;
+  tenantId: string;
+  endpoint: string;
+  key: string;
+  region: string;
+}
+interface ConvergenceResult {
+  winner?: ConflictClaim;
+  losers?: ConflictClaim[];
+  cacheRedirects?: Array<{ region: string; cacheKey: string; redirectToRunId: string }>;
+  loserCancelReason?: string;
+}
+async function simulatePartition(claims: ConflictClaim[]): Promise<{ status: number; body: ConvergenceResult }> {
+  const res = await driver.post('/v1/host/sample/test/multi-region/simulate-partition', { claims });
+  return { status: res.status, body: (res.json as ConvergenceResult) ?? {} };
+}
+describe.skipIf(HTTP_SKIP)('multi-region-idempotency-behavior: convergence rule (RFC 0036 §C)', () => {
+  it('two-region conflict resolves to the lex-min runId per annex §"Convergence rule"', async (ctx) => {
+    const probe = await simulatePartition([
+      { runId: 'run-b-east', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-1', region: 'us-east-1' },
+      { runId: 'run-a-west', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-1', region: 'eu-west-1' },
+    ]);
+    if (probe.status === 404) {
+      ctx.skip(); // host doesn't expose the simulator seam
+      return;
+    }
+    expect(
+      probe.status,
+      driver.describe(
+        'idempotency.md §"Multi-region idempotency annex"',
+        'simulate-partition seam MUST return 200 when ≥2 conflicting claims are submitted',
+      ),
+    ).toBe(200);
+    expect(
+      probe.body.winner?.runId,
+      driver.describe(
+        'idempotency.md §"Convergence rule"',
+        'winner MUST be the lex-min runId (run-a-west < run-b-east)',
+      ),
+    ).toBe('run-a-west');
+  });
+  it('three-region partition resolves to a single winner', async (ctx) => {
+    const probe = await simulatePartition([
+      { runId: 'zzz-3', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r1' },
+      { runId: 'aaa-1', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r2' },
+      { runId: 'mmm-2', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r3' },
+    ]);
+    if (probe.status === 404) {
+      ctx.skip();
+      return;
+    }
+    expect(probe.status).toBe(200);
+    expect(
+      probe.body.winner?.runId,
+      driver.describe(
+        'idempotency.md §"Convergence rule"',
+        'winner MUST be the lex-min runId across all conflicting claims',
+      ),
+    ).toBe('aaa-1');
+    expect(
+      probe.body.losers?.length,
+      driver.describe(
+        'idempotency.md §"Convergence rule"',
+        'losers array MUST contain N-1 entries when N claims conflict',
+      ),
+    ).toBe(2);
+  });
+  it('every region gets a cache redirect entry pointing at the winner', async (ctx) => {
+    const probe = await simulatePartition([
+      { runId: 'run-x', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-3', region: 'r1' },
+      { runId: 'run-a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-3', region: 'r2' },
+    ]);
+    if (probe.status === 404) {
+      ctx.skip();
+      return;
+    }
+    expect(probe.status).toBe(200);
+    const redirects = probe.body.cacheRedirects ?? [];
+    expect(
+      redirects.length,
+      driver.describe(
+        'idempotency.md §"Convergence rule"',
+        'cacheRedirects MUST contain one entry per claim (including the winner)',
+      ),
+    ).toBe(2);
+    for (const redirect of redirects) {
+      expect(
+        redirect.redirectToRunId,
+        driver.describe(
+          'idempotency.md §"Convergence rule"',
+          'every cache redirect MUST point at the winner runId',
+        ),
+      ).toBe('run-a');
+    }
+  });
+  it('loser cancel reason MUST be the canonical `cross_region_dedup_loss` string', async (ctx) => {
+    const probe = await simulatePartition([
+      { runId: 'run-b', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-4', region: 'r1' },
+      { runId: 'run-a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-4', region: 'r2' },
+    ]);
+    if (probe.status === 404) {
+      ctx.skip();
+      return;
+    }
+    expect(probe.status).toBe(200);
+    expect(
+      probe.body.loserCancelReason,
+      driver.describe(
+        'idempotency.md §"Convergence rule"',
+        'loserCancelReason MUST be the canonical `cross_region_dedup_loss` string',
+      ),
+    ).toBe('cross_region_dedup_loss');
+  });
+  it('resolver is order-invariant — shuffled inputs produce the same winner', async (ctx) => {
+    const claims: ConflictClaim[] = [
+      { runId: 'c', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r1' },
+      { runId: 'a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r2' },
+      { runId: 'b', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r3' },
+    ];
+    const p1 = await simulatePartition(claims);
+    if (p1.status === 404) {
+      ctx.skip();
+      return;
+    }
+    expect(p1.status).toBe(200);
+    const p2 = await simulatePartition([claims[2]!, claims[0]!, claims[1]!]);
+    expect(p2.status).toBe(200);
+    const p3 = await simulatePartition([...claims].reverse());
+    expect(p3.status).toBe(200);
+    expect(
+      p1.body.winner?.runId,
+      driver.describe(
+        'idempotency.md §"Convergence rule" — determinism',
+        'resolver MUST be order-invariant; all permutations MUST produce the same lex-min winner',
+      ),
+    ).toBe('a');
+    expect(p2.body.winner?.runId).toBe('a');
+    expect(p3.body.winner?.runId).toBe('a');
+  });
+  it('mismatched tuple rejects with 400 validation_error', async (ctx) => {
+    const probe = await simulatePartition([
+      { runId: 'r1', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-6', region: 'r1' },
+      { runId: 'r2', tenantId: 't2', endpoint: 'POST /v1/runs', key: 'idem-6', region: 'r2' },
+    ]);
+    if (probe.status === 404) {
+      ctx.skip();
+      return;
+    }
+    expect(
+      probe.status,
+      driver.describe(
+        'idempotency.md §"Convergence rule"',
+        'claims with non-matching (tenantId, endpoint, key) MUST be rejected — it would be a programming error in the caller',
+      ),
+    ).toBe(400);
+  });
+});