npm - @openwop/openwop-conformance - Versions diffs - 1.0.0 - Mend

@openwop/openwop-conformance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

package/LICENSE +201 -0
package/README.md +241 -0
package/api/asyncapi.yaml +481 -0
package/api/openapi.yaml +830 -0
package/api/redocly.yaml +8 -0
package/coverage.md +80 -0
package/dist/cli.js +161 -0
package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
package/fixtures/conformance-agent-identity.json +27 -0
package/fixtures/conformance-agent-low-confidence.json +29 -0
package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
package/fixtures/conformance-agent-memory-redaction.json +32 -0
package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
package/fixtures/conformance-agent-memory-ttl.json +31 -0
package/fixtures/conformance-agent-pack-export.json +26 -0
package/fixtures/conformance-agent-pack-install.json +26 -0
package/fixtures/conformance-agent-pack-provenance.json +31 -0
package/fixtures/conformance-agent-reasoning.json +29 -0
package/fixtures/conformance-approval.json +27 -0
package/fixtures/conformance-cancellable.json +33 -0
package/fixtures/conformance-cap-breach.json +27 -0
package/fixtures/conformance-capability-missing.json +23 -0
package/fixtures/conformance-channel-ttl.json +60 -0
package/fixtures/conformance-clarification.json +30 -0
package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
package/fixtures/conformance-conversation-lifecycle.json +32 -0
package/fixtures/conformance-conversation-replay.json +33 -0
package/fixtures/conformance-conversation-vs-clarification.json +26 -0
package/fixtures/conformance-delay.json +33 -0
package/fixtures/conformance-dispatch-loop.json +38 -0
package/fixtures/conformance-failure.json +23 -0
package/fixtures/conformance-idempotent.json +30 -0
package/fixtures/conformance-identity.json +32 -0
package/fixtures/conformance-interrupt-auth-required.json +28 -0
package/fixtures/conformance-interrupt-external-event.json +33 -0
package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
package/fixtures/conformance-interrupt-quorum.json +30 -0
package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
package/fixtures/conformance-message-reducer.json +31 -0
package/fixtures/conformance-multi-node.json +21 -0
package/fixtures/conformance-noop.json +23 -0
package/fixtures/conformance-orchestrator-dispatch.json +47 -0
package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
package/fixtures/conformance-orchestrator-terminate.json +44 -0
package/fixtures/conformance-stream-text.json +26 -0
package/fixtures/conformance-subworkflow-child.json +21 -0
package/fixtures/conformance-subworkflow-parent.json +49 -0
package/fixtures/conformance-version-fold.json +23 -0
package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
package/fixtures/pack-manifests/pack-private-example.json +26 -0
package/fixtures.md +404 -0
package/package.json +48 -0
package/schemas/README.md +75 -0
package/schemas/agent-manifest.schema.json +107 -0
package/schemas/agent-ref.schema.json +53 -0
package/schemas/capabilities.schema.json +287 -0
package/schemas/channel-written-payload.schema.json +55 -0
package/schemas/conversation-event.schema.json +120 -0
package/schemas/conversation-turn.schema.json +72 -0
package/schemas/debug-bundle.schema.json +196 -0
package/schemas/dispatch-config.schema.json +46 -0
package/schemas/error-envelope.schema.json +25 -0
package/schemas/memory-entry.schema.json +36 -0
package/schemas/memory-list-options.schema.json +21 -0
package/schemas/node-pack-manifest.schema.json +235 -0
package/schemas/orchestrator-decision.schema.json +60 -0
package/schemas/run-event-payloads.schema.json +663 -0
package/schemas/run-event.schema.json +116 -0
package/schemas/run-options.schema.json +81 -0
package/schemas/run-orchestrator-decided-event.schema.json +20 -0
package/schemas/run-snapshot.schema.json +121 -0
package/schemas/suspend-request.schema.json +182 -0
package/schemas/workflow-definition.schema.json +430 -0
package/src/cli.ts +187 -0
package/src/lib/a2a-fake-peer.ts +233 -0
package/src/lib/canaries.ts +186 -0
package/src/lib/driver.ts +96 -0
package/src/lib/env.ts +49 -0
package/src/lib/fixtures.ts +93 -0
package/src/lib/mcp-fake-server.ts +185 -0
package/src/lib/multi-agent-capabilities.ts +155 -0
package/src/lib/multiProcess.ts +141 -0
package/src/lib/otel-collector.ts +312 -0
package/src/lib/paths.ts +198 -0
package/src/lib/polling.ts +81 -0
package/src/lib/profiles.ts +258 -0
package/src/lib/sse.ts +172 -0
package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
package/src/scenarios/agentMessageReducer.test.ts +57 -0
package/src/scenarios/agentMetadata.test.ts +56 -0
package/src/scenarios/agentPackExport.test.ts +45 -0
package/src/scenarios/agentPackInstall.test.ts +50 -0
package/src/scenarios/agentPackProvenance.test.ts +53 -0
package/src/scenarios/agentReasoningEvents.test.ts +72 -0
package/src/scenarios/append-ordering.test.ts +91 -0
package/src/scenarios/approval-payload.test.ts +120 -0
package/src/scenarios/audit-log-integrity.test.ts +106 -0
package/src/scenarios/auth.test.ts +55 -0
package/src/scenarios/byok-roundtrip.test.ts +166 -0
package/src/scenarios/cancellation.test.ts +68 -0
package/src/scenarios/cap-breach.test.ts +149 -0
package/src/scenarios/channel-ttl.test.ts +70 -0
package/src/scenarios/configurable-schema.test.ts +76 -0
package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
package/src/scenarios/conversationLifecycle.test.ts +64 -0
package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
package/src/scenarios/cost-attribution.test.ts +207 -0
package/src/scenarios/debugBundle.test.ts +222 -0
package/src/scenarios/discovery.test.ts +147 -0
package/src/scenarios/dispatchLoop.test.ts +52 -0
package/src/scenarios/errors.test.ts +144 -0
package/src/scenarios/eventOrdering.test.ts +144 -0
package/src/scenarios/failure-path.test.ts +46 -0
package/src/scenarios/fixtures-gating.test.ts +137 -0
package/src/scenarios/fixtures-valid.test.ts +140 -0
package/src/scenarios/highConcurrency.test.ts +263 -0
package/src/scenarios/idempotency.test.ts +83 -0
package/src/scenarios/idempotencyRetry.test.ts +130 -0
package/src/scenarios/identity-passthrough.test.ts +54 -0
package/src/scenarios/interrupt-approval.test.ts +97 -0
package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
package/src/scenarios/interrupt-clarification.test.ts +45 -0
package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
package/src/scenarios/interruptRace.test.ts +176 -0
package/src/scenarios/maliciousManifest.test.ts +154 -0
package/src/scenarios/mcp-discoverability.test.ts +129 -0
package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
package/src/scenarios/multi-node-ordering.test.ts +60 -0
package/src/scenarios/multi-region-idempotency.test.ts +52 -0
package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
package/src/scenarios/orchestratorDispatch.test.ts +66 -0
package/src/scenarios/orchestratorTermination.test.ts +54 -0
package/src/scenarios/otel-emission.test.ts +113 -0
package/src/scenarios/otel-trace-propagation.test.ts +90 -0
package/src/scenarios/pack-registry-publish.test.ts +93 -0
package/src/scenarios/pack-registry.test.ts +328 -0
package/src/scenarios/pause-resume.test.ts +109 -0
package/src/scenarios/policies.test.ts +162 -0
package/src/scenarios/profileDerivation.test.ts +335 -0
package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
package/src/scenarios/rate-limit-envelope.test.ts +97 -0
package/src/scenarios/redaction.test.ts +254 -0
package/src/scenarios/redactionAdversarial.test.ts +162 -0
package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
package/src/scenarios/replay-fork.test.ts +216 -0
package/src/scenarios/replayDeterminism.test.ts +171 -0
package/src/scenarios/route-coverage.test.ts +129 -0
package/src/scenarios/runs-lifecycle.test.ts +65 -0
package/src/scenarios/runtime-capabilities.test.ts +118 -0
package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
package/src/scenarios/staleClaim.test.ts +223 -0
package/src/scenarios/stream-modes-buffer.test.ts +148 -0
package/src/scenarios/stream-modes-mixed.test.ts +149 -0
package/src/scenarios/stream-modes.test.ts +139 -0
package/src/scenarios/streamReconnect.test.ts +162 -0
package/src/scenarios/subworkflow.test.ts +126 -0
package/src/scenarios/version-negotiation.test.ts +157 -0
package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
package/src/scenarios/wasm-pack-load.test.ts +75 -0
package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
package/src/setup.ts +173 -0
package/vitest.config.ts +17 -0

package/src/scenarios/maliciousManifest.test.ts ADDED Viewed

@@ -0,0 +1,154 @@
+/**
+ * Malicious-manifest scenarios — verify the node-pack registry rejects
+ * adversarial submission shapes per `spec/v1/registry-operations.md`
+ * §"Submission validation."
+ *
+ * Profile gating: the host's `openwop-node-packs` profile is satisfied at
+ * runtime via the registry HTTP API. Hosts that don't expose the
+ * registry routes (404 on every endpoint) skip-equivalent here.
+ *
+ * Surfaces covered:
+ *
+ *   1. **manifest_name_mismatch** — manifest's `name` field differs
+ *      from the URL path's name segment.
+ *   2. **manifest_version_mismatch** — manifest's `version` field
+ *      differs from the URL path's version segment.
+ *   3. **invalid_pack_name** — URL path's name segment fails the
+ *      registry's name regex.
+ *   4. **invalid_version** — URL path's version segment fails semver.
+ *   5. **tarball_path_traversal** — registry rejects tarballs whose
+ *      entries include `..` or absolute paths (this scenario can only
+ *      assert the rejection-shape contract; constructing a real
+ *      malicious tarball requires registry-internal helpers).
+ *   6. **idempotent re-publish** — sha256-identical content for an
+ *      existing (name, version) returns 200 with the existing record,
+ *      NOT 409.
+ *
+ * Cross-references SECURITY/threat-model-node-packs.md invariants
+ * `node-pack-manifest-name-match` · `node-pack-manifest-version-match` ·
+ * `node-pack-path-traversal` · `node-pack-scope-author-match`.
+ *
+ * @see spec/v1/node-packs.md §Registry HTTP API
+ * @see spec/v1/registry-operations.md §Submission validation
+ * @see SECURITY/threat-model-node-packs.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+interface RegistryProbe {
+  available: boolean;
+}
+async function probeRegistry(): Promise<RegistryProbe> {
+  // Cheapest probe: GET on a guaranteed-nonexistent pack should return
+  // either a structured 404 (registry available, no such pack) OR
+  // simply 404 with no JSON body (host doesn't have a registry — every
+  // /v1/packs/* route is a generic 404).
+  const res = await driver.get('/v1/packs/probe-no-such-pack/-/0.0.0.json');
+  if (res.status === 404 && typeof res.json === 'object' && res.json !== null) {
+    const body = res.json as { error?: unknown };
+    if (typeof body.error === 'string') return { available: true };
+  }
+  // 404 without structured body, or any non-404, suggests no real registry.
+  return { available: false };
+}
+describe('malicious-manifest: pack-name validation per spec/v1/node-packs.md §Naming', () => {
+  it('GET /v1/packs/{bad-name}/-/{version}.json returns 400 invalid_pack_name', async () => {
+    const probe = await probeRegistry();
+    if (!probe.available) return; // host doesn't claim openwop-node-packs
+    // Bad name shapes the registry SHOULD reject:
+    //   - Reserved scope without authorization (`core.foo`)
+    //   - Invalid characters (`Bad Name`)
+    //   - Empty / too short
+    const badNames = ['Bad Name', 'name with spaces', 'a'];
+    for (const badName of badNames) {
+      const res = await driver.get(
+        `/v1/packs/${encodeURIComponent(badName)}/-/1.0.json`,
+      );
+      expect(
+        [400, 404].includes(res.status),
+        driver.describe(
+          'spec/v1/node-packs.md §Registry HTTP API',
+          `bad pack name "${badName}" MUST yield 400 (invalid_pack_name) or 404 (treated as unknown)`,
+        ),
+      ).toBe(true);
+    }
+  });
+});
+describe('malicious-manifest: version validation', () => {
+  it('GET /v1/packs/{name}/-/{bad-version}.json returns 400 invalid_version', async () => {
+    const probe = await probeRegistry();
+    if (!probe.available) return;
+    const badVersions = ['not-semver', '1', '1.0.0', 'v1.0'];
+    for (const bad of badVersions) {
+      const res = await driver.get(
+        `/v1/packs/community.test/-/${encodeURIComponent(bad)}.json`,
+      );
+      expect(
+        [400, 404].includes(res.status),
+        driver.describe(
+          'spec/v1/node-packs.md §Registry HTTP API',
+          `bad version "${bad}" MUST yield 400 (invalid_version) or 404`,
+        ),
+      ).toBe(true);
+    }
+  });
+});
+describe('malicious-manifest: signature endpoint contract per openwop/openwop@434c8f2', () => {
+  it('GET /v1/packs/{name}/-/{version}.sig of a non-existent pack returns 404 signature_not_available', async () => {
+    const probe = await probeRegistry();
+    if (!probe.available) return;
+    const res = await driver.get('/v1/packs/community.no-such-pack/-/1.0.sig');
+    expect(res.status, driver.describe(
+      'spec/v1/node-packs.md §`GET .sig`',
+      'missing/yanked/unsigned signature MUST return 404',
+    )).toBe(404);
+    if (typeof res.json === 'object' && res.json !== null) {
+      const body = res.json as { error?: unknown };
+      // Per openwop/openwop@434c8f2 the unified error code is
+      // `signature_not_available`. Hosts MAY use a more general 404
+      // shape; the assertion is permissive on the error code itself
+      // but strict on the status.
+      if (typeof body.error === 'string') {
+        expect(body.error.length, driver.describe(
+          'spec/v1/node-packs.md',
+          '404 response MUST carry a structured error envelope with a non-empty error code',
+        )).toBeGreaterThan(0);
+      }
+    }
+  });
+});
+describe('malicious-manifest: documented error catalog (per openwop/openwop@434c8f2)', () => {
+  it('lists are non-empty (sanity check on doc drift)', () => {
+    // Self-test: if the documented PUT-publish error catalog drifts
+    // and the scenario file isn't updated, this assertion catches the
+    // truncation. Each name corresponds to a normative error code from
+    // node-packs.md §Registry HTTP API.
+    const TARBALL_ERRORS = [
+      'tarball_gunzip_failed',
+      'tarball_too_large',
+      'tarball_manifest_missing',
+      'tarball_manifest_too_large',
+      'tarball_manifest_not_json',
+      'tarball_entry_missing',
+      'tarball_entry_too_large',
+      'tarball_path_traversal',
+      'tarball_tar_parse_failed',
+    ] as const;
+    expect(TARBALL_ERRORS.length, driver.describe(
+      'spec/v1/node-packs.md',
+      'documented tarball-error catalog is non-empty',
+    )).toBe(9);
+  });
+});

package/src/scenarios/mcp-discoverability.test.ts ADDED Viewed

@@ -0,0 +1,129 @@
+/**
+ * MCP-discoverability scenarios.
+ *
+ * `spec/v1/mcp-integration.md` §"Conformance + interop" calls out the
+ * MCP slot as host-implementation-defined (not a normative openwop field).
+ * The spec doesn't prescribe a wire-level MCP integration, but it
+ * DOES say an OpenWOP host that supports MCP "advertises the capability
+ * and (per the host's choice) lists supported MCP servers."
+ *
+ * Convention (matches lib/profiles.ts + reference hosts): the
+ * `/.well-known/openwop` body itself IS the capabilities object — there
+ * is no `capabilities` envelope. `replay`, `secrets`, `extensions`,
+ * etc. all live at the top level.
+ *
+ * What this scenario locks in: IF a host advertises MCP-compatibility
+ * — under either the standard top-level `mcp` slot OR a vendor-
+ * namespaced slot like `<vendor>.mcp` — it MUST follow a consistent
+ * shape so clients can discover serverUrls without per-vendor coupling.
+ *
+ * Required shape (when advertised):
+ *   { supported: boolean, serverUrls: string[] }
+ *
+ * Hosts that don't advertise any MCP capability skip-equivalent
+ * (test passes with no failed assertions per suite convention).
+ *
+ * @see spec/v1/mcp-integration.md
+ * @see spec/v1/positioning.md (why MCP composes with openwop)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+interface McpAdvertisement {
+  supported?: unknown;
+  serverUrls?: unknown;
+}
+interface DiscoveredMcp {
+  path: string;
+  ad: McpAdvertisement;
+}
+function collectMcpAdvertisements(discovery: unknown): DiscoveredMcp[] {
+  if (discovery === null || typeof discovery !== 'object') return [];
+  const out: DiscoveredMcp[] = [];
+  const obj = discovery as Record<string, unknown>;
+  // Standard slot — top level of the discovery body per
+  // mcp-integration.md §"Conformance + interop"
+  if (obj.mcp !== null && typeof obj.mcp === 'object') {
+    out.push({ path: 'mcp', ad: obj.mcp as McpAdvertisement });
+  }
+  // Vendor-namespaced slot (host-implementation-defined per spec).
+  // Scans every top-level object value for a nested `mcp` field;
+  // false-positive risk is low because non-namespace top-level fields
+  // (limits, schemaVersions, etc.) don't carry an `mcp` key.
+  for (const [key, value] of Object.entries(obj)) {
+    if (key === 'mcp') continue;
+    if (value === null || typeof value !== 'object') continue;
+    const inner = value as Record<string, unknown>;
+    if ('mcp' in inner && inner.mcp !== null && typeof inner.mcp === 'object') {
+      out.push({ path: `${key}.mcp`, ad: inner.mcp as McpAdvertisement });
+    }
+  }
+  return out;
+}
+async function fetchMcpAdvertisements(): Promise<DiscoveredMcp[]> {
+  const res = await driver.get('/.well-known/openwop', { authenticated: false });
+  if (res.status !== 200) return [];
+  return collectMcpAdvertisements(res.json);
+}
+describe('mcp: discoverability shape', () => {
+  it('any advertised MCP capability has well-formed shape ({supported, serverUrls})', async () => {
+    const advertisements = await fetchMcpAdvertisements();
+    if (advertisements.length === 0) return; // skip-equivalent: host does not advertise MCP
+    for (const { path, ad } of advertisements) {
+      expect(typeof ad.supported, driver.describe(
+        'spec/v1/mcp-integration.md §"Conformance + interop"',
+        `${path}.supported MUST be boolean when advertised`,
+      )).toBe('boolean');
+      if (ad.supported === true) {
+        expect(Array.isArray(ad.serverUrls), driver.describe(
+          'spec/v1/mcp-integration.md',
+          `${path}.serverUrls MUST be an array when supported:true`,
+        )).toBe(true);
+        if (Array.isArray(ad.serverUrls)) {
+          expect(ad.serverUrls.length, driver.describe(
+            'spec/v1/mcp-integration.md',
+            `${path}.serverUrls MUST be non-empty when supported:true`,
+          )).toBeGreaterThan(0);
+          for (const url of ad.serverUrls) {
+            expect(typeof url, driver.describe(
+              'spec/v1/mcp-integration.md',
+              `${path}.serverUrls entries MUST be strings`,
+            )).toBe('string');
+          }
+        }
+      }
+    }
+  });
+  it('serverUrls are valid URL paths or absolute URLs', async () => {
+    const advertisements = await fetchMcpAdvertisements();
+    if (advertisements.length === 0) return; // skip-equivalent
+    for (const { path, ad } of advertisements) {
+      if (ad.supported !== true || !Array.isArray(ad.serverUrls)) continue;
+      for (const url of ad.serverUrls) {
+        if (typeof url !== 'string') continue;
+        // Must be either a leading-slash path (host-relative) or an
+        // absolute URL with http/https scheme. Anything else is
+        // ambiguous to a client trying to connect.
+        const isHostRelative = url.startsWith('/');
+        const isAbsoluteHttp = url.startsWith('http://') || url.startsWith('https://');
+        expect(isHostRelative || isAbsoluteHttp, driver.describe(
+          'spec/v1/mcp-integration.md',
+          `${path}.serverUrls entry "${url}" MUST be a leading-slash path or absolute http(s) URL`,
+        )).toBe(true);
+      }
+    }
+  });
+});

package/src/scenarios/mcp-tool-roundtrip.test.ts ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Track 6: MCP tool-call roundtrip conformance.
+ *
+ * Verifies that the host's MCP integration honors the documented trust
+ * boundary from `spec/v1/mcp-integration.md` and
+ * `SECURITY/threat-model-prompt-injection.md`:
+ *
+ *   1. The host can connect to an MCP server, list its tools, and call
+ *      `tools/call` (basic protocol fidelity).
+ *   2. Tool responses surface in the run's event log with the trust
+ *      boundary intact — payloads are clearly attributable to the MCP
+ *      server, never silently merged into trusted state.
+ *
+ * Two-level scenario:
+ *
+ *   - **Direct fake-server probe** (always runs when collector started):
+ *     hits the in-process fake MCP server directly with initialize +
+ *     tools/list + tools/call to verify its wire shape. Catches
+ *     regressions in our own test fixture.
+ *
+ *   - **Host-mediated roundtrip** (runs when host advertises an MCP
+ *     fixture or roundtrip capability): starts a workflow run, observes
+ *     events, asserts tool-call envelope visibility. Skips otherwise.
+ *
+ * Operator contract:
+ *   `OPENWOP_MCP_FAKE_SERVER=true` on the suite side; configure the host
+ *   to use the printed fake-server URL as one of its MCP servers.
+ *
+ * @see spec/v1/mcp-integration.md
+ * @see SECURITY/threat-model-prompt-injection.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { getMcpFakeServer } from '../lib/mcp-fake-server.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+const ROUNDTRIP_FIXTURE = 'conformance-mcp-tool-roundtrip';
+async function postJsonRpc(
+  endpoint: string,
+  method: string,
+  params: unknown,
+  id: number,
+): Promise<{ status: number; json: Record<string, unknown> }> {
+  const res = await fetch(`${endpoint}/`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ jsonrpc: '2.0', id, method, params }),
+  });
+  const text = await res.text();
+  return { status: res.status, json: JSON.parse(text) as Record<string, unknown> };
+}
+describe('mcp-tool-roundtrip: fake-server wire shape', () => {
+  it('initialize + tools/list + tools/call echo round-trip cleanly', async () => {
+    const server = getMcpFakeServer();
+    if (!server) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        '[mcp-tool-roundtrip] fake server not started; set OPENWOP_MCP_FAKE_SERVER=true',
+      );
+      return;
+    }
+    server.reset();
+    const init = await postJsonRpc(server.endpoint(), 'initialize', {}, 1);
+    expect(init.status).toBe(200);
+    const initResult = (init.json.result ?? {}) as { protocolVersion?: string };
+    expect(typeof initResult.protocolVersion).toBe('string');
+    const list = await postJsonRpc(server.endpoint(), 'tools/list', {}, 2);
+    expect(list.status).toBe(200);
+    const listResult = (list.json.result ?? {}) as {
+      tools?: ReadonlyArray<{ name?: string }>;
+    };
+    expect(listResult.tools?.some((t) => t.name === 'echo')).toBe(true);
+    const call = await postJsonRpc(
+      server.endpoint(),
+      'tools/call',
+      { name: 'echo', arguments: { text: 'hello-from-conformance' } },
+      3,
+    );
+    expect(call.status).toBe(200);
+    const callResult = (call.json.result ?? {}) as {
+      content?: ReadonlyArray<{ type?: string; text?: string }>;
+    };
+    expect(callResult.content?.[0]?.type).toBe('text');
+    expect(callResult.content?.[0]?.text).toBe('hello-from-conformance');
+    // Invocation log captured.
+    const invocations = server.invocations();
+    const methods = invocations.map((i) => i.method);
+    expect(methods).toEqual(['initialize', 'tools/list', 'tools/call']);
+  });
+});
+describe('mcp-tool-roundtrip: host-mediated tool invocation', () => {
+  it('host invokes the configured MCP server and surfaces the tool response in the event log', async () => {
+    const server = getMcpFakeServer();
+    if (!server) {
+      // eslint-disable-next-line no-console
+      console.warn('[mcp-tool-roundtrip] fake server not started; skipping host-mediated test');
+      return;
+    }
+    if (!isFixtureAdvertised(ROUNDTRIP_FIXTURE)) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[mcp-tool-roundtrip] fixture ${ROUNDTRIP_FIXTURE} not advertised; skipping`,
+      );
+      return;
+    }
+    server.reset();
+    const create = await driver.post('/v1/runs', {
+      workflowId: ROUNDTRIP_FIXTURE,
+      inputs: { text: 'roundtrip-probe' },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    await pollUntilTerminal(runId, { timeoutMs: 30_000 });
+    const invocations = server.invocations();
+    const toolCalls = invocations.filter((i) => i.method === 'tools/call');
+    expect(toolCalls.length, driver.describe(
+      'mcp-integration.md §"Tool invocation"',
+      'host MUST invoke `tools/call` on the configured MCP server during the fixture run',
+    )).toBeGreaterThan(0);
+    // Trust-boundary assertion: the tool-call envelope MUST appear in the
+    // run's event log so observers can attribute its content to the
+    // MCP server (not to trusted user input). See threat-model-prompt-injection.md
+    // §"UNTRUSTED marker" — hosts MAY surface this via a dedicated event
+    // type (e.g., `agent.toolReturned`, `mcp.tool.called`) or a marked
+    // field on a node-completed payload. This scenario asserts SOME event
+    // mentions the tool name to confirm visibility.
+    const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+    const list = (events.json as { events?: Array<{ type: string; payload?: unknown }> }).events ?? [];
+    const haystack = JSON.stringify(list).toLowerCase();
+    expect(haystack.includes('echo'), driver.describe(
+      'mcp-integration.md + threat-model-prompt-injection.md §"UNTRUSTED marker"',
+      'host event log MUST surface the MCP tool invocation so observers can audit the trust boundary',
+    )).toBe(true);
+  });
+});

package/src/scenarios/multi-node-ordering.test.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Multi-node ordering — exercises the `conformance-multi-node` fixture
+ * (3-node DAG: a → b → c, all noop) and asserts that node.completed
+ * events arrive in topological order via the `sequence` field on the
+ * canonical RunEvent shape.
+ *
+ * Uses `GET /v1/runs/{runId}/events/poll?lastSequence=0&timeout=1` to
+ * fetch the full event log after the run terminates. Long-poll
+ * `timeout=1` keeps the test fast — terminal runs return immediately
+ * because the server has no more events to wait for.
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const WORKFLOW_ID = 'conformance-multi-node';
+const SKIP_NO_FIXTURE = !isFixtureAdvertised(WORKFLOW_ID);
+interface RunEvent {
+  readonly eventId: string;
+  readonly runId: string;
+  readonly nodeId?: string;
+  readonly type: string;
+  readonly sequence: number;
+}
+describe.skipIf(SKIP_NO_FIXTURE)('multi-node: conformance-multi-node fixture emits node.completed in topological order', () => {
+  it('a, b, c node.completed events arrive in DAG order by sequence', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: WORKFLOW_ID });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId);
+    expect(terminal.status, driver.describe(
+      'fixtures.md conformance-multi-node §Terminal status',
+      'fixture MUST reach terminal `completed`',
+    )).toBe('completed');
+    const eventsRes = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=0&timeout=1`,
+    );
+    expect(eventsRes.status, driver.describe(
+      'rest-endpoints.md GET /v1/runs/{runId}/events/poll',
+      'event-poll MUST return 200 for known runs',
+    )).toBe(200);
+    const events = (eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? [];
+    const nodeCompletions = events
+      .filter((e) => e.type === 'node.completed')
+      .sort((x, y) => x.sequence - y.sequence)
+      .map((e) => e.nodeId);
+    expect(nodeCompletions, driver.describe(
+      'fixtures.md conformance-multi-node §Topology',
+      'all three node.completed events (a, b, c) MUST be present',
+    )).toEqual(['a', 'b', 'c']);
+  });
+});

package/src/scenarios/multi-region-idempotency.test.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Track 13: multi-region idempotency capability shape (idempotency.md v1.1).
+ *
+ * Verifies that hosts advertising the multi-region idempotency annex
+ * surface a valid `capabilities.idempotency.crossRegion` value. The
+ * end-to-end partition behavior cannot be exercised black-box; this
+ * scenario validates the discovery-document shape so clients can rely
+ * on the capability for routing decisions.
+ *
+ * @see spec/v1/idempotency.md §"Multi-region idempotency"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+const ALLOWED = new Set(['single-region', 'best-effort', 'strict']);
+interface IdempotencyCaps {
+  supported?: boolean;
+  layer1RetentionSeconds?: number;
+  layer2RetentionSeconds?: number;
+  crossRegion?: string;
+}
+describe('multi-region-idempotency: capability shape', () => {
+  it('idempotency.crossRegion (when advertised) MUST be one of the closed enum', async () => {
+    const disco = await driver.get('/.well-known/openwop');
+    const idem =
+      (disco.json as { capabilities?: { idempotency?: IdempotencyCaps } }).capabilities
+        ?.idempotency;
+    if (!idem || idem.crossRegion === undefined) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        '[multi-region-idempotency] capabilities.idempotency.crossRegion not advertised; skipping',
+      );
+      return;
+    }
+    expect(ALLOWED.has(idem.crossRegion), driver.describe(
+      'idempotency.md §"Multi-region idempotency" §"Capability advertisement"',
+      'crossRegion MUST be one of {"single-region","best-effort","strict"}',
+    )).toBe(true);
+    if (idem.layer1RetentionSeconds !== undefined) {
+      expect(idem.layer1RetentionSeconds).toBeGreaterThan(0);
+    }
+    if (idem.layer2RetentionSeconds !== undefined) {
+      expect(idem.layer2RetentionSeconds).toBeGreaterThan(0);
+    }
+  });
+});

package/src/scenarios/orchestratorConservativePath.test.ts ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Multi-Agent Shift Phase 5 — CP-1 conservative-path orchestrator suspend.
+ *
+ * Verifies the CP-1 invariant: when a `core.orchestrator.supervisor`
+ * would emit a decision with `confidence < escalationThreshold`, the
+ * host MUST:
+ *   1. Hold the decision (do NOT emit runOrchestrator.decided).
+ *   2. Suspend via `node.suspended { reason: 'low-confidence' }`.
+ *   3. Transition run to `'waiting-approval'`.
+ *   4. After human resume, emit ONE `runOrchestrator.decided` carrying
+ *      the operator-ratified decision plus the supervisor's agentId.
+ *
+ * Capability-gated: skips when host doesn't advertise
+ * `capabilities.agents.orchestrator: true`. Fixture-gated: requires
+ * `conformance-orchestrator-low-confidence`.
+ *
+ * @see spec/v1/interrupt.md §`low-confidence`
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { isOrchestratorSupported } from '../lib/multi-agent-capabilities.js';
+const FIXTURE = 'conformance-orchestrator-low-confidence';
+const SKIP = !isOrchestratorSupported() || !isFixtureAdvertised(FIXTURE);
+describe.skipIf(SKIP)('orchestratorConservativePath: CP-1 low-confidence suspend', () => {
+  it('supervisor below threshold suspends with reason=low-confidence; ratified decision follows after resume', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    // Wait for the run to enter waiting-approval.
+    let status: string | undefined;
+    for (let i = 0; i < 50; i++) {
+      const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
+      status = (res.json as { status: string }).status;
+      if (status === 'waiting-approval' || status === 'failed' || status === 'completed') break;
+      await new Promise((r) => setTimeout(r, 100));
+    }
+    expect(status).toBe('waiting-approval');
+    const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+    const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
+      .events ?? [];
+    // Before resume: no runOrchestrator.decided emitted yet (the decision
+    // was held per CP-1 step 1).
+    const decisionsBeforeResume = list.filter((e) => e.type === 'runOrchestrator.decided');
+    expect(
+      decisionsBeforeResume.length,
+      'CP-1: low-confidence holds the decision until human ratification',
+    ).toBe(0);
+    // node.suspended with reason=low-confidence is present.
+    const lowConfSuspend = list.find(
+      (e) => e.type === 'node.suspended' && e.payload?.reason === 'low-confidence',
+    );
+    expect(lowConfSuspend).toBeDefined();
+    expect(typeof lowConfSuspend!.payload?.agentId).toBe('string');
+  });
+});

package/src/scenarios/orchestratorDispatch.test.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * Multi-Agent Shift Phase 5 — orchestrator → dispatch → next-worker round-trip.
+ *
+ * Verifies that a workflow with `core.orchestrator.supervisor` →
+ * `core.dispatch` topology emits the canonical event sequence:
+ *   `node.started{supervisor}` → `runOrchestrator.decided{next-worker}`
+ *   → `node.completed{supervisor}` → `node.started{dispatch}` → child-run
+ *   lifecycle → `node.completed{dispatch}`.
+ *
+ * The supervisor's `runOrchestrator.decided` payload conforms to
+ * `schemas/run-orchestrator-decided-event.schema.json` + nested
+ * `schemas/orchestrator-decision.schema.json`.
+ *
+ * Capability-gated: skips when host doesn't advertise
+ * `capabilities.agents.orchestrator: true` AND `capabilities.agents.dispatch: true`.
+ * Fixture-gated: requires `conformance-orchestrator-dispatch`.
+ *
+ * @see schemas/orchestrator-decision.schema.json
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import {
+  isOrchestratorSupported,
+  isDispatchSupported,
+} from '../lib/multi-agent-capabilities.js';
+const FIXTURE = 'conformance-orchestrator-dispatch';
+const SKIP =
+  !isOrchestratorSupported() ||
+  !isDispatchSupported() ||
+  !isFixtureAdvertised(FIXTURE);
+describe.skipIf(SKIP)('orchestratorDispatch: supervisor → dispatch → next-worker', () => {
+  it('emits runOrchestrator.decided{next-worker} between supervisor + dispatch', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId);
+    expect(terminal.status).toBe('completed');
+    const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+    const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
+      .events ?? [];
+    const decisions = list.filter((e) => e.type === 'runOrchestrator.decided');
+    expect(decisions.length).toBeGreaterThan(0);
+    // At least one decision must be kind:'next-worker' (the dispatched-worker case).
+    const nextWorker = decisions.find((e) => {
+      const d = e.payload?.decision as { kind?: string } | undefined;
+      return d?.kind === 'next-worker';
+    });
+    expect(nextWorker, 'fixture emits at least one kind:next-worker decision').toBeDefined();
+    const payload = nextWorker!.payload!;
+    expect(typeof payload.agentId).toBe('string');
+    const decision = payload.decision as { kind: string; nextWorkerIds: string[] };
+    expect(decision.kind).toBe('next-worker');
+    expect(Array.isArray(decision.nextWorkerIds)).toBe(true);
+    expect(decision.nextWorkerIds.length).toBeGreaterThanOrEqual(1);
+  });
+});