npm - @openwop/openwop-conformance - Versions diffs - 1.0.0 → 1.1.0 - Mend

@openwop/openwop-conformance 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +17 -0
package/README.md +31 -6
package/api/grpc/openwop.proto +251 -0
package/api/openapi.yaml +109 -3
package/coverage.md +48 -9
package/fixtures/conformance-configurable-schema.json +39 -0
package/fixtures/conformance-subworkflow-parent.json +1 -1
package/fixtures/conformance-wasm-pack-memory-cap-breach.json +23 -0
package/fixtures/openwop-smoke-byok-roundtrip.json +25 -0
package/fixtures.md +21 -0
package/package.json +3 -1
package/schemas/README.md +4 -0
package/schemas/audit-verify-result.schema.json +90 -0
package/schemas/capabilities.schema.json +293 -1
package/schemas/node-pack-manifest.schema.json +4 -4
package/schemas/pack-lockfile.schema.json +92 -0
package/schemas/registry-version-manifest.schema.json +145 -0
package/schemas/run-event-payloads.schema.json +2 -2
package/schemas/security-advisory.schema.json +109 -0
package/src/lib/a2a-fake-peer.ts +143 -56
package/src/lib/behavior-gate.ts +68 -0
package/src/lib/env.ts +10 -0
package/src/lib/grpc-framing.test.ts +96 -0
package/src/lib/grpc-framing.ts +76 -0
package/src/lib/oidc-issuer.test.ts +328 -0
package/src/lib/oidc-issuer.ts +241 -0
package/src/lib/otel-collector-grpc.test.ts +191 -0
package/src/lib/otel-collector.test.ts +303 -0
package/src/lib/otel-collector.ts +318 -14
package/src/lib/otlp-protobuf.test.ts +461 -0
package/src/lib/otlp-protobuf.ts +529 -0
package/src/scenarios/a2a-task-roundtrip.test.ts +147 -28
package/src/scenarios/agentConfidenceEscalation.test.ts +1 -0
package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +1 -0
package/src/scenarios/agentMemoryRedactionContract.test.ts +1 -0
package/src/scenarios/agentMemoryRoundTrip.test.ts +1 -0
package/src/scenarios/agentMemoryTtlExpiry.test.ts +1 -0
package/src/scenarios/agentMessageReducer.test.ts +1 -0
package/src/scenarios/agentMetadata.test.ts +1 -0
package/src/scenarios/agentPackExport.test.ts +1 -0
package/src/scenarios/agentPackInstall.test.ts +1 -0
package/src/scenarios/agentPackProvenance.test.ts +1 -0
package/src/scenarios/audit-log-integrity.test.ts +3 -6
package/src/scenarios/auth-api-key-rotation.test.ts +182 -0
package/src/scenarios/auth-mtls.test.ts +274 -0
package/src/scenarios/auth-oauth2-client-credentials.test.ts +259 -0
package/src/scenarios/auth-oidc-user-bearer.test.ts +361 -0
package/src/scenarios/bulk-cancel.test.ts +111 -0
package/src/scenarios/configurable-schema.test.ts +48 -0
package/src/scenarios/conversationCapabilityNegotiation.test.ts +1 -0
package/src/scenarios/conversationLifecycle.test.ts +1 -0
package/src/scenarios/conversationReplayDeterminism.test.ts +1 -0
package/src/scenarios/conversationVsLegacySuspend.test.ts +1 -0
package/src/scenarios/debug-bundle-truncation.test.ts +95 -0
package/src/scenarios/discovery.test.ts +183 -0
package/src/scenarios/http-client-ssrf.test.ts +71 -0
package/src/scenarios/idempotency.test.ts +6 -0
package/src/scenarios/idempotencyRetry.test.ts +3 -0
package/src/scenarios/mcp-tool-roundtrip.test.ts +198 -34
package/src/scenarios/mcp-toolcall-redaction.test.ts +66 -0
package/src/scenarios/metric-emission.test.ts +113 -0
package/src/scenarios/orchestratorConservativePath.test.ts +1 -0
package/src/scenarios/orchestratorDispatch.test.ts +1 -0
package/src/scenarios/orchestratorTermination.test.ts +1 -0
package/src/scenarios/otel-emission-grpc.test.ts +98 -0
package/src/scenarios/pause-resume.test.ts +119 -0
package/src/scenarios/production-backpressure.test.ts +342 -0
package/src/scenarios/production-retention-expiry.test.ts +164 -0
package/src/scenarios/registry-public.test.ts +131 -0
package/src/scenarios/replay-llm-cache-key.test.ts +35 -0
package/src/scenarios/replay-retention-expiry.test.ts +178 -0
package/src/scenarios/restart-during-run.test.ts +177 -0
package/src/scenarios/spec-corpus-validity.test.ts +54 -26
package/src/scenarios/staleClaim.test.ts +3 -0
package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +67 -10
package/src/scenarios/wasm-pack-memory-cap.test.ts +64 -9
package/src/scenarios/webhook-negative.test.ts +90 -0
package/src/scenarios/webhook-signed-delivery.test.ts +178 -0
package/src/setup.ts +25 -1
package/vitest.config.ts +5 -1

package/src/scenarios/discovery.test.ts CHANGED Viewed

@@ -8,6 +8,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { behaviorGate } from '../lib/behavior-gate.js';
 describe('discovery: /.well-known/openwop', () => {
   it('returns 200 with required Capabilities fields per capabilities.md §2', async () => {
@@ -145,3 +146,185 @@ describe('discovery: /v1/openapi.json', () => {
     )).toMatch(/^3\.[1-9]/);
   });
 });
+/**
+ * RFC 0011 §B: auth-scoped discovery subtest.
+ *
+ * Per `capabilities-change-detection.md` §"Scoped capability views":
+ * hosts that return a different payload when called authenticated
+ * vs. anonymous MUST advertise that surface via
+ * `capabilities.discovery.authScoped.supported: true`. The
+ * authenticated view MUST still satisfy `capabilities.schema.json`
+ * (required fields preserved) and MUST NOT expose capabilities
+ * outside the caller's authorization.
+ *
+ * Capability shape runs unconditionally when the profile is advertised.
+ * The authorization-oracle probe (assertion 5 of §B) is gated on
+ * `OPENWOP_TEST_UNAUTHORIZED_API_KEY` because it requires an
+ * operator-supplied secondary key with strictly-fewer capabilities
+ * than the primary.
+ *
+ * @see RFCS/0011-auth-scoped-discovery.md §B
+ * @see spec/v1/capabilities-change-detection.md §"Scoped capability views"
+ */
+interface AuthScopedCaps {
+  supported?: boolean;
+  mode?: string;
+  endpointPath?: string;
+}
+interface DiscoveryCaps {
+  authScoped?: AuthScopedCaps;
+}
+const AUTH_SCOPED_PROFILE = 'openwop-discovery-auth-scoped';
+async function readDiscoveryCaps(): Promise<DiscoveryCaps | undefined> {
+  const disco = await driver.get('/.well-known/openwop', { authenticated: false });
+  return (disco.json as { capabilities?: { discovery?: DiscoveryCaps } }).capabilities
+    ?.discovery;
+}
+function isAuthScopedAdvertised(disc: DiscoveryCaps | undefined): boolean {
+  return disc?.authScoped?.supported === true;
+}
+describe('discovery: auth-scoped capability shape', () => {
+  it('host claiming auth-scoped discovery advertises required fields', async () => {
+    const disc = await readDiscoveryCaps();
+    if (!behaviorGate(AUTH_SCOPED_PROFILE, isAuthScopedAdvertised(disc))) {
+      return;
+    }
+    expect(disc?.authScoped?.supported, driver.describe(
+      'capabilities-change-detection.md §"Scoped capability views"',
+      'capabilities.discovery.authScoped.supported MUST be true when the profile is claimed',
+    )).toBe(true);
+    if (disc?.authScoped?.mode !== undefined) {
+      expect(
+        ['same-endpoint', 'extension-endpoint'].includes(disc.authScoped.mode),
+        driver.describe(
+          'capabilities.schema.json discovery.authScoped.mode',
+          'mode MUST be one of same-endpoint / extension-endpoint when advertised',
+        ),
+      ).toBe(true);
+    }
+    if (disc?.authScoped?.mode === 'extension-endpoint') {
+      expect(
+        typeof disc.authScoped.endpointPath === 'string' &&
+          disc.authScoped.endpointPath.startsWith('/'),
+        driver.describe(
+          'RFCS/0011-auth-scoped-discovery.md §A',
+          'extension-endpoint mode MUST advertise endpointPath as a leading-slash relative path',
+        ),
+      ).toBe(true);
+    }
+  });
+});
+describe('discovery: auth-scoped view satisfies base schema', () => {
+  it('authenticated discovery preserves required Capabilities fields', async () => {
+    const disc = await readDiscoveryCaps();
+    if (!behaviorGate(AUTH_SCOPED_PROFILE, isAuthScopedAdvertised(disc))) {
+      return;
+    }
+    const mode = disc?.authScoped?.mode ?? 'same-endpoint';
+    const path =
+      mode === 'extension-endpoint'
+        ? disc?.authScoped?.endpointPath ?? '/v1/capabilities'
+        : '/.well-known/openwop';
+    const res = await driver.get(path);
+    expect(res.status, driver.describe(
+      'capabilities-change-detection.md §"Scoped capability views"',
+      'authenticated discovery MUST return 200',
+    )).toBe(200);
+    const body = res.json as Record<string, unknown> | undefined;
+    expect(body, 'authenticated discovery body MUST be JSON').toBeDefined();
+    // Required fields per capabilities.md §3 preserved in the
+    // authenticated view (per spec annex: "MUST still satisfy the
+    // base capabilities.schema.json shape").
+    for (const required of [
+      'protocolVersion',
+      'supportedEnvelopes',
+      'schemaVersions',
+      'limits',
+    ]) {
+      expect(body?.[required], driver.describe(
+        'capabilities-change-detection.md §"Scoped capability views"',
+        `auth-scoped view MUST preserve required field "${required}" from capabilities.md §3`,
+      )).toBeDefined();
+    }
+  });
+});
+describe('discovery: auth-scoped is not an authorization oracle', () => {
+  it('unauthorized key MUST NOT reveal capabilities outside its authorization', async () => {
+    const disc = await readDiscoveryCaps();
+    if (!behaviorGate(AUTH_SCOPED_PROFILE, isAuthScopedAdvertised(disc))) {
+      return;
+    }
+    const unauthorizedKey = process.env.OPENWOP_TEST_UNAUTHORIZED_API_KEY;
+    if (!unauthorizedKey) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        '[discovery: auth-scoped] OPENWOP_TEST_UNAUTHORIZED_API_KEY not supplied; skipping authorization-oracle probe',
+      );
+      return;
+    }
+    const mode = disc?.authScoped?.mode ?? 'same-endpoint';
+    const path =
+      mode === 'extension-endpoint'
+        ? disc?.authScoped?.endpointPath ?? '/v1/capabilities'
+        : '/.well-known/openwop';
+    // Primary key (env-default Authorization).
+    const primary = await driver.get(path);
+    // Unauthorized / lower-privilege key.
+    const unauthorized = await driver.get(path, {
+      authenticated: false,
+      headers: { Authorization: `Bearer ${unauthorizedKey}` },
+    });
+    if (unauthorized.status === 401 || unauthorized.status === 403) {
+      // Host rejected the unauthorized key outright — that's fine.
+      // The oracle probe is moot when the host refuses the bearer.
+      return;
+    }
+    expect(unauthorized.status).toBe(200);
+    const primaryCaps = Object.keys(
+      (primary.json as { capabilities?: Record<string, unknown> })?.capabilities ?? {},
+    );
+    const unauthorizedCaps = Object.keys(
+      (unauthorized.json as { capabilities?: Record<string, unknown> })?.capabilities ??
+        {},
+    );
+    // Spec annex line 69: "Hosts MUST NOT let scoped discovery become
+    // an authorization oracle. A caller should learn only about
+    // capabilities it is allowed to use." Operationalized as: the
+    // unauthorized view's capability keys MUST be a subset of the
+    // primary view's keys (no capabilities the unauthorized caller
+    // can use that the primary cannot).
+    const extras = unauthorizedCaps.filter((c) => !primaryCaps.includes(c));
+    expect(extras.length, driver.describe(
+      'capabilities-change-detection.md §"Scoped capability views"',
+      'unauthorized view MUST NOT expose capability keys absent from the primary (authorized) view',
+    )).toBe(0);
+  });
+});

package/src/scenarios/http-client-ssrf.test.ts ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * HTTP client SSRF-guard advertisement contract.
+ *
+ * Capability-gated: skips when the host does not advertise
+ * `capabilities.httpClient.supported = true`.
+ *
+ * Verifies that any host claiming an `httpClient` surface MUST advertise
+ * `ssrfGuard: true` and `maxResponseBodyBytes` — without these two,
+ * the host's "call any URL" node is a vector for both SSRF and DoS.
+ *
+ * The actual SSRF-rejection behavior is verified by the host's
+ * in-process test (`http-client.test.ts`). The conformance suite only
+ * asserts the advertisement shape — driving an SSRF rejection requires
+ * a deployment that doesn't set `OPENWOP_HTTP_ALLOW_PRIVATE=true`,
+ * which is the operator's choice, not the suite's.
+ *
+ * @see SECURITY/invariants.yaml id: http-client-ssrf-guard
+ * @see spec/v1/capabilities.md §`httpClient` (additive)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+async function isHttpClientSupported(): Promise<boolean> {
+  const disco = await driver.get('/.well-known/openwop');
+  const caps = (disco.json as { capabilities?: { httpClient?: { supported?: boolean } } })
+    .capabilities;
+  return caps?.httpClient?.supported === true;
+}
+describe('http-client-ssrf: capability advertisement contract', () => {
+  it('host advertising httpClient MUST declare ssrfGuard: true + maxResponseBodyBytes', async () => {
+    if (!(await isHttpClientSupported())) {
+      // eslint-disable-next-line no-console
+      console.warn('[http-client-ssrf] host does not advertise httpClient; skipping');
+      return;
+    }
+    const disco = await driver.get('/.well-known/openwop');
+    const cap = (disco.json as {
+      capabilities?: {
+        httpClient?: {
+          supported?: boolean;
+          ssrfGuard?: boolean;
+          maxResponseBodyBytes?: number;
+          methods?: unknown;
+        };
+      };
+    }).capabilities?.httpClient;
+    expect(cap?.supported, driver.describe(
+      'capabilities.md §httpClient',
+      'httpClient.supported MUST be a boolean',
+    )).toBe(true);
+    expect(cap?.ssrfGuard, driver.describe(
+      'SECURITY/threat-model-secret-leakage.md (SSRF probing analog)',
+      'httpClient.ssrfGuard MUST be true — a host that lets any tenant POST a workflow with arbitrary URLs without SSRF protection enables blind probing of deployer-internal services',
+    )).toBe(true);
+    expect(typeof cap?.maxResponseBodyBytes, driver.describe(
+      'capabilities.md §httpClient',
+      'httpClient.maxResponseBodyBytes MUST be a number — a host that streams unbounded response bodies into variables is a DoS vector',
+    )).toBe('number');
+    expect((cap?.maxResponseBodyBytes ?? 0) > 0).toBe(true);
+    expect(Array.isArray(cap?.methods), driver.describe(
+      'capabilities.md §httpClient',
+      'httpClient.methods MUST be an array of supported HTTP methods',
+    )).toBe(true);
+  });
+});

package/src/scenarios/idempotency.test.ts CHANGED Viewed

@@ -6,6 +6,12 @@
  * Uses the `conformance-idempotent` fixture. Server MUST have seeded
  * it. The fixture's `nonce` input has no side effect — it exists so
  * the conformance suite can vary the body without affecting behavior.
+ *
+ * @see spec/v1/idempotency.md §Layer 1
+ * @see spec/v1/rest-endpoints.md
+ * @see spec/v1/production-profile.md §"Retry and idempotency" (RFC 0009
+ *      — this scenario satisfies the basic-idempotency predicate when
+ *      the host advertises capabilities.production.supported: true)
  */
 import { describe, it, expect } from 'vitest';

package/src/scenarios/idempotencyRetry.test.ts CHANGED Viewed

@@ -18,6 +18,9 @@
  *
  * @see spec/v1/idempotency.md
  * @see spec/v1/scale-profiles.md §"Retry semantics"
+ * @see spec/v1/production-profile.md §"Retry and idempotency" (RFC 0009
+ *      — this scenario satisfies the 24h retention + 5-retry predicate
+ *      when the host advertises capabilities.production.supported: true)
  */
 import { describe, it, expect } from 'vitest';

package/src/scenarios/mcp-tool-roundtrip.test.ts CHANGED Viewed

@@ -13,21 +13,44 @@
  *
  * Two-level scenario:
  *
- *   - **Direct fake-server probe** (always runs when collector started):
- *     hits the in-process fake MCP server directly with initialize +
+ *   - **Direct probe** (always runs when an MCP endpoint is configured):
+ *     hits the configured MCP server directly with initialize +
  *     tools/list + tools/call to verify its wire shape. Catches
- *     regressions in our own test fixture.
+ *     regressions in our own test fixture; doubles as the shape check
+ *     against real reference servers when `OPENWOP_MCP_REAL_SERVER_URL`
+ *     points at one.
  *
  *   - **Host-mediated roundtrip** (runs when host advertises an MCP
  *     fixture or roundtrip capability): starts a workflow run, observes
  *     events, asserts tool-call envelope visibility. Skips otherwise.
  *
  * Operator contract:
- *   `OPENWOP_MCP_FAKE_SERVER=true` on the suite side; configure the host
- *   to use the printed fake-server URL as one of its MCP servers.
+ *   - `OPENWOP_MCP_FAKE_SERVER=true` — boots the in-process synthetic
+ *     server at suite init. The direct probe asserts the echo tool's
+ *     deterministic shape.
+ *   - `OPENWOP_MCP_REAL_SERVER_URL=<base-url>` — points the direct
+ *     probe at a real MCP server. Auto-detects the transport from the
+ *     server's `Content-Type` response header:
+ *       - `application/json` → single-JSON response, parsed as one
+ *         JSON-RPC frame.
+ *       - `text/event-stream` → streamable-http+SSE; the probe reads
+ *         SSE frames until it finds one whose `data:` payload matches
+ *         the JSON-RPC `id` we sent, then returns that frame.
+ *     The stdio transport (default for `modelcontextprotocol/servers`
+ *     reference servers) is still out of scope — those run as a child
+ *     process speaking JSON-RPC over stdin/stdout, no HTTP endpoint to
+ *     point env vars at. Operators wanting interop evidence against
+ *     stdio servers run them under a `mcp-bridge` HTTP adapter.
+ *     Assertions stay shape-only: tools/list returns ≥1 tool, a
+ *     tools/call returns valid MCP content (a `result.content` array,
+ *     possibly `isError: true` — both are spec-conformant).
+ *
+ *   When both env vars are set, the real-server URL wins (it's the more
+ *   meaningful evidence). When neither is set, the scenario soft-skips.
  *
  * @see spec/v1/mcp-integration.md
  * @see SECURITY/threat-model-prompt-injection.md
+ * @see docs/PROTOCOL-GAP-CLOSURE-PLAN.md Phase 3 T3.4
  */
 import { describe, it, expect } from 'vitest';
@@ -38,62 +61,203 @@ import { pollUntilTerminal } from '../lib/polling.js';
 const ROUNDTRIP_FIXTURE = 'conformance-mcp-tool-roundtrip';
+/**
+ * Read an SSE `text/event-stream` body until a frame's `data:` payload
+ * is a JSON-RPC response with `id === wantId`, then return that frame's
+ * parsed payload. Honors the MCP streamable-http transport's "single
+ * POST may return one OR many SSE frames; correlate by id" pattern.
+ */
+async function readSseUntilId(
+  res: Response,
+  wantId: number,
+  timeoutMs = 5_000,
+): Promise<Record<string, unknown>> {
+  if (!res.body) throw new Error('SSE response has no body');
+  const reader = res.body.getReader();
+  const decoder = new TextDecoder('utf-8');
+  let buffer = '';
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    const { value, done } = await reader.read();
+    if (value) buffer += decoder.decode(value, { stream: true });
+    let sepIndex: number;
+    while ((sepIndex = buffer.indexOf('\n\n')) !== -1) {
+      const block = buffer.slice(0, sepIndex);
+      buffer = buffer.slice(sepIndex + 2);
+      let dataLines: string[] = [];
+      for (const line of block.split('\n')) {
+        // SSE permits multi-line data via repeated `data:` lines, joined by \n.
+        if (line.startsWith('data:')) dataLines.push(line.slice(5).replace(/^ /, ''));
+      }
+      if (dataLines.length === 0) continue;
+      try {
+        const parsed = JSON.parse(dataLines.join('\n')) as Record<string, unknown>;
+        if (parsed.id === wantId) {
+          // Drop the reader; the server may keep the stream open for
+          // unrelated notifications.
+          void reader.cancel().catch(() => undefined);
+          return parsed;
+        }
+      } catch {
+        // Skip malformed frames.
+      }
+    }
+    if (done) break;
+  }
+  throw new Error(`SSE stream closed before frame with id=${wantId} arrived`);
+}
 async function postJsonRpc(
   endpoint: string,
   method: string,
   params: unknown,
   id: number,
-): Promise<{ status: number; json: Record<string, unknown> }> {
-  const res = await fetch(`${endpoint}/`, {
+  sessionId?: string,
+): Promise<{ status: number; json: Record<string, unknown>; sessionId: string | null }> {
+  // POST to `endpoint` verbatim — the trailing-slash decision is the
+  // caller's. The probe accepts both response shapes per MCP's
+  // streamable-http spec: a single JSON body OR an SSE stream that
+  // emits one-or-many JSON-RPC frames. Transport is auto-detected
+  // from Content-Type.
+  //
+  // Session-id threading: real MCP servers built on the official SDK
+  // assign a session id at `initialize` and require it on every
+  // subsequent call via `mcp-session-id`. The in-process fake doesn't
+  // enforce that, but real impls do — so the probe always echoes back
+  // any session header it receives from initialize.
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    // MCP streamable-http servers SHOULD return `application/json`
+    // by default but MAY upgrade to SSE; advertise both as
+    // acceptable.
+    Accept: 'application/json, text/event-stream',
+  };
+  if (sessionId) headers['mcp-session-id'] = sessionId;
+  const res = await fetch(endpoint, {
     method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
+    headers,
     body: JSON.stringify({ jsonrpc: '2.0', id, method, params }),
   });
+  const returnedSessionId = res.headers.get('mcp-session-id');
+  const contentType = res.headers.get('content-type') ?? '';
+  if (contentType.includes('text/event-stream')) {
+    const json = await readSseUntilId(res, id);
+    return { status: res.status, json, sessionId: returnedSessionId };
+  }
   const text = await res.text();
-  return { status: res.status, json: JSON.parse(text) as Record<string, unknown> };
+  return {
+    status: res.status,
+    json: JSON.parse(text) as Record<string, unknown>,
+    sessionId: returnedSessionId,
+  };
 }
-describe('mcp-tool-roundtrip: fake-server wire shape', () => {
-  it('initialize + tools/list + tools/call echo round-trip cleanly', async () => {
-    const server = getMcpFakeServer();
-    if (!server) {
+/** Resolve the MCP endpoint to probe: real-server env wins; otherwise the in-process fake. */
+function probeEndpoint(): { url: string; isReal: boolean } | null {
+  const real = process.env.OPENWOP_MCP_REAL_SERVER_URL;
+  if (real && real.length > 0) return { url: real.replace(/\/$/, ''), isReal: true };
+  const fake = getMcpFakeServer();
+  if (fake) return { url: fake.endpoint(), isReal: false };
+  return null;
+}
+describe('mcp-tool-roundtrip: server wire shape', () => {
+  it('initialize + tools/list + tools/call round-trip per MCP JSON-RPC contract', async () => {
+    const probe = probeEndpoint();
+    if (!probe) {
       // eslint-disable-next-line no-console
       console.warn(
-        '[mcp-tool-roundtrip] fake server not started; set OPENWOP_MCP_FAKE_SERVER=true',
+        '[mcp-tool-roundtrip] no MCP endpoint configured; set OPENWOP_MCP_FAKE_SERVER=true ' +
+          'or OPENWOP_MCP_REAL_SERVER_URL=<base-url>',
       );
       return;
     }
-    server.reset();
+    if (!probe.isReal) getMcpFakeServer()!.reset();
-    const init = await postJsonRpc(server.endpoint(), 'initialize', {}, 1);
+    // Per MCP `initialize` spec, params MUST carry protocolVersion +
+    // capabilities + clientInfo. The in-process fake accepts empty
+    // params; real reference servers built on @modelcontextprotocol/sdk
+    // reject them with 400. Sending the canonical shape keeps the probe
+    // valid against both.
+    const init = await postJsonRpc(
+      probe.url,
+      'initialize',
+      {
+        protocolVersion: '2024-11-05',
+        capabilities: {},
+        clientInfo: { name: 'openwop-conformance-probe', version: '1.0.0' },
+      },
+      1,
+    );
     expect(init.status).toBe(200);
     const initResult = (init.json.result ?? {}) as { protocolVersion?: string };
     expect(typeof initResult.protocolVersion).toBe('string');
+    // Capture session id from initialize so real SDK-based servers can
+    // bind subsequent calls; fakes that don't set the header pass null
+    // through and the calls still succeed.
+    const sid = init.sessionId ?? undefined;
-    const list = await postJsonRpc(server.endpoint(), 'tools/list', {}, 2);
+    const list = await postJsonRpc(probe.url, 'tools/list', {}, 2, sid);
     expect(list.status).toBe(200);
     const listResult = (list.json.result ?? {}) as {
       tools?: ReadonlyArray<{ name?: string }>;
     };
-    expect(listResult.tools?.some((t) => t.name === 'echo')).toBe(true);
+    expect(Array.isArray(listResult.tools)).toBe(true);
+    expect((listResult.tools ?? []).length).toBeGreaterThan(0);
-    const call = await postJsonRpc(
-      server.endpoint(),
-      'tools/call',
-      { name: 'echo', arguments: { text: 'hello-from-conformance' } },
-      3,
-    );
-    expect(call.status).toBe(200);
-    const callResult = (call.json.result ?? {}) as {
-      content?: ReadonlyArray<{ type?: string; text?: string }>;
-    };
-    expect(callResult.content?.[0]?.type).toBe('text');
-    expect(callResult.content?.[0]?.text).toBe('hello-from-conformance');
+    if (probe.isReal) {
+      // Real-server interop evidence (Phase 3 T3.4). We can't assume a
+      // deterministic echo tool exists on every reference server, so the
+      // assertions stay shape-only:
+      //   - tools/list returns ≥1 tool ✓ (above)
+      //   - the first tool has a name + an input-schema-compatible shape
+      //   - tools/call against that tool returns valid MCP content (array
+      //     of {type, ...}). A failed call (e.g., bad arguments) still
+      //     returns 200 with an `isError: true` content marker — both
+      //     paths are spec-conformant; we assert SOME response.
+      const first = listResult.tools?.[0];
+      expect(typeof first?.name).toBe('string');
+      const callRes = await postJsonRpc(
+        probe.url,
+        'tools/call',
+        { name: first!.name, arguments: {} },
+        3,
+        sid,
+      );
+      expect(callRes.status).toBe(200);
+      const callResult = (callRes.json.result ?? {}) as {
+        content?: ReadonlyArray<{ type?: string }>;
+        isError?: boolean;
+      };
+      // Either valid content[] OR isError-marked content[] is acceptable.
+      expect(Array.isArray(callResult.content)).toBe(true);
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[mcp-tool-roundtrip] real-server interop OK against ${probe.url} ` +
+          `(tool=${first?.name}, isError=${callResult.isError === true})`,
+      );
+    } else {
+      // Fake-server path: deterministic echo tool, assert verbatim.
+      expect(listResult.tools?.some((t) => t.name === 'echo')).toBe(true);
-    // Invocation log captured.
-    const invocations = server.invocations();
-    const methods = invocations.map((i) => i.method);
-    expect(methods).toEqual(['initialize', 'tools/list', 'tools/call']);
+      const call = await postJsonRpc(
+        probe.url,
+        'tools/call',
+        { name: 'echo', arguments: { text: 'hello-from-conformance' } },
+        3,
+      );
+      expect(call.status).toBe(200);
+      const callResult = (call.json.result ?? {}) as {
+        content?: ReadonlyArray<{ type?: string; text?: string }>;
+      };
+      expect(callResult.content?.[0]?.type).toBe('text');
+      expect(callResult.content?.[0]?.text).toBe('hello-from-conformance');
+      const fake = getMcpFakeServer()!;
+      const methods = fake.invocations().map((i) => i.method);
+      expect(methods).toEqual(['initialize', 'tools/list', 'tools/call']);
+    }
   });
 });

package/src/scenarios/mcp-toolcall-redaction.test.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * MCP-1 invariant: tool-call arguments + result content NEVER appear
+ * on emitted event payloads.
+ *
+ * Capability-gated: skips when the host does not advertise
+ * `capabilities.mcpClient.supported = true`.
+ *
+ * The test does NOT actually invoke an MCP tool (that requires the
+ * host to be wired to a real MCP server, which is deployment-specific
+ * and outside the conformance suite's environmental contract). What
+ * it verifies is the SHAPE of the host's mcpClient advertisement +
+ * the trust-boundary marker. The redaction invariant is then verified
+ * end-to-end by the host's own in-process test (`mcp-client.test.ts`)
+ * which DOES drive a fake MCP server and asserts no raw args/results
+ * appear on the sanitized summary.
+ *
+ * @see SECURITY/invariants.yaml id: mcp-toolcall-payload-redaction
+ * @see spec/v1/host-capabilities.md §host.mcp
+ * @see SECURITY/threat-model-prompt-injection.md §"UNTRUSTED marker"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+async function isMcpClientSupported(): Promise<boolean> {
+  const disco = await driver.get('/.well-known/openwop');
+  const caps = (disco.json as { capabilities?: { mcpClient?: { supported?: boolean } } })
+    .capabilities;
+  return caps?.mcpClient?.supported === true;
+}
+describe('mcp-toolcall-redaction: capability advertisement contract', () => {
+  it('host advertising mcpClient MUST declare trustBoundary: "untrusted"', async () => {
+    if (!(await isMcpClientSupported())) {
+      // eslint-disable-next-line no-console
+      console.warn('[mcp-toolcall-redaction] host does not advertise mcpClient; skipping');
+      return;
+    }
+    const disco = await driver.get('/.well-known/openwop');
+    const cap = (disco.json as {
+      capabilities?: {
+        mcpClient?: { supported?: boolean; transports?: unknown; trustBoundary?: string };
+      };
+    }).capabilities?.mcpClient;
+    expect(cap?.supported, driver.describe(
+      'host-capabilities.md §host.mcp',
+      'mcpClient.supported MUST be a boolean',
+    )).toBe(true);
+    expect(Array.isArray(cap?.transports), driver.describe(
+      'host-capabilities.md §host.mcp',
+      'mcpClient.transports MUST be an array of transport identifiers',
+    )).toBe(true);
+    // threat-model-prompt-injection.md §"UNTRUSTED marker": MCP tool
+    // output is by spec untrusted (it can carry adversarial content).
+    // Hosts advertising mcpClient MUST encode the boundary in the
+    // capability so downstream consumers (LLM nodes) treat the
+    // content accordingly.
+    expect(cap?.trustBoundary, driver.describe(
+      'SECURITY/threat-model-prompt-injection.md §"UNTRUSTED marker"',
+      'mcpClient.trustBoundary MUST be "untrusted" — downstream LLM nodes treat tool content as user data',
+    )).toBe('untrusted');
+  });
+});