@openwop/openwop-conformance 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/README.md +31 -6
  3. package/api/grpc/openwop.proto +251 -0
  4. package/api/openapi.yaml +109 -3
  5. package/coverage.md +48 -9
  6. package/fixtures/conformance-configurable-schema.json +39 -0
  7. package/fixtures/conformance-subworkflow-parent.json +1 -1
  8. package/fixtures/conformance-wasm-pack-memory-cap-breach.json +23 -0
  9. package/fixtures/openwop-smoke-byok-roundtrip.json +25 -0
  10. package/fixtures.md +21 -0
  11. package/package.json +3 -1
  12. package/schemas/README.md +4 -0
  13. package/schemas/audit-verify-result.schema.json +90 -0
  14. package/schemas/capabilities.schema.json +293 -1
  15. package/schemas/node-pack-manifest.schema.json +4 -4
  16. package/schemas/pack-lockfile.schema.json +92 -0
  17. package/schemas/registry-version-manifest.schema.json +145 -0
  18. package/schemas/run-event-payloads.schema.json +2 -2
  19. package/schemas/security-advisory.schema.json +109 -0
  20. package/src/lib/a2a-fake-peer.ts +143 -56
  21. package/src/lib/behavior-gate.ts +68 -0
  22. package/src/lib/env.ts +10 -0
  23. package/src/lib/grpc-framing.test.ts +96 -0
  24. package/src/lib/grpc-framing.ts +76 -0
  25. package/src/lib/oidc-issuer.test.ts +328 -0
  26. package/src/lib/oidc-issuer.ts +241 -0
  27. package/src/lib/otel-collector-grpc.test.ts +191 -0
  28. package/src/lib/otel-collector.test.ts +303 -0
  29. package/src/lib/otel-collector.ts +318 -14
  30. package/src/lib/otlp-protobuf.test.ts +461 -0
  31. package/src/lib/otlp-protobuf.ts +529 -0
  32. package/src/scenarios/a2a-task-roundtrip.test.ts +147 -28
  33. package/src/scenarios/agentConfidenceEscalation.test.ts +1 -0
  34. package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +1 -0
  35. package/src/scenarios/agentMemoryRedactionContract.test.ts +1 -0
  36. package/src/scenarios/agentMemoryRoundTrip.test.ts +1 -0
  37. package/src/scenarios/agentMemoryTtlExpiry.test.ts +1 -0
  38. package/src/scenarios/agentMessageReducer.test.ts +1 -0
  39. package/src/scenarios/agentMetadata.test.ts +1 -0
  40. package/src/scenarios/agentPackExport.test.ts +1 -0
  41. package/src/scenarios/agentPackInstall.test.ts +1 -0
  42. package/src/scenarios/agentPackProvenance.test.ts +1 -0
  43. package/src/scenarios/audit-log-integrity.test.ts +3 -6
  44. package/src/scenarios/auth-api-key-rotation.test.ts +182 -0
  45. package/src/scenarios/auth-mtls.test.ts +274 -0
  46. package/src/scenarios/auth-oauth2-client-credentials.test.ts +259 -0
  47. package/src/scenarios/auth-oidc-user-bearer.test.ts +361 -0
  48. package/src/scenarios/bulk-cancel.test.ts +111 -0
  49. package/src/scenarios/configurable-schema.test.ts +48 -0
  50. package/src/scenarios/conversationCapabilityNegotiation.test.ts +1 -0
  51. package/src/scenarios/conversationLifecycle.test.ts +1 -0
  52. package/src/scenarios/conversationReplayDeterminism.test.ts +1 -0
  53. package/src/scenarios/conversationVsLegacySuspend.test.ts +1 -0
  54. package/src/scenarios/debug-bundle-truncation.test.ts +95 -0
  55. package/src/scenarios/discovery.test.ts +183 -0
  56. package/src/scenarios/http-client-ssrf.test.ts +71 -0
  57. package/src/scenarios/idempotency.test.ts +6 -0
  58. package/src/scenarios/idempotencyRetry.test.ts +3 -0
  59. package/src/scenarios/mcp-tool-roundtrip.test.ts +198 -34
  60. package/src/scenarios/mcp-toolcall-redaction.test.ts +66 -0
  61. package/src/scenarios/metric-emission.test.ts +113 -0
  62. package/src/scenarios/orchestratorConservativePath.test.ts +1 -0
  63. package/src/scenarios/orchestratorDispatch.test.ts +1 -0
  64. package/src/scenarios/orchestratorTermination.test.ts +1 -0
  65. package/src/scenarios/otel-emission-grpc.test.ts +98 -0
  66. package/src/scenarios/pause-resume.test.ts +119 -0
  67. package/src/scenarios/production-backpressure.test.ts +342 -0
  68. package/src/scenarios/production-retention-expiry.test.ts +164 -0
  69. package/src/scenarios/registry-public.test.ts +131 -0
  70. package/src/scenarios/replay-llm-cache-key.test.ts +35 -0
  71. package/src/scenarios/replay-retention-expiry.test.ts +178 -0
  72. package/src/scenarios/restart-during-run.test.ts +177 -0
  73. package/src/scenarios/spec-corpus-validity.test.ts +54 -26
  74. package/src/scenarios/staleClaim.test.ts +3 -0
  75. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +67 -10
  76. package/src/scenarios/wasm-pack-memory-cap.test.ts +64 -9
  77. package/src/scenarios/webhook-negative.test.ts +90 -0
  78. package/src/scenarios/webhook-signed-delivery.test.ts +178 -0
  79. package/src/setup.ts +25 -1
  80. package/vitest.config.ts +5 -1
@@ -8,6 +8,7 @@
8
8
 
9
9
  import { describe, it, expect } from 'vitest';
10
10
  import { driver } from '../lib/driver.js';
11
+ import { behaviorGate } from '../lib/behavior-gate.js';
11
12
 
12
13
  describe('discovery: /.well-known/openwop', () => {
13
14
  it('returns 200 with required Capabilities fields per capabilities.md §2', async () => {
@@ -145,3 +146,185 @@ describe('discovery: /v1/openapi.json', () => {
145
146
  )).toMatch(/^3\.[1-9]/);
146
147
  });
147
148
  });
149
+
150
+ /**
151
+ * RFC 0011 §B: auth-scoped discovery subtest.
152
+ *
153
+ * Per `capabilities-change-detection.md` §"Scoped capability views":
154
+ * hosts that return a different payload when called authenticated
155
+ * vs. anonymous MUST advertise that surface via
156
+ * `capabilities.discovery.authScoped.supported: true`. The
157
+ * authenticated view MUST still satisfy `capabilities.schema.json`
158
+ * (required fields preserved) and MUST NOT expose capabilities
159
+ * outside the caller's authorization.
160
+ *
161
+ * Capability shape runs unconditionally when the profile is advertised.
162
+ * The authorization-oracle probe (assertion 5 of §B) is gated on
163
+ * `OPENWOP_TEST_UNAUTHORIZED_API_KEY` because it requires an
164
+ * operator-supplied secondary key with strictly-fewer capabilities
165
+ * than the primary.
166
+ *
167
+ * @see RFCS/0011-auth-scoped-discovery.md §B
168
+ * @see spec/v1/capabilities-change-detection.md §"Scoped capability views"
169
+ */
170
+
171
+ interface AuthScopedCaps {
172
+ supported?: boolean;
173
+ mode?: string;
174
+ endpointPath?: string;
175
+ }
176
+
177
+ interface DiscoveryCaps {
178
+ authScoped?: AuthScopedCaps;
179
+ }
180
+
181
+ const AUTH_SCOPED_PROFILE = 'openwop-discovery-auth-scoped';
182
+
183
+ async function readDiscoveryCaps(): Promise<DiscoveryCaps | undefined> {
184
+ const disco = await driver.get('/.well-known/openwop', { authenticated: false });
185
+ return (disco.json as { capabilities?: { discovery?: DiscoveryCaps } }).capabilities
186
+ ?.discovery;
187
+ }
188
+
189
+ function isAuthScopedAdvertised(disc: DiscoveryCaps | undefined): boolean {
190
+ return disc?.authScoped?.supported === true;
191
+ }
192
+
193
+ describe('discovery: auth-scoped capability shape', () => {
194
+ it('host claiming auth-scoped discovery advertises required fields', async () => {
195
+ const disc = await readDiscoveryCaps();
196
+
197
+ if (!behaviorGate(AUTH_SCOPED_PROFILE, isAuthScopedAdvertised(disc))) {
198
+ return;
199
+ }
200
+
201
+ expect(disc?.authScoped?.supported, driver.describe(
202
+ 'capabilities-change-detection.md §"Scoped capability views"',
203
+ 'capabilities.discovery.authScoped.supported MUST be true when the profile is claimed',
204
+ )).toBe(true);
205
+
206
+ if (disc?.authScoped?.mode !== undefined) {
207
+ expect(
208
+ ['same-endpoint', 'extension-endpoint'].includes(disc.authScoped.mode),
209
+ driver.describe(
210
+ 'capabilities.schema.json discovery.authScoped.mode',
211
+ 'mode MUST be one of same-endpoint / extension-endpoint when advertised',
212
+ ),
213
+ ).toBe(true);
214
+ }
215
+
216
+ if (disc?.authScoped?.mode === 'extension-endpoint') {
217
+ expect(
218
+ typeof disc.authScoped.endpointPath === 'string' &&
219
+ disc.authScoped.endpointPath.startsWith('/'),
220
+ driver.describe(
221
+ 'RFCS/0011-auth-scoped-discovery.md §A',
222
+ 'extension-endpoint mode MUST advertise endpointPath as a leading-slash relative path',
223
+ ),
224
+ ).toBe(true);
225
+ }
226
+ });
227
+ });
228
+
229
+ describe('discovery: auth-scoped view satisfies base schema', () => {
230
+ it('authenticated discovery preserves required Capabilities fields', async () => {
231
+ const disc = await readDiscoveryCaps();
232
+
233
+ if (!behaviorGate(AUTH_SCOPED_PROFILE, isAuthScopedAdvertised(disc))) {
234
+ return;
235
+ }
236
+
237
+ const mode = disc?.authScoped?.mode ?? 'same-endpoint';
238
+ const path =
239
+ mode === 'extension-endpoint'
240
+ ? disc?.authScoped?.endpointPath ?? '/v1/capabilities'
241
+ : '/.well-known/openwop';
242
+
243
+ const res = await driver.get(path);
244
+
245
+ expect(res.status, driver.describe(
246
+ 'capabilities-change-detection.md §"Scoped capability views"',
247
+ 'authenticated discovery MUST return 200',
248
+ )).toBe(200);
249
+
250
+ const body = res.json as Record<string, unknown> | undefined;
251
+ expect(body, 'authenticated discovery body MUST be JSON').toBeDefined();
252
+
253
+ // Required fields per capabilities.md §3 preserved in the
254
+ // authenticated view (per spec annex: "MUST still satisfy the
255
+ // base capabilities.schema.json shape").
256
+ for (const required of [
257
+ 'protocolVersion',
258
+ 'supportedEnvelopes',
259
+ 'schemaVersions',
260
+ 'limits',
261
+ ]) {
262
+ expect(body?.[required], driver.describe(
263
+ 'capabilities-change-detection.md §"Scoped capability views"',
264
+ `auth-scoped view MUST preserve required field "${required}" from capabilities.md §3`,
265
+ )).toBeDefined();
266
+ }
267
+ });
268
+ });
269
+
270
+ describe('discovery: auth-scoped is not an authorization oracle', () => {
271
+ it('unauthorized key MUST NOT reveal capabilities outside its authorization', async () => {
272
+ const disc = await readDiscoveryCaps();
273
+
274
+ if (!behaviorGate(AUTH_SCOPED_PROFILE, isAuthScopedAdvertised(disc))) {
275
+ return;
276
+ }
277
+
278
+ const unauthorizedKey = process.env.OPENWOP_TEST_UNAUTHORIZED_API_KEY;
279
+ if (!unauthorizedKey) {
280
+ // eslint-disable-next-line no-console
281
+ console.warn(
282
+ '[discovery: auth-scoped] OPENWOP_TEST_UNAUTHORIZED_API_KEY not supplied; skipping authorization-oracle probe',
283
+ );
284
+ return;
285
+ }
286
+
287
+ const mode = disc?.authScoped?.mode ?? 'same-endpoint';
288
+ const path =
289
+ mode === 'extension-endpoint'
290
+ ? disc?.authScoped?.endpointPath ?? '/v1/capabilities'
291
+ : '/.well-known/openwop';
292
+
293
+ // Primary key (env-default Authorization).
294
+ const primary = await driver.get(path);
295
+
296
+ // Unauthorized / lower-privilege key.
297
+ const unauthorized = await driver.get(path, {
298
+ authenticated: false,
299
+ headers: { Authorization: `Bearer ${unauthorizedKey}` },
300
+ });
301
+
302
+ if (unauthorized.status === 401 || unauthorized.status === 403) {
303
+ // Host rejected the unauthorized key outright — that's fine.
304
+ // The oracle probe is moot when the host refuses the bearer.
305
+ return;
306
+ }
307
+ expect(unauthorized.status).toBe(200);
308
+
309
+ const primaryCaps = Object.keys(
310
+ (primary.json as { capabilities?: Record<string, unknown> })?.capabilities ?? {},
311
+ );
312
+ const unauthorizedCaps = Object.keys(
313
+ (unauthorized.json as { capabilities?: Record<string, unknown> })?.capabilities ??
314
+ {},
315
+ );
316
+
317
+ // Spec annex line 69: "Hosts MUST NOT let scoped discovery become
318
+ // an authorization oracle. A caller should learn only about
319
+ // capabilities it is allowed to use." Operationalized as: the
320
+ // unauthorized view's capability keys MUST be a subset of the
321
+ // primary view's keys (no capabilities the unauthorized caller
322
+ // can use that the primary cannot).
323
+ const extras = unauthorizedCaps.filter((c) => !primaryCaps.includes(c));
324
+ expect(extras.length, driver.describe(
325
+ 'capabilities-change-detection.md §"Scoped capability views"',
326
+ 'unauthorized view MUST NOT expose capability keys absent from the primary (authorized) view',
327
+ )).toBe(0);
328
+ });
329
+ });
330
+
@@ -0,0 +1,71 @@
1
+ /**
2
+ * HTTP client SSRF-guard advertisement contract.
3
+ *
4
+ * Capability-gated: skips when the host does not advertise
5
+ * `capabilities.httpClient.supported = true`.
6
+ *
7
+ * Verifies that any host claiming an `httpClient` surface MUST advertise
8
+ * `ssrfGuard: true` and `maxResponseBodyBytes` — without these two,
9
+ * the host's "call any URL" node is a vector for both SSRF and DoS.
10
+ *
11
+ * The actual SSRF-rejection behavior is verified by the host's
12
+ * in-process test (`http-client.test.ts`). The conformance suite only
13
+ * asserts the advertisement shape — driving an SSRF rejection requires
14
+ * a deployment that doesn't set `OPENWOP_HTTP_ALLOW_PRIVATE=true`,
15
+ * which is the operator's choice, not the suite's.
16
+ *
17
+ * @see SECURITY/invariants.yaml id: http-client-ssrf-guard
18
+ * @see spec/v1/capabilities.md §`httpClient` (additive)
19
+ */
20
+
21
+ import { describe, it, expect } from 'vitest';
22
+ import { driver } from '../lib/driver.js';
23
+
24
+ async function isHttpClientSupported(): Promise<boolean> {
25
+ const disco = await driver.get('/.well-known/openwop');
26
+ const caps = (disco.json as { capabilities?: { httpClient?: { supported?: boolean } } })
27
+ .capabilities;
28
+ return caps?.httpClient?.supported === true;
29
+ }
30
+
31
+ describe('http-client-ssrf: capability advertisement contract', () => {
32
+ it('host advertising httpClient MUST declare ssrfGuard: true + maxResponseBodyBytes', async () => {
33
+ if (!(await isHttpClientSupported())) {
34
+ // eslint-disable-next-line no-console
35
+ console.warn('[http-client-ssrf] host does not advertise httpClient; skipping');
36
+ return;
37
+ }
38
+ const disco = await driver.get('/.well-known/openwop');
39
+ const cap = (disco.json as {
40
+ capabilities?: {
41
+ httpClient?: {
42
+ supported?: boolean;
43
+ ssrfGuard?: boolean;
44
+ maxResponseBodyBytes?: number;
45
+ methods?: unknown;
46
+ };
47
+ };
48
+ }).capabilities?.httpClient;
49
+
50
+ expect(cap?.supported, driver.describe(
51
+ 'capabilities.md §httpClient',
52
+ 'httpClient.supported MUST be a boolean',
53
+ )).toBe(true);
54
+
55
+ expect(cap?.ssrfGuard, driver.describe(
56
+ 'SECURITY/threat-model-secret-leakage.md (SSRF probing analog)',
57
+ 'httpClient.ssrfGuard MUST be true — a host that lets any tenant POST a workflow with arbitrary URLs without SSRF protection enables blind probing of deployer-internal services',
58
+ )).toBe(true);
59
+
60
+ expect(typeof cap?.maxResponseBodyBytes, driver.describe(
61
+ 'capabilities.md §httpClient',
62
+ 'httpClient.maxResponseBodyBytes MUST be a number — a host that streams unbounded response bodies into variables is a DoS vector',
63
+ )).toBe('number');
64
+ expect((cap?.maxResponseBodyBytes ?? 0) > 0).toBe(true);
65
+
66
+ expect(Array.isArray(cap?.methods), driver.describe(
67
+ 'capabilities.md §httpClient',
68
+ 'httpClient.methods MUST be an array of supported HTTP methods',
69
+ )).toBe(true);
70
+ });
71
+ });
@@ -6,6 +6,12 @@
6
6
  * Uses the `conformance-idempotent` fixture. Server MUST have seeded
7
7
  * it. The fixture's `nonce` input has no side effect — it exists so
8
8
  * the conformance suite can vary the body without affecting behavior.
9
+ *
10
+ * @see spec/v1/idempotency.md §Layer 1
11
+ * @see spec/v1/rest-endpoints.md
12
+ * @see spec/v1/production-profile.md §"Retry and idempotency" (RFC 0009
13
+ * — this scenario satisfies the basic-idempotency predicate when
14
+ * the host advertises capabilities.production.supported: true)
9
15
  */
10
16
 
11
17
  import { describe, it, expect } from 'vitest';
@@ -18,6 +18,9 @@
18
18
  *
19
19
  * @see spec/v1/idempotency.md
20
20
  * @see spec/v1/scale-profiles.md §"Retry semantics"
21
+ * @see spec/v1/production-profile.md §"Retry and idempotency" (RFC 0009
22
+ * — this scenario satisfies the 24h retention + 5-retry predicate
23
+ * when the host advertises capabilities.production.supported: true)
21
24
  */
22
25
 
23
26
  import { describe, it, expect } from 'vitest';
@@ -13,21 +13,44 @@
13
13
  *
14
14
  * Two-level scenario:
15
15
  *
16
- * - **Direct fake-server probe** (always runs when collector started):
17
- * hits the in-process fake MCP server directly with initialize +
16
+ * - **Direct probe** (always runs when an MCP endpoint is configured):
17
+ * hits the configured MCP server directly with initialize +
18
18
  * tools/list + tools/call to verify its wire shape. Catches
19
- * regressions in our own test fixture.
19
+ * regressions in our own test fixture; doubles as the shape check
20
+ * against real reference servers when `OPENWOP_MCP_REAL_SERVER_URL`
21
+ * points at one.
20
22
  *
21
23
  * - **Host-mediated roundtrip** (runs when host advertises an MCP
22
24
  * fixture or roundtrip capability): starts a workflow run, observes
23
25
  * events, asserts tool-call envelope visibility. Skips otherwise.
24
26
  *
25
27
  * Operator contract:
26
- * `OPENWOP_MCP_FAKE_SERVER=true` on the suite side; configure the host
27
- * to use the printed fake-server URL as one of its MCP servers.
28
+ * - `OPENWOP_MCP_FAKE_SERVER=true` boots the in-process synthetic
29
+ * server at suite init. The direct probe asserts the echo tool's
30
+ * deterministic shape.
31
+ * - `OPENWOP_MCP_REAL_SERVER_URL=<base-url>` — points the direct
32
+ * probe at a real MCP server. Auto-detects the transport from the
33
+ * server's `Content-Type` response header:
34
+ * - `application/json` → single-JSON response, parsed as one
35
+ * JSON-RPC frame.
36
+ * - `text/event-stream` → streamable-http+SSE; the probe reads
37
+ * SSE frames until it finds one whose `data:` payload matches
38
+ * the JSON-RPC `id` we sent, then returns that frame.
39
+ * The stdio transport (default for `modelcontextprotocol/servers`
40
+ * reference servers) is still out of scope — those run as a child
41
+ * process speaking JSON-RPC over stdin/stdout, no HTTP endpoint to
42
+ * point env vars at. Operators wanting interop evidence against
43
+ * stdio servers run them under a `mcp-bridge` HTTP adapter.
44
+ * Assertions stay shape-only: tools/list returns ≥1 tool, a
45
+ * tools/call returns valid MCP content (a `result.content` array,
46
+ * possibly `isError: true` — both are spec-conformant).
47
+ *
48
+ * When both env vars are set, the real-server URL wins (it's the more
49
+ * meaningful evidence). When neither is set, the scenario soft-skips.
28
50
  *
29
51
  * @see spec/v1/mcp-integration.md
30
52
  * @see SECURITY/threat-model-prompt-injection.md
53
+ * @see docs/PROTOCOL-GAP-CLOSURE-PLAN.md Phase 3 T3.4
31
54
  */
32
55
 
33
56
  import { describe, it, expect } from 'vitest';
@@ -38,62 +61,203 @@ import { pollUntilTerminal } from '../lib/polling.js';
38
61
 
39
62
  const ROUNDTRIP_FIXTURE = 'conformance-mcp-tool-roundtrip';
40
63
 
64
+ /**
65
+ * Read an SSE `text/event-stream` body until a frame's `data:` payload
66
+ * is a JSON-RPC response with `id === wantId`, then return that frame's
67
+ * parsed payload. Honors the MCP streamable-http transport's "single
68
+ * POST may return one OR many SSE frames; correlate by id" pattern.
69
+ */
70
+ async function readSseUntilId(
71
+ res: Response,
72
+ wantId: number,
73
+ timeoutMs = 5_000,
74
+ ): Promise<Record<string, unknown>> {
75
+ if (!res.body) throw new Error('SSE response has no body');
76
+ const reader = res.body.getReader();
77
+ const decoder = new TextDecoder('utf-8');
78
+ let buffer = '';
79
+ const deadline = Date.now() + timeoutMs;
80
+
81
+ while (Date.now() < deadline) {
82
+ const { value, done } = await reader.read();
83
+ if (value) buffer += decoder.decode(value, { stream: true });
84
+ let sepIndex: number;
85
+ while ((sepIndex = buffer.indexOf('\n\n')) !== -1) {
86
+ const block = buffer.slice(0, sepIndex);
87
+ buffer = buffer.slice(sepIndex + 2);
88
+ let dataLines: string[] = [];
89
+ for (const line of block.split('\n')) {
90
+ // SSE permits multi-line data via repeated `data:` lines, joined by \n.
91
+ if (line.startsWith('data:')) dataLines.push(line.slice(5).replace(/^ /, ''));
92
+ }
93
+ if (dataLines.length === 0) continue;
94
+ try {
95
+ const parsed = JSON.parse(dataLines.join('\n')) as Record<string, unknown>;
96
+ if (parsed.id === wantId) {
97
+ // Drop the reader; the server may keep the stream open for
98
+ // unrelated notifications.
99
+ void reader.cancel().catch(() => undefined);
100
+ return parsed;
101
+ }
102
+ } catch {
103
+ // Skip malformed frames.
104
+ }
105
+ }
106
+ if (done) break;
107
+ }
108
+ throw new Error(`SSE stream closed before frame with id=${wantId} arrived`);
109
+ }
110
+
41
111
  async function postJsonRpc(
42
112
  endpoint: string,
43
113
  method: string,
44
114
  params: unknown,
45
115
  id: number,
46
- ): Promise<{ status: number; json: Record<string, unknown> }> {
47
- const res = await fetch(`${endpoint}/`, {
116
+ sessionId?: string,
117
+ ): Promise<{ status: number; json: Record<string, unknown>; sessionId: string | null }> {
118
+ // POST to `endpoint` verbatim — the trailing-slash decision is the
119
+ // caller's. The probe accepts both response shapes per MCP's
120
+ // streamable-http spec: a single JSON body OR an SSE stream that
121
+ // emits one-or-many JSON-RPC frames. Transport is auto-detected
122
+ // from Content-Type.
123
+ //
124
+ // Session-id threading: real MCP servers built on the official SDK
125
+ // assign a session id at `initialize` and require it on every
126
+ // subsequent call via `mcp-session-id`. The in-process fake doesn't
127
+ // enforce that, but real impls do — so the probe always echoes back
128
+ // any session header it receives from initialize.
129
+ const headers: Record<string, string> = {
130
+ 'Content-Type': 'application/json',
131
+ // MCP streamable-http servers SHOULD return `application/json`
132
+ // by default but MAY upgrade to SSE; advertise both as
133
+ // acceptable.
134
+ Accept: 'application/json, text/event-stream',
135
+ };
136
+ if (sessionId) headers['mcp-session-id'] = sessionId;
137
+ const res = await fetch(endpoint, {
48
138
  method: 'POST',
49
- headers: { 'Content-Type': 'application/json' },
139
+ headers,
50
140
  body: JSON.stringify({ jsonrpc: '2.0', id, method, params }),
51
141
  });
142
+ const returnedSessionId = res.headers.get('mcp-session-id');
143
+ const contentType = res.headers.get('content-type') ?? '';
144
+ if (contentType.includes('text/event-stream')) {
145
+ const json = await readSseUntilId(res, id);
146
+ return { status: res.status, json, sessionId: returnedSessionId };
147
+ }
52
148
  const text = await res.text();
53
- return { status: res.status, json: JSON.parse(text) as Record<string, unknown> };
149
+ return {
150
+ status: res.status,
151
+ json: JSON.parse(text) as Record<string, unknown>,
152
+ sessionId: returnedSessionId,
153
+ };
54
154
  }
55
155
 
56
- describe('mcp-tool-roundtrip: fake-server wire shape', () => {
57
- it('initialize + tools/list + tools/call echo round-trip cleanly', async () => {
58
- const server = getMcpFakeServer();
59
- if (!server) {
156
+ /** Resolve the MCP endpoint to probe: real-server env wins; otherwise the in-process fake. */
157
+ function probeEndpoint(): { url: string; isReal: boolean } | null {
158
+ const real = process.env.OPENWOP_MCP_REAL_SERVER_URL;
159
+ if (real && real.length > 0) return { url: real.replace(/\/$/, ''), isReal: true };
160
+ const fake = getMcpFakeServer();
161
+ if (fake) return { url: fake.endpoint(), isReal: false };
162
+ return null;
163
+ }
164
+
165
+ describe('mcp-tool-roundtrip: server wire shape', () => {
166
+ it('initialize + tools/list + tools/call round-trip per MCP JSON-RPC contract', async () => {
167
+ const probe = probeEndpoint();
168
+ if (!probe) {
60
169
  // eslint-disable-next-line no-console
61
170
  console.warn(
62
- '[mcp-tool-roundtrip] fake server not started; set OPENWOP_MCP_FAKE_SERVER=true',
171
+ '[mcp-tool-roundtrip] no MCP endpoint configured; set OPENWOP_MCP_FAKE_SERVER=true ' +
172
+ 'or OPENWOP_MCP_REAL_SERVER_URL=<base-url>',
63
173
  );
64
174
  return;
65
175
  }
66
- server.reset();
176
+ if (!probe.isReal) getMcpFakeServer()!.reset();
67
177
 
68
- const init = await postJsonRpc(server.endpoint(), 'initialize', {}, 1);
178
+ // Per MCP `initialize` spec, params MUST carry protocolVersion +
179
+ // capabilities + clientInfo. The in-process fake accepts empty
180
+ // params; real reference servers built on @modelcontextprotocol/sdk
181
+ // reject them with 400. Sending the canonical shape keeps the probe
182
+ // valid against both.
183
+ const init = await postJsonRpc(
184
+ probe.url,
185
+ 'initialize',
186
+ {
187
+ protocolVersion: '2024-11-05',
188
+ capabilities: {},
189
+ clientInfo: { name: 'openwop-conformance-probe', version: '1.0.0' },
190
+ },
191
+ 1,
192
+ );
69
193
  expect(init.status).toBe(200);
70
194
  const initResult = (init.json.result ?? {}) as { protocolVersion?: string };
71
195
  expect(typeof initResult.protocolVersion).toBe('string');
196
+ // Capture session id from initialize so real SDK-based servers can
197
+ // bind subsequent calls; fakes that don't set the header pass null
198
+ // through and the calls still succeed.
199
+ const sid = init.sessionId ?? undefined;
72
200
 
73
- const list = await postJsonRpc(server.endpoint(), 'tools/list', {}, 2);
201
+ const list = await postJsonRpc(probe.url, 'tools/list', {}, 2, sid);
74
202
  expect(list.status).toBe(200);
75
203
  const listResult = (list.json.result ?? {}) as {
76
204
  tools?: ReadonlyArray<{ name?: string }>;
77
205
  };
78
- expect(listResult.tools?.some((t) => t.name === 'echo')).toBe(true);
206
+ expect(Array.isArray(listResult.tools)).toBe(true);
207
+ expect((listResult.tools ?? []).length).toBeGreaterThan(0);
79
208
 
80
- const call = await postJsonRpc(
81
- server.endpoint(),
82
- 'tools/call',
83
- { name: 'echo', arguments: { text: 'hello-from-conformance' } },
84
- 3,
85
- );
86
- expect(call.status).toBe(200);
87
- const callResult = (call.json.result ?? {}) as {
88
- content?: ReadonlyArray<{ type?: string; text?: string }>;
89
- };
90
- expect(callResult.content?.[0]?.type).toBe('text');
91
- expect(callResult.content?.[0]?.text).toBe('hello-from-conformance');
209
+ if (probe.isReal) {
210
+ // Real-server interop evidence (Phase 3 T3.4). We can't assume a
211
+ // deterministic echo tool exists on every reference server, so the
212
+ // assertions stay shape-only:
213
+ // - tools/list returns ≥1 tool ✓ (above)
214
+ // - the first tool has a name + an input-schema-compatible shape
215
+ // - tools/call against that tool returns valid MCP content (array
216
+ // of {type, ...}). A failed call (e.g., bad arguments) still
217
+ // returns 200 with an `isError: true` content marker — both
218
+ // paths are spec-conformant; we assert SOME response.
219
+ const first = listResult.tools?.[0];
220
+ expect(typeof first?.name).toBe('string');
221
+ const callRes = await postJsonRpc(
222
+ probe.url,
223
+ 'tools/call',
224
+ { name: first!.name, arguments: {} },
225
+ 3,
226
+ sid,
227
+ );
228
+ expect(callRes.status).toBe(200);
229
+ const callResult = (callRes.json.result ?? {}) as {
230
+ content?: ReadonlyArray<{ type?: string }>;
231
+ isError?: boolean;
232
+ };
233
+ // Either valid content[] OR isError-marked content[] is acceptable.
234
+ expect(Array.isArray(callResult.content)).toBe(true);
235
+ // eslint-disable-next-line no-console
236
+ console.warn(
237
+ `[mcp-tool-roundtrip] real-server interop OK against ${probe.url} ` +
238
+ `(tool=${first?.name}, isError=${callResult.isError === true})`,
239
+ );
240
+ } else {
241
+ // Fake-server path: deterministic echo tool, assert verbatim.
242
+ expect(listResult.tools?.some((t) => t.name === 'echo')).toBe(true);
92
243
 
93
- // Invocation log captured.
94
- const invocations = server.invocations();
95
- const methods = invocations.map((i) => i.method);
96
- expect(methods).toEqual(['initialize', 'tools/list', 'tools/call']);
244
+ const call = await postJsonRpc(
245
+ probe.url,
246
+ 'tools/call',
247
+ { name: 'echo', arguments: { text: 'hello-from-conformance' } },
248
+ 3,
249
+ );
250
+ expect(call.status).toBe(200);
251
+ const callResult = (call.json.result ?? {}) as {
252
+ content?: ReadonlyArray<{ type?: string; text?: string }>;
253
+ };
254
+ expect(callResult.content?.[0]?.type).toBe('text');
255
+ expect(callResult.content?.[0]?.text).toBe('hello-from-conformance');
256
+
257
+ const fake = getMcpFakeServer()!;
258
+ const methods = fake.invocations().map((i) => i.method);
259
+ expect(methods).toEqual(['initialize', 'tools/list', 'tools/call']);
260
+ }
97
261
  });
98
262
  });
99
263
 
@@ -0,0 +1,66 @@
1
+ /**
2
+ * MCP-1 invariant: tool-call arguments + result content NEVER appear
3
+ * on emitted event payloads.
4
+ *
5
+ * Capability-gated: skips when the host does not advertise
6
+ * `capabilities.mcpClient.supported = true`.
7
+ *
8
+ * The test does NOT actually invoke an MCP tool (that requires the
9
+ * host to be wired to a real MCP server, which is deployment-specific
10
+ * and outside the conformance suite's environmental contract). What
11
+ * it verifies is the SHAPE of the host's mcpClient advertisement +
12
+ * the trust-boundary marker. The redaction invariant is then verified
13
+ * end-to-end by the host's own in-process test (`mcp-client.test.ts`)
14
+ * which DOES drive a fake MCP server and asserts no raw args/results
15
+ * appear on the sanitized summary.
16
+ *
17
+ * @see SECURITY/invariants.yaml id: mcp-toolcall-payload-redaction
18
+ * @see spec/v1/host-capabilities.md §host.mcp
19
+ * @see SECURITY/threat-model-prompt-injection.md §"UNTRUSTED marker"
20
+ */
21
+
22
+ import { describe, it, expect } from 'vitest';
23
+ import { driver } from '../lib/driver.js';
24
+
25
+ async function isMcpClientSupported(): Promise<boolean> {
26
+ const disco = await driver.get('/.well-known/openwop');
27
+ const caps = (disco.json as { capabilities?: { mcpClient?: { supported?: boolean } } })
28
+ .capabilities;
29
+ return caps?.mcpClient?.supported === true;
30
+ }
31
+
32
+ describe('mcp-toolcall-redaction: capability advertisement contract', () => {
33
+ it('host advertising mcpClient MUST declare trustBoundary: "untrusted"', async () => {
34
+ if (!(await isMcpClientSupported())) {
35
+ // eslint-disable-next-line no-console
36
+ console.warn('[mcp-toolcall-redaction] host does not advertise mcpClient; skipping');
37
+ return;
38
+ }
39
+ const disco = await driver.get('/.well-known/openwop');
40
+ const cap = (disco.json as {
41
+ capabilities?: {
42
+ mcpClient?: { supported?: boolean; transports?: unknown; trustBoundary?: string };
43
+ };
44
+ }).capabilities?.mcpClient;
45
+
46
+ expect(cap?.supported, driver.describe(
47
+ 'host-capabilities.md §host.mcp',
48
+ 'mcpClient.supported MUST be a boolean',
49
+ )).toBe(true);
50
+
51
+ expect(Array.isArray(cap?.transports), driver.describe(
52
+ 'host-capabilities.md §host.mcp',
53
+ 'mcpClient.transports MUST be an array of transport identifiers',
54
+ )).toBe(true);
55
+
56
+ // threat-model-prompt-injection.md §"UNTRUSTED marker": MCP tool
57
+ // output is by spec untrusted (it can carry adversarial content).
58
+ // Hosts advertising mcpClient MUST encode the boundary in the
59
+ // capability so downstream consumers (LLM nodes) treat the
60
+ // content accordingly.
61
+ expect(cap?.trustBoundary, driver.describe(
62
+ 'SECURITY/threat-model-prompt-injection.md §"UNTRUSTED marker"',
63
+ 'mcpClient.trustBoundary MUST be "untrusted" — downstream LLM nodes treat tool content as user data',
64
+ )).toBe('untrusted');
65
+ });
66
+ });