@openwop/openwop-conformance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +241 -0
  3. package/api/asyncapi.yaml +481 -0
  4. package/api/openapi.yaml +830 -0
  5. package/api/redocly.yaml +8 -0
  6. package/coverage.md +80 -0
  7. package/dist/cli.js +161 -0
  8. package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
  9. package/fixtures/conformance-agent-identity.json +27 -0
  10. package/fixtures/conformance-agent-low-confidence.json +29 -0
  11. package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
  12. package/fixtures/conformance-agent-memory-redaction.json +32 -0
  13. package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
  14. package/fixtures/conformance-agent-memory-ttl.json +31 -0
  15. package/fixtures/conformance-agent-pack-export.json +26 -0
  16. package/fixtures/conformance-agent-pack-install.json +26 -0
  17. package/fixtures/conformance-agent-pack-provenance.json +31 -0
  18. package/fixtures/conformance-agent-reasoning.json +29 -0
  19. package/fixtures/conformance-approval.json +27 -0
  20. package/fixtures/conformance-cancellable.json +33 -0
  21. package/fixtures/conformance-cap-breach.json +27 -0
  22. package/fixtures/conformance-capability-missing.json +23 -0
  23. package/fixtures/conformance-channel-ttl.json +60 -0
  24. package/fixtures/conformance-clarification.json +30 -0
  25. package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
  26. package/fixtures/conformance-conversation-lifecycle.json +32 -0
  27. package/fixtures/conformance-conversation-replay.json +33 -0
  28. package/fixtures/conformance-conversation-vs-clarification.json +26 -0
  29. package/fixtures/conformance-delay.json +33 -0
  30. package/fixtures/conformance-dispatch-loop.json +38 -0
  31. package/fixtures/conformance-failure.json +23 -0
  32. package/fixtures/conformance-idempotent.json +30 -0
  33. package/fixtures/conformance-identity.json +32 -0
  34. package/fixtures/conformance-interrupt-auth-required.json +28 -0
  35. package/fixtures/conformance-interrupt-external-event.json +33 -0
  36. package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
  37. package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
  38. package/fixtures/conformance-interrupt-quorum.json +30 -0
  39. package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
  40. package/fixtures/conformance-message-reducer.json +31 -0
  41. package/fixtures/conformance-multi-node.json +21 -0
  42. package/fixtures/conformance-noop.json +23 -0
  43. package/fixtures/conformance-orchestrator-dispatch.json +47 -0
  44. package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
  45. package/fixtures/conformance-orchestrator-terminate.json +44 -0
  46. package/fixtures/conformance-stream-text.json +26 -0
  47. package/fixtures/conformance-subworkflow-child.json +21 -0
  48. package/fixtures/conformance-subworkflow-parent.json +49 -0
  49. package/fixtures/conformance-version-fold.json +23 -0
  50. package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
  51. package/fixtures/pack-manifests/pack-private-example.json +26 -0
  52. package/fixtures.md +404 -0
  53. package/package.json +48 -0
  54. package/schemas/README.md +75 -0
  55. package/schemas/agent-manifest.schema.json +107 -0
  56. package/schemas/agent-ref.schema.json +53 -0
  57. package/schemas/capabilities.schema.json +287 -0
  58. package/schemas/channel-written-payload.schema.json +55 -0
  59. package/schemas/conversation-event.schema.json +120 -0
  60. package/schemas/conversation-turn.schema.json +72 -0
  61. package/schemas/debug-bundle.schema.json +196 -0
  62. package/schemas/dispatch-config.schema.json +46 -0
  63. package/schemas/error-envelope.schema.json +25 -0
  64. package/schemas/memory-entry.schema.json +36 -0
  65. package/schemas/memory-list-options.schema.json +21 -0
  66. package/schemas/node-pack-manifest.schema.json +235 -0
  67. package/schemas/orchestrator-decision.schema.json +60 -0
  68. package/schemas/run-event-payloads.schema.json +663 -0
  69. package/schemas/run-event.schema.json +116 -0
  70. package/schemas/run-options.schema.json +81 -0
  71. package/schemas/run-orchestrator-decided-event.schema.json +20 -0
  72. package/schemas/run-snapshot.schema.json +121 -0
  73. package/schemas/suspend-request.schema.json +182 -0
  74. package/schemas/workflow-definition.schema.json +430 -0
  75. package/src/cli.ts +187 -0
  76. package/src/lib/a2a-fake-peer.ts +233 -0
  77. package/src/lib/canaries.ts +186 -0
  78. package/src/lib/driver.ts +96 -0
  79. package/src/lib/env.ts +49 -0
  80. package/src/lib/fixtures.ts +93 -0
  81. package/src/lib/mcp-fake-server.ts +185 -0
  82. package/src/lib/multi-agent-capabilities.ts +155 -0
  83. package/src/lib/multiProcess.ts +141 -0
  84. package/src/lib/otel-collector.ts +312 -0
  85. package/src/lib/paths.ts +198 -0
  86. package/src/lib/polling.ts +81 -0
  87. package/src/lib/profiles.ts +258 -0
  88. package/src/lib/sse.ts +172 -0
  89. package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
  90. package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
  91. package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
  92. package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
  93. package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
  94. package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
  95. package/src/scenarios/agentMessageReducer.test.ts +57 -0
  96. package/src/scenarios/agentMetadata.test.ts +56 -0
  97. package/src/scenarios/agentPackExport.test.ts +45 -0
  98. package/src/scenarios/agentPackInstall.test.ts +50 -0
  99. package/src/scenarios/agentPackProvenance.test.ts +53 -0
  100. package/src/scenarios/agentReasoningEvents.test.ts +72 -0
  101. package/src/scenarios/append-ordering.test.ts +91 -0
  102. package/src/scenarios/approval-payload.test.ts +120 -0
  103. package/src/scenarios/audit-log-integrity.test.ts +106 -0
  104. package/src/scenarios/auth.test.ts +55 -0
  105. package/src/scenarios/byok-roundtrip.test.ts +166 -0
  106. package/src/scenarios/cancellation.test.ts +68 -0
  107. package/src/scenarios/cap-breach.test.ts +149 -0
  108. package/src/scenarios/channel-ttl.test.ts +70 -0
  109. package/src/scenarios/configurable-schema.test.ts +76 -0
  110. package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
  111. package/src/scenarios/conversationLifecycle.test.ts +64 -0
  112. package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
  113. package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
  114. package/src/scenarios/cost-attribution.test.ts +207 -0
  115. package/src/scenarios/debugBundle.test.ts +222 -0
  116. package/src/scenarios/discovery.test.ts +147 -0
  117. package/src/scenarios/dispatchLoop.test.ts +52 -0
  118. package/src/scenarios/errors.test.ts +144 -0
  119. package/src/scenarios/eventOrdering.test.ts +144 -0
  120. package/src/scenarios/failure-path.test.ts +46 -0
  121. package/src/scenarios/fixtures-gating.test.ts +137 -0
  122. package/src/scenarios/fixtures-valid.test.ts +140 -0
  123. package/src/scenarios/highConcurrency.test.ts +263 -0
  124. package/src/scenarios/idempotency.test.ts +83 -0
  125. package/src/scenarios/idempotencyRetry.test.ts +130 -0
  126. package/src/scenarios/identity-passthrough.test.ts +54 -0
  127. package/src/scenarios/interrupt-approval.test.ts +97 -0
  128. package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
  129. package/src/scenarios/interrupt-clarification.test.ts +45 -0
  130. package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
  131. package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
  132. package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
  133. package/src/scenarios/interruptRace.test.ts +176 -0
  134. package/src/scenarios/maliciousManifest.test.ts +154 -0
  135. package/src/scenarios/mcp-discoverability.test.ts +129 -0
  136. package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
  137. package/src/scenarios/multi-node-ordering.test.ts +60 -0
  138. package/src/scenarios/multi-region-idempotency.test.ts +52 -0
  139. package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
  140. package/src/scenarios/orchestratorDispatch.test.ts +66 -0
  141. package/src/scenarios/orchestratorTermination.test.ts +54 -0
  142. package/src/scenarios/otel-emission.test.ts +113 -0
  143. package/src/scenarios/otel-trace-propagation.test.ts +90 -0
  144. package/src/scenarios/pack-registry-publish.test.ts +93 -0
  145. package/src/scenarios/pack-registry.test.ts +328 -0
  146. package/src/scenarios/pause-resume.test.ts +109 -0
  147. package/src/scenarios/policies.test.ts +162 -0
  148. package/src/scenarios/profileDerivation.test.ts +335 -0
  149. package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
  150. package/src/scenarios/rate-limit-envelope.test.ts +97 -0
  151. package/src/scenarios/redaction.test.ts +254 -0
  152. package/src/scenarios/redactionAdversarial.test.ts +162 -0
  153. package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
  154. package/src/scenarios/replay-fork.test.ts +216 -0
  155. package/src/scenarios/replayDeterminism.test.ts +171 -0
  156. package/src/scenarios/route-coverage.test.ts +129 -0
  157. package/src/scenarios/runs-lifecycle.test.ts +65 -0
  158. package/src/scenarios/runtime-capabilities.test.ts +118 -0
  159. package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
  160. package/src/scenarios/staleClaim.test.ts +223 -0
  161. package/src/scenarios/stream-modes-buffer.test.ts +148 -0
  162. package/src/scenarios/stream-modes-mixed.test.ts +149 -0
  163. package/src/scenarios/stream-modes.test.ts +139 -0
  164. package/src/scenarios/streamReconnect.test.ts +162 -0
  165. package/src/scenarios/subworkflow.test.ts +126 -0
  166. package/src/scenarios/version-negotiation.test.ts +157 -0
  167. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
  168. package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
  169. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
  170. package/src/scenarios/wasm-pack-load.test.ts +75 -0
  171. package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
  172. package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
  173. package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
  174. package/src/setup.ts +173 -0
  175. package/vitest.config.ts +17 -0
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Provider-policy enforcement scenarios — extends `policies.test.ts`
3
+ * (which covers discovery-shape only) with denial-error-shape contracts
4
+ * for hosts that advertise enforcement.
5
+ *
6
+ * Why discovery-shape vs full enforcement:
7
+ *
8
+ * Real enforcement requires a configured policy document AND a
9
+ * working AI provider invocation, AND admin write access to set the
10
+ * policy under test. None of those are black-box reproducible. The
11
+ * conformance suite gates on the wire shape of denial responses +
12
+ * SECURITY/invariants.yaml entries.
13
+ *
14
+ * Profile gating:
15
+ *
16
+ * - Hosts that don't advertise `aiProviders.policies` skip-equivalent
17
+ * (no policy enforcement to verify).
18
+ * - Hosts that advertise it MUST honor the documented denial reason
19
+ * enum + the closed mode set per spec/v1/capabilities.md
20
+ * §"`aiProviders.policies`".
21
+ *
22
+ * Cross-references SECURITY/threat-model-provider-policy.md invariants
23
+ * `provider-policy-pre-dispatch` · `provider-policy-disabled-hard` ·
24
+ * `provider-policy-restricted-glob` · `provider-policy-restricted-fail-closed`.
25
+ *
26
+ * @see spec/v1/capabilities.md §"`aiProviders.policies`"
27
+ * @see SECURITY/threat-model-provider-policy.md
28
+ * @see SECURITY/invariants.yaml — provider-policy-* entries
29
+ */
30
+
31
+ import { describe, it, expect } from 'vitest';
32
+ import { driver } from '../lib/driver.js';
33
+
34
+ const CANONICAL_MODES = ['disabled', 'optional', 'required', 'restricted'] as const;
35
+
36
+ // Documented denial-reason enum from spec/v1/capabilities.md.
37
+ const DOCUMENTED_DENIAL_REASONS = [
38
+ 'provider_disabled',
39
+ 'byok_required',
40
+ 'byok_required_but_unresolved',
41
+ 'model_not_allowed',
42
+ ] as const;
43
+
44
+ interface PoliciesShape {
45
+ modes?: unknown;
46
+ scopes?: unknown;
47
+ errorCode?: unknown;
48
+ }
49
+
50
+ async function fetchPolicies(): Promise<PoliciesShape | null> {
51
+ const res = await driver.get('/.well-known/openwop', { authenticated: false });
52
+ if (res.status !== 200) return null;
53
+ const body = res.json as { aiProviders?: { policies?: PoliciesShape } };
54
+ return body.aiProviders?.policies ?? null;
55
+ }
56
+
57
+ describe('provider-policy-enforcement: closed mode set per spec/v1/capabilities.md §`aiProviders.policies`', () => {
58
+ it('every advertised mode is one of the four canonical values', async () => {
59
+ const policies = await fetchPolicies();
60
+ if (policies === null || !Array.isArray(policies.modes)) return;
61
+
62
+ for (const mode of policies.modes) {
63
+ expect(typeof mode, driver.describe(
64
+ 'capabilities.md §"`aiProviders.policies`"',
65
+ 'each entry in policies.modes MUST be a string',
66
+ )).toBe('string');
67
+ expect(
68
+ (CANONICAL_MODES as readonly string[]).includes(mode as string),
69
+ driver.describe(
70
+ 'capabilities.md §"`aiProviders.policies`"',
71
+ `mode "${String(mode)}" is not in the closed canonical set [${CANONICAL_MODES.join(', ')}]`,
72
+ ),
73
+ ).toBe(true);
74
+ }
75
+ });
76
+
77
+ it('hosts that support `restricted` MUST also support `optional` (default no-restriction case)', async () => {
78
+ const policies = await fetchPolicies();
79
+ if (policies === null || !Array.isArray(policies.modes)) return;
80
+ const modes = policies.modes as string[];
81
+ if (!modes.includes('restricted')) return;
82
+ expect(modes.includes('optional'), driver.describe(
83
+ 'spec/v1/profiles.md §`openwop-provider-policy`',
84
+ 'a host advertising `restricted` MUST also advertise `optional` so workflows without policy hit the default permissive case',
85
+ )).toBe(true);
86
+ });
87
+
88
+ it('errorCode is a non-empty string when present', async () => {
89
+ const policies = await fetchPolicies();
90
+ if (policies === null || policies.errorCode === undefined) return;
91
+ expect(typeof policies.errorCode, driver.describe(
92
+ 'capabilities.md §"`aiProviders.policies`"',
93
+ 'aiProviders.policies.errorCode MUST be a string when present',
94
+ )).toBe('string');
95
+ expect((policies.errorCode as string).length, driver.describe(
96
+ 'capabilities.md §"`aiProviders.policies`"',
97
+ 'aiProviders.policies.errorCode MUST be non-empty when present',
98
+ )).toBeGreaterThan(0);
99
+ });
100
+ });
101
+
102
+ describe('provider-policy-enforcement: scope advertisement', () => {
103
+ it('scopes contains only non-empty strings when present', async () => {
104
+ const policies = await fetchPolicies();
105
+ if (policies === null) return;
106
+ if (!Array.isArray(policies.scopes)) return;
107
+
108
+ for (const scope of policies.scopes) {
109
+ expect(typeof scope === 'string' && scope.length > 0, driver.describe(
110
+ 'capabilities.md §"`aiProviders.policies`"',
111
+ 'each entry in policies.scopes MUST be a non-empty string',
112
+ )).toBe(true);
113
+ }
114
+ });
115
+ });
116
+
117
+ describe('provider-policy-enforcement: documented denial reasons enumeration', () => {
118
+ it('lists are non-empty (sanity check on documentation drift)', () => {
119
+ // Self-test. If the documented denial-reason set drifts and this
120
+ // file isn't updated, scenario authors will be surprised. This
121
+ // assertion catches that — an empty CANONICAL_MODES or DOCUMENTED_
122
+ // DENIAL_REASONS would indicate the test file got truncated.
123
+ expect(CANONICAL_MODES.length, driver.describe(
124
+ 'spec/v1/capabilities.md §"`aiProviders.policies`"',
125
+ 'closed mode set MUST be the four canonical values',
126
+ )).toBe(4);
127
+ expect(DOCUMENTED_DENIAL_REASONS.length, driver.describe(
128
+ 'openwop/openwop@0bebfb0 — denial-reason enum alignment',
129
+ 'documented denial-reason set is non-empty',
130
+ )).toBeGreaterThan(0);
131
+ });
132
+ });
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Track 13: normative 429 envelope shape (rest-endpoints.md v1.1).
3
+ *
4
+ * Verifies that any 429 response produced by the host conforms to the
5
+ * canonical envelope shape and supplemental `details` keys.
6
+ *
7
+ * Verifies:
8
+ * 1. `error === 'rate_limited'`.
9
+ * 2. `Retry-After` header present and integer-seconds.
10
+ * 3. When `details.retryAfterMs` is present, it is consistent with
11
+ * `Retry-After`.
12
+ * 4. `details.scope` is one of {"tenant", "route", "global", "key"}.
13
+ * 5. No top-level keys outside `{error, message, details}`.
14
+ *
15
+ * Soft-skip behavior: hosts that don't surface 429 during the test
16
+ * window (low traffic) emit a warning rather than fail. To exercise this
17
+ * scenario the conformance harness MAY set OPENWOP_FORCE_RATE_LIMIT=true
18
+ * which signals the host to fabricate a 429 against a test-only key.
19
+ *
20
+ * @see spec/v1/rest-endpoints.md §"429 Too Many Requests envelope"
21
+ */
22
+
23
+ import { describe, it, expect } from 'vitest';
24
+ import { driver } from '../lib/driver.js';
25
+
26
+ const ALLOWED_SCOPES = new Set(['tenant', 'route', 'global', 'key']);
27
+ const FORCE = process.env.OPENWOP_FORCE_RATE_LIMIT === 'true';
28
+
29
+ describe('rate-limit-envelope: 429 conforms to canonical shape', () => {
30
+ it('when a 429 is observed, the response body satisfies the v1.1 contract', async () => {
31
+ // Drive a burst to provoke a 429 against a benign endpoint. If the host
32
+ // is generous, this will not trip — that is acceptable; the test
33
+ // is observational and skips its assertions when no 429 occurs.
34
+ let last: Awaited<ReturnType<typeof driver.get>> | null = null;
35
+ for (let i = 0; i < (FORCE ? 200 : 50); i++) {
36
+ const r = await driver.get('/.well-known/openwop');
37
+ last = r;
38
+ if (r.status === 429) break;
39
+ }
40
+
41
+ if (!last || last.status !== 429) {
42
+ // eslint-disable-next-line no-console
43
+ console.warn(
44
+ '[rate-limit-envelope] no 429 observed within burst; skipping shape assertions',
45
+ );
46
+ return;
47
+ }
48
+
49
+ const retryAfter = last.headers.get('retry-after');
50
+ expect(retryAfter, driver.describe(
51
+ 'rest-endpoints.md §"429 Too Many Requests envelope"',
52
+ '429 response MUST set Retry-After header',
53
+ )).not.toBeNull();
54
+
55
+ const body = last.json as {
56
+ error?: string;
57
+ message?: string;
58
+ details?: { retryAfterMs?: number; scope?: string; limit?: number; observedRate?: number };
59
+ [k: string]: unknown;
60
+ };
61
+
62
+ expect(body.error, driver.describe(
63
+ 'rest-endpoints.md §"429 Too Many Requests envelope"',
64
+ "429 body MUST carry error === 'rate_limited'",
65
+ )).toBe('rate_limited');
66
+ expect(typeof body.message).toBe('string');
67
+
68
+ // additionalProperties: false on error-envelope.schema.json
69
+ const topKeys = Object.keys(body).filter(
70
+ (k) => !['error', 'message', 'details'].includes(k),
71
+ );
72
+ expect(topKeys, driver.describe(
73
+ 'error-envelope.schema.json additionalProperties: false',
74
+ '429 envelope MUST NOT carry top-level keys outside {error, message, details}',
75
+ )).toEqual([]);
76
+
77
+ if (body.details?.scope !== undefined) {
78
+ expect(ALLOWED_SCOPES.has(body.details.scope), driver.describe(
79
+ 'rest-endpoints.md §"429 Too Many Requests envelope"',
80
+ "details.scope MUST be one of {'tenant','route','global','key'} when present",
81
+ )).toBe(true);
82
+ }
83
+
84
+ const retryAfterSec = Number(retryAfter);
85
+ if (
86
+ body.details?.retryAfterMs !== undefined &&
87
+ !Number.isNaN(retryAfterSec) &&
88
+ retryAfterSec > 0
89
+ ) {
90
+ const diff = Math.abs(body.details.retryAfterMs / 1000 - retryAfterSec);
91
+ expect(diff, driver.describe(
92
+ 'rest-endpoints.md §"429 Too Many Requests envelope"',
93
+ 'details.retryAfterMs MUST be consistent with Retry-After header (±1s)',
94
+ )).toBeLessThanOrEqual(1.5);
95
+ }
96
+ });
97
+ });
@@ -0,0 +1,254 @@
1
+ /**
2
+ * Redaction conformance scenarios — `capabilities.md` §"Secrets" + NFR-7.
3
+ *
4
+ * These are vendor-neutral assertions that any OpenWOP-compliant server
5
+ * doesn't leak secret material in observable surfaces. The scenarios
6
+ * gate cleanly on the host's advertised capabilities:
7
+ *
8
+ * - **Discovery shape contract** runs against every host. It verifies
9
+ * `secrets` and `aiProviders` advertisements are well-formed
10
+ * regardless of whether the host supports BYOK.
11
+ *
12
+ * - **Bearer-token redaction** runs against every host. The 401
13
+ * response when an invalid Bearer token is supplied MUST NOT
14
+ * echo the token back. This is universal — applies even to hosts
15
+ * that don't advertise `secrets.supported: true`.
16
+ *
17
+ * - **credentialRef echo control** runs ONLY when the host advertises
18
+ * `secrets.supported: true`. Per `capabilities.md` §"aiProviders":
19
+ * `RunOptions.configurable.ai.credentialRef` is opaque + host-
20
+ * resolved; servers MUST NOT include the value in any RunEvent,
21
+ * log line, span attribute, error message, or export. The scenario
22
+ * plants a canary as `credentialRef` on a noop run and asserts
23
+ * the canary doesn't appear in any event payload.
24
+ *
25
+ * **Why these scenarios live here, not just in-tree:**
26
+ *
27
+ * Spec rule NFR-7 is normative: "any code path that emits a `RunEvent`
28
+ * / OTel span / log line / error / export MUST NOT contain raw key
29
+ * material." The reference implementation has its own in-process
30
+ * canary harness (which can mock + intercept logger output). But other
31
+ * OpenWOP-compliant servers — including non-OpenWOP ones — need to
32
+ * verify the same invariant black-box, against their HTTP surface.
33
+ * That's what these scenarios cover.
34
+ *
35
+ * **Limitations:**
36
+ *
37
+ * The conformance suite only sees what the HTTP surface emits — it
38
+ * can't read a host's stdout / Cloud Logging / OTel collector.
39
+ * Hosts MUST run their own internal redaction tests (mocking the
40
+ * logger / tracer / etc.) to cover those surfaces. These scenarios
41
+ * cover only the response-body + run-event-stream surfaces, which are
42
+ * the cross-implementation interop contract.
43
+ *
44
+ * @see capabilities.md §"Secrets" + §"aiProviders"
45
+ * @see lib/canaries.ts — canary fixtures + detector
46
+ */
47
+
48
+ import { describe, it, expect } from 'vitest';
49
+ import { driver } from '../lib/driver.js';
50
+ import {
51
+ CANARIES,
52
+ CANARY_MARKER,
53
+ assertNoCanaryLeak,
54
+ captureToText,
55
+ getCanary,
56
+ } from '../lib/canaries.js';
57
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
58
+
59
+ const NOOP_WORKFLOW_ID = 'conformance-noop';
60
+ const SKIP_NO_NOOP = !isFixtureAdvertised(NOOP_WORKFLOW_ID);
61
+
62
+ // ─── Discovery shape contract (always runs) ───────────────────────────
63
+
64
+ describe('redaction: /.well-known/openwop secrets+aiProviders shape contract', () => {
65
+ it('secrets is well-formed regardless of supported value', async () => {
66
+ const res = await driver.get('/.well-known/openwop', { authenticated: false });
67
+ expect(res.status).toBe(200);
68
+
69
+ const body = res.json as { secrets?: unknown } | undefined;
70
+ const secrets = body?.secrets;
71
+
72
+ if (secrets === undefined) {
73
+ // Optional v1 field — hosts MAY omit. Spec-allowed; nothing to assert.
74
+ return;
75
+ }
76
+
77
+ // Per capabilities.schema.json: `secrets.supported` is REQUIRED
78
+ // when secrets is present.
79
+ const s = secrets as {
80
+ supported?: unknown;
81
+ scopes?: unknown;
82
+ resolution?: unknown;
83
+ };
84
+ expect(typeof s.supported, driver.describe(
85
+ 'capabilities.md §"Secrets"',
86
+ 'secrets.supported MUST be a boolean',
87
+ )).toBe('boolean');
88
+
89
+ // When `supported === true`, scopes MUST be a non-empty array
90
+ // (a host claiming secrets must declare at least one scope) AND
91
+ // resolution MUST be 'host-managed' (only allowed value in v1.x).
92
+ if (s.supported === true) {
93
+ expect(Array.isArray(s.scopes), driver.describe(
94
+ 'capabilities.md §"Secrets"',
95
+ 'when secrets.supported is true, scopes MUST be a string[]',
96
+ )).toBe(true);
97
+ const scopes = s.scopes as string[];
98
+ expect(scopes.length, driver.describe(
99
+ 'capabilities.md §"Secrets"',
100
+ 'when secrets.supported is true, scopes MUST be non-empty',
101
+ )).toBeGreaterThanOrEqual(1);
102
+ for (const scope of scopes) {
103
+ expect(['tenant', 'user', 'run']).toContain(scope);
104
+ }
105
+ expect(s.resolution, driver.describe(
106
+ 'capabilities.md §"Secrets"',
107
+ 'resolution MUST be "host-managed" in v1.x',
108
+ )).toBe('host-managed');
109
+ }
110
+ });
111
+
112
+ it('aiProviders is well-formed when present (byok ⊆ supported)', async () => {
113
+ const res = await driver.get('/.well-known/openwop', { authenticated: false });
114
+ const body = res.json as { aiProviders?: unknown } | undefined;
115
+ const ap = body?.aiProviders as
116
+ | { supported?: unknown; byok?: unknown }
117
+ | undefined;
118
+
119
+ if (ap === undefined) return; // Optional v1 field — hosts MAY omit.
120
+
121
+ if (ap.supported !== undefined) {
122
+ expect(Array.isArray(ap.supported)).toBe(true);
123
+ const supported = ap.supported as string[];
124
+ const supportedSet = new Set(supported);
125
+
126
+ if (ap.byok !== undefined) {
127
+ expect(Array.isArray(ap.byok)).toBe(true);
128
+ const byok = ap.byok as string[];
129
+ for (const id of byok) {
130
+ expect(supportedSet.has(id), driver.describe(
131
+ 'capabilities.md §"aiProviders"',
132
+ `byok entry "${id}" MUST appear in aiProviders.supported`,
133
+ )).toBe(true);
134
+ }
135
+ }
136
+ }
137
+ });
138
+ });
139
+
140
+ // ─── Bearer-token redaction (always runs) ─────────────────────────────
141
+
142
+ describe('redaction: 401 response MUST NOT echo invalid Bearer token (NFR-7)', () => {
143
+ it.each(CANARIES.filter((c) => c.label === 'jwt-bearer' || c.label === 'byok-credential-ref').map((c) => [c.label, c.value]))(
144
+ 'canary "%s" planted in Authorization header is not echoed in 401 body',
145
+ async (_label, canaryValue) => {
146
+ // Drive the canonical authenticated route with a synthetic
147
+ // bearer that the server cannot validate. The 401 body MUST
148
+ // NOT echo the canary back. Per NFR-7, even error envelopes
149
+ // are observable surfaces and MUST be canary-clean.
150
+ const res = await driver.post(
151
+ '/v1/runs',
152
+ { workflowId: NOOP_WORKFLOW_ID, tenantId: 'conformance-tenant' },
153
+ {
154
+ authenticated: false,
155
+ headers: { Authorization: `Bearer ${canaryValue}` },
156
+ },
157
+ );
158
+
159
+ // We don't assert a specific status — different hosts have
160
+ // different auth models (Firebase JWT, API-key, etc.). The
161
+ // ONE assertion: no canary in response.
162
+ const responseText = captureToText(res.json) + '\n' + res.text;
163
+ assertNoCanaryLeak(responseText, 'invalid-bearer-401-body');
164
+ },
165
+ );
166
+
167
+ it('the marker substring alone never appears in a 401 body (universal)', async () => {
168
+ const res = await driver.post(
169
+ '/v1/runs',
170
+ { workflowId: NOOP_WORKFLOW_ID, tenantId: 'conformance-tenant' },
171
+ {
172
+ authenticated: false,
173
+ headers: { Authorization: `Bearer ${CANARY_MARKER}-direct-marker` },
174
+ },
175
+ );
176
+ const responseText = captureToText(res.json) + '\n' + res.text;
177
+ expect(responseText).not.toContain(CANARY_MARKER);
178
+ });
179
+ });
180
+
181
+ // ─── credentialRef echo control (gated on secrets.supported) ──────────
182
+
183
+ describe.skipIf(SKIP_NO_NOOP)('redaction: credentialRef value MUST NOT appear in event payloads (gated on secrets.supported)', () => {
184
+ it('skips when host does NOT advertise secrets.supported', async () => {
185
+ const cap = await driver.get('/.well-known/openwop', { authenticated: false });
186
+ const supported =
187
+ (cap.json as { secrets?: { supported?: boolean } } | undefined)?.secrets
188
+ ?.supported ?? false;
189
+
190
+ if (supported !== true) {
191
+ // Spec-allowed — this scenario only applies to hosts that opt
192
+ // into BYOK. Pass trivially.
193
+ expect(supported).not.toBe(true);
194
+ return;
195
+ }
196
+
197
+ // Real assertion path: plant a canary as credentialRef on a noop
198
+ // run, complete the run, then poll all events and assert no event
199
+ // payload contains the canary. The credentialRef is allowed to
200
+ // round-trip via `RunSnapshot.configurable` (per run-options.md
201
+ // §configurable echo) — but per capabilities.md §"aiProviders"
202
+ // it MUST NOT appear in any RunEvent payload.
203
+ const c = getCanary('byok-credential-ref');
204
+ const create = await driver.post('/v1/runs', {
205
+ workflowId: NOOP_WORKFLOW_ID,
206
+ tenantId: 'conformance-tenant',
207
+ configurable: { ai: { credentialRef: c.value } },
208
+ });
209
+ if (create.status !== 201) {
210
+ // Auth-required hosts may 401 here without an API key. The
211
+ // conformance suite is expected to provide OPENWOP_API_KEY for the
212
+ // full scenario; if the key is missing or invalid, the suite's
213
+ // earlier auth scenarios already catch that. Bail with a non-
214
+ // assertion — this scenario is opt-in.
215
+ return;
216
+ }
217
+ const runId = (create.json as { runId: string }).runId;
218
+
219
+ // Wait briefly for the noop to complete + emit terminal events.
220
+ // We use poll-with-timeout rather than SSE to keep this scenario
221
+ // transport-agnostic (some hosts might gate SSE behind a feature
222
+ // flag).
223
+ let lastSeq = 0;
224
+ let isComplete = false;
225
+ let iterations = 0;
226
+ let allEvents: string[] = [];
227
+ while (!isComplete && iterations < 5) {
228
+ iterations++;
229
+ const poll = await driver.get(
230
+ `/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=${lastSeq}&timeout=5`,
231
+ );
232
+ const pollBody = poll.json as
233
+ | { events?: unknown[]; isComplete?: boolean }
234
+ | undefined;
235
+ const events = pollBody?.events ?? [];
236
+ isComplete = pollBody?.isComplete === true;
237
+ for (const ev of events) {
238
+ allEvents.push(captureToText(ev));
239
+ const seq = (ev as { sequence?: number }).sequence;
240
+ if (typeof seq === 'number' && seq > lastSeq) lastSeq = seq;
241
+ }
242
+ if (events.length === 0 && !isComplete) {
243
+ // No new events — small backoff before re-poll.
244
+ await new Promise((r) => setTimeout(r, 500));
245
+ }
246
+ }
247
+
248
+ // Canonical assertion: across ALL captured event payloads, no
249
+ // canary value MAY appear. credentialRef may round-trip via
250
+ // snapshot.configurable but MUST NOT touch any event.
251
+ const allEventsText = allEvents.join('\n');
252
+ assertNoCanaryLeak(allEventsText, 'credentialRef-event-stream');
253
+ });
254
+ });
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Adversarial redaction scenarios — extends `redaction.test.ts` with
3
+ * canary-injection through every untrusted input surface.
4
+ *
5
+ * `redaction.test.ts` covers the structural contract (`/.well-known/openwop`
6
+ * shape, bearer-token redaction, basic credentialRef control). This
7
+ * file pushes further: under hostile inputs, the host's redaction
8
+ * pipeline MUST hold across every observable surface.
9
+ *
10
+ * Surfaces covered:
11
+ *
12
+ * 1. **Workflow inputs** — submit a run whose inputs contain canary
13
+ * values; assert no event payload echoes the canary verbatim.
14
+ * (Every host gates on this — even hosts that don't claim
15
+ * `openwop-secrets` mustn't echo arbitrary input values into events
16
+ * verbatim where the field shape suggests credential.)
17
+ *
18
+ * 2. **Run-creation request body in 4xx error responses** — submit
19
+ * a syntactically-valid POST whose `workflowId` references a
20
+ * non-existent workflow; the host returns 404. The error message
21
+ * MUST NOT echo the entire input object verbatim, since that path
22
+ * could leak embedded canaries.
23
+ *
24
+ * 3. **Long bearer-shaped strings in inputs** — long base64-shape
25
+ * runs (entropy proxy for token shape) MUST NOT round-trip into
26
+ * event payloads when they appear in non-credential field names.
27
+ * This is a defense-in-depth check; hosts MAY echo non-credential
28
+ * input verbatim, but a leak in this surface is a higher-severity
29
+ * defect.
30
+ *
31
+ * Profile gating:
32
+ *
33
+ * - Surfaces 1 + 2 run against every conforming host.
34
+ * - Surface 3 is `@advisory-redaction` — recommended but not gating.
35
+ *
36
+ * Cross-references SECURITY/threat-model-secret-leakage.md invariants
37
+ * `secret-leakage-eventlog-payload`, `secret-leakage-error-envelope`,
38
+ * `secret-leakage-stream-chunk`.
39
+ *
40
+ * @see SECURITY/threat-model-secret-leakage.md
41
+ * @see SECURITY/invariants.yaml — secret-leakage-* entries
42
+ * @see spec/v1/capabilities.md §"Secrets" + NFR-7
43
+ */
44
+
45
+ import { describe, it, expect } from 'vitest';
46
+ import { driver } from '../lib/driver.js';
47
+ import { CANARIES, CANARY_MARKER, getCanary } from '../lib/canaries.js';
48
+ import { pollUntilTerminal } from '../lib/polling.js';
49
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
50
+
51
+ const NOOP_WORKFLOW_ID = 'conformance-noop';
52
+ const SKIP_NO_NOOP = !isFixtureAdvertised(NOOP_WORKFLOW_ID);
53
+
54
+ describe.skipIf(SKIP_NO_NOOP)('redaction-adversarial: workflow input canaries', () => {
55
+ it('OpenAI-shaped canary in arbitrary input field MUST NOT appear verbatim in any event-stream payload', async () => {
56
+ const canary = getCanary('openai').value;
57
+
58
+ const create = await driver.post('/v1/runs', {
59
+ workflowId: NOOP_WORKFLOW_ID,
60
+ inputs: {
61
+ // Use a field name that does NOT signal "credential" — host has no
62
+ // semantic reason to mask this, BUT it also has no reason to echo
63
+ // arbitrary input values verbatim into RunEvent payloads.
64
+ // For conformance-noop (which has NO declared variables), the
65
+ // host SHOULD NOT propagate inputs verbatim into events.
66
+ userSuppliedField: canary,
67
+ },
68
+ });
69
+ expect(create.status, driver.describe('rest-endpoints.md', 'noop run create returns 201')).toBe(201);
70
+ const runId = (create.json as { runId: string }).runId;
71
+
72
+ await pollUntilTerminal(runId);
73
+
74
+ const events = await driver.get(
75
+ `/v1/runs/${encodeURIComponent(runId)}/events/poll`,
76
+ );
77
+ if (events.status !== 200) {
78
+ // Host doesn't expose polling; SSE is the alternative. Skip-equivalent.
79
+ return;
80
+ }
81
+
82
+ const eventsText = events.text;
83
+ expect(eventsText.includes(canary), driver.describe(
84
+ 'capabilities.md §"Secrets" + NFR-7',
85
+ 'canary value submitted as workflow input MUST NOT appear verbatim in event poll response',
86
+ )).toBe(false);
87
+ expect(eventsText.includes(CANARY_MARKER), driver.describe(
88
+ 'NFR-7',
89
+ 'canary marker substring MUST NOT appear in event poll response',
90
+ )).toBe(false);
91
+ });
92
+ });
93
+
94
+ describe('redaction-adversarial: error-envelope canaries', () => {
95
+ it('JWT-shaped canary in workflowId of a 404-failing request MUST NOT echo verbatim in error response', async () => {
96
+ const canary = getCanary('jwt-bearer').value;
97
+ const res = await driver.post('/v1/runs', {
98
+ workflowId: `nonexistent-${canary}`,
99
+ inputs: {},
100
+ });
101
+ expect(
102
+ [400, 404].includes(res.status),
103
+ driver.describe('rest-endpoints.md', 'unknown workflowId returns 400 or 404'),
104
+ ).toBe(true);
105
+
106
+ const responseText = res.text;
107
+ // Hosts MAY echo the workflowId in the error message — that's a
108
+ // legitimate UX choice. The redaction concern is the canary value
109
+ // shape (JWT-style) appearing verbatim. Per
110
+ // SECURITY/invariants.yaml `secret-leakage-error-envelope`, the
111
+ // host SHOULD sanitize entropy-shaped substrings even when echoing
112
+ // the workflowId.
113
+ expect(responseText.includes(canary), driver.describe(
114
+ 'SECURITY/invariants.yaml secret-leakage-error-envelope',
115
+ 'JWT-shaped canary submitted in workflowId SHOULD NOT echo verbatim in 4xx error envelope',
116
+ )).toBe(false);
117
+ });
118
+ });
119
+
120
+ describe.skipIf(SKIP_NO_NOOP)('redaction-adversarial: bearer-shaped strings round-trip', () => {
121
+ it('Anthropic-shaped canary embedded in non-credential input MUST NOT appear verbatim in events', async () => {
122
+ // Plant a canary distinct from the first scenario to avoid
123
+ // collisions if the host caches inputs across runs.
124
+ const canary = getCanary('anthropic').value;
125
+
126
+ const create = await driver.post('/v1/runs', {
127
+ workflowId: NOOP_WORKFLOW_ID,
128
+ inputs: {
129
+ descriptiveText: `Some description containing ${canary} embedded`,
130
+ },
131
+ });
132
+ expect(create.status).toBe(201);
133
+ const runId = (create.json as { runId: string }).runId;
134
+
135
+ await pollUntilTerminal(runId);
136
+
137
+ const events = await driver.get(
138
+ `/v1/runs/${encodeURIComponent(runId)}/events/poll`,
139
+ );
140
+ if (events.status !== 200) return;
141
+
142
+ expect(events.text.includes(canary), driver.describe(
143
+ 'capabilities.md §"Secrets" + NFR-7',
144
+ 'Anthropic-shaped canary embedded in arbitrary input MUST NOT appear verbatim in events',
145
+ )).toBe(false);
146
+ });
147
+ });
148
+
149
+ describe('redaction-adversarial: every canary fixture carries the marker substring', () => {
150
+ it('every canary in CANARIES has the marker substring', () => {
151
+ // Self-test on the lib/canaries.ts contract. Every canary value MUST
152
+ // include CANARY_MARKER so the leak detector finds it unambiguously.
153
+ // If this fails the canary harness has a bug — every other
154
+ // adversarial scenario depends on this property.
155
+ for (const c of CANARIES) {
156
+ expect(c.value.includes(CANARY_MARKER), driver.describe(
157
+ 'lib/canaries.ts',
158
+ `every canary fixture MUST contain CANARY_MARKER (offender label: ${c.label})`,
159
+ )).toBe(true);
160
+ }
161
+ });
162
+ });