@openwop/openwop-conformance 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +241 -0
- package/api/asyncapi.yaml +481 -0
- package/api/openapi.yaml +830 -0
- package/api/redocly.yaml +8 -0
- package/coverage.md +80 -0
- package/dist/cli.js +161 -0
- package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
- package/fixtures/conformance-agent-identity.json +27 -0
- package/fixtures/conformance-agent-low-confidence.json +29 -0
- package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
- package/fixtures/conformance-agent-memory-redaction.json +32 -0
- package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
- package/fixtures/conformance-agent-memory-ttl.json +31 -0
- package/fixtures/conformance-agent-pack-export.json +26 -0
- package/fixtures/conformance-agent-pack-install.json +26 -0
- package/fixtures/conformance-agent-pack-provenance.json +31 -0
- package/fixtures/conformance-agent-reasoning.json +29 -0
- package/fixtures/conformance-approval.json +27 -0
- package/fixtures/conformance-cancellable.json +33 -0
- package/fixtures/conformance-cap-breach.json +27 -0
- package/fixtures/conformance-capability-missing.json +23 -0
- package/fixtures/conformance-channel-ttl.json +60 -0
- package/fixtures/conformance-clarification.json +30 -0
- package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
- package/fixtures/conformance-conversation-lifecycle.json +32 -0
- package/fixtures/conformance-conversation-replay.json +33 -0
- package/fixtures/conformance-conversation-vs-clarification.json +26 -0
- package/fixtures/conformance-delay.json +33 -0
- package/fixtures/conformance-dispatch-loop.json +38 -0
- package/fixtures/conformance-failure.json +23 -0
- package/fixtures/conformance-idempotent.json +30 -0
- package/fixtures/conformance-identity.json +32 -0
- package/fixtures/conformance-interrupt-auth-required.json +28 -0
- package/fixtures/conformance-interrupt-external-event.json +33 -0
- package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
- package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
- package/fixtures/conformance-interrupt-quorum.json +30 -0
- package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
- package/fixtures/conformance-message-reducer.json +31 -0
- package/fixtures/conformance-multi-node.json +21 -0
- package/fixtures/conformance-noop.json +23 -0
- package/fixtures/conformance-orchestrator-dispatch.json +47 -0
- package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
- package/fixtures/conformance-orchestrator-terminate.json +44 -0
- package/fixtures/conformance-stream-text.json +26 -0
- package/fixtures/conformance-subworkflow-child.json +21 -0
- package/fixtures/conformance-subworkflow-parent.json +49 -0
- package/fixtures/conformance-version-fold.json +23 -0
- package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
- package/fixtures/pack-manifests/pack-private-example.json +26 -0
- package/fixtures.md +404 -0
- package/package.json +48 -0
- package/schemas/README.md +75 -0
- package/schemas/agent-manifest.schema.json +107 -0
- package/schemas/agent-ref.schema.json +53 -0
- package/schemas/capabilities.schema.json +287 -0
- package/schemas/channel-written-payload.schema.json +55 -0
- package/schemas/conversation-event.schema.json +120 -0
- package/schemas/conversation-turn.schema.json +72 -0
- package/schemas/debug-bundle.schema.json +196 -0
- package/schemas/dispatch-config.schema.json +46 -0
- package/schemas/error-envelope.schema.json +25 -0
- package/schemas/memory-entry.schema.json +36 -0
- package/schemas/memory-list-options.schema.json +21 -0
- package/schemas/node-pack-manifest.schema.json +235 -0
- package/schemas/orchestrator-decision.schema.json +60 -0
- package/schemas/run-event-payloads.schema.json +663 -0
- package/schemas/run-event.schema.json +116 -0
- package/schemas/run-options.schema.json +81 -0
- package/schemas/run-orchestrator-decided-event.schema.json +20 -0
- package/schemas/run-snapshot.schema.json +121 -0
- package/schemas/suspend-request.schema.json +182 -0
- package/schemas/workflow-definition.schema.json +430 -0
- package/src/cli.ts +187 -0
- package/src/lib/a2a-fake-peer.ts +233 -0
- package/src/lib/canaries.ts +186 -0
- package/src/lib/driver.ts +96 -0
- package/src/lib/env.ts +49 -0
- package/src/lib/fixtures.ts +93 -0
- package/src/lib/mcp-fake-server.ts +185 -0
- package/src/lib/multi-agent-capabilities.ts +155 -0
- package/src/lib/multiProcess.ts +141 -0
- package/src/lib/otel-collector.ts +312 -0
- package/src/lib/paths.ts +198 -0
- package/src/lib/polling.ts +81 -0
- package/src/lib/profiles.ts +258 -0
- package/src/lib/sse.ts +172 -0
- package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
- package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
- package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
- package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
- package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
- package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
- package/src/scenarios/agentMessageReducer.test.ts +57 -0
- package/src/scenarios/agentMetadata.test.ts +56 -0
- package/src/scenarios/agentPackExport.test.ts +45 -0
- package/src/scenarios/agentPackInstall.test.ts +50 -0
- package/src/scenarios/agentPackProvenance.test.ts +53 -0
- package/src/scenarios/agentReasoningEvents.test.ts +72 -0
- package/src/scenarios/append-ordering.test.ts +91 -0
- package/src/scenarios/approval-payload.test.ts +120 -0
- package/src/scenarios/audit-log-integrity.test.ts +106 -0
- package/src/scenarios/auth.test.ts +55 -0
- package/src/scenarios/byok-roundtrip.test.ts +166 -0
- package/src/scenarios/cancellation.test.ts +68 -0
- package/src/scenarios/cap-breach.test.ts +149 -0
- package/src/scenarios/channel-ttl.test.ts +70 -0
- package/src/scenarios/configurable-schema.test.ts +76 -0
- package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
- package/src/scenarios/conversationLifecycle.test.ts +64 -0
- package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
- package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
- package/src/scenarios/cost-attribution.test.ts +207 -0
- package/src/scenarios/debugBundle.test.ts +222 -0
- package/src/scenarios/discovery.test.ts +147 -0
- package/src/scenarios/dispatchLoop.test.ts +52 -0
- package/src/scenarios/errors.test.ts +144 -0
- package/src/scenarios/eventOrdering.test.ts +144 -0
- package/src/scenarios/failure-path.test.ts +46 -0
- package/src/scenarios/fixtures-gating.test.ts +137 -0
- package/src/scenarios/fixtures-valid.test.ts +140 -0
- package/src/scenarios/highConcurrency.test.ts +263 -0
- package/src/scenarios/idempotency.test.ts +83 -0
- package/src/scenarios/idempotencyRetry.test.ts +130 -0
- package/src/scenarios/identity-passthrough.test.ts +54 -0
- package/src/scenarios/interrupt-approval.test.ts +97 -0
- package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
- package/src/scenarios/interrupt-clarification.test.ts +45 -0
- package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
- package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
- package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
- package/src/scenarios/interruptRace.test.ts +176 -0
- package/src/scenarios/maliciousManifest.test.ts +154 -0
- package/src/scenarios/mcp-discoverability.test.ts +129 -0
- package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
- package/src/scenarios/multi-node-ordering.test.ts +60 -0
- package/src/scenarios/multi-region-idempotency.test.ts +52 -0
- package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
- package/src/scenarios/orchestratorDispatch.test.ts +66 -0
- package/src/scenarios/orchestratorTermination.test.ts +54 -0
- package/src/scenarios/otel-emission.test.ts +113 -0
- package/src/scenarios/otel-trace-propagation.test.ts +90 -0
- package/src/scenarios/pack-registry-publish.test.ts +93 -0
- package/src/scenarios/pack-registry.test.ts +328 -0
- package/src/scenarios/pause-resume.test.ts +109 -0
- package/src/scenarios/policies.test.ts +162 -0
- package/src/scenarios/profileDerivation.test.ts +335 -0
- package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
- package/src/scenarios/rate-limit-envelope.test.ts +97 -0
- package/src/scenarios/redaction.test.ts +254 -0
- package/src/scenarios/redactionAdversarial.test.ts +162 -0
- package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
- package/src/scenarios/replay-fork.test.ts +216 -0
- package/src/scenarios/replayDeterminism.test.ts +171 -0
- package/src/scenarios/route-coverage.test.ts +129 -0
- package/src/scenarios/runs-lifecycle.test.ts +65 -0
- package/src/scenarios/runtime-capabilities.test.ts +118 -0
- package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
- package/src/scenarios/staleClaim.test.ts +223 -0
- package/src/scenarios/stream-modes-buffer.test.ts +148 -0
- package/src/scenarios/stream-modes-mixed.test.ts +149 -0
- package/src/scenarios/stream-modes.test.ts +139 -0
- package/src/scenarios/streamReconnect.test.ts +162 -0
- package/src/scenarios/subworkflow.test.ts +126 -0
- package/src/scenarios/version-negotiation.test.ts +157 -0
- package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
- package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
- package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
- package/src/scenarios/wasm-pack-load.test.ts +75 -0
- package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
- package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
- package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
- package/src/setup.ts +173 -0
- package/vitest.config.ts +17 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-policy enforcement scenarios — extends `policies.test.ts`
|
|
3
|
+
* (which covers discovery-shape only) with denial-error-shape contracts
|
|
4
|
+
* for hosts that advertise enforcement.
|
|
5
|
+
*
|
|
6
|
+
* Why discovery-shape vs full enforcement:
|
|
7
|
+
*
|
|
8
|
+
* Real enforcement requires a configured policy document AND a
|
|
9
|
+
* working AI provider invocation, AND admin write access to set the
|
|
10
|
+
* policy under test. None of those are black-box reproducible. The
|
|
11
|
+
* conformance suite gates on the wire shape of denial responses +
|
|
12
|
+
* SECURITY/invariants.yaml entries.
|
|
13
|
+
*
|
|
14
|
+
* Profile gating:
|
|
15
|
+
*
|
|
16
|
+
* - Hosts that don't advertise `aiProviders.policies` skip-equivalent
|
|
17
|
+
* (no policy enforcement to verify).
|
|
18
|
+
* - Hosts that advertise it MUST honor the documented denial reason
|
|
19
|
+
* enum + the closed mode set per spec/v1/capabilities.md
|
|
20
|
+
* §"`aiProviders.policies`".
|
|
21
|
+
*
|
|
22
|
+
* Cross-references SECURITY/threat-model-provider-policy.md invariants
|
|
23
|
+
* `provider-policy-pre-dispatch` · `provider-policy-disabled-hard` ·
|
|
24
|
+
* `provider-policy-restricted-glob` · `provider-policy-restricted-fail-closed`.
|
|
25
|
+
*
|
|
26
|
+
* @see spec/v1/capabilities.md §"`aiProviders.policies`"
|
|
27
|
+
* @see SECURITY/threat-model-provider-policy.md
|
|
28
|
+
* @see SECURITY/invariants.yaml — provider-policy-* entries
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect } from 'vitest';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
|
|
34
|
+
const CANONICAL_MODES = ['disabled', 'optional', 'required', 'restricted'] as const;
|
|
35
|
+
|
|
36
|
+
// Documented denial-reason enum from spec/v1/capabilities.md.
|
|
37
|
+
const DOCUMENTED_DENIAL_REASONS = [
|
|
38
|
+
'provider_disabled',
|
|
39
|
+
'byok_required',
|
|
40
|
+
'byok_required_but_unresolved',
|
|
41
|
+
'model_not_allowed',
|
|
42
|
+
] as const;
|
|
43
|
+
|
|
44
|
+
interface PoliciesShape {
|
|
45
|
+
modes?: unknown;
|
|
46
|
+
scopes?: unknown;
|
|
47
|
+
errorCode?: unknown;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function fetchPolicies(): Promise<PoliciesShape | null> {
|
|
51
|
+
const res = await driver.get('/.well-known/openwop', { authenticated: false });
|
|
52
|
+
if (res.status !== 200) return null;
|
|
53
|
+
const body = res.json as { aiProviders?: { policies?: PoliciesShape } };
|
|
54
|
+
return body.aiProviders?.policies ?? null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
describe('provider-policy-enforcement: closed mode set per spec/v1/capabilities.md §`aiProviders.policies`', () => {
|
|
58
|
+
it('every advertised mode is one of the four canonical values', async () => {
|
|
59
|
+
const policies = await fetchPolicies();
|
|
60
|
+
if (policies === null || !Array.isArray(policies.modes)) return;
|
|
61
|
+
|
|
62
|
+
for (const mode of policies.modes) {
|
|
63
|
+
expect(typeof mode, driver.describe(
|
|
64
|
+
'capabilities.md §"`aiProviders.policies`"',
|
|
65
|
+
'each entry in policies.modes MUST be a string',
|
|
66
|
+
)).toBe('string');
|
|
67
|
+
expect(
|
|
68
|
+
(CANONICAL_MODES as readonly string[]).includes(mode as string),
|
|
69
|
+
driver.describe(
|
|
70
|
+
'capabilities.md §"`aiProviders.policies`"',
|
|
71
|
+
`mode "${String(mode)}" is not in the closed canonical set [${CANONICAL_MODES.join(', ')}]`,
|
|
72
|
+
),
|
|
73
|
+
).toBe(true);
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('hosts that support `restricted` MUST also support `optional` (default no-restriction case)', async () => {
|
|
78
|
+
const policies = await fetchPolicies();
|
|
79
|
+
if (policies === null || !Array.isArray(policies.modes)) return;
|
|
80
|
+
const modes = policies.modes as string[];
|
|
81
|
+
if (!modes.includes('restricted')) return;
|
|
82
|
+
expect(modes.includes('optional'), driver.describe(
|
|
83
|
+
'spec/v1/profiles.md §`openwop-provider-policy`',
|
|
84
|
+
'a host advertising `restricted` MUST also advertise `optional` so workflows without policy hit the default permissive case',
|
|
85
|
+
)).toBe(true);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('errorCode is a non-empty string when present', async () => {
|
|
89
|
+
const policies = await fetchPolicies();
|
|
90
|
+
if (policies === null || policies.errorCode === undefined) return;
|
|
91
|
+
expect(typeof policies.errorCode, driver.describe(
|
|
92
|
+
'capabilities.md §"`aiProviders.policies`"',
|
|
93
|
+
'aiProviders.policies.errorCode MUST be a string when present',
|
|
94
|
+
)).toBe('string');
|
|
95
|
+
expect((policies.errorCode as string).length, driver.describe(
|
|
96
|
+
'capabilities.md §"`aiProviders.policies`"',
|
|
97
|
+
'aiProviders.policies.errorCode MUST be non-empty when present',
|
|
98
|
+
)).toBeGreaterThan(0);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe('provider-policy-enforcement: scope advertisement', () => {
|
|
103
|
+
it('scopes contains only non-empty strings when present', async () => {
|
|
104
|
+
const policies = await fetchPolicies();
|
|
105
|
+
if (policies === null) return;
|
|
106
|
+
if (!Array.isArray(policies.scopes)) return;
|
|
107
|
+
|
|
108
|
+
for (const scope of policies.scopes) {
|
|
109
|
+
expect(typeof scope === 'string' && scope.length > 0, driver.describe(
|
|
110
|
+
'capabilities.md §"`aiProviders.policies`"',
|
|
111
|
+
'each entry in policies.scopes MUST be a non-empty string',
|
|
112
|
+
)).toBe(true);
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
describe('provider-policy-enforcement: documented denial reasons enumeration', () => {
|
|
118
|
+
it('lists are non-empty (sanity check on documentation drift)', () => {
|
|
119
|
+
// Self-test. If the documented denial-reason set drifts and this
|
|
120
|
+
// file isn't updated, scenario authors will be surprised. This
|
|
121
|
+
// assertion catches that — an empty CANONICAL_MODES or DOCUMENTED_
|
|
122
|
+
// DENIAL_REASONS would indicate the test file got truncated.
|
|
123
|
+
expect(CANONICAL_MODES.length, driver.describe(
|
|
124
|
+
'spec/v1/capabilities.md §"`aiProviders.policies`"',
|
|
125
|
+
'closed mode set MUST be the four canonical values',
|
|
126
|
+
)).toBe(4);
|
|
127
|
+
expect(DOCUMENTED_DENIAL_REASONS.length, driver.describe(
|
|
128
|
+
'openwop/openwop@0bebfb0 — denial-reason enum alignment',
|
|
129
|
+
'documented denial-reason set is non-empty',
|
|
130
|
+
)).toBeGreaterThan(0);
|
|
131
|
+
});
|
|
132
|
+
});
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Track 13: normative 429 envelope shape (rest-endpoints.md v1.1).
|
|
3
|
+
*
|
|
4
|
+
* Verifies that any 429 response produced by the host conforms to the
|
|
5
|
+
* canonical envelope shape and supplemental `details` keys.
|
|
6
|
+
*
|
|
7
|
+
* Verifies:
|
|
8
|
+
* 1. `error === 'rate_limited'`.
|
|
9
|
+
* 2. `Retry-After` header present and integer-seconds.
|
|
10
|
+
* 3. When `details.retryAfterMs` is present, it is consistent with
|
|
11
|
+
* `Retry-After`.
|
|
12
|
+
* 4. `details.scope` is one of {"tenant", "route", "global", "key"}.
|
|
13
|
+
* 5. No top-level keys outside `{error, message, details}`.
|
|
14
|
+
*
|
|
15
|
+
* Soft-skip behavior: hosts that don't surface 429 during the test
|
|
16
|
+
* window (low traffic) emit a warning rather than fail. To exercise this
|
|
17
|
+
* scenario the conformance harness MAY set OPENWOP_FORCE_RATE_LIMIT=true
|
|
18
|
+
* which signals the host to fabricate a 429 against a test-only key.
|
|
19
|
+
*
|
|
20
|
+
* @see spec/v1/rest-endpoints.md §"429 Too Many Requests envelope"
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { describe, it, expect } from 'vitest';
|
|
24
|
+
import { driver } from '../lib/driver.js';
|
|
25
|
+
|
|
26
|
+
const ALLOWED_SCOPES = new Set(['tenant', 'route', 'global', 'key']);
|
|
27
|
+
const FORCE = process.env.OPENWOP_FORCE_RATE_LIMIT === 'true';
|
|
28
|
+
|
|
29
|
+
describe('rate-limit-envelope: 429 conforms to canonical shape', () => {
|
|
30
|
+
it('when a 429 is observed, the response body satisfies the v1.1 contract', async () => {
|
|
31
|
+
// Drive a burst to provoke a 429 against a benign endpoint. If the host
|
|
32
|
+
// is generous, this will not trip — that is acceptable; the test
|
|
33
|
+
// is observational and skips its assertions when no 429 occurs.
|
|
34
|
+
let last: Awaited<ReturnType<typeof driver.get>> | null = null;
|
|
35
|
+
for (let i = 0; i < (FORCE ? 200 : 50); i++) {
|
|
36
|
+
const r = await driver.get('/.well-known/openwop');
|
|
37
|
+
last = r;
|
|
38
|
+
if (r.status === 429) break;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (!last || last.status !== 429) {
|
|
42
|
+
// eslint-disable-next-line no-console
|
|
43
|
+
console.warn(
|
|
44
|
+
'[rate-limit-envelope] no 429 observed within burst; skipping shape assertions',
|
|
45
|
+
);
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const retryAfter = last.headers.get('retry-after');
|
|
50
|
+
expect(retryAfter, driver.describe(
|
|
51
|
+
'rest-endpoints.md §"429 Too Many Requests envelope"',
|
|
52
|
+
'429 response MUST set Retry-After header',
|
|
53
|
+
)).not.toBeNull();
|
|
54
|
+
|
|
55
|
+
const body = last.json as {
|
|
56
|
+
error?: string;
|
|
57
|
+
message?: string;
|
|
58
|
+
details?: { retryAfterMs?: number; scope?: string; limit?: number; observedRate?: number };
|
|
59
|
+
[k: string]: unknown;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
expect(body.error, driver.describe(
|
|
63
|
+
'rest-endpoints.md §"429 Too Many Requests envelope"',
|
|
64
|
+
"429 body MUST carry error === 'rate_limited'",
|
|
65
|
+
)).toBe('rate_limited');
|
|
66
|
+
expect(typeof body.message).toBe('string');
|
|
67
|
+
|
|
68
|
+
// additionalProperties: false on error-envelope.schema.json
|
|
69
|
+
const topKeys = Object.keys(body).filter(
|
|
70
|
+
(k) => !['error', 'message', 'details'].includes(k),
|
|
71
|
+
);
|
|
72
|
+
expect(topKeys, driver.describe(
|
|
73
|
+
'error-envelope.schema.json additionalProperties: false',
|
|
74
|
+
'429 envelope MUST NOT carry top-level keys outside {error, message, details}',
|
|
75
|
+
)).toEqual([]);
|
|
76
|
+
|
|
77
|
+
if (body.details?.scope !== undefined) {
|
|
78
|
+
expect(ALLOWED_SCOPES.has(body.details.scope), driver.describe(
|
|
79
|
+
'rest-endpoints.md §"429 Too Many Requests envelope"',
|
|
80
|
+
"details.scope MUST be one of {'tenant','route','global','key'} when present",
|
|
81
|
+
)).toBe(true);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const retryAfterSec = Number(retryAfter);
|
|
85
|
+
if (
|
|
86
|
+
body.details?.retryAfterMs !== undefined &&
|
|
87
|
+
!Number.isNaN(retryAfterSec) &&
|
|
88
|
+
retryAfterSec > 0
|
|
89
|
+
) {
|
|
90
|
+
const diff = Math.abs(body.details.retryAfterMs / 1000 - retryAfterSec);
|
|
91
|
+
expect(diff, driver.describe(
|
|
92
|
+
'rest-endpoints.md §"429 Too Many Requests envelope"',
|
|
93
|
+
'details.retryAfterMs MUST be consistent with Retry-After header (±1s)',
|
|
94
|
+
)).toBeLessThanOrEqual(1.5);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
});
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Redaction conformance scenarios — `capabilities.md` §"Secrets" + NFR-7.
|
|
3
|
+
*
|
|
4
|
+
* These are vendor-neutral assertions that any OpenWOP-compliant server
|
|
5
|
+
* doesn't leak secret material in observable surfaces. The scenarios
|
|
6
|
+
* gate cleanly on the host's advertised capabilities:
|
|
7
|
+
*
|
|
8
|
+
* - **Discovery shape contract** runs against every host. It verifies
|
|
9
|
+
* `secrets` and `aiProviders` advertisements are well-formed
|
|
10
|
+
* regardless of whether the host supports BYOK.
|
|
11
|
+
*
|
|
12
|
+
* - **Bearer-token redaction** runs against every host. The 401
|
|
13
|
+
* response when an invalid Bearer token is supplied MUST NOT
|
|
14
|
+
* echo the token back. This is universal — applies even to hosts
|
|
15
|
+
* that don't advertise `secrets.supported: true`.
|
|
16
|
+
*
|
|
17
|
+
* - **credentialRef echo control** runs ONLY when the host advertises
|
|
18
|
+
* `secrets.supported: true`. Per `capabilities.md` §"aiProviders":
|
|
19
|
+
* `RunOptions.configurable.ai.credentialRef` is opaque + host-
|
|
20
|
+
* resolved; servers MUST NOT include the value in any RunEvent,
|
|
21
|
+
* log line, span attribute, error message, or export. The scenario
|
|
22
|
+
* plants a canary as `credentialRef` on a noop run and asserts
|
|
23
|
+
* the canary doesn't appear in any event payload.
|
|
24
|
+
*
|
|
25
|
+
* **Why these scenarios live here, not just in-tree:**
|
|
26
|
+
*
|
|
27
|
+
* Spec rule NFR-7 is normative: "any code path that emits a `RunEvent`
|
|
28
|
+
* / OTel span / log line / error / export MUST NOT contain raw key
|
|
29
|
+
* material." The reference implementation has its own in-process
|
|
30
|
+
* canary harness (which can mock + intercept logger output). But other
|
|
31
|
+
* OpenWOP-compliant servers — including non-OpenWOP ones — need to
|
|
32
|
+
* verify the same invariant black-box, against their HTTP surface.
|
|
33
|
+
* That's what these scenarios cover.
|
|
34
|
+
*
|
|
35
|
+
* **Limitations:**
|
|
36
|
+
*
|
|
37
|
+
* The conformance suite only sees what the HTTP surface emits — it
|
|
38
|
+
* can't read a host's stdout / Cloud Logging / OTel collector.
|
|
39
|
+
* Hosts MUST run their own internal redaction tests (mocking the
|
|
40
|
+
* logger / tracer / etc.) to cover those surfaces. These scenarios
|
|
41
|
+
* cover only the response-body + run-event-stream surfaces, which are
|
|
42
|
+
* the cross-implementation interop contract.
|
|
43
|
+
*
|
|
44
|
+
* @see capabilities.md §"Secrets" + §"aiProviders"
|
|
45
|
+
* @see lib/canaries.ts — canary fixtures + detector
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
import { describe, it, expect } from 'vitest';
|
|
49
|
+
import { driver } from '../lib/driver.js';
|
|
50
|
+
import {
|
|
51
|
+
CANARIES,
|
|
52
|
+
CANARY_MARKER,
|
|
53
|
+
assertNoCanaryLeak,
|
|
54
|
+
captureToText,
|
|
55
|
+
getCanary,
|
|
56
|
+
} from '../lib/canaries.js';
|
|
57
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
58
|
+
|
|
59
|
+
const NOOP_WORKFLOW_ID = 'conformance-noop';
|
|
60
|
+
const SKIP_NO_NOOP = !isFixtureAdvertised(NOOP_WORKFLOW_ID);
|
|
61
|
+
|
|
62
|
+
// ─── Discovery shape contract (always runs) ───────────────────────────
|
|
63
|
+
|
|
64
|
+
describe('redaction: /.well-known/openwop secrets+aiProviders shape contract', () => {
|
|
65
|
+
it('secrets is well-formed regardless of supported value', async () => {
|
|
66
|
+
const res = await driver.get('/.well-known/openwop', { authenticated: false });
|
|
67
|
+
expect(res.status).toBe(200);
|
|
68
|
+
|
|
69
|
+
const body = res.json as { secrets?: unknown } | undefined;
|
|
70
|
+
const secrets = body?.secrets;
|
|
71
|
+
|
|
72
|
+
if (secrets === undefined) {
|
|
73
|
+
// Optional v1 field — hosts MAY omit. Spec-allowed; nothing to assert.
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Per capabilities.schema.json: `secrets.supported` is REQUIRED
|
|
78
|
+
// when secrets is present.
|
|
79
|
+
const s = secrets as {
|
|
80
|
+
supported?: unknown;
|
|
81
|
+
scopes?: unknown;
|
|
82
|
+
resolution?: unknown;
|
|
83
|
+
};
|
|
84
|
+
expect(typeof s.supported, driver.describe(
|
|
85
|
+
'capabilities.md §"Secrets"',
|
|
86
|
+
'secrets.supported MUST be a boolean',
|
|
87
|
+
)).toBe('boolean');
|
|
88
|
+
|
|
89
|
+
// When `supported === true`, scopes MUST be a non-empty array
|
|
90
|
+
// (a host claiming secrets must declare at least one scope) AND
|
|
91
|
+
// resolution MUST be 'host-managed' (only allowed value in v1.x).
|
|
92
|
+
if (s.supported === true) {
|
|
93
|
+
expect(Array.isArray(s.scopes), driver.describe(
|
|
94
|
+
'capabilities.md §"Secrets"',
|
|
95
|
+
'when secrets.supported is true, scopes MUST be a string[]',
|
|
96
|
+
)).toBe(true);
|
|
97
|
+
const scopes = s.scopes as string[];
|
|
98
|
+
expect(scopes.length, driver.describe(
|
|
99
|
+
'capabilities.md §"Secrets"',
|
|
100
|
+
'when secrets.supported is true, scopes MUST be non-empty',
|
|
101
|
+
)).toBeGreaterThanOrEqual(1);
|
|
102
|
+
for (const scope of scopes) {
|
|
103
|
+
expect(['tenant', 'user', 'run']).toContain(scope);
|
|
104
|
+
}
|
|
105
|
+
expect(s.resolution, driver.describe(
|
|
106
|
+
'capabilities.md §"Secrets"',
|
|
107
|
+
'resolution MUST be "host-managed" in v1.x',
|
|
108
|
+
)).toBe('host-managed');
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it('aiProviders is well-formed when present (byok ⊆ supported)', async () => {
|
|
113
|
+
const res = await driver.get('/.well-known/openwop', { authenticated: false });
|
|
114
|
+
const body = res.json as { aiProviders?: unknown } | undefined;
|
|
115
|
+
const ap = body?.aiProviders as
|
|
116
|
+
| { supported?: unknown; byok?: unknown }
|
|
117
|
+
| undefined;
|
|
118
|
+
|
|
119
|
+
if (ap === undefined) return; // Optional v1 field — hosts MAY omit.
|
|
120
|
+
|
|
121
|
+
if (ap.supported !== undefined) {
|
|
122
|
+
expect(Array.isArray(ap.supported)).toBe(true);
|
|
123
|
+
const supported = ap.supported as string[];
|
|
124
|
+
const supportedSet = new Set(supported);
|
|
125
|
+
|
|
126
|
+
if (ap.byok !== undefined) {
|
|
127
|
+
expect(Array.isArray(ap.byok)).toBe(true);
|
|
128
|
+
const byok = ap.byok as string[];
|
|
129
|
+
for (const id of byok) {
|
|
130
|
+
expect(supportedSet.has(id), driver.describe(
|
|
131
|
+
'capabilities.md §"aiProviders"',
|
|
132
|
+
`byok entry "${id}" MUST appear in aiProviders.supported`,
|
|
133
|
+
)).toBe(true);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// ─── Bearer-token redaction (always runs) ─────────────────────────────
|
|
141
|
+
|
|
142
|
+
describe('redaction: 401 response MUST NOT echo invalid Bearer token (NFR-7)', () => {
|
|
143
|
+
it.each(CANARIES.filter((c) => c.label === 'jwt-bearer' || c.label === 'byok-credential-ref').map((c) => [c.label, c.value]))(
|
|
144
|
+
'canary "%s" planted in Authorization header is not echoed in 401 body',
|
|
145
|
+
async (_label, canaryValue) => {
|
|
146
|
+
// Drive the canonical authenticated route with a synthetic
|
|
147
|
+
// bearer that the server cannot validate. The 401 body MUST
|
|
148
|
+
// NOT echo the canary back. Per NFR-7, even error envelopes
|
|
149
|
+
// are observable surfaces and MUST be canary-clean.
|
|
150
|
+
const res = await driver.post(
|
|
151
|
+
'/v1/runs',
|
|
152
|
+
{ workflowId: NOOP_WORKFLOW_ID, tenantId: 'conformance-tenant' },
|
|
153
|
+
{
|
|
154
|
+
authenticated: false,
|
|
155
|
+
headers: { Authorization: `Bearer ${canaryValue}` },
|
|
156
|
+
},
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
// We don't assert a specific status — different hosts have
|
|
160
|
+
// different auth models (Firebase JWT, API-key, etc.). The
|
|
161
|
+
// ONE assertion: no canary in response.
|
|
162
|
+
const responseText = captureToText(res.json) + '\n' + res.text;
|
|
163
|
+
assertNoCanaryLeak(responseText, 'invalid-bearer-401-body');
|
|
164
|
+
},
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
it('the marker substring alone never appears in a 401 body (universal)', async () => {
|
|
168
|
+
const res = await driver.post(
|
|
169
|
+
'/v1/runs',
|
|
170
|
+
{ workflowId: NOOP_WORKFLOW_ID, tenantId: 'conformance-tenant' },
|
|
171
|
+
{
|
|
172
|
+
authenticated: false,
|
|
173
|
+
headers: { Authorization: `Bearer ${CANARY_MARKER}-direct-marker` },
|
|
174
|
+
},
|
|
175
|
+
);
|
|
176
|
+
const responseText = captureToText(res.json) + '\n' + res.text;
|
|
177
|
+
expect(responseText).not.toContain(CANARY_MARKER);
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
// ─── credentialRef echo control (gated on secrets.supported) ──────────
|
|
182
|
+
|
|
183
|
+
describe.skipIf(SKIP_NO_NOOP)('redaction: credentialRef value MUST NOT appear in event payloads (gated on secrets.supported)', () => {
|
|
184
|
+
it('skips when host does NOT advertise secrets.supported', async () => {
|
|
185
|
+
const cap = await driver.get('/.well-known/openwop', { authenticated: false });
|
|
186
|
+
const supported =
|
|
187
|
+
(cap.json as { secrets?: { supported?: boolean } } | undefined)?.secrets
|
|
188
|
+
?.supported ?? false;
|
|
189
|
+
|
|
190
|
+
if (supported !== true) {
|
|
191
|
+
// Spec-allowed — this scenario only applies to hosts that opt
|
|
192
|
+
// into BYOK. Pass trivially.
|
|
193
|
+
expect(supported).not.toBe(true);
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Real assertion path: plant a canary as credentialRef on a noop
|
|
198
|
+
// run, complete the run, then poll all events and assert no event
|
|
199
|
+
// payload contains the canary. The credentialRef is allowed to
|
|
200
|
+
// round-trip via `RunSnapshot.configurable` (per run-options.md
|
|
201
|
+
// §configurable echo) — but per capabilities.md §"aiProviders"
|
|
202
|
+
// it MUST NOT appear in any RunEvent payload.
|
|
203
|
+
const c = getCanary('byok-credential-ref');
|
|
204
|
+
const create = await driver.post('/v1/runs', {
|
|
205
|
+
workflowId: NOOP_WORKFLOW_ID,
|
|
206
|
+
tenantId: 'conformance-tenant',
|
|
207
|
+
configurable: { ai: { credentialRef: c.value } },
|
|
208
|
+
});
|
|
209
|
+
if (create.status !== 201) {
|
|
210
|
+
// Auth-required hosts may 401 here without an API key. The
|
|
211
|
+
// conformance suite is expected to provide OPENWOP_API_KEY for the
|
|
212
|
+
// full scenario; if the key is missing or invalid, the suite's
|
|
213
|
+
// earlier auth scenarios already catch that. Bail with a non-
|
|
214
|
+
// assertion — this scenario is opt-in.
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
const runId = (create.json as { runId: string }).runId;
|
|
218
|
+
|
|
219
|
+
// Wait briefly for the noop to complete + emit terminal events.
|
|
220
|
+
// We use poll-with-timeout rather than SSE to keep this scenario
|
|
221
|
+
// transport-agnostic (some hosts might gate SSE behind a feature
|
|
222
|
+
// flag).
|
|
223
|
+
let lastSeq = 0;
|
|
224
|
+
let isComplete = false;
|
|
225
|
+
let iterations = 0;
|
|
226
|
+
let allEvents: string[] = [];
|
|
227
|
+
while (!isComplete && iterations < 5) {
|
|
228
|
+
iterations++;
|
|
229
|
+
const poll = await driver.get(
|
|
230
|
+
`/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=${lastSeq}&timeout=5`,
|
|
231
|
+
);
|
|
232
|
+
const pollBody = poll.json as
|
|
233
|
+
| { events?: unknown[]; isComplete?: boolean }
|
|
234
|
+
| undefined;
|
|
235
|
+
const events = pollBody?.events ?? [];
|
|
236
|
+
isComplete = pollBody?.isComplete === true;
|
|
237
|
+
for (const ev of events) {
|
|
238
|
+
allEvents.push(captureToText(ev));
|
|
239
|
+
const seq = (ev as { sequence?: number }).sequence;
|
|
240
|
+
if (typeof seq === 'number' && seq > lastSeq) lastSeq = seq;
|
|
241
|
+
}
|
|
242
|
+
if (events.length === 0 && !isComplete) {
|
|
243
|
+
// No new events — small backoff before re-poll.
|
|
244
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Canonical assertion: across ALL captured event payloads, no
|
|
249
|
+
// canary value MAY appear. credentialRef may round-trip via
|
|
250
|
+
// snapshot.configurable but MUST NOT touch any event.
|
|
251
|
+
const allEventsText = allEvents.join('\n');
|
|
252
|
+
assertNoCanaryLeak(allEventsText, 'credentialRef-event-stream');
|
|
253
|
+
});
|
|
254
|
+
});
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adversarial redaction scenarios — extends `redaction.test.ts` with
|
|
3
|
+
* canary-injection through every untrusted input surface.
|
|
4
|
+
*
|
|
5
|
+
* `redaction.test.ts` covers the structural contract (`/.well-known/openwop`
|
|
6
|
+
* shape, bearer-token redaction, basic credentialRef control). This
|
|
7
|
+
* file pushes further: under hostile inputs, the host's redaction
|
|
8
|
+
* pipeline MUST hold across every observable surface.
|
|
9
|
+
*
|
|
10
|
+
* Surfaces covered:
|
|
11
|
+
*
|
|
12
|
+
* 1. **Workflow inputs** — submit a run whose inputs contain canary
|
|
13
|
+
* values; assert no event payload echoes the canary verbatim.
|
|
14
|
+
* (Every host gates on this — even hosts that don't claim
|
|
15
|
+
* `openwop-secrets` mustn't echo arbitrary input values into events
|
|
16
|
+
* verbatim where the field shape suggests credential.)
|
|
17
|
+
*
|
|
18
|
+
* 2. **Run-creation request body in 4xx error responses** — submit
|
|
19
|
+
* a syntactically-valid POST whose `workflowId` references a
|
|
20
|
+
* non-existent workflow; the host returns 404. The error message
|
|
21
|
+
* MUST NOT echo the entire input object verbatim, since that path
|
|
22
|
+
* could leak embedded canaries.
|
|
23
|
+
*
|
|
24
|
+
* 3. **Long bearer-shaped strings in inputs** — long base64-shape
|
|
25
|
+
* runs (entropy proxy for token shape) MUST NOT round-trip into
|
|
26
|
+
* event payloads when they appear in non-credential field names.
|
|
27
|
+
* This is a defense-in-depth check; hosts MAY echo non-credential
|
|
28
|
+
* input verbatim, but a leak in this surface is a higher-severity
|
|
29
|
+
* defect.
|
|
30
|
+
*
|
|
31
|
+
* Profile gating:
|
|
32
|
+
*
|
|
33
|
+
* - Surfaces 1 + 2 run against every conforming host.
|
|
34
|
+
* - Surface 3 is `@advisory-redaction` — recommended but not gating.
|
|
35
|
+
*
|
|
36
|
+
* Cross-references SECURITY/threat-model-secret-leakage.md invariants
|
|
37
|
+
* `secret-leakage-eventlog-payload`, `secret-leakage-error-envelope`,
|
|
38
|
+
* `secret-leakage-stream-chunk`.
|
|
39
|
+
*
|
|
40
|
+
* @see SECURITY/threat-model-secret-leakage.md
|
|
41
|
+
* @see SECURITY/invariants.yaml — secret-leakage-* entries
|
|
42
|
+
* @see spec/v1/capabilities.md §"Secrets" + NFR-7
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
import { describe, it, expect } from 'vitest';
|
|
46
|
+
import { driver } from '../lib/driver.js';
|
|
47
|
+
import { CANARIES, CANARY_MARKER, getCanary } from '../lib/canaries.js';
|
|
48
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
49
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
50
|
+
|
|
51
|
+
const NOOP_WORKFLOW_ID = 'conformance-noop';
|
|
52
|
+
const SKIP_NO_NOOP = !isFixtureAdvertised(NOOP_WORKFLOW_ID);
|
|
53
|
+
|
|
54
|
+
describe.skipIf(SKIP_NO_NOOP)('redaction-adversarial: workflow input canaries', () => {
|
|
55
|
+
it('OpenAI-shaped canary in arbitrary input field MUST NOT appear verbatim in any event-stream payload', async () => {
|
|
56
|
+
const canary = getCanary('openai').value;
|
|
57
|
+
|
|
58
|
+
const create = await driver.post('/v1/runs', {
|
|
59
|
+
workflowId: NOOP_WORKFLOW_ID,
|
|
60
|
+
inputs: {
|
|
61
|
+
// Use a field name that does NOT signal "credential" — host has no
|
|
62
|
+
// semantic reason to mask this, BUT it also has no reason to echo
|
|
63
|
+
// arbitrary input values verbatim into RunEvent payloads.
|
|
64
|
+
// For conformance-noop (which has NO declared variables), the
|
|
65
|
+
// host SHOULD NOT propagate inputs verbatim into events.
|
|
66
|
+
userSuppliedField: canary,
|
|
67
|
+
},
|
|
68
|
+
});
|
|
69
|
+
expect(create.status, driver.describe('rest-endpoints.md', 'noop run create returns 201')).toBe(201);
|
|
70
|
+
const runId = (create.json as { runId: string }).runId;
|
|
71
|
+
|
|
72
|
+
await pollUntilTerminal(runId);
|
|
73
|
+
|
|
74
|
+
const events = await driver.get(
|
|
75
|
+
`/v1/runs/${encodeURIComponent(runId)}/events/poll`,
|
|
76
|
+
);
|
|
77
|
+
if (events.status !== 200) {
|
|
78
|
+
// Host doesn't expose polling; SSE is the alternative. Skip-equivalent.
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const eventsText = events.text;
|
|
83
|
+
expect(eventsText.includes(canary), driver.describe(
|
|
84
|
+
'capabilities.md §"Secrets" + NFR-7',
|
|
85
|
+
'canary value submitted as workflow input MUST NOT appear verbatim in event poll response',
|
|
86
|
+
)).toBe(false);
|
|
87
|
+
expect(eventsText.includes(CANARY_MARKER), driver.describe(
|
|
88
|
+
'NFR-7',
|
|
89
|
+
'canary marker substring MUST NOT appear in event poll response',
|
|
90
|
+
)).toBe(false);
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
describe('redaction-adversarial: error-envelope canaries', () => {
|
|
95
|
+
it('JWT-shaped canary in workflowId of a 404-failing request MUST NOT echo verbatim in error response', async () => {
|
|
96
|
+
const canary = getCanary('jwt-bearer').value;
|
|
97
|
+
const res = await driver.post('/v1/runs', {
|
|
98
|
+
workflowId: `nonexistent-${canary}`,
|
|
99
|
+
inputs: {},
|
|
100
|
+
});
|
|
101
|
+
expect(
|
|
102
|
+
[400, 404].includes(res.status),
|
|
103
|
+
driver.describe('rest-endpoints.md', 'unknown workflowId returns 400 or 404'),
|
|
104
|
+
).toBe(true);
|
|
105
|
+
|
|
106
|
+
const responseText = res.text;
|
|
107
|
+
// Hosts MAY echo the workflowId in the error message — that's a
|
|
108
|
+
// legitimate UX choice. The redaction concern is the canary value
|
|
109
|
+
// shape (JWT-style) appearing verbatim. Per
|
|
110
|
+
// SECURITY/invariants.yaml `secret-leakage-error-envelope`, the
|
|
111
|
+
// host SHOULD sanitize entropy-shaped substrings even when echoing
|
|
112
|
+
// the workflowId.
|
|
113
|
+
expect(responseText.includes(canary), driver.describe(
|
|
114
|
+
'SECURITY/invariants.yaml secret-leakage-error-envelope',
|
|
115
|
+
'JWT-shaped canary submitted in workflowId SHOULD NOT echo verbatim in 4xx error envelope',
|
|
116
|
+
)).toBe(false);
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
describe.skipIf(SKIP_NO_NOOP)('redaction-adversarial: bearer-shaped strings round-trip', () => {
|
|
121
|
+
it('Anthropic-shaped canary embedded in non-credential input MUST NOT appear verbatim in events', async () => {
|
|
122
|
+
// Plant a canary distinct from the first scenario to avoid
|
|
123
|
+
// collisions if the host caches inputs across runs.
|
|
124
|
+
const canary = getCanary('anthropic').value;
|
|
125
|
+
|
|
126
|
+
const create = await driver.post('/v1/runs', {
|
|
127
|
+
workflowId: NOOP_WORKFLOW_ID,
|
|
128
|
+
inputs: {
|
|
129
|
+
descriptiveText: `Some description containing ${canary} embedded`,
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
expect(create.status).toBe(201);
|
|
133
|
+
const runId = (create.json as { runId: string }).runId;
|
|
134
|
+
|
|
135
|
+
await pollUntilTerminal(runId);
|
|
136
|
+
|
|
137
|
+
const events = await driver.get(
|
|
138
|
+
`/v1/runs/${encodeURIComponent(runId)}/events/poll`,
|
|
139
|
+
);
|
|
140
|
+
if (events.status !== 200) return;
|
|
141
|
+
|
|
142
|
+
expect(events.text.includes(canary), driver.describe(
|
|
143
|
+
'capabilities.md §"Secrets" + NFR-7',
|
|
144
|
+
'Anthropic-shaped canary embedded in arbitrary input MUST NOT appear verbatim in events',
|
|
145
|
+
)).toBe(false);
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
describe('redaction-adversarial: every canary fixture carries the marker substring', () => {
|
|
150
|
+
it('every canary in CANARIES has the marker substring', () => {
|
|
151
|
+
// Self-test on the lib/canaries.ts contract. Every canary value MUST
|
|
152
|
+
// include CANARY_MARKER so the leak detector finds it unambiguously.
|
|
153
|
+
// If this fails the canary harness has a bug — every other
|
|
154
|
+
// adversarial scenario depends on this property.
|
|
155
|
+
for (const c of CANARIES) {
|
|
156
|
+
expect(c.value.includes(CANARY_MARKER), driver.describe(
|
|
157
|
+
'lib/canaries.ts',
|
|
158
|
+
`every canary fixture MUST contain CANARY_MARKER (offender label: ${c.label})`,
|
|
159
|
+
)).toBe(true);
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
});
|