@openwop/openwop-conformance 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +2 -2
- package/api/asyncapi.yaml +25 -4
- package/api/openapi.yaml +371 -0
- package/coverage.md +31 -4
- package/fixtures/conformance-phase4-nondet-tool.json +53 -0
- package/fixtures/conformance-phase4-replay-divergence.json +40 -0
- package/fixtures.md +5 -3
- package/package.json +1 -1
- package/schemas/README.md +4 -0
- package/schemas/annotation-create.schema.json +37 -0
- package/schemas/annotation.schema.json +56 -0
- package/schemas/capabilities.schema.json +191 -3
- package/schemas/credential-reference.schema.json +21 -0
- package/schemas/node-pack-manifest.schema.json +112 -1
- package/schemas/run-diff-response.schema.json +64 -0
- package/schemas/run-event-payloads.schema.json +104 -2
- package/schemas/run-event.schema.json +8 -1
- package/schemas/run-snapshot.schema.json +11 -0
- package/src/lib/behavior-gate.ts +51 -0
- package/src/lib/driver.ts +13 -1
- package/src/lib/feedback.ts +31 -0
- package/src/lib/saml-idp.ts +179 -0
- package/src/scenarios/approval-gate-events.test.ts +61 -0
- package/src/scenarios/approval-gate-flow.test.ts +68 -0
- package/src/scenarios/auth-saml-profile.test.ts +119 -0
- package/src/scenarios/auth-scim-profile.test.ts +65 -0
- package/src/scenarios/authorization-fail-closed.test.ts +80 -0
- package/src/scenarios/authorization-roles-shape.test.ts +83 -0
- package/src/scenarios/connector-manifest-validity.test.ts +142 -0
- package/src/scenarios/credential-payload-redaction.test.ts +93 -0
- package/src/scenarios/credentials-capability-shape.test.ts +90 -0
- package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
- package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
- package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
- package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
- package/src/scenarios/experimental-tier-shape.test.ts +192 -0
- package/src/scenarios/feedback-capability-shape.test.ts +35 -0
- package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
- package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
- package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
- package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
- package/src/scenarios/feedback-record-and-list.test.ts +32 -0
- package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
- package/src/scenarios/identity-owner-shape.test.ts +64 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
- package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
- package/src/scenarios/oauth-capability-shape.test.ts +97 -0
- package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
- package/src/scenarios/pack-registry-isolation.test.ts +108 -0
- package/src/scenarios/pack-registry-publish.test.ts +1 -1
- package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
- package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
- package/src/scenarios/redaction.test.ts +4 -1
- package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
- package/src/scenarios/run-diff.test.ts +143 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
- package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
- package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
- package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
- package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
- package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
- package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
- package/src/scenarios/spec-corpus-validity.test.ts +6 -3
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* secret-leakage-otel-attribute — SECURITY invariant verification via RFC 0034 seam.
|
|
3
|
+
*
|
|
4
|
+
* Verifies the two `SECURITY/invariants.yaml` rows
|
|
5
|
+
* - `secret-leakage-otel-attribute` (reference-impl → protocol per RFC 0034)
|
|
6
|
+
* - `secret-leakage-debug-bundle-otel` (reference-impl → protocol per RFC 0034)
|
|
7
|
+
*
|
|
8
|
+
* The host has a BYOK plumbing path that resolves a `credentialRef` and
|
|
9
|
+
* passes the plaintext value into a NodeModule (the `openwop-smoke-byok-
|
|
10
|
+
* roundtrip` fixture does exactly this). Two separate exfiltration risks
|
|
11
|
+
* exist on the way back out:
|
|
12
|
+
*
|
|
13
|
+
* 1. OTel span attributes — host instrumentation MAY accidentally
|
|
14
|
+
* stamp the resolved plaintext onto a span attribute (`openwop.*`
|
|
15
|
+
* or vendor-namespaced). RFC 0034 §B's `GET /v1/host/sample/test/
|
|
16
|
+
* otel/spans?runId=<id>` seam exposes the full span buffer so
|
|
17
|
+
* conformance can mechanically prove no leak.
|
|
18
|
+
*
|
|
19
|
+
* 2. Debug-bundle exports — the optional `debug-bundle` capability
|
|
20
|
+
* exposes a portable JSON snapshot of a run's diagnostic state.
|
|
21
|
+
* RFC 0034 §B's `POST /v1/host/sample/test/debug-bundle/export`
|
|
22
|
+
* seam returns the bundle so conformance can prove the canary
|
|
23
|
+
* doesn't appear there either.
|
|
24
|
+
*
|
|
25
|
+
* Distinct from `envelope-reasoning-secret-redaction.test.ts`, which
|
|
26
|
+
* narrows to the envelope-acceptor's redaction of `reasoning` field
|
|
27
|
+
* canaries. This file verifies the broader executor-side claim: that
|
|
28
|
+
* NO span attribute or debug-bundle field contains the resolved
|
|
29
|
+
* plaintext anywhere in the run's instrumentation surface.
|
|
30
|
+
*
|
|
31
|
+
* Detection method: the host pre-provisions a canary secret under
|
|
32
|
+
* `openwop-conformance-canary-secret` (any non-empty value). The
|
|
33
|
+
* conformance scenario reads the same canary value via
|
|
34
|
+
* `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner
|
|
35
|
+
* agree on it). The scenario then runs the `openwop-smoke-byok-
|
|
36
|
+
* roundtrip` fixture, scrapes the OTel + debug-bundle seams, and
|
|
37
|
+
* asserts the canary plaintext never appears in either surface.
|
|
38
|
+
*
|
|
39
|
+
* Gating:
|
|
40
|
+
* - `capabilities.secrets.supported: true` — host has BYOK plumbing.
|
|
41
|
+
* - `capabilities.observability.testSeams.otelScrape: true` (RFC 0034)
|
|
42
|
+
* - `capabilities.observability.testSeams.debugBundleExport: true` (RFC 0034)
|
|
43
|
+
* - Fixture `openwop-smoke-byok-roundtrip` advertised + the host has
|
|
44
|
+
* pre-provisioned the canary secret.
|
|
45
|
+
*
|
|
46
|
+
* Soft-skips on any unmet gate. Hard-fails on canary leak.
|
|
47
|
+
*
|
|
48
|
+
* @see RFCS/0034-otel-collector-test-seam.md §B
|
|
49
|
+
* @see SECURITY/invariants.yaml secret-leakage-otel-attribute
|
|
50
|
+
* @see SECURITY/invariants.yaml secret-leakage-debug-bundle-otel
|
|
51
|
+
* @see SECURITY/threat-model-secret-leakage.md
|
|
52
|
+
*/
|
|
53
|
+
|
|
54
|
+
import { describe, it, expect } from 'vitest';
|
|
55
|
+
import { driver } from '../lib/driver.js';
|
|
56
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
57
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
58
|
+
|
|
59
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
60
|
+
const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
|
|
61
|
+
const FIXTURE_SKIP = !isFixtureAdvertised(BYOK_WORKFLOW_ID);
|
|
62
|
+
const CANARY_VALUE = process.env.OPENWOP_CANARY_SECRET_VALUE;
|
|
63
|
+
|
|
64
|
+
interface DiscoveryDoc {
|
|
65
|
+
capabilities?: {
|
|
66
|
+
secrets?: { supported?: unknown };
|
|
67
|
+
observability?: {
|
|
68
|
+
testSeams?: {
|
|
69
|
+
otelScrape?: unknown;
|
|
70
|
+
debugBundleExport?: unknown;
|
|
71
|
+
};
|
|
72
|
+
};
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
77
|
+
try {
|
|
78
|
+
const res = await driver.get('/.well-known/openwop');
|
|
79
|
+
if (res.status !== 200) return null;
|
|
80
|
+
return res.json as DiscoveryDoc;
|
|
81
|
+
} catch {
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async function startByokRun(): Promise<string | null> {
|
|
87
|
+
const create = await driver.post('/v1/runs', { workflowId: BYOK_WORKFLOW_ID });
|
|
88
|
+
if (create.status === 404 || create.status === 422) return null;
|
|
89
|
+
expect(create.status).toBe(201);
|
|
90
|
+
return (create.json as { runId: string }).runId;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
|
|
94
|
+
'secret-leakage-otel-attribute: OTel span scrape (RFC 0034 §B)',
|
|
95
|
+
() => {
|
|
96
|
+
it('NO OTel span attribute MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
|
|
97
|
+
if (!CANARY_VALUE) {
|
|
98
|
+
ctx.skip();
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
const d = await readDiscovery();
|
|
102
|
+
const secretsOk = d?.capabilities?.secrets?.supported === true;
|
|
103
|
+
const seamOk = d?.capabilities?.observability?.testSeams?.otelScrape === true;
|
|
104
|
+
if (!secretsOk || !seamOk) {
|
|
105
|
+
ctx.skip();
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const runId = await startByokRun();
|
|
110
|
+
if (runId === null) {
|
|
111
|
+
ctx.skip();
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
const terminal = await pollUntilTerminal(runId);
|
|
115
|
+
expect(
|
|
116
|
+
terminal.status,
|
|
117
|
+
driver.describe(
|
|
118
|
+
'auth.md §"Secret resolution"',
|
|
119
|
+
'BYOK fixture run MUST terminate `completed` when the canary is provisioned',
|
|
120
|
+
),
|
|
121
|
+
).toBe('completed');
|
|
122
|
+
|
|
123
|
+
const spansRes = await driver.get(
|
|
124
|
+
`/v1/host/sample/test/otel/spans?runId=${encodeURIComponent(runId)}`,
|
|
125
|
+
);
|
|
126
|
+
expect(
|
|
127
|
+
spansRes.status,
|
|
128
|
+
driver.describe(
|
|
129
|
+
'RFCS/0034-otel-collector-test-seam.md §B',
|
|
130
|
+
'host advertising capabilities.observability.testSeams.otelScrape MUST serve GET /v1/host/sample/test/otel/spans (200) — 404 is non-conformant',
|
|
131
|
+
),
|
|
132
|
+
).toBe(200);
|
|
133
|
+
|
|
134
|
+
const body = spansRes.json as { spans?: Array<Record<string, unknown>> };
|
|
135
|
+
const spans = body.spans ?? [];
|
|
136
|
+
expect(
|
|
137
|
+
Array.isArray(spans),
|
|
138
|
+
driver.describe(
|
|
139
|
+
'RFCS/0034-otel-collector-test-seam.md §B',
|
|
140
|
+
'response body MUST have a `spans` array',
|
|
141
|
+
),
|
|
142
|
+
).toBe(true);
|
|
143
|
+
|
|
144
|
+
// The host's instrumentation produces at least one span per run
|
|
145
|
+
// (the engine emits `openwop.run.lifecycle` + per-node spans). If
|
|
146
|
+
// the buffer is empty, the host isn't actually instrumenting — that
|
|
147
|
+
// would be a separate non-conformance, but for this assertion we
|
|
148
|
+
// only care about leak, not coverage.
|
|
149
|
+
const serialized = JSON.stringify(spans);
|
|
150
|
+
|
|
151
|
+
expect(
|
|
152
|
+
serialized.includes(CANARY_VALUE),
|
|
153
|
+
driver.describe(
|
|
154
|
+
'SECURITY/invariants.yaml secret-leakage-otel-attribute',
|
|
155
|
+
'no OTel span attribute may contain the BYOK canary plaintext — host instrumentation MUST redact or omit secret material before any span emission. Per `agent-memory.md` §SR-1, the canonical redaction marker is `[REDACTED:<secretId>]`.',
|
|
156
|
+
),
|
|
157
|
+
).toBe(false);
|
|
158
|
+
});
|
|
159
|
+
},
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
|
|
163
|
+
'secret-leakage-debug-bundle-otel: debug-bundle export scrape (RFC 0034 §B)',
|
|
164
|
+
() => {
|
|
165
|
+
it('NO debug-bundle field MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
|
|
166
|
+
if (!CANARY_VALUE) {
|
|
167
|
+
ctx.skip();
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
const d = await readDiscovery();
|
|
171
|
+
const secretsOk = d?.capabilities?.secrets?.supported === true;
|
|
172
|
+
const seamOk = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
|
|
173
|
+
if (!secretsOk || !seamOk) {
|
|
174
|
+
ctx.skip();
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const runId = await startByokRun();
|
|
179
|
+
if (runId === null) {
|
|
180
|
+
ctx.skip();
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
const terminal = await pollUntilTerminal(runId);
|
|
184
|
+
expect(terminal.status).toBe('completed');
|
|
185
|
+
|
|
186
|
+
const bundleRes = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId });
|
|
187
|
+
expect(
|
|
188
|
+
bundleRes.status,
|
|
189
|
+
driver.describe(
|
|
190
|
+
'RFCS/0034-otel-collector-test-seam.md §B',
|
|
191
|
+
'host advertising capabilities.observability.testSeams.debugBundleExport MUST serve POST /v1/host/sample/test/debug-bundle/export (200) — 404 is non-conformant',
|
|
192
|
+
),
|
|
193
|
+
).toBe(200);
|
|
194
|
+
|
|
195
|
+
const serialized = JSON.stringify(bundleRes.json ?? {});
|
|
196
|
+
expect(
|
|
197
|
+
serialized.includes(CANARY_VALUE),
|
|
198
|
+
driver.describe(
|
|
199
|
+
'SECURITY/invariants.yaml secret-leakage-debug-bundle-otel',
|
|
200
|
+
'no debug-bundle field may contain the BYOK canary plaintext — debug-bundle export MUST redact or omit secret material. Per `debug-bundle.md` §"Redaction", the canonical marker is `[REDACTED:<secretId>]`.',
|
|
201
|
+
),
|
|
202
|
+
).toBe(false);
|
|
203
|
+
});
|
|
204
|
+
},
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
|
|
208
|
+
'secret-leakage-otel-attribute: advertisement-shape probe (RFC 0034 §A)',
|
|
209
|
+
() => {
|
|
210
|
+
it('when secrets.supported is true, observability.testSeams advertisements MUST be boolean if present', async (ctx) => {
|
|
211
|
+
const d = await readDiscovery();
|
|
212
|
+
if (d?.capabilities?.secrets?.supported !== true) {
|
|
213
|
+
ctx.skip();
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
const seams = d?.capabilities?.observability?.testSeams;
|
|
217
|
+
if (seams === undefined) {
|
|
218
|
+
ctx.skip(); // host honest about not exposing the seams — Drift #17 path
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
if ('otelScrape' in seams && seams.otelScrape !== undefined) {
|
|
222
|
+
expect(
|
|
223
|
+
typeof seams.otelScrape,
|
|
224
|
+
driver.describe(
|
|
225
|
+
'RFCS/0034-otel-collector-test-seam.md §A',
|
|
226
|
+
'capabilities.observability.testSeams.otelScrape MUST be boolean when present',
|
|
227
|
+
),
|
|
228
|
+
).toBe('boolean');
|
|
229
|
+
}
|
|
230
|
+
if ('debugBundleExport' in seams && seams.debugBundleExport !== undefined) {
|
|
231
|
+
expect(
|
|
232
|
+
typeof seams.debugBundleExport,
|
|
233
|
+
driver.describe(
|
|
234
|
+
'RFCS/0034-otel-collector-test-seam.md §A',
|
|
235
|
+
'capabilities.observability.testSeams.debugBundleExport MUST be boolean when present',
|
|
236
|
+
),
|
|
237
|
+
).toBe('boolean');
|
|
238
|
+
}
|
|
239
|
+
});
|
|
240
|
+
},
|
|
241
|
+
);
|
|
@@ -1016,7 +1016,10 @@ describe('spec-corpus: AsyncAPI 3.1 spec is structurally valid', () => {
|
|
|
1016
1016
|
const messageNames = extractAsyncApiMessageNames(raw);
|
|
1017
1017
|
const runEventSchema = readJson(join(SCHEMAS_DIR, 'run-event.schema.json'));
|
|
1018
1018
|
const runEventTypes = new Set(findRunEventTypeEnum(runEventSchema));
|
|
1019
|
-
|
|
1019
|
+
// `run.annotated` (RFC 0056) is a live SSE notification carrying an
|
|
1020
|
+
// Annotation — NOT a RunEventDoc and deliberately NOT in the RunEventType
|
|
1021
|
+
// enum (annotations are a side-resource, excluded from fork/replay).
|
|
1022
|
+
const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated']);
|
|
1020
1023
|
|
|
1021
1024
|
expect(messageNames.length, 'AsyncAPI MUST declare named SSE messages').toBeGreaterThan(0);
|
|
1022
1025
|
|
|
@@ -1105,7 +1108,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
|
|
|
1105
1108
|
});
|
|
1106
1109
|
|
|
1107
1110
|
for (const file of proseFiles) {
|
|
1108
|
-
it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL)`, () => {
|
|
1111
|
+
it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL | Stable / Stabilizing / Draft / Experimental)`, () => {
|
|
1109
1112
|
// V1_DIR is non-null here — proseFiles is empty when V1_DIR is null
|
|
1110
1113
|
// so this loop body never runs in the published-tarball layout.
|
|
1111
1114
|
const content = readFileSync(join(V1_DIR as string, file), 'utf8');
|
|
@@ -1113,7 +1116,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
|
|
|
1113
1116
|
expect(
|
|
1114
1117
|
content,
|
|
1115
1118
|
`${file} must include a "Status:" legend tag near its header`,
|
|
1116
|
-
).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL)\b/);
|
|
1119
|
+
).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL|Stable|Stabilizing|Draft|Experimental)\b/);
|
|
1117
1120
|
});
|
|
1118
1121
|
}
|
|
1119
1122
|
});
|