@openwop/openwop-conformance 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +25 -4
  4. package/api/openapi.yaml +371 -0
  5. package/coverage.md +31 -4
  6. package/fixtures/conformance-phase4-nondet-tool.json +53 -0
  7. package/fixtures/conformance-phase4-replay-divergence.json +40 -0
  8. package/fixtures.md +5 -3
  9. package/package.json +1 -1
  10. package/schemas/README.md +4 -0
  11. package/schemas/annotation-create.schema.json +37 -0
  12. package/schemas/annotation.schema.json +56 -0
  13. package/schemas/capabilities.schema.json +191 -3
  14. package/schemas/credential-reference.schema.json +21 -0
  15. package/schemas/node-pack-manifest.schema.json +112 -1
  16. package/schemas/run-diff-response.schema.json +64 -0
  17. package/schemas/run-event-payloads.schema.json +104 -2
  18. package/schemas/run-event.schema.json +8 -1
  19. package/schemas/run-snapshot.schema.json +11 -0
  20. package/src/lib/behavior-gate.ts +51 -0
  21. package/src/lib/driver.ts +13 -1
  22. package/src/lib/feedback.ts +31 -0
  23. package/src/lib/saml-idp.ts +179 -0
  24. package/src/scenarios/approval-gate-events.test.ts +61 -0
  25. package/src/scenarios/approval-gate-flow.test.ts +68 -0
  26. package/src/scenarios/auth-saml-profile.test.ts +119 -0
  27. package/src/scenarios/auth-scim-profile.test.ts +65 -0
  28. package/src/scenarios/authorization-fail-closed.test.ts +80 -0
  29. package/src/scenarios/authorization-roles-shape.test.ts +83 -0
  30. package/src/scenarios/connector-manifest-validity.test.ts +142 -0
  31. package/src/scenarios/credential-payload-redaction.test.ts +93 -0
  32. package/src/scenarios/credentials-capability-shape.test.ts +90 -0
  33. package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
  34. package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
  35. package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
  36. package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
  37. package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
  38. package/src/scenarios/experimental-tier-shape.test.ts +192 -0
  39. package/src/scenarios/feedback-capability-shape.test.ts +35 -0
  40. package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
  41. package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
  42. package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
  43. package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
  44. package/src/scenarios/feedback-record-and-list.test.ts +32 -0
  45. package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
  46. package/src/scenarios/identity-owner-shape.test.ts +64 -0
  47. package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
  48. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
  49. package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
  50. package/src/scenarios/oauth-capability-shape.test.ts +97 -0
  51. package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
  52. package/src/scenarios/pack-registry-isolation.test.ts +108 -0
  53. package/src/scenarios/pack-registry-publish.test.ts +1 -1
  54. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
  55. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
  56. package/src/scenarios/redaction.test.ts +4 -1
  57. package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
  58. package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
  59. package/src/scenarios/run-diff.test.ts +143 -0
  60. package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
  61. package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
  62. package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
  63. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
  64. package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
  65. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
  66. package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
  67. package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
  68. package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
  69. package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
  70. package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
  71. package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
  72. package/src/scenarios/spec-corpus-validity.test.ts +6 -3
@@ -0,0 +1,241 @@
1
+ /**
2
+ * secret-leakage-otel-attribute — SECURITY invariant verification via RFC 0034 seam.
3
+ *
4
+ * Verifies the two `SECURITY/invariants.yaml` rows
5
+ * - `secret-leakage-otel-attribute` (reference-impl → protocol per RFC 0034)
6
+ * - `secret-leakage-debug-bundle-otel` (reference-impl → protocol per RFC 0034)
7
+ *
8
+ * The host has a BYOK plumbing path that resolves a `credentialRef` and
9
+ * passes the plaintext value into a NodeModule (the `openwop-smoke-byok-
10
+ * roundtrip` fixture does exactly this). Two separate exfiltration risks
11
+ * exist on the way back out:
12
+ *
13
+ * 1. OTel span attributes — host instrumentation MAY accidentally
14
+ * stamp the resolved plaintext onto a span attribute (`openwop.*`
15
+ * or vendor-namespaced). RFC 0034 §B's `GET /v1/host/sample/test/
16
+ * otel/spans?runId=<id>` seam exposes the full span buffer so
17
+ * conformance can mechanically prove no leak.
18
+ *
19
+ * 2. Debug-bundle exports — the optional `debug-bundle` capability
20
+ * exposes a portable JSON snapshot of a run's diagnostic state.
21
+ * RFC 0034 §B's `POST /v1/host/sample/test/debug-bundle/export`
22
+ * seam returns the bundle so conformance can prove the canary
23
+ * doesn't appear there either.
24
+ *
25
+ * Distinct from `envelope-reasoning-secret-redaction.test.ts`, which
26
+ * narrows to the envelope-acceptor's redaction of `reasoning` field
27
+ * canaries. This file verifies the broader executor-side claim: that
28
+ * NO span attribute or debug-bundle field contains the resolved
29
+ * plaintext anywhere in the run's instrumentation surface.
30
+ *
31
+ * Detection method: the host pre-provisions a canary secret under
32
+ * `openwop-conformance-canary-secret` (any non-empty value). The
33
+ * conformance scenario reads the same canary value via
34
+ * `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner
35
+ * agree on it). The scenario then runs the `openwop-smoke-byok-
36
+ * roundtrip` fixture, scrapes the OTel + debug-bundle seams, and
37
+ * asserts the canary plaintext never appears in either surface.
38
+ *
39
+ * Gating:
40
+ * - `capabilities.secrets.supported: true` — host has BYOK plumbing.
41
+ * - `capabilities.observability.testSeams.otelScrape: true` (RFC 0034)
42
+ * - `capabilities.observability.testSeams.debugBundleExport: true` (RFC 0034)
43
+ * - Fixture `openwop-smoke-byok-roundtrip` advertised + the host has
44
+ * pre-provisioned the canary secret.
45
+ *
46
+ * Soft-skips on any unmet gate. Hard-fails on canary leak.
47
+ *
48
+ * @see RFCS/0034-otel-collector-test-seam.md §B
49
+ * @see SECURITY/invariants.yaml secret-leakage-otel-attribute
50
+ * @see SECURITY/invariants.yaml secret-leakage-debug-bundle-otel
51
+ * @see SECURITY/threat-model-secret-leakage.md
52
+ */
53
+
54
+ import { describe, it, expect } from 'vitest';
55
+ import { driver } from '../lib/driver.js';
56
+ import { pollUntilTerminal } from '../lib/polling.js';
57
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
58
+
59
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
60
+ const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
61
+ const FIXTURE_SKIP = !isFixtureAdvertised(BYOK_WORKFLOW_ID);
62
+ const CANARY_VALUE = process.env.OPENWOP_CANARY_SECRET_VALUE;
63
+
64
+ interface DiscoveryDoc {
65
+ capabilities?: {
66
+ secrets?: { supported?: unknown };
67
+ observability?: {
68
+ testSeams?: {
69
+ otelScrape?: unknown;
70
+ debugBundleExport?: unknown;
71
+ };
72
+ };
73
+ };
74
+ }
75
+
76
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
77
+ try {
78
+ const res = await driver.get('/.well-known/openwop');
79
+ if (res.status !== 200) return null;
80
+ return res.json as DiscoveryDoc;
81
+ } catch {
82
+ return null;
83
+ }
84
+ }
85
+
86
+ async function startByokRun(): Promise<string | null> {
87
+ const create = await driver.post('/v1/runs', { workflowId: BYOK_WORKFLOW_ID });
88
+ if (create.status === 404 || create.status === 422) return null;
89
+ expect(create.status).toBe(201);
90
+ return (create.json as { runId: string }).runId;
91
+ }
92
+
93
+ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
94
+ 'secret-leakage-otel-attribute: OTel span scrape (RFC 0034 §B)',
95
+ () => {
96
+ it('NO OTel span attribute MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
97
+ if (!CANARY_VALUE) {
98
+ ctx.skip();
99
+ return;
100
+ }
101
+ const d = await readDiscovery();
102
+ const secretsOk = d?.capabilities?.secrets?.supported === true;
103
+ const seamOk = d?.capabilities?.observability?.testSeams?.otelScrape === true;
104
+ if (!secretsOk || !seamOk) {
105
+ ctx.skip();
106
+ return;
107
+ }
108
+
109
+ const runId = await startByokRun();
110
+ if (runId === null) {
111
+ ctx.skip();
112
+ return;
113
+ }
114
+ const terminal = await pollUntilTerminal(runId);
115
+ expect(
116
+ terminal.status,
117
+ driver.describe(
118
+ 'auth.md §"Secret resolution"',
119
+ 'BYOK fixture run MUST terminate `completed` when the canary is provisioned',
120
+ ),
121
+ ).toBe('completed');
122
+
123
+ const spansRes = await driver.get(
124
+ `/v1/host/sample/test/otel/spans?runId=${encodeURIComponent(runId)}`,
125
+ );
126
+ expect(
127
+ spansRes.status,
128
+ driver.describe(
129
+ 'RFCS/0034-otel-collector-test-seam.md §B',
130
+ 'host advertising capabilities.observability.testSeams.otelScrape MUST serve GET /v1/host/sample/test/otel/spans (200) — 404 is non-conformant',
131
+ ),
132
+ ).toBe(200);
133
+
134
+ const body = spansRes.json as { spans?: Array<Record<string, unknown>> };
135
+ const spans = body.spans ?? [];
136
+ expect(
137
+ Array.isArray(spans),
138
+ driver.describe(
139
+ 'RFCS/0034-otel-collector-test-seam.md §B',
140
+ 'response body MUST have a `spans` array',
141
+ ),
142
+ ).toBe(true);
143
+
144
+ // The host's instrumentation produces at least one span per run
145
+ // (the engine emits `openwop.run.lifecycle` + per-node spans). If
146
+ // the buffer is empty, the host isn't actually instrumenting — that
147
+ // would be a separate non-conformance, but for this assertion we
148
+ // only care about leak, not coverage.
149
+ const serialized = JSON.stringify(spans);
150
+
151
+ expect(
152
+ serialized.includes(CANARY_VALUE),
153
+ driver.describe(
154
+ 'SECURITY/invariants.yaml secret-leakage-otel-attribute',
155
+ 'no OTel span attribute may contain the BYOK canary plaintext — host instrumentation MUST redact or omit secret material before any span emission. Per `agent-memory.md` §SR-1, the canonical redaction marker is `[REDACTED:<secretId>]`.',
156
+ ),
157
+ ).toBe(false);
158
+ });
159
+ },
160
+ );
161
+
162
+ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
163
+ 'secret-leakage-debug-bundle-otel: debug-bundle export scrape (RFC 0034 §B)',
164
+ () => {
165
+ it('NO debug-bundle field MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
166
+ if (!CANARY_VALUE) {
167
+ ctx.skip();
168
+ return;
169
+ }
170
+ const d = await readDiscovery();
171
+ const secretsOk = d?.capabilities?.secrets?.supported === true;
172
+ const seamOk = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
173
+ if (!secretsOk || !seamOk) {
174
+ ctx.skip();
175
+ return;
176
+ }
177
+
178
+ const runId = await startByokRun();
179
+ if (runId === null) {
180
+ ctx.skip();
181
+ return;
182
+ }
183
+ const terminal = await pollUntilTerminal(runId);
184
+ expect(terminal.status).toBe('completed');
185
+
186
+ const bundleRes = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId });
187
+ expect(
188
+ bundleRes.status,
189
+ driver.describe(
190
+ 'RFCS/0034-otel-collector-test-seam.md §B',
191
+ 'host advertising capabilities.observability.testSeams.debugBundleExport MUST serve POST /v1/host/sample/test/debug-bundle/export (200) — 404 is non-conformant',
192
+ ),
193
+ ).toBe(200);
194
+
195
+ const serialized = JSON.stringify(bundleRes.json ?? {});
196
+ expect(
197
+ serialized.includes(CANARY_VALUE),
198
+ driver.describe(
199
+ 'SECURITY/invariants.yaml secret-leakage-debug-bundle-otel',
200
+ 'no debug-bundle field may contain the BYOK canary plaintext — debug-bundle export MUST redact or omit secret material. Per `debug-bundle.md` §"Redaction", the canonical marker is `[REDACTED:<secretId>]`.',
201
+ ),
202
+ ).toBe(false);
203
+ });
204
+ },
205
+ );
206
+
207
+ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
208
+ 'secret-leakage-otel-attribute: advertisement-shape probe (RFC 0034 §A)',
209
+ () => {
210
+ it('when secrets.supported is true, observability.testSeams advertisements MUST be boolean if present', async (ctx) => {
211
+ const d = await readDiscovery();
212
+ if (d?.capabilities?.secrets?.supported !== true) {
213
+ ctx.skip();
214
+ return;
215
+ }
216
+ const seams = d?.capabilities?.observability?.testSeams;
217
+ if (seams === undefined) {
218
+ ctx.skip(); // host honest about not exposing the seams — Drift #17 path
219
+ return;
220
+ }
221
+ if ('otelScrape' in seams && seams.otelScrape !== undefined) {
222
+ expect(
223
+ typeof seams.otelScrape,
224
+ driver.describe(
225
+ 'RFCS/0034-otel-collector-test-seam.md §A',
226
+ 'capabilities.observability.testSeams.otelScrape MUST be boolean when present',
227
+ ),
228
+ ).toBe('boolean');
229
+ }
230
+ if ('debugBundleExport' in seams && seams.debugBundleExport !== undefined) {
231
+ expect(
232
+ typeof seams.debugBundleExport,
233
+ driver.describe(
234
+ 'RFCS/0034-otel-collector-test-seam.md §A',
235
+ 'capabilities.observability.testSeams.debugBundleExport MUST be boolean when present',
236
+ ),
237
+ ).toBe('boolean');
238
+ }
239
+ });
240
+ },
241
+ );
@@ -1016,7 +1016,10 @@ describe('spec-corpus: AsyncAPI 3.1 spec is structurally valid', () => {
1016
1016
  const messageNames = extractAsyncApiMessageNames(raw);
1017
1017
  const runEventSchema = readJson(join(SCHEMAS_DIR, 'run-event.schema.json'));
1018
1018
  const runEventTypes = new Set(findRunEventTypeEnum(runEventSchema));
1019
- const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any']);
1019
+ // `run.annotated` (RFC 0056) is a live SSE notification carrying an
1020
+ // Annotation — NOT a RunEventDoc and deliberately NOT in the RunEventType
1021
+ // enum (annotations are a side-resource, excluded from fork/replay).
1022
+ const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated']);
1020
1023
 
1021
1024
  expect(messageNames.length, 'AsyncAPI MUST declare named SSE messages').toBeGreaterThan(0);
1022
1025
 
@@ -1105,7 +1108,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
1105
1108
  });
1106
1109
 
1107
1110
  for (const file of proseFiles) {
1108
- it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL)`, () => {
1111
+ it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL | Stable / Stabilizing / Draft / Experimental)`, () => {
1109
1112
  // V1_DIR is non-null here — proseFiles is empty when V1_DIR is null
1110
1113
  // so this loop body never runs in the published-tarball layout.
1111
1114
  const content = readFileSync(join(V1_DIR as string, file), 'utf8');
@@ -1113,7 +1116,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
1113
1116
  expect(
1114
1117
  content,
1115
1118
  `${file} must include a "Status:" legend tag near its header`,
1116
- ).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL)\b/);
1119
+ ).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL|Stable|Stabilizing|Draft|Experimental)\b/);
1117
1120
  });
1118
1121
  }
1119
1122
  });