@openwop/openwop-conformance 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +60 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +8 -3
  4. package/api/openapi.yaml +305 -0
  5. package/coverage.md +35 -10
  6. package/fixtures/conformance-phase4-nondet-tool.json +53 -0
  7. package/fixtures/conformance-phase4-replay-divergence.json +40 -0
  8. package/fixtures.md +5 -3
  9. package/package.json +1 -1
  10. package/schemas/README.md +2 -0
  11. package/schemas/capabilities.schema.json +176 -3
  12. package/schemas/credential-reference.schema.json +21 -0
  13. package/schemas/node-pack-manifest.schema.json +112 -1
  14. package/schemas/run-diff-response.schema.json +64 -0
  15. package/schemas/run-event-payloads.schema.json +104 -2
  16. package/schemas/run-event.schema.json +8 -1
  17. package/schemas/run-snapshot.schema.json +11 -0
  18. package/src/lib/behavior-gate.ts +51 -0
  19. package/src/lib/driver.ts +13 -1
  20. package/src/lib/saml-idp.ts +179 -0
  21. package/src/scenarios/approval-gate-events.test.ts +61 -0
  22. package/src/scenarios/approval-gate-flow.test.ts +68 -0
  23. package/src/scenarios/auth-saml-profile.test.ts +119 -0
  24. package/src/scenarios/auth-scim-profile.test.ts +65 -0
  25. package/src/scenarios/authorization-fail-closed.test.ts +80 -0
  26. package/src/scenarios/authorization-roles-shape.test.ts +83 -0
  27. package/src/scenarios/connector-manifest-validity.test.ts +142 -0
  28. package/src/scenarios/credential-payload-redaction.test.ts +93 -0
  29. package/src/scenarios/credentials-capability-shape.test.ts +90 -0
  30. package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
  31. package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
  32. package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
  33. package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
  34. package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
  35. package/src/scenarios/experimental-tier-shape.test.ts +192 -0
  36. package/src/scenarios/identity-owner-shape.test.ts +64 -0
  37. package/src/scenarios/multi-agent-confidence-escalation.test.ts +59 -21
  38. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
  39. package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
  40. package/src/scenarios/oauth-capability-shape.test.ts +97 -0
  41. package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
  42. package/src/scenarios/pack-registry-isolation.test.ts +108 -0
  43. package/src/scenarios/pack-registry-publish.test.ts +1 -1
  44. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
  45. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
  46. package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
  47. package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
  48. package/src/scenarios/run-diff.test.ts +143 -0
  49. package/src/scenarios/sandbox-capability-gate-respected.test.ts +15 -13
  50. package/src/scenarios/sandbox-memory-cap.test.ts +7 -8
  51. package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
  52. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +14 -13
  53. package/src/scenarios/sandbox-no-host-env-leak.test.ts +14 -21
  54. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +20 -15
  55. package/src/scenarios/sandbox-no-host-process-escape.test.ts +18 -13
  56. package/src/scenarios/sandbox-no-network-escape.test.ts +14 -31
  57. package/src/scenarios/sandbox-timeout-cap.test.ts +7 -8
  58. package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
  59. package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
  60. package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
  61. package/src/scenarios/spec-corpus-validity.test.ts +2 -2
@@ -0,0 +1,66 @@
1
+ /**
2
+ * scheduling-cron-fires-once — RFC 0052 §B behavioral verification.
3
+ *
4
+ * Status: DRAFT. RFC 0052 (scheduling & time-based triggers) is `Draft`.
5
+ *
6
+ * Capability-gated: skips when the host does not advertise
7
+ * `capabilities.scheduling.supported = true`.
8
+ *
9
+ * What this scenario asserts (via the optional
10
+ * `POST /v1/host/sample/scheduling/tick` test seam, which advances a
11
+ * deterministic clock and reports the runs a cron schedule produced):
12
+ * 1. Once-per-tick — a single cron tick produces exactly one run; no
13
+ * duplicate concurrent firing (RFC 0052 §B.2).
14
+ * 2. Missed-tick policy — a host-down-across-a-tick window applies the
15
+ * advertised policy (fire-once-on-recovery OR skip), never a backlog
16
+ * flood (RFC 0052 §B.4).
17
+ *
18
+ * Hosts without the seam soft-skip the behavioral probes (404). Horizon
19
+ * rejection (`schedule_horizon_exceeded`) is covered by the shape +
20
+ * error-code contract; behavioral horizon assertion is part of the deferred
21
+ * delayed-execution scenario.
22
+ *
23
+ * @see RFCS/0052-scheduling-and-time-based-triggers.md
24
+ * @see spec/v1/host-capabilities.md §host.scheduling
25
+ */
26
+
27
+ import { describe, it, expect } from 'vitest';
28
+ import { driver } from '../lib/driver.js';
29
+
30
+ interface DiscoveryDoc {
31
+ capabilities?: { scheduling?: { supported?: boolean; cron?: boolean } };
32
+ }
33
+
34
+ async function readScheduling(): Promise<{ supported?: boolean; cron?: boolean } | null> {
35
+ const res = await driver.get('/.well-known/openwop');
36
+ return (res.json as DiscoveryDoc | undefined)?.capabilities?.scheduling ?? null;
37
+ }
38
+
39
+ describe('scheduling-cron-fires-once: once-per-tick + missed-tick (RFC 0052 §B)', () => {
40
+ it('a single cron tick produces exactly one run', async () => {
41
+ const sched = await readScheduling();
42
+ if (!sched?.supported || sched.cron !== true) return; // capability-gated
43
+ const res = await driver.post('/v1/host/sample/scheduling/tick', { scenario: 'single-tick' });
44
+ if (res.status === 404) return; // seam unwired — soft-skip
45
+ const body = res.json as { runsFired?: number } | undefined;
46
+ expect(
47
+ body?.runsFired,
48
+ driver.describe('RFC 0052 §B.2', 'a single cron tick MUST fire exactly one run (no duplicate concurrent firing)'),
49
+ ).toBe(1);
50
+ });
51
+
52
+ it('a missed-tick window does not produce a backlog flood', async () => {
53
+ const sched = await readScheduling();
54
+ if (!sched?.supported || sched.cron !== true) return; // capability-gated
55
+ const res = await driver.post('/v1/host/sample/scheduling/tick', { scenario: 'missed-window', missedTicks: 5 });
56
+ if (res.status === 404) return; // seam unwired — soft-skip
57
+ const body = res.json as { runsFired?: number } | undefined;
58
+ expect(
59
+ typeof body?.runsFired === 'number' && body.runsFired <= 1,
60
+ driver.describe(
61
+ 'RFC 0052 §B.4',
62
+ `a missed-tick window MUST apply the advertised policy (fire-once-on-recovery or skip), never N backlogged runs; got runsFired=${body?.runsFired}`,
63
+ ),
64
+ ).toBe(true);
65
+ });
66
+ });
@@ -0,0 +1,241 @@
1
+ /**
2
+ * secret-leakage-otel-attribute — SECURITY invariant verification via RFC 0034 seam.
3
+ *
4
+ * Verifies the two `SECURITY/invariants.yaml` rows
5
+ * - `secret-leakage-otel-attribute` (reference-impl → protocol per RFC 0034)
6
+ * - `secret-leakage-debug-bundle-otel` (reference-impl → protocol per RFC 0034)
7
+ *
8
+ * The host has a BYOK plumbing path that resolves a `credentialRef` and
9
+ * passes the plaintext value into a NodeModule (the `openwop-smoke-byok-
10
+ * roundtrip` fixture does exactly this). Two separate exfiltration risks
11
+ * exist on the way back out:
12
+ *
13
+ * 1. OTel span attributes — host instrumentation MAY accidentally
14
+ * stamp the resolved plaintext onto a span attribute (`openwop.*`
15
+ * or vendor-namespaced). RFC 0034 §B's `GET /v1/host/sample/test/
16
+ * otel/spans?runId=<id>` seam exposes the full span buffer so
17
+ * conformance can mechanically prove no leak.
18
+ *
19
+ * 2. Debug-bundle exports — the optional `debug-bundle` capability
20
+ * exposes a portable JSON snapshot of a run's diagnostic state.
21
+ * RFC 0034 §B's `POST /v1/host/sample/test/debug-bundle/export`
22
+ * seam returns the bundle so conformance can prove the canary
23
+ * doesn't appear there either.
24
+ *
25
+ * Distinct from `envelope-reasoning-secret-redaction.test.ts`, which
26
+ * narrows to the envelope-acceptor's redaction of `reasoning` field
27
+ * canaries. This file verifies the broader executor-side claim: that
28
+ * NO span attribute or debug-bundle field contains the resolved
29
+ * plaintext anywhere in the run's instrumentation surface.
30
+ *
31
+ * Detection method: the host pre-provisions a canary secret under
32
+ * `openwop-conformance-canary-secret` (any non-empty value). The
33
+ * conformance scenario reads the same canary value via
34
+ * `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner
35
+ * agree on it). The scenario then runs the `openwop-smoke-byok-
36
+ * roundtrip` fixture, scrapes the OTel + debug-bundle seams, and
37
+ * asserts the canary plaintext never appears in either surface.
38
+ *
39
+ * Gating:
40
+ * - `capabilities.secrets.supported: true` — host has BYOK plumbing.
41
+ * - `capabilities.observability.testSeams.otelScrape: true` (RFC 0034)
42
+ * - `capabilities.observability.testSeams.debugBundleExport: true` (RFC 0034)
43
+ * - Fixture `openwop-smoke-byok-roundtrip` advertised + the host has
44
+ * pre-provisioned the canary secret.
45
+ *
46
+ * Soft-skips on any unmet gate. Hard-fails on canary leak.
47
+ *
48
+ * @see RFCS/0034-otel-collector-test-seam.md §B
49
+ * @see SECURITY/invariants.yaml secret-leakage-otel-attribute
50
+ * @see SECURITY/invariants.yaml secret-leakage-debug-bundle-otel
51
+ * @see SECURITY/threat-model-secret-leakage.md
52
+ */
53
+
54
+ import { describe, it, expect } from 'vitest';
55
+ import { driver } from '../lib/driver.js';
56
+ import { pollUntilTerminal } from '../lib/polling.js';
57
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
58
+
59
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
60
+ const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
61
+ const FIXTURE_SKIP = !isFixtureAdvertised(BYOK_WORKFLOW_ID);
62
+ const CANARY_VALUE = process.env.OPENWOP_CANARY_SECRET_VALUE;
63
+
64
+ interface DiscoveryDoc {
65
+ capabilities?: {
66
+ secrets?: { supported?: unknown };
67
+ observability?: {
68
+ testSeams?: {
69
+ otelScrape?: unknown;
70
+ debugBundleExport?: unknown;
71
+ };
72
+ };
73
+ };
74
+ }
75
+
76
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
77
+ try {
78
+ const res = await driver.get('/.well-known/openwop');
79
+ if (res.status !== 200) return null;
80
+ return res.json as DiscoveryDoc;
81
+ } catch {
82
+ return null;
83
+ }
84
+ }
85
+
86
+ async function startByokRun(): Promise<string | null> {
87
+ const create = await driver.post('/v1/runs', { workflowId: BYOK_WORKFLOW_ID });
88
+ if (create.status === 404 || create.status === 422) return null;
89
+ expect(create.status).toBe(201);
90
+ return (create.json as { runId: string }).runId;
91
+ }
92
+
93
+ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
94
+ 'secret-leakage-otel-attribute: OTel span scrape (RFC 0034 §B)',
95
+ () => {
96
+ it('NO OTel span attribute MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
97
+ if (!CANARY_VALUE) {
98
+ ctx.skip();
99
+ return;
100
+ }
101
+ const d = await readDiscovery();
102
+ const secretsOk = d?.capabilities?.secrets?.supported === true;
103
+ const seamOk = d?.capabilities?.observability?.testSeams?.otelScrape === true;
104
+ if (!secretsOk || !seamOk) {
105
+ ctx.skip();
106
+ return;
107
+ }
108
+
109
+ const runId = await startByokRun();
110
+ if (runId === null) {
111
+ ctx.skip();
112
+ return;
113
+ }
114
+ const terminal = await pollUntilTerminal(runId);
115
+ expect(
116
+ terminal.status,
117
+ driver.describe(
118
+ 'auth.md §"Secret resolution"',
119
+ 'BYOK fixture run MUST terminate `completed` when the canary is provisioned',
120
+ ),
121
+ ).toBe('completed');
122
+
123
+ const spansRes = await driver.get(
124
+ `/v1/host/sample/test/otel/spans?runId=${encodeURIComponent(runId)}`,
125
+ );
126
+ expect(
127
+ spansRes.status,
128
+ driver.describe(
129
+ 'RFCS/0034-otel-collector-test-seam.md §B',
130
+ 'host advertising capabilities.observability.testSeams.otelScrape MUST serve GET /v1/host/sample/test/otel/spans (200) — 404 is non-conformant',
131
+ ),
132
+ ).toBe(200);
133
+
134
+ const body = spansRes.json as { spans?: Array<Record<string, unknown>> };
135
+ const spans = body.spans ?? [];
136
+ expect(
137
+ Array.isArray(spans),
138
+ driver.describe(
139
+ 'RFCS/0034-otel-collector-test-seam.md §B',
140
+ 'response body MUST have a `spans` array',
141
+ ),
142
+ ).toBe(true);
143
+
144
+ // The host's instrumentation produces at least one span per run
145
+ // (the engine emits `openwop.run.lifecycle` + per-node spans). If
146
+ // the buffer is empty, the host isn't actually instrumenting — that
147
+ // would be a separate non-conformance, but for this assertion we
148
+ // only care about leak, not coverage.
149
+ const serialized = JSON.stringify(spans);
150
+
151
+ expect(
152
+ serialized.includes(CANARY_VALUE),
153
+ driver.describe(
154
+ 'SECURITY/invariants.yaml secret-leakage-otel-attribute',
155
+ 'no OTel span attribute may contain the BYOK canary plaintext — host instrumentation MUST redact or omit secret material before any span emission. Per `agent-memory.md` §SR-1, the canonical redaction marker is `[REDACTED:<secretId>]`.',
156
+ ),
157
+ ).toBe(false);
158
+ });
159
+ },
160
+ );
161
+
162
+ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
163
+ 'secret-leakage-debug-bundle-otel: debug-bundle export scrape (RFC 0034 §B)',
164
+ () => {
165
+ it('NO debug-bundle field MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
166
+ if (!CANARY_VALUE) {
167
+ ctx.skip();
168
+ return;
169
+ }
170
+ const d = await readDiscovery();
171
+ const secretsOk = d?.capabilities?.secrets?.supported === true;
172
+ const seamOk = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
173
+ if (!secretsOk || !seamOk) {
174
+ ctx.skip();
175
+ return;
176
+ }
177
+
178
+ const runId = await startByokRun();
179
+ if (runId === null) {
180
+ ctx.skip();
181
+ return;
182
+ }
183
+ const terminal = await pollUntilTerminal(runId);
184
+ expect(terminal.status).toBe('completed');
185
+
186
+ const bundleRes = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId });
187
+ expect(
188
+ bundleRes.status,
189
+ driver.describe(
190
+ 'RFCS/0034-otel-collector-test-seam.md §B',
191
+ 'host advertising capabilities.observability.testSeams.debugBundleExport MUST serve POST /v1/host/sample/test/debug-bundle/export (200) — 404 is non-conformant',
192
+ ),
193
+ ).toBe(200);
194
+
195
+ const serialized = JSON.stringify(bundleRes.json ?? {});
196
+ expect(
197
+ serialized.includes(CANARY_VALUE),
198
+ driver.describe(
199
+ 'SECURITY/invariants.yaml secret-leakage-debug-bundle-otel',
200
+ 'no debug-bundle field may contain the BYOK canary plaintext — debug-bundle export MUST redact or omit secret material. Per `debug-bundle.md` §"Redaction", the canonical marker is `[REDACTED:<secretId>]`.',
201
+ ),
202
+ ).toBe(false);
203
+ });
204
+ },
205
+ );
206
+
207
+ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
208
+ 'secret-leakage-otel-attribute: advertisement-shape probe (RFC 0034 §A)',
209
+ () => {
210
+ it('when secrets.supported is true, observability.testSeams advertisements MUST be boolean if present', async (ctx) => {
211
+ const d = await readDiscovery();
212
+ if (d?.capabilities?.secrets?.supported !== true) {
213
+ ctx.skip();
214
+ return;
215
+ }
216
+ const seams = d?.capabilities?.observability?.testSeams;
217
+ if (seams === undefined) {
218
+ ctx.skip(); // host honest about not exposing the seams — Drift #17 path
219
+ return;
220
+ }
221
+ if ('otelScrape' in seams && seams.otelScrape !== undefined) {
222
+ expect(
223
+ typeof seams.otelScrape,
224
+ driver.describe(
225
+ 'RFCS/0034-otel-collector-test-seam.md §A',
226
+ 'capabilities.observability.testSeams.otelScrape MUST be boolean when present',
227
+ ),
228
+ ).toBe('boolean');
229
+ }
230
+ if ('debugBundleExport' in seams && seams.debugBundleExport !== undefined) {
231
+ expect(
232
+ typeof seams.debugBundleExport,
233
+ driver.describe(
234
+ 'RFCS/0034-otel-collector-test-seam.md §A',
235
+ 'capabilities.observability.testSeams.debugBundleExport MUST be boolean when present',
236
+ ),
237
+ ).toBe('boolean');
238
+ }
239
+ });
240
+ },
241
+ );
@@ -1105,7 +1105,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
1105
1105
  });
1106
1106
 
1107
1107
  for (const file of proseFiles) {
1108
- it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL)`, () => {
1108
+ it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL | Stable / Stabilizing / Draft / Experimental)`, () => {
1109
1109
  // V1_DIR is non-null here — proseFiles is empty when V1_DIR is null
1110
1110
  // so this loop body never runs in the published-tarball layout.
1111
1111
  const content = readFileSync(join(V1_DIR as string, file), 'utf8');
@@ -1113,7 +1113,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
1113
1113
  expect(
1114
1114
  content,
1115
1115
  `${file} must include a "Status:" legend tag near its header`,
1116
- ).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL)\b/);
1116
+ ).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL|Stable|Stabilizing|Draft|Experimental)\b/);
1117
1117
  });
1118
1118
  }
1119
1119
  });