npm - @openwop/openwop-conformance - Versions diffs - 1.4.0 → 1.6.0 - Mend

@openwop/openwop-conformance 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/CHANGELOG.md +60 -0
package/README.md +2 -2
package/api/asyncapi.yaml +8 -3
package/api/openapi.yaml +305 -0
package/coverage.md +35 -10
package/fixtures/conformance-phase4-nondet-tool.json +53 -0
package/fixtures/conformance-phase4-replay-divergence.json +40 -0
package/fixtures.md +5 -3
package/package.json +1 -1
package/schemas/README.md +2 -0
package/schemas/capabilities.schema.json +176 -3
package/schemas/credential-reference.schema.json +21 -0
package/schemas/node-pack-manifest.schema.json +112 -1
package/schemas/run-diff-response.schema.json +64 -0
package/schemas/run-event-payloads.schema.json +104 -2
package/schemas/run-event.schema.json +8 -1
package/schemas/run-snapshot.schema.json +11 -0
package/src/lib/behavior-gate.ts +51 -0
package/src/lib/driver.ts +13 -1
package/src/lib/saml-idp.ts +179 -0
package/src/scenarios/approval-gate-events.test.ts +61 -0
package/src/scenarios/approval-gate-flow.test.ts +68 -0
package/src/scenarios/auth-saml-profile.test.ts +119 -0
package/src/scenarios/auth-scim-profile.test.ts +65 -0
package/src/scenarios/authorization-fail-closed.test.ts +80 -0
package/src/scenarios/authorization-roles-shape.test.ts +83 -0
package/src/scenarios/connector-manifest-validity.test.ts +142 -0
package/src/scenarios/credential-payload-redaction.test.ts +93 -0
package/src/scenarios/credentials-capability-shape.test.ts +90 -0
package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
package/src/scenarios/experimental-tier-shape.test.ts +192 -0
package/src/scenarios/identity-owner-shape.test.ts +64 -0
package/src/scenarios/multi-agent-confidence-escalation.test.ts +59 -21
package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
package/src/scenarios/oauth-capability-shape.test.ts +97 -0
package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
package/src/scenarios/pack-registry-isolation.test.ts +108 -0
package/src/scenarios/pack-registry-publish.test.ts +1 -1
package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
package/src/scenarios/run-diff.test.ts +143 -0
package/src/scenarios/sandbox-capability-gate-respected.test.ts +15 -13
package/src/scenarios/sandbox-memory-cap.test.ts +7 -8
package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +14 -13
package/src/scenarios/sandbox-no-host-env-leak.test.ts +14 -21
package/src/scenarios/sandbox-no-host-fs-escape.test.ts +20 -15
package/src/scenarios/sandbox-no-host-process-escape.test.ts +18 -13
package/src/scenarios/sandbox-no-network-escape.test.ts +14 -31
package/src/scenarios/sandbox-timeout-cap.test.ts +7 -8
package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
package/src/scenarios/spec-corpus-validity.test.ts +2 -2

package/src/scenarios/scheduling-cron-fires-once.test.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * scheduling-cron-fires-once — RFC 0052 §B behavioral verification.
+ *
+ * Status: DRAFT. RFC 0052 (scheduling & time-based triggers) is `Draft`.
+ *
+ * Capability-gated: skips when the host does not advertise
+ * `capabilities.scheduling.supported = true`.
+ *
+ * What this scenario asserts (via the optional
+ * `POST /v1/host/sample/scheduling/tick` test seam, which advances a
+ * deterministic clock and reports the runs a cron schedule produced):
+ *   1. Once-per-tick — a single cron tick produces exactly one run; no
+ *      duplicate concurrent firing (RFC 0052 §B.2).
+ *   2. Missed-tick policy — a host-down-across-a-tick window applies the
+ *      advertised policy (fire-once-on-recovery OR skip), never a backlog
+ *      flood (RFC 0052 §B.4).
+ *
+ * Hosts without the seam soft-skip the behavioral probes (404). Horizon
+ * rejection (`schedule_horizon_exceeded`) is covered by the shape +
+ * error-code contract; behavioral horizon assertion is part of the deferred
+ * delayed-execution scenario.
+ *
+ * @see RFCS/0052-scheduling-and-time-based-triggers.md
+ * @see spec/v1/host-capabilities.md §host.scheduling
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+interface DiscoveryDoc {
+  capabilities?: { scheduling?: { supported?: boolean; cron?: boolean } };
+}
+async function readScheduling(): Promise<{ supported?: boolean; cron?: boolean } | null> {
+  const res = await driver.get('/.well-known/openwop');
+  return (res.json as DiscoveryDoc | undefined)?.capabilities?.scheduling ?? null;
+}
+describe('scheduling-cron-fires-once: once-per-tick + missed-tick (RFC 0052 §B)', () => {
+  it('a single cron tick produces exactly one run', async () => {
+    const sched = await readScheduling();
+    if (!sched?.supported || sched.cron !== true) return; // capability-gated
+    const res = await driver.post('/v1/host/sample/scheduling/tick', { scenario: 'single-tick' });
+    if (res.status === 404) return; // seam unwired — soft-skip
+    const body = res.json as { runsFired?: number } | undefined;
+    expect(
+      body?.runsFired,
+      driver.describe('RFC 0052 §B.2', 'a single cron tick MUST fire exactly one run (no duplicate concurrent firing)'),
+    ).toBe(1);
+  });
+  it('a missed-tick window does not produce a backlog flood', async () => {
+    const sched = await readScheduling();
+    if (!sched?.supported || sched.cron !== true) return; // capability-gated
+    const res = await driver.post('/v1/host/sample/scheduling/tick', { scenario: 'missed-window', missedTicks: 5 });
+    if (res.status === 404) return; // seam unwired — soft-skip
+    const body = res.json as { runsFired?: number } | undefined;
+    expect(
+      typeof body?.runsFired === 'number' && body.runsFired <= 1,
+      driver.describe(
+        'RFC 0052 §B.4',
+        `a missed-tick window MUST apply the advertised policy (fire-once-on-recovery or skip), never N backlogged runs; got runsFired=${body?.runsFired}`,
+      ),
+    ).toBe(true);
+  });
+});

package/src/scenarios/secret-leakage-otel-attribute.test.ts ADDED Viewed

@@ -0,0 +1,241 @@
+/**
+ * secret-leakage-otel-attribute — SECURITY invariant verification via RFC 0034 seam.
+ *
+ * Verifies the two `SECURITY/invariants.yaml` rows
+ *   - `secret-leakage-otel-attribute` (reference-impl → protocol per RFC 0034)
+ *   - `secret-leakage-debug-bundle-otel` (reference-impl → protocol per RFC 0034)
+ *
+ * The host has a BYOK plumbing path that resolves a `credentialRef` and
+ * passes the plaintext value into a NodeModule (the `openwop-smoke-byok-
+ * roundtrip` fixture does exactly this). Two separate exfiltration risks
+ * exist on the way back out:
+ *
+ *   1. OTel span attributes — host instrumentation MAY accidentally
+ *      stamp the resolved plaintext onto a span attribute (`openwop.*`
+ *      or vendor-namespaced). RFC 0034 §B's `GET /v1/host/sample/test/
+ *      otel/spans?runId=<id>` seam exposes the full span buffer so
+ *      conformance can mechanically prove no leak.
+ *
+ *   2. Debug-bundle exports — the optional `debug-bundle` capability
+ *      exposes a portable JSON snapshot of a run's diagnostic state.
+ *      RFC 0034 §B's `POST /v1/host/sample/test/debug-bundle/export`
+ *      seam returns the bundle so conformance can prove the canary
+ *      doesn't appear there either.
+ *
+ * Distinct from `envelope-reasoning-secret-redaction.test.ts`, which
+ * narrows to the envelope-acceptor's redaction of `reasoning` field
+ * canaries. This file verifies the broader executor-side claim: that
+ * NO span attribute or debug-bundle field contains the resolved
+ * plaintext anywhere in the run's instrumentation surface.
+ *
+ * Detection method: the host pre-provisions a canary secret under
+ * `openwop-conformance-canary-secret` (any non-empty value). The
+ * conformance scenario reads the same canary value via
+ * `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner
+ * agree on it). The scenario then runs the `openwop-smoke-byok-
+ * roundtrip` fixture, scrapes the OTel + debug-bundle seams, and
+ * asserts the canary plaintext never appears in either surface.
+ *
+ * Gating:
+ *   - `capabilities.secrets.supported: true` — host has BYOK plumbing.
+ *   - `capabilities.observability.testSeams.otelScrape: true` (RFC 0034)
+ *   - `capabilities.observability.testSeams.debugBundleExport: true` (RFC 0034)
+ *   - Fixture `openwop-smoke-byok-roundtrip` advertised + the host has
+ *     pre-provisioned the canary secret.
+ *
+ * Soft-skips on any unmet gate. Hard-fails on canary leak.
+ *
+ * @see RFCS/0034-otel-collector-test-seam.md §B
+ * @see SECURITY/invariants.yaml secret-leakage-otel-attribute
+ * @see SECURITY/invariants.yaml secret-leakage-debug-bundle-otel
+ * @see SECURITY/threat-model-secret-leakage.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
+const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
+const FIXTURE_SKIP = !isFixtureAdvertised(BYOK_WORKFLOW_ID);
+const CANARY_VALUE = process.env.OPENWOP_CANARY_SECRET_VALUE;
+interface DiscoveryDoc {
+  capabilities?: {
+    secrets?: { supported?: unknown };
+    observability?: {
+      testSeams?: {
+        otelScrape?: unknown;
+        debugBundleExport?: unknown;
+      };
+    };
+  };
+}
+async function readDiscovery(): Promise<DiscoveryDoc | null> {
+  try {
+    const res = await driver.get('/.well-known/openwop');
+    if (res.status !== 200) return null;
+    return res.json as DiscoveryDoc;
+  } catch {
+    return null;
+  }
+}
+async function startByokRun(): Promise<string | null> {
+  const create = await driver.post('/v1/runs', { workflowId: BYOK_WORKFLOW_ID });
+  if (create.status === 404 || create.status === 422) return null;
+  expect(create.status).toBe(201);
+  return (create.json as { runId: string }).runId;
+}
+describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
+  'secret-leakage-otel-attribute: OTel span scrape (RFC 0034 §B)',
+  () => {
+    it('NO OTel span attribute MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
+      if (!CANARY_VALUE) {
+        ctx.skip();
+        return;
+      }
+      const d = await readDiscovery();
+      const secretsOk = d?.capabilities?.secrets?.supported === true;
+      const seamOk = d?.capabilities?.observability?.testSeams?.otelScrape === true;
+      if (!secretsOk || !seamOk) {
+        ctx.skip();
+        return;
+      }
+      const runId = await startByokRun();
+      if (runId === null) {
+        ctx.skip();
+        return;
+      }
+      const terminal = await pollUntilTerminal(runId);
+      expect(
+        terminal.status,
+        driver.describe(
+          'auth.md §"Secret resolution"',
+          'BYOK fixture run MUST terminate `completed` when the canary is provisioned',
+        ),
+      ).toBe('completed');
+      const spansRes = await driver.get(
+        `/v1/host/sample/test/otel/spans?runId=${encodeURIComponent(runId)}`,
+      );
+      expect(
+        spansRes.status,
+        driver.describe(
+          'RFCS/0034-otel-collector-test-seam.md §B',
+          'host advertising capabilities.observability.testSeams.otelScrape MUST serve GET /v1/host/sample/test/otel/spans (200) — 404 is non-conformant',
+        ),
+      ).toBe(200);
+      const body = spansRes.json as { spans?: Array<Record<string, unknown>> };
+      const spans = body.spans ?? [];
+      expect(
+        Array.isArray(spans),
+        driver.describe(
+          'RFCS/0034-otel-collector-test-seam.md §B',
+          'response body MUST have a `spans` array',
+        ),
+      ).toBe(true);
+      // The host's instrumentation produces at least one span per run
+      // (the engine emits `openwop.run.lifecycle` + per-node spans). If
+      // the buffer is empty, the host isn't actually instrumenting — that
+      // would be a separate non-conformance, but for this assertion we
+      // only care about leak, not coverage.
+      const serialized = JSON.stringify(spans);
+      expect(
+        serialized.includes(CANARY_VALUE),
+        driver.describe(
+          'SECURITY/invariants.yaml secret-leakage-otel-attribute',
+          'no OTel span attribute may contain the BYOK canary plaintext — host instrumentation MUST redact or omit secret material before any span emission. Per `agent-memory.md` §SR-1, the canonical redaction marker is `[REDACTED:<secretId>]`.',
+        ),
+      ).toBe(false);
+    });
+  },
+);
+describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
+  'secret-leakage-debug-bundle-otel: debug-bundle export scrape (RFC 0034 §B)',
+  () => {
+    it('NO debug-bundle field MUST contain the BYOK canary plaintext for a run that resolved it', async (ctx) => {
+      if (!CANARY_VALUE) {
+        ctx.skip();
+        return;
+      }
+      const d = await readDiscovery();
+      const secretsOk = d?.capabilities?.secrets?.supported === true;
+      const seamOk = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
+      if (!secretsOk || !seamOk) {
+        ctx.skip();
+        return;
+      }
+      const runId = await startByokRun();
+      if (runId === null) {
+        ctx.skip();
+        return;
+      }
+      const terminal = await pollUntilTerminal(runId);
+      expect(terminal.status).toBe('completed');
+      const bundleRes = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId });
+      expect(
+        bundleRes.status,
+        driver.describe(
+          'RFCS/0034-otel-collector-test-seam.md §B',
+          'host advertising capabilities.observability.testSeams.debugBundleExport MUST serve POST /v1/host/sample/test/debug-bundle/export (200) — 404 is non-conformant',
+        ),
+      ).toBe(200);
+      const serialized = JSON.stringify(bundleRes.json ?? {});
+      expect(
+        serialized.includes(CANARY_VALUE),
+        driver.describe(
+          'SECURITY/invariants.yaml secret-leakage-debug-bundle-otel',
+          'no debug-bundle field may contain the BYOK canary plaintext — debug-bundle export MUST redact or omit secret material. Per `debug-bundle.md` §"Redaction", the canonical marker is `[REDACTED:<secretId>]`.',
+        ),
+      ).toBe(false);
+    });
+  },
+);
+describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
+  'secret-leakage-otel-attribute: advertisement-shape probe (RFC 0034 §A)',
+  () => {
+    it('when secrets.supported is true, observability.testSeams advertisements MUST be boolean if present', async (ctx) => {
+      const d = await readDiscovery();
+      if (d?.capabilities?.secrets?.supported !== true) {
+        ctx.skip();
+        return;
+      }
+      const seams = d?.capabilities?.observability?.testSeams;
+      if (seams === undefined) {
+        ctx.skip(); // host honest about not exposing the seams — Drift #17 path
+        return;
+      }
+      if ('otelScrape' in seams && seams.otelScrape !== undefined) {
+        expect(
+          typeof seams.otelScrape,
+          driver.describe(
+            'RFCS/0034-otel-collector-test-seam.md §A',
+            'capabilities.observability.testSeams.otelScrape MUST be boolean when present',
+          ),
+        ).toBe('boolean');
+      }
+      if ('debugBundleExport' in seams && seams.debugBundleExport !== undefined) {
+        expect(
+          typeof seams.debugBundleExport,
+          driver.describe(
+            'RFCS/0034-otel-collector-test-seam.md §A',
+            'capabilities.observability.testSeams.debugBundleExport MUST be boolean when present',
+          ),
+        ).toBe('boolean');
+      }
+    });
+  },
+);

package/src/scenarios/spec-corpus-validity.test.ts CHANGED Viewed

@@ -1105,7 +1105,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
   });
   for (const file of proseFiles) {
-    it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL)`, () => {
+    it(`${file} declares a Status: tag (STUB / DRAFT / OUTLINE / FINAL | Stable / Stabilizing / Draft / Experimental)`, () => {
       // V1_DIR is non-null here — proseFiles is empty when V1_DIR is null
       // so this loop body never runs in the published-tarball layout.
       const content = readFileSync(join(V1_DIR as string, file), 'utf8');
@@ -1113,7 +1113,7 @@ describe.skipIf(V1_DIR === null)('spec-corpus: prose docs carry a Status: legend
       expect(
         content,
         `${file} must include a "Status:" legend tag near its header`,
-      ).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL)\b/);
+      ).toMatch(/\*\*Status:\s*(STUB|DRAFT|OUTLINE|FINAL|Stable|Stabilizing|Draft|Experimental)\b/);
     });
   }
 });