npm - @openwop/openwop-conformance - Versions diffs - 1.13.0 → 1.15.0 - Mend

@openwop/openwop-conformance 1.13.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/CHANGELOG.md +21 -0
package/README.md +2 -2
package/api/openapi.yaml +60 -0
package/coverage.md +15 -4
package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
package/package.json +1 -1
package/src/lib/discovery-capabilities.ts +18 -19
package/src/lib/egressPolicy.ts +76 -0
package/src/lib/otel-collector.ts +72 -0
package/src/lib/profiles.ts +15 -0
package/src/lib/sandbox-timeout-worker.mjs +31 -0
package/src/lib/toolCatalog.ts +81 -0
package/src/lib/wasm-sandbox-probe.ts +168 -0
package/src/scenarios/core-standard-profile.test.ts +75 -0
package/src/scenarios/egress-audience-binding.test.ts +81 -0
package/src/scenarios/egress-decision-content-free.test.ts +57 -0
package/src/scenarios/memory-degraded-projection.test.ts +121 -0
package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
package/src/scenarios/otel-collector-canary-inspection.test.ts +211 -0
package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
package/src/scenarios/replay-observable-sequence-determinism.test.ts +192 -75
package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
package/src/scenarios/secret-leakage-otel-attribute.test.ts +52 -0
package/src/scenarios/tool-catalog-projection.test.ts +120 -0
package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0

package/src/scenarios/egress-audience-binding.test.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * Credential-audience-bound egress (RFC 0079 §C) — behavioral KEYSTONE.
+ *
+ * Gated on `httpClient.egressPolicy.supported` (root-first per RFC 0073).
+ * Soft-skips when unadvertised (default) / hard-fails under
+ * `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
+ * `egress-provenance-shape.test.ts`; this asserts host BEHAVIOR — the §C
+ * confused-deputy MUST that backs the `egress-credential-audience-bound`
+ * SECURITY invariant:
+ *
+ *   1. OUT-OF-AUDIENCE — a host-issued credential bound to audience A, used for
+ *      an egress to destination B (B ∉ A), MUST be `denied` or `downgraded`
+ *      with `reason: "out-of-audience"`, and the credential MUST NOT be attached
+ *      to the egress (`credentialAttached !== true`).
+ *   2. PROVENANCE-UNEVALUABLE — an egress whose credential provenance cannot be
+ *      evaluated MUST be `denied` with `reason: "provenance-unevaluable"`
+ *      (fail-closed, not fail-open).
+ *
+ * The decision is driven through the OPTIONAL host-sample egress seam
+ * (`POST /v1/host/sample/egress/decide`) — soft-skip on 404/405. The decision
+ * reason is a CLOSED enum so a host cannot spill a blocked URL/host into a
+ * free-form string (SR-1, asserted in `egress-decision-content-free.test.ts`).
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/host-capabilities.md (§"Credential provenance + egress policy")
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0079-credential-provenance-and-egress-policy.md
+ *   - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (egress-credential-audience-bound)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { behaviorGate } from '../lib/behavior-gate.js';
+import { readEgressPolicyCap, driveEgress, EGRESS_DECISIONS, EGRESS_REASONS } from '../lib/egressPolicy.js';
+describe('egress-audience-binding (RFC 0079 §C)', () => {
+  it('denies/downgrades an out-of-audience egress without attaching the credential, and fails closed on unevaluable provenance', async () => {
+    const cap = await readEgressPolicyCap();
+    if (!behaviorGate('openwop-egress-audience-binding', cap?.supported === true)) return;
+    // ---- Leg 1: out-of-audience — deny|downgrade + credential NOT attached --
+    const oob = await driveEgress({ scenario: 'out-of-audience' });
+    if (oob === null) return; // egress seam absent — soft-skip the whole behavior
+    expect(
+      oob.decision === 'denied' || oob.decision === 'downgraded',
+      driver.describe('host-capabilities.md §"Credential provenance + egress policy"', 'an out-of-audience egress MUST be denied or downgraded'),
+    ).toBe(true);
+    expect(
+      typeof oob.decision === 'string' && EGRESS_DECISIONS.includes(oob.decision),
+      driver.describe('run-event-payloads.schema.json#egressDecided', 'decision MUST be in the closed enum'),
+    ).toBe(true);
+    expect(
+      oob.reason === 'out-of-audience',
+      driver.describe('RFC 0079 §C', 'an out-of-audience denial MUST carry reason "out-of-audience"'),
+    ).toBe(true);
+    expect(
+      oob.credentialAttached !== true,
+      driver.describe('SECURITY/invariants.yaml egress-credential-audience-bound', 'the host MUST NOT attach a credential whose audience excludes the destination (confused-deputy)'),
+    ).toBe(true);
+    // ---- Leg 2: provenance-unevaluable — fail closed (deny) ----------------
+    const uneval = await driveEgress({ scenario: 'provenance-unevaluable' });
+    if (uneval !== null) {
+      expect(
+        uneval.decision === 'denied',
+        driver.describe('RFC 0079 §C', 'an egress with unevaluable provenance MUST fail closed (denied)'),
+      ).toBe(true);
+      expect(
+        uneval.reason === 'provenance-unevaluable',
+        driver.describe('RFC 0079 §C', 'a provenance-unevaluable denial MUST carry reason "provenance-unevaluable"'),
+      ).toBe(true);
+      expect(
+        typeof uneval.reason === 'string' && EGRESS_REASONS.includes(uneval.reason),
+        driver.describe('run-event-payloads.schema.json#egressDecided', 'reason MUST be in the closed enum'),
+      ).toBe(true);
+      expect(
+        uneval.credentialAttached !== true,
+        driver.describe('SECURITY/invariants.yaml egress-credential-audience-bound', 'a fail-closed egress MUST NOT attach the credential'),
+      ).toBe(true);
+    }
+  });
+});

package/src/scenarios/egress-decision-content-free.test.ts ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Egress-decision secret non-leak (RFC 0079 §F / SR-1) — behavioral.
+ *
+ * Gated on `httpClient.egressPolicy.supported` (root-first per RFC 0073).
+ * Soft-skips when unadvertised (default) / hard-fails under
+ * `OPENWOP_REQUIRE_BEHAVIOR=true`. Backs the `egress-decision-no-secret-leak`
+ * guarantee: an `egress.decided` payload is metadata-only — it MUST NOT carry
+ * the credential value, nor spill the blocked URL/host/header/body into a
+ * free-form field, and its `reason` MUST be drawn from the CLOSED vocabulary
+ * (so a host cannot smuggle a blocked destination into the reason string).
+ *
+ * Drives the host-sample seam with a `canary` credential whose value is a known
+ * sentinel and asserts the sentinel never surfaces in the decision
+ * (`canaryLeaked !== true`) and that the payload carries none of the forbidden
+ * content keys. Soft-skips on 404/405.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/host-capabilities.md (§"Credential provenance + egress policy")
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0079-credential-provenance-and-egress-policy.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { behaviorGate } from '../lib/behavior-gate.js';
+import { readEgressPolicyCap, driveEgress, EGRESS_REASONS, EGRESS_CONTENT_FORBIDDEN } from '../lib/egressPolicy.js';
+describe('egress-decision-content-free (RFC 0079 §F / SR-1)', () => {
+  it('never leaks the credential value or the blocked destination into the egress.decided payload', async () => {
+    const cap = await readEgressPolicyCap();
+    if (!behaviorGate('openwop-egress-decision-content-free', cap?.supported === true)) return;
+    const res = await driveEgress({ scenario: 'canary' });
+    if (res === null) return; // seam absent — soft-skip
+    // The canary sentinel MUST NOT appear anywhere observable.
+    expect(
+      res.canaryLeaked !== true,
+      driver.describe('RFC 0079 §F (SR-1)', 'the credential value (canary) MUST NOT leak into any observable surface'),
+    ).toBe(true);
+    // No forbidden content keys on the decision payload.
+    for (const forbidden of EGRESS_CONTENT_FORBIDDEN) {
+      expect(
+        !(forbidden in res),
+        driver.describe('RFC 0079 §F (SR-1)', `egress.decided MUST be content-free (no ${forbidden})`),
+      ).toBe(true);
+    }
+    // The reason stays in the closed vocabulary — no free-form destination spill.
+    if (res.reason !== undefined) {
+      expect(
+        typeof res.reason === 'string' && EGRESS_REASONS.includes(res.reason),
+        driver.describe('run-event-payloads.schema.json#egressDecided', 'reason MUST be in the closed enum (no free-form spill)'),
+      ).toBe(true);
+    }
+  });
+});

package/src/scenarios/memory-degraded-projection.test.ts ADDED Viewed

@@ -0,0 +1,121 @@
+/**
+ * Memory-capability degraded projection (RFC 0080 §C) — behavioral.
+ *
+ * Gated on `capabilities.agents.manifestRuntime` + `capabilities.memory`
+ * (root-first per RFC 0073). Soft-skips when either is unadvertised (default) /
+ * hard-fails under `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape
+ * coverage lives in `memory-capability-model-shape.test.ts` (the schema fields +
+ * the closed dimension enum); this asserts host BEHAVIOR on the NORMATIVE
+ * `GET /v1/agents` inventory:
+ *
+ *   §C iff-contract — for EVERY inventory entry, when the host cannot satisfy an
+ *   agent's requested `memoryShape` it MUST stamp `memoryDegraded: true` together
+ *   with a NON-EMPTY `degradedMemoryDimensions[]` whose members are the RFC 0080
+ *   §A dimension names (the CLOSED enum, NOT the `memoryShape` keys) and are
+ *   unique; a non-degraded entry MUST carry `memoryDegraded` absent or `false`
+ *   and MUST NOT carry a non-empty `degradedMemoryDimensions`.
+ *
+ *   Non-vacuity — the inventory MUST be non-empty (the cap is advertised + the
+ *   endpoint serves). When `OPENWOP_DEGRADED_AGENT_ID` names an agent the host
+ *   knows is degraded (an agent whose `memoryShape` exceeds host capability —
+ *   e.g. one requesting `longTerm` on a host without long-term durability), the
+ *   degraded branch is asserted NON-VACUOUSLY against that agent.
+ *
+ * Black-box on the normative path — no POST seam.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/agent-memory.md (§"Memory capability model")
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0080-agent-memory-capability-reconciliation.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { behaviorGate } from '../lib/behavior-gate.js';
+import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
+import { readManifestRuntimeCap, listManifestAgents } from '../lib/agentRuntime.js';
+/** The CLOSED RFC 0080 §A dimension vocabulary (agent-inventory-response.schema.json
+ *  `degradedMemoryDimensions` enum). NOT the `memoryShape` keys. */
+const DIMENSIONS = [
+  'read',
+  'write',
+  'search',
+  'long-term-durability',
+  'compaction',
+  'attribution',
+  'replay-snapshot',
+  'retention',
+];
+interface InventoryEntry {
+  agentId?: string;
+  memoryDegraded?: unknown;
+  degradedMemoryDimensions?: unknown;
+  [k: string]: unknown;
+}
+describe('memory-degraded-projection (RFC 0080 §C)', () => {
+  it('stamps memoryDegraded + a closed-enum degradedMemoryDimensions on degraded agents and nothing on the rest', async () => {
+    const mr = await readManifestRuntimeCap();
+    const memory = await readCapabilityFamily<Record<string, unknown>>('memory');
+    const advertised = mr?.supported === true && !!memory && memory.supported === true;
+    if (!behaviorGate('openwop-memory-degraded', advertised)) return;
+    const inv = await listManifestAgents();
+    if (inv === null) return; // host advertises the cap but doesn't serve /v1/agents — soft-skip
+    const agents = (inv.agents ?? []) as InventoryEntry[];
+    // Non-vacuity: an advertising + serving host MUST expose its inventory.
+    expect(
+      agents.length >= 1,
+      driver.describe('agent-memory.md §"Memory capability model"', 'GET /v1/agents MUST return the installed manifest agents'),
+    ).toBe(true);
+    // §C iff-contract on EVERY entry.
+    for (const a of agents) {
+      const degraded = a.memoryDegraded === true;
+      const dims = a.degradedMemoryDimensions;
+      if (degraded) {
+        expect(
+          Array.isArray(dims) && dims.length >= 1,
+          driver.describe('RFC 0080 §C', `memoryDegraded:true MUST carry a non-empty degradedMemoryDimensions (agent ${a.agentId})`),
+        ).toBe(true);
+        if (Array.isArray(dims)) {
+          for (const d of dims) {
+            expect(
+              typeof d === 'string' && DIMENSIONS.includes(d),
+              driver.describe('agent-inventory-response.schema.json', `degradedMemoryDimensions members MUST be RFC 0080 §A dimension names (got ${String(d)})`),
+            ).toBe(true);
+          }
+          expect(
+            new Set(dims as string[]).size === dims.length,
+            driver.describe('RFC 0080 §C', 'degradedMemoryDimensions MUST be unique'),
+          ).toBe(true);
+        }
+      } else {
+        // Not degraded ⇒ no non-empty dimension list (absent or empty both pass).
+        expect(
+          dims === undefined || (Array.isArray(dims) && dims.length === 0),
+          driver.describe('RFC 0080 §C', `a non-degraded entry MUST NOT carry a non-empty degradedMemoryDimensions (agent ${a.agentId})`),
+        ).toBe(true);
+      }
+    }
+    // Non-vacuous degraded branch when the host names a known-degraded agent.
+    const degradedId = process.env.OPENWOP_DEGRADED_AGENT_ID;
+    if (degradedId) {
+      const target = agents.find((a) => a.agentId === degradedId);
+      expect(
+        target !== undefined,
+        driver.describe('RFC 0080 §C', `OPENWOP_DEGRADED_AGENT_ID=${degradedId} MUST appear in the inventory`),
+      ).toBe(true);
+      if (target) {
+        expect(
+          target.memoryDegraded === true && Array.isArray(target.degradedMemoryDimensions) && target.degradedMemoryDimensions.length >= 1,
+          driver.describe('RFC 0080 §C', 'the named degraded agent MUST project memoryDegraded:true + a non-empty degradedMemoryDimensions'),
+        ).toBe(true);
+      }
+    }
+  });
+});

package/src/scenarios/multi-agent-confidence-escalation.test.ts CHANGED Viewed

@@ -49,7 +49,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
 import { isFixtureAdvertised } from '../lib/fixtures.js';
-import { pollUntilTerminal } from '../lib/polling.js';
+import { pollUntil } from '../lib/polling.js';
 import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -111,12 +111,17 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
     expect(create.status).toBe(201);
     const runId = (create.json as { runId: string }).runId;
-    const terminal = await pollUntilTerminal(runId);
-    // RFC 0039 escalation suspends the parent — NOT a terminal `completed`.
-    // The conformance pollUntilTerminal returns when the run reaches any
-    // settled status. RFC 0039 §A gives hosts a choice: clarify-kind
-    // escalation (→ waiting-clarification) OR escalate-kind approval
-    // (→ waiting-approval).
+    // RFC 0039 confidence escalation SUSPENDS the parent (a `waiting-*` status)
+    // — it is NOT a terminal `completed`/`failed`/`cancelled`. So poll until the
+    // run either suspends or settles; polling only for terminal statuses
+    // (`pollUntilTerminal`, whose set is {completed,failed,cancelled}) would time
+    // out before the suspension is ever observed — the cause of the prior flake.
+    const terminal = await pollUntil(runId, (s) => {
+      const st = s.status as string;
+      return st.startsWith('waiting-') || st === 'completed' || st === 'failed' || st === 'cancelled';
+    });
+    // RFC 0039 §A gives hosts a choice: clarify-kind escalation
+    // (→ waiting-clarification) OR escalate-kind approval (→ waiting-approval).
     //
     // RFC 0044 routing: when the host advertises
     // `capabilities.multiAgent.executionModel.confidenceEscalationInterruptKind`

package/src/scenarios/otel-collector-canary-inspection.test.ts ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * otel-collector-canary-inspection — always-on proof that the conformance
+ * OTel collector inspects real OTLP span attributes for secret leakage.
+ *
+ * Context: `secret-leakage-otel-attribute.test.ts` proves a host doesn't
+ * leak a BYOK canary on its `GET /v1/host/sample/test/otel/spans` scrape
+ * seam. But the scrape seam reports what the host *says* it emitted; a
+ * host could redact there yet still ship the plaintext over the wire via
+ * its real OTLP exporter. `docs/KNOWN-LIMITS.md` tracked exactly this gap:
+ *
+ *   "The conformance OTel collector seam doesn't yet inspect span
+ *    attributes; a host could pass conformance while leaking BYOK
+ *    material on telemetry exports."
+ *
+ * `OtelCollector.findCanaryLeakage()` (added with this scenario) closes
+ * the harness half: it scans every captured span's name, attributes, and
+ * resource attributes — plus metric data-point attributes — for the
+ * canary, returning the offending surface. This file is the always-on,
+ * server-free proof that the inspector is NON-VACUOUS: it stands up a
+ * real collector, POSTs synthetic OTLP/HTTP-JSON payloads through the
+ * collector's actual ingest path, and asserts the inspector (a) catches a
+ * canary embedded in a span attribute / resource attribute / span name /
+ * metric attribute, and (b) reports zero hits on a redacted payload.
+ *
+ * Pairing this with the live, capability-gated collector scrape in
+ * `secret-leakage-otel-attribute.test.ts` means the invariant is proven
+ * end-to-end the moment a host exports OTLP to the collector — the
+ * inspector itself is no longer the missing piece.
+ *
+ * Runs unconditionally (no host, no network): it exercises the collector
+ * library directly, so it executes in CI on every run.
+ *
+ * @see conformance/src/lib/otel-collector.ts findCanaryLeakage
+ * @see SECURITY/invariants.yaml secret-leakage-otel-attribute
+ * @see docs/KNOWN-LIMITS.md "Behavior tests too coarse to fully prove an invariant"
+ */
+import { describe, it, expect, afterEach } from 'vitest';
+import { OtelCollector } from '../lib/otel-collector.js';
+const CANARY = 'sk-canary-DO-NOT-LEAK-0f3a9c';
+const REDACTED = '[REDACTED:openwop-conformance-canary-secret]';
+/** Build an OTLP/HTTP-JSON traces export carrying the given span+resource attrs. */
+function tracesPayload(opts: {
+  spanName: string;
+  spanAttrs: Record<string, string>;
+  resourceAttrs: Record<string, string>;
+}): unknown {
+  const toAttrs = (m: Record<string, string>) =>
+    Object.entries(m).map(([key, value]) => ({ key, value: { stringValue: value } }));
+  return {
+    resourceSpans: [
+      {
+        resource: { attributes: toAttrs(opts.resourceAttrs) },
+        scopeSpans: [
+          {
+            scope: { name: 'openwop' },
+            spans: [
+              {
+                traceId: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
+                spanId: 'bbbbbbbbbbbbbbbb',
+                name: opts.spanName,
+                startTimeUnixNano: '1',
+                endTimeUnixNano: '2',
+                attributes: toAttrs(opts.spanAttrs),
+              },
+            ],
+          },
+        ],
+      },
+    ],
+  };
+}
+/** Build an OTLP/HTTP-JSON metrics export with one sum data point carrying attrs. */
+function metricsPayload(metricName: string, attrs: Record<string, string>): unknown {
+  return {
+    resourceMetrics: [
+      {
+        scopeMetrics: [
+          {
+            scope: { name: 'openwop' },
+            metrics: [
+              {
+                name: metricName,
+                sum: {
+                  dataPoints: [
+                    {
+                      asInt: '1',
+                      attributes: Object.entries(attrs).map(([key, value]) => ({
+                        key,
+                        value: { stringValue: value },
+                      })),
+                    },
+                  ],
+                },
+              },
+            ],
+          },
+        ],
+      },
+    ],
+  };
+}
+describe('otel-collector-canary-inspection: collector inspects real OTLP exports', () => {
+  let collector: OtelCollector | null = null;
+  afterEach(async () => {
+    if (collector) {
+      await collector.stop();
+      collector = null;
+    }
+  });
+  async function postTraces(payload: unknown): Promise<void> {
+    const res = await fetch(`${collector!.endpoint()}/v1/traces`, {
+      method: 'POST',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify(payload),
+    });
+    expect(res.status).toBeLessThan(300);
+  }
+  async function postMetrics(payload: unknown): Promise<void> {
+    const res = await fetch(`${collector!.endpoint()}/v1/metrics`, {
+      method: 'POST',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify(payload),
+    });
+    expect(res.status).toBeLessThan(300);
+  }
+  it('catches a canary embedded in a span attribute value', async () => {
+    collector = new OtelCollector();
+    await collector.start();
+    await postTraces(
+      tracesPayload({
+        spanName: 'openwop.node.execute',
+        spanAttrs: { 'openwop.node.id': 'n1', 'http.request.header.authorization': `Bearer ${CANARY}` },
+        resourceAttrs: { 'service.name': 'host' },
+      }),
+    );
+    const leaks = collector.findCanaryLeakage(CANARY);
+    expect(leaks.length).toBeGreaterThan(0);
+    const attrLeak = leaks.find((l) => l.surface === 'span.attribute');
+    expect(attrLeak).toBeDefined();
+    expect(attrLeak!.key).toBe('http.request.header.authorization');
+    expect(attrLeak!.value).toContain(CANARY);
+  });
+  it('catches a canary in a resource attribute and in a span name', async () => {
+    collector = new OtelCollector();
+    await collector.start();
+    await postTraces(
+      tracesPayload({
+        spanName: `openwop.run ${CANARY}`,
+        spanAttrs: { 'openwop.run.id': 'r1' },
+        resourceAttrs: { 'service.name': 'host', 'deployment.token': CANARY },
+      }),
+    );
+    const leaks = collector.findCanaryLeakage(CANARY);
+    const surfaces = new Set(leaks.map((l) => l.surface));
+    expect(surfaces.has('span.name')).toBe(true);
+    expect(surfaces.has('span.resourceAttribute')).toBe(true);
+  });
+  it('catches a canary in a metric data-point attribute', async () => {
+    collector = new OtelCollector();
+    await collector.start();
+    await postMetrics(metricsPayload('openwop.node.duration', { 'secret.echo': CANARY }));
+    const leaks = collector.findCanaryLeakage(CANARY);
+    const metricLeak = leaks.find((l) => l.surface === 'metric.attribute');
+    expect(metricLeak).toBeDefined();
+    expect(metricLeak!.emitterName).toBe('openwop.node.duration');
+  });
+  it('reports ZERO hits when the host redacts the canary before export (positive control)', async () => {
+    collector = new OtelCollector();
+    await collector.start();
+    await postTraces(
+      tracesPayload({
+        spanName: 'openwop.node.execute',
+        spanAttrs: { 'openwop.node.id': 'n1', 'http.request.header.authorization': `Bearer ${REDACTED}` },
+        resourceAttrs: { 'service.name': 'host', 'deployment.token': REDACTED },
+      }),
+    );
+    await postMetrics(metricsPayload('openwop.node.duration', { 'secret.echo': REDACTED }));
+    expect(collector.findCanaryLeakage(CANARY)).toEqual([]);
+  });
+  it('an empty or whitespace canary never produces a (vacuous) hit', async () => {
+    collector = new OtelCollector();
+    await collector.start();
+    await postTraces(
+      tracesPayload({
+        spanName: 'openwop.node.execute',
+        spanAttrs: { 'a': 'b' },
+        resourceAttrs: { 'service.name': 'host' },
+      }),
+    );
+    expect(collector.findCanaryLeakage('')).toEqual([]);
+    expect(collector.findCanaryLeakage('   ')).toEqual([]);
+  });
+});

package/src/scenarios/prompt-resolution-chain-event.test.ts ADDED Viewed

@@ -0,0 +1,113 @@
+/**
+ * prompt-resolution-chain-event — RFC 0029 layer precedence on the PRODUCTION wire.
+ *
+ * The black-box, production-path counterpart to the three seam-driven
+ * `prompt-resolution-chain-{node-wins,agent-intrinsic,fallback-cascade}.test.ts`
+ * scenarios. Instead of the synchronous `POST /v1/host/sample/prompt/resolve`
+ * seam, this creates a real run from a prompt-exercising fixture, reads the
+ * run's DURABLE event log via the NORMATIVE `GET /v1/runs/{runId}/events/poll`
+ * endpoint, and asserts the `agent.promptResolved` event carries the full
+ * layer-by-layer precedence record (`spec/v1/prompts.md` §"Resolution chain") —
+ * no `/v1/host/sample/*` seam.
+ *
+ * The `agentPromptResolved` payload (`schemas/run-event-payloads.schema.json`)
+ * already REQUIRES `chain[]` with one `applied: true` entry + the full-traversal
+ * MUST, so the wire is already capable of conveying precedence without the seam.
+ * This is the "replace seam-gated proofs with black-box production-path
+ * conformance" step (independent-audit acceptance-bar item 3) for RFC 0029: once
+ * a host emits `agent.promptResolved`, prompt-chain precedence is proven on the
+ * production wire and the surface graduates INTO the `openwop-core-standard`
+ * floor (RFC 0088 §D Lever-2 → floor).
+ *
+ * Gating: soft-skips unless `capabilities.prompts.supported` AND the host
+ * actually emits `agent.promptResolved` for the run (emission is staged per
+ * RFC 0029 / RFC 0021 — a host advertising prompts MAY not yet emit the event).
+ *
+ * @see RFCS/0029-prompt-override-hierarchy.md
+ * @see spec/v1/prompts.md §"Resolution chain (normative)"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+const PROMPT_FIXTURE_ID = 'conformance-prompt-end-to-end';
+const VALID_LAYERS = new Set([
+  'run-configurable', 'node', 'agent-intrinsic', 'agent-overrides',
+  'agent-library-default', 'workflow-defaults', 'host-defaults',
+]);
+interface ChainEntry { layer?: unknown; source?: unknown; applied?: unknown }
+interface PromptResolvedPayload { chain?: ChainEntry[]; resolved?: unknown }
+interface RawEvent { type?: string; payload?: PromptResolvedPayload }
+async function promptsSupported(): Promise<boolean> {
+  const res = await driver.get('/.well-known/openwop');
+  return capabilityFamily(res.json as Record<string, unknown> | undefined, 'prompts')?.supported === true;
+}
+describe('prompt-resolution-chain-event (black-box): agent.promptResolved carries the precedence record (RFC 0029)', () => {
+  it('the production agent.promptResolved event records the full four-layer resolution chain', async () => {
+    if (!(await promptsSupported())) return; // capability not advertised — skip
+    const create = await driver.post('/v1/runs', { workflowId: PROMPT_FIXTURE_ID });
+    if (create.status !== 201) {
+      // Fixture not seeded / run not accepted — not a prompt-chain failure.
+      // eslint-disable-next-line no-console
+      console.warn(`[prompt-resolution-chain-event] POST /v1/runs for ${PROMPT_FIXTURE_ID} returned ${create.status}; skipping the production-path assertion`);
+      return;
+    }
+    const runId = (create.json as { runId?: string }).runId;
+    if (!runId) return;
+    await pollUntilTerminal(runId);
+    const poll = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events/poll`);
+    const events = (poll.json as { events?: RawEvent[] } | undefined)?.events ?? [];
+    const resolved = events.filter((e) => e.type === 'agent.promptResolved');
+    if (resolved.length === 0) {
+      // Host advertises prompts but does not yet emit agent.promptResolved
+      // (RFC 0029 emission is staged) — soft-skip the behavioral assertion.
+      // eslint-disable-next-line no-console
+      console.warn('[prompt-resolution-chain-event] host emitted no agent.promptResolved event; skipping (RFC 0029 emission staged)');
+      return;
+    }
+    for (const ev of resolved) {
+      const chain = ev.payload?.chain;
+      expect(
+        Array.isArray(chain) && chain.length > 0,
+        driver.describe('prompts.md §Resolution chain', 'agent.promptResolved MUST carry a non-empty chain[] of attempted layers'),
+      ).toBe(true);
+      const entries = chain as ChainEntry[];
+      // Every entry is a well-formed layer record (the full-traversal shape).
+      for (const e of entries) {
+        expect(
+          typeof e.layer === 'string' && VALID_LAYERS.has(e.layer),
+          driver.describe('prompts.md §Resolution chain', `each chain entry MUST name a valid layer, got ${String(e.layer)}`),
+        ).toBe(true);
+        expect(typeof e.applied, driver.describe('prompts.md §Resolution chain', 'each chain entry MUST carry a boolean `applied`')).toBe('boolean');
+      }
+      // Exactly one layer wins (or none, when resolved is null).
+      const applied = entries.filter((e) => e.applied === true);
+      expect(
+        applied.length <= 1,
+        driver.describe('prompts.md §Resolution chain', 'AT MOST one chain entry MAY be applied: true (the winning layer)'),
+      ).toBe(true);
+      // resolved mirrors the applied entry's source (RFC 0029 §B).
+      if (applied.length === 1) {
+        expect(
+          ev.payload?.resolved,
+          driver.describe('run-event-payloads.schema.json agentPromptResolved', '`resolved` MUST mirror the applied chain entry\'s `source`'),
+        ).toBe(applied[0]?.source);
+      } else {
+        expect(
+          ev.payload?.resolved === null || ev.payload?.resolved === undefined,
+          driver.describe('run-event-payloads.schema.json agentPromptResolved', 'with no applied layer, `resolved` MUST be null'),
+        ).toBe(true);
+      }
+    }
+  });
+});