npm - @openwop/openwop-conformance - Versions diffs - 1.2.0 → 1.4.0 - Mend

@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

package/CHANGELOG.md +156 -1
package/README.md +3 -2
package/api/asyncapi.yaml +8 -0
package/api/openapi.yaml +371 -1
package/api/redocly.yaml +15 -0
package/coverage.md +26 -5
package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
package/fixtures/conformance-envelope-recovery-applied.json +39 -0
package/fixtures/conformance-envelope-refusal.json +38 -0
package/fixtures/conformance-envelope-retry-attempted.json +39 -0
package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
package/fixtures/conformance-envelope-truncated.json +39 -0
package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
package/fixtures/conformance-model-capability-insufficient.json +25 -0
package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
package/fixtures/conformance-multi-agent-handoff.json +49 -0
package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
package/fixtures/conformance-prompt-end-to-end.json +33 -0
package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
package/fixtures/openwop-smoke-cost-emit.json +37 -0
package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
package/fixtures.md +45 -0
package/package.json +1 -1
package/schemas/README.md +5 -0
package/schemas/agent-manifest.schema.json +16 -0
package/schemas/capabilities.schema.json +390 -0
package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
package/schemas/envelopes/clarification.request.schema.json +9 -0
package/schemas/envelopes/error.schema.json +4 -0
package/schemas/envelopes/schema.request.schema.json +4 -0
package/schemas/envelopes/schema.response.schema.json +1 -1
package/schemas/node-pack-manifest.schema.json +28 -0
package/schemas/orchestrator-decision.schema.json +12 -0
package/schemas/prompt-kind.schema.json +8 -0
package/schemas/prompt-pack-manifest.schema.json +80 -0
package/schemas/prompt-ref.schema.json +40 -0
package/schemas/prompt-template.schema.json +149 -0
package/schemas/registry-version-manifest.schema.json +5 -0
package/schemas/run-ancestry-response.schema.json +54 -0
package/schemas/run-event-payloads.schema.json +513 -11
package/schemas/run-event.schema.json +17 -1
package/schemas/run-snapshot.schema.json +3 -2
package/schemas/workflow-definition.schema.json +19 -1
package/src/lib/driver.ts +15 -0
package/src/lib/env.ts +51 -0
package/src/lib/event-log-query.ts +62 -0
package/src/lib/fixtures.ts +38 -1
package/src/lib/host-toggle.ts +54 -0
package/src/lib/llm-cache-key-recipe.ts +68 -0
package/src/lib/multi-agent-capabilities.ts +10 -0
package/src/lib/otel-scrape.ts +59 -0
package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
package/src/scenarios/blob-presign-expiry.test.ts +42 -9
package/src/scenarios/blob-roundtrip.test.ts +0 -0
package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
package/src/scenarios/cost-attribution.test.ts +124 -11
package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
package/src/scenarios/envelope-truncated.test.ts +136 -0
package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
package/src/scenarios/fixtures-gating.test.ts +139 -1
package/src/scenarios/fixtures-valid.test.ts +123 -15
package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
package/src/scenarios/model-capability-insufficient.test.ts +221 -0
package/src/scenarios/model-capability-substituted.test.ts +203 -0
package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
package/src/scenarios/multi-region-idempotency.test.ts +58 -0
package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
package/src/scenarios/pack-registry-publish.test.ts +231 -51
package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
package/src/scenarios/prompt-pack-install.test.ts +187 -0
package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
package/src/scenarios/prompt-template-shape.test.ts +359 -0
package/src/scenarios/provider-usage.test.ts +185 -0
package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
package/src/scenarios/spec-corpus-validity.test.ts +34 -6
package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
package/src/scenarios/table-cursor-pagination.test.ts +47 -9
package/src/scenarios/table-schema-enforcement.test.ts +46 -9
package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0

package/src/scenarios/aiEnvelope.correlationReplay.test.ts CHANGED Viewed

@@ -1,9 +1,11 @@
 /**
- * aiEnvelope.correlationReplay — FINAL v1.1 advertisement-shape verification + behavioral placeholders.
+ * aiEnvelope.correlationReplay — FINAL v1.1 advertisement-shape + behavioral.
  *
- * Status: DRAFT (advertisement-shape). `spec/v1/ai-envelope.md` landed
- * 2026-05-17 as DRAFT v1.x. Behavioral assertions stay `it.todo()` until a
- * reference host wires the accept path and the cross-process replay seam.
+ * Status: ACTIVE (advertisement-shape + behavioral). `spec/v1/ai-envelope.md`
+ * promoted Draft → FINAL v1.1 2026-05-18. Live behavioral via the
+ * `POST /v1/host/sample/envelope/accept` seam with the persisted
+ * `priorCorrelations` store (survives process restart between original
+ * accept and replay; soft-skip on HTTP 404).
  *
  * Summary: two envelopes in the same run with the same `correlationId` MUST
  * be treated as a re-emission. The second invocation returns the cached
@@ -45,25 +47,255 @@ describe('aiEnvelope.correlationReplay: advertisement shape (FINAL v1.1)', () =>
   });
 });
-describe('aiEnvelope.correlationReplay: engine-state placeholders', () => {
-  // The 4 assertions below require the engine to maintain a per-run
-  // correlationId → cached-outcome map AND project envelope acceptance
-  // onto RunEventDocs with `causationId = envelope.correlationId`.
-  //
-  // The reference workflow-engine sample's `acceptEnvelope` is a pure
-  // function (host/envelopeAcceptor.ts) — it validates + categorizes
-  // a single envelope without tracking state across calls. Promoting
-  // these to behavioral requires either:
-  //   (a) extending the acceptor with an injected dedup store
-  //       (per-run correlationId map keyed by runId), OR
-  //   (b) a higher-level test seam that wires the acceptor into the
-  //       run lifecycle + event log.
-  //
-  // (b) is the spec-faithful path (per ai-envelope.md §"Replay
-  // determinism" the dedup is engine-level, not acceptor-level).
-  // Tracked as host-impl follow-up.
-  it.todo('emit envelope twice with same correlationId → second returns cached outcome; no duplicate RunEventDocs');
-  it.todo('emit envelope with correlationId C, then with same C and different type → refuse envelope_correlation_conflict');
-  it.todo('cross-process replay: process-death after accept; recovered process re-emits same correlationId → cached outcome, no handler re-invocation');
-  it.todo('resulting RunEventDoc.causationId equals the envelope.correlationId (causal chain preserved)');
+// Behavioral assertions through the workflow-engine sample's env-gated
+// `POST /v1/host/sample/envelope/accept` seam. The seam accepts a flat
+// `priorCorrelations` array (each entry: `{correlationId, outcome, envelopeType}`)
+// that the acceptor consumes as the per-run dedup store. Each test
+// soft-skips on HTTP 404 (host doesn't expose the seam).
+//
+// The cross-process replay assertion (process death + recovery) still
+// stays deferred — it requires a higher-level lifecycle seam that
+// persists the dedup state, which is engine scope, not acceptor scope.
+async function accept(envelope: unknown, opts: Record<string, unknown> = {}): Promise<{ status: number; body: { status?: string; reason?: string; envelopeId?: string; normalizedMeta?: { contentTrust?: string } } }> {
+  const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
+  return { status: res.status, body: res.json as { status?: string; reason?: string; envelopeId?: string; normalizedMeta?: { contentTrust?: string } } };
+}
+const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-18T10:00:00Z' };
+describe('aiEnvelope.correlationReplay: behavioral in-process dedup (FINAL v1.1)', () => {
+  it('same correlationId re-emission returns the cached outcome unchanged', async () => {
+    const envelope = {
+      type: 'clarification.request',
+      schemaVersion: 1,
+      envelopeId: 'env-cr-replay-1',
+      correlationId: 'r:n:0:replay1',
+      payload: { questions: [{ id: 'q1', question: 'why?' }] },
+      meta: baseMeta,
+    };
+    const first = await accept(envelope);
+    if (first.status === 404) return;
+    expect(first.body.status).toBe('accepted');
+    const cachedOutcome = first.body;
+    const second = await accept(envelope, {
+      priorCorrelations: [
+        {
+          correlationId: 'r:n:0:replay1',
+          outcome: cachedOutcome,
+          envelopeType: 'clarification.request',
+        },
+      ],
+    });
+    expect(
+      second.body.status,
+      driver.describe(
+        'ai-envelope.md §"Replay determinism"',
+        'second emission with same correlationId MUST return the cached outcome (handler runs at most once per correlationId)',
+      ),
+    ).toBe('accepted');
+    expect(second.body.envelopeId).toBe(cachedOutcome.envelopeId);
+  });
+  it('same correlationId, different envelope type → invalid envelope_correlation_conflict', async () => {
+    const r = await accept(
+      {
+        type: 'error', // re-using a correlationId previously bound to clarification.request
+        schemaVersion: 1,
+        envelopeId: 'env-cr-conflict',
+        correlationId: 'r:n:0:conflict',
+        payload: { code: 'x', message: 'y' },
+        meta: baseMeta,
+      },
+      {
+        priorCorrelations: [
+          {
+            correlationId: 'r:n:0:conflict',
+            outcome: { status: 'accepted', envelopeId: 'env-prior', recordedEventIds: [], normalizedMeta: { contentTrust: 'trusted' } },
+            envelopeType: 'clarification.request',
+          },
+        ],
+      },
+    );
+    if (r.status === 404) return;
+    expect(
+      r.body.status,
+      driver.describe(
+        'ai-envelope.md §"Replay determinism"',
+        'same correlationId with different type MUST refuse envelope_correlation_conflict',
+      ),
+    ).toBe('invalid');
+    expect(r.body.reason).toContain('envelope_correlation_conflict');
+  });
+  it('cached outcome of any status (invalid/gated/breached) replays identically', async () => {
+    // Plant a `gated` cached outcome; second emission MUST return the same gated outcome
+    // (handler MUST NOT re-run, even if conditions might now accept).
+    const cached = {
+      status: 'gated' as const,
+      reason: 'envelope type \'vendor.x.foo\' not advertised',
+      allowedKinds: ['clarification.request', 'schema.request', 'schema.response', 'error'],
+    };
+    const r = await accept(
+      {
+        type: 'vendor.x.foo',
+        schemaVersion: 1,
+        envelopeId: 'env-cr-cached-gated',
+        correlationId: 'r:n:0:cachedgated',
+        payload: {},
+        meta: baseMeta,
+      },
+      {
+        hostSupportedEnvelopes: ['vendor.x.foo'], // would otherwise accept
+        priorCorrelations: [
+          {
+            correlationId: 'r:n:0:cachedgated',
+            outcome: cached,
+            envelopeType: 'vendor.x.foo',
+          },
+        ],
+      },
+    );
+    if (r.status === 404) return;
+    expect(
+      r.body.status,
+      driver.describe(
+        'ai-envelope.md §"Replay determinism"',
+        'cached non-accepted outcome MUST replay identically (handler at most once per correlationId)',
+      ),
+    ).toBe('gated');
+  });
+});
+// E.1 engine-projection via the test-only event-log seam.
+import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
+describe('aiEnvelope.correlationReplay: causationId projection via event-log seam', () => {
+  it('resulting RunEventDoc.causationId MUST equal the envelope.correlationId (causal chain preserved)', async () => {
+    if (!(await isEventLogSeamAvailable())) return;
+    const runId = `r-cr-cause-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    const correlationId = `${runId}:n:0:causationId-link`;
+    await accept(
+      {
+        type: 'clarification.request',
+        schemaVersion: 1,
+        envelopeId: 'env-cr-cause-1',
+        correlationId,
+        payload: { questions: [{ id: 'q1', question: 'why?' }] },
+        meta: baseMeta,
+      },
+      { projectTo: { runId, nodeId: 'n' } },
+    );
+    const events = await queryTestEvents(runId);
+    if (!events.ok || events.events.length === 0) return;
+    for (const e of events.events) {
+      expect(
+        e.causationId,
+        driver.describe('ai-envelope.md §"Replay determinism"', 'every event projected from an envelope MUST carry causationId === envelope.correlationId'),
+      ).toBe(correlationId);
+    }
+    await resetTestSeam();
+  });
+});
+describe('aiEnvelope.correlationReplay: cross-process replay via persisted dedup', () => {
+  // Cross-process replay proven WITHOUT actually killing the process:
+  // when a caller supplies `persistedDedup: { runId }`, the seam reads
+  // the persisted store BEFORE consulting the in-memory priorCorrelations
+  // and writes the outcome back after a successful accept. A second
+  // call from the same (or a hypothetically-restarted) process with
+  // ONLY persistedDedup set — no in-memory priorCorrelations — MUST
+  // return the same outcome as the first. That is the cross-process
+  // semantics: the persisted store is the source of truth, the in-
+  // memory map a per-process accelerator.
+  it('persisted outcome replays for the same correlationId even with NO in-memory priorCorrelations', async () => {
+    const runId = `r-cr-persist-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    const correlationId = `${runId}:n:0:persist1`;
+    // Two envelopes with the SAME correlationId but DIFFERENT
+    // envelopeIds. The acceptor reflects the inbound envelopeId on a
+    // fresh accept; a cache-hit returns the FIRST call's envelopeId
+    // regardless of what the second call carried. The envelopeId
+    // divergence is what makes this assertion non-trivial: if the
+    // persisted store is consulted, second.envelopeId === 'env-cr-
+    // persist-1'; if the handler re-runs (cache miss), it would
+    // surface 'env-cr-persist-2'.
+    const env1 = {
+      type: 'clarification.request',
+      schemaVersion: 1,
+      envelopeId: 'env-cr-persist-1',
+      correlationId,
+      payload: { questions: [{ id: 'q1', question: 'why?' }] },
+      meta: baseMeta,
+    };
+    const env2 = {
+      type: 'clarification.request',
+      schemaVersion: 1,
+      envelopeId: 'env-cr-persist-2',
+      correlationId,
+      payload: { questions: [{ id: 'q1', question: 'why?' }] },
+      meta: baseMeta,
+    };
+    // First accept persists the outcome under (runId, correlationId).
+    const first = await accept(env1, { persistedDedup: { runId } });
+    if (first.status === 404) return; // seam not exposed — soft-skip
+    expect(first.body.status).toBe('accepted');
+    expect(first.body.envelopeId).toBe('env-cr-persist-1');
+    // Second accept — same correlationId, NO priorCorrelations passed
+    // in-band, DIFFERENT envelopeId. If the persisted store is
+    // consulted, the cached outcome's envelopeId (env-cr-persist-1)
+    // is returned. If only the in-memory map were used, the handler
+    // would re-run and reflect env-cr-persist-2.
+    const second = await accept(env2, { persistedDedup: { runId } });
+    expect(
+      second.body.envelopeId,
+      driver.describe(
+        'ai-envelope.md §"Replay determinism"',
+        'persisted outcome MUST replay across calls without an in-memory priorCorrelations map (cross-process recovery: cached envelopeId surfaces even when the inbound envelope carries a different envelopeId)',
+      ),
+    ).toBe('env-cr-persist-1');
+    expect(second.body.status).toBe('accepted');
+  });
+  it('persisted store enforces envelope_correlation_conflict across calls', async () => {
+    const runId = `r-cr-persist-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    const correlationId = `${runId}:n:0:conflict1`;
+    // First accept: clarification.request.
+    const first = await accept(
+      {
+        type: 'clarification.request',
+        schemaVersion: 1,
+        envelopeId: 'env-cr-persist-conflict-1',
+        correlationId,
+        payload: { questions: [{ id: 'q1', question: 'why?' }] },
+        meta: baseMeta,
+      },
+      { persistedDedup: { runId } },
+    );
+    if (first.status === 404) return;
+    expect(first.body.status).toBe('accepted');
+    // Second accept: same correlationId, different envelope type, NO
+    // in-memory priorCorrelations — the conflict MUST be served from
+    // the persisted store.
+    const second = await accept(
+      {
+        type: 'error',
+        schemaVersion: 1,
+        envelopeId: 'env-cr-persist-conflict-2',
+        correlationId,
+        payload: { code: 'x', message: 'y' },
+        meta: baseMeta,
+      },
+      { persistedDedup: { runId } },
+    );
+    expect(
+      second.body.status,
+      driver.describe(
+        'ai-envelope.md §"Replay determinism"',
+        'persisted store MUST surface envelope_correlation_conflict on type mismatch without an in-memory priorCorrelations map',
+      ),
+    ).toBe('invalid');
+    expect(second.body.reason).toContain('envelope_correlation_conflict');
+  });
 });

package/src/scenarios/aiEnvelope.redaction.test.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 /**
- * aiEnvelope.redaction — FINAL v1.1 advertisement-shape verification + behavioral placeholders.
+ * aiEnvelope.redaction — FINAL v1.1 advertisement-shape + behavioral.
  *
- * Status: DRAFT (advertisement-shape). `spec/v1/ai-envelope.md` landed
- * 2026-05-17 as DRAFT v1.x. Behavioral assertions stay `it.todo()` until a
- * reference host wires the envelope accept path through the BYOK redaction
- * harness.
+ * Status: ACTIVE (advertisement-shape + behavioral). `spec/v1/ai-envelope.md`
+ * promoted Draft → FINAL v1.1 2026-05-18. Live behavioral via the
+ * `POST /v1/host/sample/envelope/accept` seam, which routes the envelope
+ * through the BYOK redaction harness and returns `redactedPayload` +
+ * `redactionCount` (soft-skip on HTTP 404).
  *
  * Summary: AI Envelopes MUST route through the same BYOK redaction harness
  * applied to a fresh `MemoryEntry.put` per `agent-memory.md` §"SR-1
@@ -46,28 +47,208 @@ describe('aiEnvelope.redaction: advertisement shape (FINAL v1.1)', () => {
   });
 });
-describe('aiEnvelope.redaction: BYOK-redaction placeholders', () => {
-  // The 6 assertions below require the engine's BYOK redaction pipeline
-  // (per SECURITY/threat-model-secret-leakage.md SR-1 carry-forward) to
-  // hook into envelope acceptance AND every downstream surface that
-  // persists envelope content (RunEventDoc, OTel span attributes,
-  // debug-bundle export, error envelope projection).
-  //
-  // The reference workflow-engine sample's `acceptEnvelope` is pure +
-  // doesn't touch payload contents. Redaction lives at a different
-  // layer (BYOK secretResolver + event-log sanitizer). Promoting these
-  // to behavioral requires either:
-  //   (a) chaining the acceptor through `stripSecretsFromPersisted`
-  //       before persisting the recorded view, OR
-  //   (b) an end-to-end test that plants a BYOK canary in an envelope
-  //       payload, runs through the full accept → emit → persist → export
-  //       chain, and asserts the canary is absent on every output.
-  //
-  // (b) is the spec-faithful path. Tracked as host-impl follow-up.
-  it.todo('emit envelope whose payload contains a known BYOK substring → substring absent from emitted RunEventDocs');
-  it.todo('redacted substring absent from OTel envelope_* span attributes');
-  it.todo('redacted substring absent from debug-bundle export');
-  it.todo('redacted substring absent from error envelope on validation refusal (no leak via error path)');
-  it.todo('redaction marker is the canonical [REDACTED:<reason>] form, NOT a model-generated <REDACTED> string');
-  it.todo('redaction runs AFTER schema validation: a payload with redacted-shaped substrings still validates structurally');
+// Behavioral assertions through the workflow-engine sample's env-gated
+// `POST /v1/host/sample/envelope/accept` seam. The seam threads a
+// `byokCanaries: string[]` option into AcceptOptions so the acceptor
+// can apply the §"Redaction (SR-1 carry-forward)" pass on accepted
+// envelopes, returning `redactedPayload` in the outcome. Each test
+// soft-skips on HTTP 404 (host doesn't expose the seam).
+async function accept(envelope: unknown, opts: Record<string, unknown> = {}): Promise<{ status: number; body: { status?: string; reason?: string; redactedPayload?: unknown; redactionCount?: number; details?: unknown[] } }> {
+  const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
+  return { status: res.status, body: res.json as { status?: string; reason?: string; redactedPayload?: unknown; redactionCount?: number; details?: unknown[] } };
+}
+const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-18T10:00:00Z' };
+const CANARY_VALUE = 'sk-canary-DO-NOT-LEAK-9d7f3a2b';
+const CANARY_SECRET_ID = 'byok-secret-test-fixture-001';
+const CANARIES = [{ value: CANARY_VALUE, secretId: CANARY_SECRET_ID }];
+const CANONICAL_MARKER = `[REDACTED:${CANARY_SECRET_ID}]`;
+describe('aiEnvelope.redaction: behavioral acceptor-level scrub (FINAL v1.1)', () => {
+  it('canary in payload → substituted with canonical SR-1 [REDACTED:<secretId>] marker per agent-memory.md:66', async () => {
+    const r = await accept(
+      {
+        type: 'error',
+        schemaVersion: 1,
+        envelopeId: 'env-red-1',
+        correlationId: 'r:n:0:red1',
+        payload: { code: 'leak_demo', message: `tool result included ${CANARY_VALUE} verbatim` },
+        meta: baseMeta,
+      },
+      { byokCanaries: CANARIES },
+    );
+    if (r.status === 404) return;
+    expect(r.body.status).toBe('accepted');
+    expect(r.body.redactionCount, 'redactionCount MUST be > 0 when canary appears').toBeGreaterThan(0);
+    expect(
+      JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
+      driver.describe('ai-envelope.md §"Redaction (SR-1 carry-forward)"', 'canary plaintext MUST be absent from the redacted view'),
+    ).toBe(false);
+    expect(
+      JSON.stringify(r.body.redactedPayload),
+      driver.describe('agent-memory.md §SR-1 line 66', 'persisted entry MUST carry [REDACTED:<secretId>] in place of the plaintext'),
+    ).toContain(CANONICAL_MARKER);
+  });
+  it('canary across nested object fields → all occurrences scrubbed with canonical marker', async () => {
+    const r = await accept(
+      {
+        type: 'clarification.request',
+        schemaVersion: 1,
+        envelopeId: 'env-red-nested',
+        correlationId: 'r:n:0:rednested',
+        payload: {
+          questions: [
+            { id: 'q1', question: `What is ${CANARY_VALUE}?` },
+            { id: 'q2', question: 'unrelated', context: { trace: `${CANARY_VALUE}/${CANARY_VALUE}` } },
+          ],
+        },
+        meta: baseMeta,
+      },
+      { byokCanaries: CANARIES },
+    );
+    if (r.status === 404) return;
+    expect(r.body.status).toBe('accepted');
+    expect(
+      JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
+      'no canary plaintext remnant anywhere in the redacted view (recursive scrub)',
+    ).toBe(false);
+    // q1's question (1 occurrence), q2's context.trace (2 occurrences) = total 3
+    expect(r.body.redactionCount).toBe(3);
+  });
+  it('multiple canaries → each substituted with its own secretId marker', async () => {
+    const C1 = { value: 'sk-canary-alpha-xxxx', secretId: 'secret-alpha' };
+    const C2 = { value: 'sk-canary-beta-yyyy', secretId: 'secret-beta' };
+    const r = await accept(
+      {
+        type: 'error',
+        schemaVersion: 1,
+        envelopeId: 'env-red-multi',
+        correlationId: 'r:n:0:redmulti',
+        payload: { code: 'multi_leak', message: `first=${C1.value}, second=${C2.value}` },
+        meta: baseMeta,
+      },
+      { byokCanaries: [C1, C2] },
+    );
+    if (r.status === 404) return;
+    expect(r.body.status).toBe('accepted');
+    const view = JSON.stringify(r.body.redactedPayload);
+    expect(view.includes(C1.value)).toBe(false);
+    expect(view.includes(C2.value)).toBe(false);
+    expect(
+      view.includes(`[REDACTED:${C1.secretId}]`) && view.includes(`[REDACTED:${C2.secretId}]`),
+      driver.describe('agent-memory.md §SR-1', 'each canary MUST be substituted with its OWN [REDACTED:<secretId>] marker'),
+    ).toBe(true);
+  });
+  it('redaction runs AFTER schema validation: payload with [REDACTED:...]-shaped substrings still validates', async () => {
+    // The error-kind payload schema requires { code, message }. A pre-redacted
+    // marker in the message MUST NOT trip validation.
+    const r = await accept(
+      {
+        type: 'error',
+        schemaVersion: 1,
+        envelopeId: 'env-red-shape',
+        correlationId: 'r:n:0:redshape',
+        payload: { code: 'demo', message: 'already had [REDACTED:secret-prior] before we saw it' },
+        meta: baseMeta,
+      },
+      { byokCanaries: CANARIES }, // canary NOT in payload; substitution count expected 0
+    );
+    if (r.status === 404) return;
+    expect(
+      r.body.status,
+      driver.describe('ai-envelope.md §"Redaction (SR-1 carry-forward)"', 'redaction MUST run AFTER schema validation; pre-existing markers do not affect validation'),
+    ).toBe('accepted');
+    // No canary present → redactionCount absent or 0
+    expect(r.body.redactionCount ?? 0).toBe(0);
+  });
+  it('canary in invalid envelope (validation refusal) → error response MUST NOT echo the canary plaintext', async () => {
+    // ISO 8601 violation triggers an `invalid` outcome BEFORE the redaction
+    // pass runs. The acceptor's validation-detail extractor MUST NOT echo
+    // the payload contents into the error response.
+    const r = await accept(
+      {
+        type: 'error',
+        schemaVersion: 1,
+        envelopeId: 'env-red-leak',
+        correlationId: 'r:n:0:redleak',
+        payload: { code: 'demo', message: `secret value is ${CANARY_VALUE}` },
+        meta: { ...baseMeta, ts: 'tomorrow' }, // bad ts → invalid
+      },
+      { byokCanaries: CANARIES },
+    );
+    if (r.status === 404) return;
+    expect(r.body.status).toBe('invalid');
+    const bodyString = JSON.stringify(r.body);
+    expect(
+      bodyString.includes(CANARY_VALUE),
+      driver.describe(
+        'SECURITY/threat-model-secret-leakage.md §SR-1',
+        'error response on validation refusal MUST NOT echo BYOK canary plaintext',
+      ),
+    ).toBe(false);
+  });
+});
+// E.2 OTel scrape + E.3 debug-bundle seams.
+import { queryTestSpans, exportDebugBundle, isOtelSeamAvailable } from '../lib/otel-scrape.js';
+import { resetTestSeam } from '../lib/event-log-query.js';
+describe('aiEnvelope.redaction: OTel + debug-bundle scrape (E.2 + E.3)', () => {
+  it('redacted canary plaintext MUST be absent from OTel envelope_* span attributes', async () => {
+    if (!(await isOtelSeamAvailable())) return;
+    const runId = `r-red-otel-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    await accept(
+      {
+        type: 'error',
+        schemaVersion: 1,
+        envelopeId: 'env-red-otel-1',
+        correlationId: `${runId}:n:0:red-otel`,
+        payload: { code: 'leak_demo', message: `tool result included ${CANARY_VALUE} verbatim` },
+        meta: baseMeta,
+      },
+      { byokCanaries: CANARIES, projectTo: { runId, nodeId: 'n' } },
+    );
+    const spans = await queryTestSpans({ runId });
+    if (!spans.ok) return;
+    const allAttrs = spans.data.flatMap((s) => Object.values(s.attributes).map((v) => String(v)));
+    expect(
+      allAttrs.some((v) => v.includes(CANARY_VALUE)),
+      driver.describe(
+        'SECURITY/threat-model-secret-leakage.md §SR-1',
+        'BYOK canary plaintext MUST NOT appear in any OTel envelope_* span attribute',
+      ),
+    ).toBe(false);
+    await resetTestSeam();
+  });
+  it('redacted canary plaintext MUST be absent from debug-bundle export', async () => {
+    if (!(await isOtelSeamAvailable())) return;
+    const runId = `r-red-bundle-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    await accept(
+      {
+        type: 'clarification.request',
+        schemaVersion: 1,
+        envelopeId: 'env-red-bundle-1',
+        correlationId: `${runId}:n:0:red-bundle`,
+        payload: { questions: [{ id: 'q1', question: `embed ${CANARY_VALUE} here` }] },
+        meta: baseMeta,
+      },
+      { byokCanaries: CANARIES, projectTo: { runId, nodeId: 'n' } },
+    );
+    const bundle = await exportDebugBundle(runId);
+    if (!bundle.ok) return;
+    const serialized = JSON.stringify(bundle.data);
+    expect(
+      serialized.includes(CANARY_VALUE),
+      driver.describe(
+        'SECURITY/threat-model-secret-leakage.md §SR-1',
+        'BYOK canary plaintext MUST NOT appear in the debug-bundle export (events + spans)',
+      ),
+    ).toBe(false);
+    await resetTestSeam();
+  });
 });