npm - @openwop/openwop-conformance - Versions diffs - 1.6.0 → 1.10.0 - Mend

@openwop/openwop-conformance 1.6.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

package/src/scenarios/distillation-stable-archive.test.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * distillation-stable-archive — RFC 0062 §B(4). The distilled archive is an
+ * immutable, addressable artifact: the same source set + budget MUST yield a
+ * byte-stable archive checksum (reproducible + auditable).
+ *
+ * Gated on `capabilities.memory.distillation.supported` + the host memory-
+ * distillation seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0062-scheduled-memory-distillation.md §B
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readDistillationCap, invokeDistill } from '../lib/distillation.js';
+describe('distillation-stable-archive (RFC 0062 §B)', () => {
+  it('identical sources + budget produce an identical archive checksum', async () => {
+    if ((await readDistillationCap())?.supported !== true) return;
+    const req = {
+      memoryRef: 'conformance-distill',
+      tokenBudget: 8000,
+      sources: ['s1', 's2', 's3'],
+    };
+    const a = await invokeDistill(req);
+    if (a === null) return; // seam absent — soft-skip
+    const b = await invokeDistill(req);
+    if (b === null) return;
+    expect(
+      typeof a.body.archiveChecksum === 'string' && (a.body.archiveChecksum as string).length > 0,
+      driver.describe('RFC 0062 §B', 'a distillation run MUST produce a non-empty archive checksum'),
+    ).toBe(true);
+    expect(
+      b.body.archiveChecksum,
+      driver.describe('RFC 0062 §B', 'the same source set + budget MUST yield a byte-stable archive'),
+    ).toBe(a.body.archiveChecksum);
+  });
+});

package/src/scenarios/distillation-token-budget.test.ts ADDED Viewed

@@ -0,0 +1,45 @@
+/**
+ * distillation-token-budget — RFC 0062 §B. A distillation run stays within its
+ * token budget (`memory.compacted.distillation.tokensUsed ≤ tokenBudget`); an
+ * un-meetable budget fails with `token_budget_exceeded` and writes no partial
+ * archive (atomic).
+ *
+ * Gated on `capabilities.memory.distillation.supported` + the host memory-
+ * distillation seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0062-scheduled-memory-distillation.md §B
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readDistillationCap, invokeDistill } from '../lib/distillation.js';
+describe('distillation-token-budget (RFC 0062 §B)', () => {
+  it('within budget tokensUsed ≤ tokenBudget; an un-meetable budget fails atomically', async () => {
+    if ((await readDistillationCap())?.supported !== true) return;
+    const ok = await invokeDistill({ memoryRef: 'conformance-distill', tokenBudget: 8000 });
+    if (ok === null) return; // seam absent — soft-skip
+    const dist = ok.body.event?.distillation ?? {};
+    expect(
+      typeof dist.tokenBudget === 'number' && typeof dist.tokensUsed === 'number',
+      driver.describe('RFC 0062 §B', 'memory.compacted MUST carry distillation.tokenBudget + tokensUsed on a budgeted run'),
+    ).toBe(true);
+    expect(
+      (dist.tokensUsed as number) <= (dist.tokenBudget as number),
+      driver.describe('RFC 0062 §B', 'a successful distillation MUST consume ≤ its tokenBudget'),
+    ).toBe(true);
+    // A budget too small to distill the corpus MUST fail closed, no partial archive.
+    const tooSmall = await invokeDistill({ memoryRef: 'conformance-distill', tokenBudget: 1 });
+    if (tooSmall === null) return;
+    expect(
+      tooSmall.status >= 400 && tooSmall.body.error === 'token_budget_exceeded',
+      driver.describe('RFC 0062 §B', 'an un-meetable budget MUST fail with token_budget_exceeded'),
+    ).toBe(true);
+    expect(
+      tooSmall.body.archiveChecksum,
+      driver.describe('RFC 0062 §B', 'a token_budget_exceeded run MUST write no partial archive (atomic)'),
+    ).toBeUndefined();
+  });
+});

package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts CHANGED Viewed

@@ -31,6 +31,7 @@ import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
 import { pollUntilTerminal } from '../lib/polling.js';
 import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
 const NODE_ID = 'structured-call';
@@ -91,7 +92,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: advert
   it('capabilities.envelopes.reliability.completion (when present) conforms to RFC 0033 §E', async () => {
     const d = await readDiscovery();
     if (d === null) return;
-    const completion = d.capabilities?.envelopes?.reliability?.completion;
+    const completion = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion;
     if (completion === undefined) return;
     expect(
       typeof completion.distinguishesTruncation,
@@ -114,7 +115,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: trunca
   it('truncation: emits envelope.truncated + envelope.retry.attempted with reason: "truncation"', async () => {
     if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
     const d = await readDiscovery();
-    if (d?.capabilities?.envelopes?.reliability?.completion?.distinguishesTruncation !== true) return;
+    if (capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion?.distinguishesTruncation !== true) return;
     const seed = await programMock([
       { stopReason: 'max_tokens', content: '{"partial' },
       { stopReason: 'end_turn', content: '{"valid":true}' },
@@ -139,7 +140,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: trunca
   it('truncation: retry budget strictly greater than initial (RFC 0033 §B truncationBudgetMultiplier)', async () => {
     if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
     const d = await readDiscovery();
-    if (d?.capabilities?.envelopes?.reliability?.completion?.distinguishesTruncation !== true) return;
+    if (capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion?.distinguishesTruncation !== true) return;
     const seed = await programMock([
       { stopReason: 'max_tokens', content: '{"partial' },
       { stopReason: 'end_turn', content: '{"valid":true}' },

package/src/scenarios/envelope-reasoning-secret-redaction.test.ts CHANGED Viewed

@@ -35,6 +35,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -97,8 +98,8 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: advertisement s
   it('hosts advertising envelope reasoning + BYOK honor SR-1 carry-forward for the reasoning field', async () => {
     const d = await readDiscovery();
     if (d === null) return;
-    const reasoning = d.capabilities?.envelopes?.reasoning?.supported;
-    const secrets = d.capabilities?.secrets?.supported;
+    const reasoning = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reasoning?.supported;
+    const secrets = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported;
     if (reasoning !== true || secrets !== true) return; // soft-skip when either is absent
     // The contract is invariant-based, not capability-flag-based — the
     // advertisement-shape check here just confirms both surfaces are claimed.
@@ -257,7 +258,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-proj
     // RFC 0034 §B: gate on capabilities.observability.testSeams.otelScrape.
     // Hosts that don't advertise it soft-skip; hosts that DO advertise MUST serve a valid response.
     const d = await readDiscovery();
-    const otelScrapeAdvertised = d?.capabilities?.observability?.testSeams?.otelScrape === true;
+    const otelScrapeAdvertised = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.otelScrape === true;
     if (!otelScrapeAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
     const r = await acceptForRun(
@@ -291,7 +292,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-proj
   it("debug-bundle export MUST NOT include plaintext `secret:`-prefixed substrings from envelope.reasoning", async () => {
     // RFC 0034 §B: gate on capabilities.observability.testSeams.debugBundleExport.
     const d = await readDiscovery();
-    const debugBundleAdvertised = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
+    const debugBundleAdvertised = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.debugBundleExport === true;
     if (!debugBundleAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
     const r = await acceptForRun(

package/src/scenarios/envelope-reasoning-shape.test.ts CHANGED Viewed

@@ -32,6 +32,7 @@ import { readFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { driver } from '../lib/driver.js';
 import { SCHEMAS_DIR } from '../lib/paths.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -163,7 +164,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes adv
   it('capabilities.envelopes.reasoning (when present) conforms to RFC 0030 §C', async () => {
     const d = await readDiscovery();
     if (d === null) return;
-    const reasoning = d.capabilities?.envelopes?.reasoning;
+    const reasoning = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reasoning;
     if (reasoning === undefined) return; // optional block; host MAY omit
     expect(
       typeof reasoning.supported,
@@ -180,7 +181,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes adv
   it('capabilities.envelopes.tierOneSubsetCompliance (when present) conforms to RFC 0030 §B', async () => {
     const d = await readDiscovery();
     if (d === null) return;
-    const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
+    const compliance = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.tierOneSubsetCompliance;
     if (compliance === undefined) return; // optional; host MAY omit
     expect(
       ['strict', 'warn', 'off'],

package/src/scenarios/envelope-refusal-shape.test.ts CHANGED Viewed

@@ -64,7 +64,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: seam emission (RFC 0032 §B.
   it('accepts a well-formed `envelope.refusal` payload + writes it to the test event log', async () => {
     const d = await readDiscovery();
     if (d === null) return;
-    const reliability = d.capabilities?.envelopes?.reliability;
+    const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
     if (!reliability || reliability.supported !== true) return;
     if (!Array.isArray(reliability.events) || !(reliability.events as unknown[]).includes('envelope.refusal')) return;
@@ -154,7 +154,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: advertisement contract (RFC
   it('capabilities.envelopes.reliability (when supported: true with non-empty events[]) MUST list both MUST-tier events', async () => {
     const d = await readDiscovery();
     if (d === null) return;
-    const reliability = d.capabilities?.envelopes?.reliability;
+    const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
     if (!reliability || reliability.supported !== true) return;
     // Hosts running the legacy undifferentiated retry loop advertise
     // `events: []` (per the OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=false
@@ -190,6 +190,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: advertisement contract (RFC
 import { pollUntilTerminal } from '../lib/polling.js';
 import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const E2E_FIXTURE = 'conformance-envelope-refusal';
 const E2E_NODE_ID = 'structured-call';

package/src/scenarios/envelope-rendering-hint.test.ts ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * envelope-rendering-hint — RFC 0055 §B `meta.rendering` shape conformance.
+ *
+ * Server-free schema assertions that the optional rendering hint is exactly
+ * that — optional and additive:
+ *   1. An envelope WITH a well-formed `meta.rendering` validates.
+ *   2. An envelope WITHOUT `meta.rendering` still validates (proves the
+ *      property is optional — existing envelopes are unaffected).
+ *   3. An unknown `display` value is rejected by the closed enum (the
+ *      vocabulary is fixed; consumers fall back, producers don't invent).
+ *   4. An unknown property under `rendering` is rejected
+ *      (additionalProperties:false on the hint object).
+ *
+ * Always runs (pure on-disk Ajv2020 validation).
+ *
+ * @see RFCS/0055-multimodal-envelope-variants-and-rendering-hints.md §B
+ * @see spec/v1/ai-envelope.md §"Rendering hints"
+ * @see schemas/ai-envelope.schema.json ($defs.EnvelopeMeta.rendering)
+ */
+import { describe, it, expect } from 'vitest';
+import Ajv2020 from 'ajv/dist/2020.js';
+import addFormats from 'ajv-formats';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+function compileEnvelope(): ReturnType<Ajv2020['compile']> {
+  const ajv = new Ajv2020({ strict: false, allErrors: true });
+  addFormats(ajv);
+  const schema = JSON.parse(
+    readFileSync(join(SCHEMAS_DIR, 'ai-envelope.schema.json'), 'utf8'),
+  ) as Record<string, unknown>;
+  return ajv.compile(schema);
+}
+const baseEnvelope = {
+  type: 'error',
+  schemaVersion: 1,
+  envelopeId: 'env-rendering-1',
+  correlationId: 'run-1:node-2:turn-0:abc123',
+  payload: { code: 'x', message: 'y' },
+  meta: { source: 'ai-generation' as const, ts: '2026-05-25T10:00:00Z' },
+};
+describe('envelope-rendering-hint: meta.rendering shape (RFC 0055 §B)', () => {
+  const validate = compileEnvelope();
+  it('accepts an envelope carrying a well-formed meta.rendering hint', () => {
+    const env = {
+      ...baseEnvelope,
+      meta: {
+        ...baseEnvelope.meta,
+        rendering: { display: 'image', mimeType: 'image/png', alt: 'Q3 revenue chart', title: 'Revenue' },
+      },
+    };
+    const ok = validate(env);
+    expect(
+      ok,
+      'ai-envelope.md §"Rendering hints": ' + `meta.rendering MUST validate; errors: ${JSON.stringify(validate.errors)}`,
+    ).toBe(true);
+  });
+  it('accepts an envelope with NO meta.rendering (proves the property is optional)', () => {
+    const ok = validate(baseEnvelope);
+    expect(
+      ok,
+      'ai-envelope.md §"Rendering hints": ' + 'meta.rendering MUST be optional — envelopes omitting it still validate',
+    ).toBe(true);
+  });
+  it('rejects an unknown display value (closed enum)', () => {
+    const env = {
+      ...baseEnvelope,
+      meta: { ...baseEnvelope.meta, rendering: { display: 'hologram' } },
+    };
+    const ok = validate(env);
+    expect(
+      ok,
+      'ai-envelope.md §"Rendering hints": ' + 'display is a closed enum — unknown families MUST be rejected',
+    ).toBe(false);
+  });
+  it('rejects an unknown property under rendering (additionalProperties:false)', () => {
+    const env = {
+      ...baseEnvelope,
+      meta: { ...baseEnvelope.meta, rendering: { display: 'markdown', wat: true } },
+    };
+    const ok = validate(env);
+    expect(
+      ok,
+      'ai-envelope.md §"Rendering hints": ' + 'rendering is additionalProperties:false',
+    ).toBe(false);
+  });
+});

package/src/scenarios/envelope-retry-attempted.test.ts CHANGED Viewed

@@ -59,7 +59,7 @@ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0
   it('capabilities.envelopes.reliability (when present) conforms to RFC 0032 §C', async () => {
     const d = await readDiscovery();
     if (d === null) return;
-    const reliability = d.capabilities?.envelopes?.reliability;
+    const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
     if (reliability === undefined) return;
     expect(typeof reliability.supported, 'reliability.supported MUST be boolean').toBe('boolean');
     if (reliability.events !== undefined) {
@@ -114,6 +114,7 @@ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0
 import { pollUntilTerminal } from '../lib/polling.js';
 import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const FIXTURE = 'conformance-envelope-retry-attempted';
 const NODE_ID = 'structured-call';

package/src/scenarios/envelope-tier-one-subset-static.test.ts CHANGED Viewed

@@ -34,6 +34,7 @@ import { readFileSync, existsSync } from 'node:fs';
 import { join } from 'node:path';
 import { driver } from '../lib/driver.js';
 import { SCHEMAS_DIR } from '../lib/paths.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -179,9 +180,9 @@ describe.skipIf(HTTP_SKIP)('envelope-tier-one-subset-static (RFC 0030 §B)', ()
   it('hosts advertising tierOneSubsetCompliance: "strict" have payload schemas that satisfy the Tier-1 intersection', async () => {
     const d = await readDiscovery();
     if (d === null) return; // host unreachable; soft-skip
-    const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
+    const compliance = capabilityFamily(d, 'envelopes')?.tierOneSubsetCompliance;
     if (compliance !== 'strict') return; // gated on "strict" only
-    const advertised = (d.capabilities?.supportedEnvelopes ?? []) as string[];
+    const advertised = (capabilityFamily(d, 'supportedEnvelopes') ?? []) as string[];
     if (advertised.length === 0) return;
     const violationsByKind: Record<string, Violation[]> = {};

package/src/scenarios/exec-not-protocol-tier.test.ts ADDED Viewed

@@ -0,0 +1,137 @@
+/**
+ * exec-class tools MUST NOT be protocol-tier (RFC 0069, `Draft`).
+ *
+ * Always-on, server-free structural assertion over the spec corpus. Verifies
+ * the SECURITY invariant `exec-must-not-be-protocol-tier`: the protocol
+ * defines NO arbitrary-command (`exec`-class) primitive under a
+ * protocol-owned namespace (`core.*` / `openwop.*`), NO exec capability
+ * flag in `capabilities.schema.json`, and NO exec-class entry in the
+ * canonical RunEventType vocabulary.
+ *
+ * This guards against an independent implementer reading the protocol's
+ * silence as permission to ship a `core.exec` RCE primitive other hosts
+ * would treat as canonical. The assertion is against the protocol's OWN
+ * surface — it must hold for every release of the corpus regardless of
+ * which host runs it. A `vendor.acme.exec` / `x-host-acme-exec` identifier
+ * is allowed (host-extension namespace); the check fires only on
+ * protocol-owned namespaces.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/host-extensions.md §"exec-class tools"
+ *   - https://github.com/openwop/openwop/blob/main/SECURITY/threat-model-prompt-injection.md §"exec tools"
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0069-exec-class-tool-host-extension-safety-contract.md
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync, readdirSync } from 'node:fs';
+import { join } from 'node:path';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+/** Server-free assertion-message helper (mirrors driver.describe's "spec — requirement" shape without requiring OPENWOP_BASE_URL). */
+const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
+/**
+ * Closed denylist of exec-class identifier *segments* (whole tokens). The
+ * check matches a protocol-owned namespaced id whose final segment IS one
+ * of these — it does NOT flag substrings like `execution` in
+ * `multi-agent-execution` or `subprocess` inside an unrelated word.
+ */
+const EXEC_SEGMENTS = new Set([
+  'exec',
+  'shell',
+  'spawn',
+  'runcommand',
+  'runscript',
+  'subprocess',
+  'systemcall',
+  'eval',
+]);
+/** Protocol-owned namespace prefixes per host-extensions.md §"Canonical prefixes". */
+const PROTOCOL_PREFIXES = ['core.', 'openwop.'];
+/** Pull every `"core.*"` / `"openwop.*"` quoted identifier out of a corpus file. */
+function protocolOwnedIds(text: string): string[] {
+  const out: string[] = [];
+  const re = /["'`](core|openwop)\.[a-zA-Z0-9_.-]+["'`]/g;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(text)) !== null) {
+    out.push(m[0].slice(1, -1));
+  }
+  return out;
+}
+function isExecClass(id: string): boolean {
+  if (!PROTOCOL_PREFIXES.some((p) => id.startsWith(p))) return false;
+  const lastSegment = id.split('.').pop()?.toLowerCase().replace(/-/g, '') ?? '';
+  return EXEC_SEGMENTS.has(lastSegment);
+}
+describe('exec-not-protocol-tier: no exec-class primitive in the protocol corpus (RFC 0069, server-free)', () => {
+  it('no protocol-owned (core.* / openwop.*) identifier denotes arbitrary command execution', () => {
+    const schemaFiles = readdirSync(SCHEMAS_DIR).filter((f) => f.endsWith('.schema.json'));
+    const offenders: string[] = [];
+    for (const f of schemaFiles) {
+      const text = readFileSync(join(SCHEMAS_DIR, f), 'utf8');
+      for (const id of protocolOwnedIds(text)) {
+        if (isExecClass(id)) offenders.push(`${f}: ${id}`);
+      }
+    }
+    expect(
+      offenders,
+      why(
+        'host-extensions.md §exec-class tools',
+        'the protocol corpus MUST NOT define a core.*/openwop.* exec-class identifier',
+      ),
+    ).toEqual([]);
+  });
+  it('no capabilities.schema.json property name denotes arbitrary command execution', () => {
+    const caps = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'capabilities.schema.json'), 'utf8')) as Record<string, unknown>;
+    const offenders: string[] = [];
+    const walkProps = (node: unknown, path: string): void => {
+      if (!node || typeof node !== 'object') return;
+      const obj = node as Record<string, unknown>;
+      const props = obj.properties as Record<string, unknown> | undefined;
+      if (props) {
+        for (const key of Object.keys(props)) {
+          if (EXEC_SEGMENTS.has(key.toLowerCase().replace(/-/g, ''))) {
+            offenders.push(`${path}.${key}`);
+          }
+          walkProps(props[key], `${path}.${key}`);
+        }
+      }
+    };
+    walkProps(caps, 'capabilities');
+    expect(
+      offenders,
+      why('host-extensions.md §exec-class tools', 'capabilities.schema.json MUST NOT declare an exec-class capability flag'),
+    ).toEqual([]);
+  });
+  it('the canonical RunEventType vocabulary contains no exec-class event', () => {
+    const runEvent = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'run-event.schema.json'), 'utf8')) as {
+      $defs?: { RunEventType?: { enum?: string[] } };
+    };
+    const enumVals = runEvent.$defs?.RunEventType?.enum ?? [];
+    const offenders = enumVals.filter((v) => {
+      const lastSegment = v.split('.').pop()?.toLowerCase().replace(/-/g, '') ?? '';
+      return EXEC_SEGMENTS.has(lastSegment);
+    });
+    expect(
+      offenders,
+      why('host-extensions.md §exec-class tools', 'no RunEventType MUST denote arbitrary command execution'),
+    ).toEqual([]);
+  });
+  it('positive control: a vendor / x-host exec identifier is allowed (host-extension namespace)', () => {
+    expect(isExecClass('vendor.acme.exec')).toBe(false);
+    expect(isExecClass('x-host-acme-exec')).toBe(false);
+    expect(isExecClass('private.host.shell')).toBe(false);
+    // And the denylist actually fires on a protocol-owned id:
+    expect(isExecClass('core.exec')).toBe(true);
+    expect(isExecClass('openwop.shell')).toBe(true);
+    // Negative control: a benign substring is not flagged.
+    expect(isExecClass('core.workflowChain.event')).toBe(false);
+  });
+});

package/src/scenarios/experimental-tier-shape.test.ts CHANGED Viewed

@@ -29,6 +29,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
 import { experimentalGate } from '../lib/behavior-gate.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -57,7 +58,7 @@ async function readDiscovery(): Promise<DiscoveryDoc | null> {
 describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC 0042 §A)', () => {
   it('multiAgent.executionModel.tier (when present) MUST be one of {stable, experimental}', async (ctx) => {
     const d = await readDiscovery();
-    const em = d?.capabilities?.multiAgent?.executionModel;
+    const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
     if (em === undefined) {
       ctx.skip();
       return;
@@ -77,7 +78,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
   it('when tier === "experimental", experimentalUntil MUST be present + valid date', async (ctx) => {
     const d = await readDiscovery();
-    const em = d?.capabilities?.multiAgent?.executionModel;
+    const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
     if (em === undefined || em.tier !== 'experimental') {
       ctx.skip();
       return;
@@ -112,7 +113,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
   it('experimentalUntil MUST be ≤ 365 days in the future (sunset bound)', async (ctx) => {
     const d = await readDiscovery();
-    const em = d?.capabilities?.multiAgent?.executionModel;
+    const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
     if (em === undefined || em.tier !== 'experimental') {
       ctx.skip();
       return;
@@ -135,7 +136,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
   it('sunset detection: experimentalUntil in the past is non-conformant', async (ctx) => {
     const d = await readDiscovery();
-    const em = d?.capabilities?.multiAgent?.executionModel;
+    const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
     if (em === undefined || em.tier !== 'experimental') {
       ctx.skip();
       return;

package/src/scenarios/feedback-capability-shape.test.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * feedback-capability-shape — RFC 0056 §A. The `capabilities.feedback`
+ * advertisement block is either absent or a well-formed object.
+ *
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage
+ * lives in the sibling `feedback-*.test.ts` scenarios, gated on
+ * `capabilities.feedback.supported`.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §A
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readFeedbackCap } from '../lib/feedback.js';
+describe('feedback-capability-shape: advertisement (RFC 0056 §A)', () => {
+  it('capabilities.feedback is absent or a well-formed object', async () => {
+    const cap = await readFeedbackCap();
+    if (cap === null) return; // not advertised — valid
+    expect(
+      typeof cap.supported,
+      driver.describe('capabilities.schema.json §feedback', 'capabilities.feedback.supported MUST be a boolean when present'),
+    ).toBe('boolean');
+    if (Array.isArray(cap.targets)) {
+      for (const t of cap.targets) {
+        expect(['run', 'event', 'node']).toContain(t);
+      }
+    }
+    if (Array.isArray(cap.signals)) {
+      for (const s of cap.signals) {
+        expect(['rating', 'correction', 'label', 'flag']).toContain(s);
+      }
+    }
+  });
+});

package/src/scenarios/feedback-correction-redaction.test.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * feedback-correction-redaction — RFC 0056 §E + SECURITY/invariants.yaml
+ * `annotation-content-redaction`. `signal.correction` and `note` are
+ * untrusted user content; secret-shaped material MUST be redacted under
+ * SR-1 before persistence/listing/export.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §E
+ * @see SECURITY/invariants.yaml — annotation-content-redaction
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readFeedbackCap, seedRun } from '../lib/feedback.js';
+const CANARY = 'sk-canary-rfc0056-do-not-leak-abc123';
+describe('feedback-correction-redaction (RFC 0056 §E)', () => {
+  it('secret-shaped material in correction/note is redacted in the annotation list', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported !== true) return;
+    const runId = await seedRun('feedback-redact');
+    if (!runId) return;
+    const post = await driver.post(`/v1/runs/${runId}/annotations`, {
+      signal: { kind: 'correction', correction: `please use ${CANARY}` },
+      note: CANARY,
+    });
+    if (post.status === 501 || post.status === 404) return;
+    expect(post.status).toBe(201);
+    const list = await driver.get(`/v1/runs/${runId}/annotations`);
+    expect(
+      JSON.stringify(list.json ?? {}).includes(CANARY),
+      driver.describe('RFC 0056 §E', 'secret-shaped material MUST be redacted before persistence/listing (SR-1)'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/feedback-cross-tenant-isolation.test.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * feedback-cross-tenant-isolation — RFC 0056 §E + SECURITY/invariants.yaml
+ * `annotation-cross-tenant-isolation`. A run's annotation list MUST contain
+ * only that run's annotations (mirrors CTI-1).
+ *
+ * The run-scoped check runs against any feedback host. The full cross-tenant
+ * proof (tenant B cannot read tenant A's run) needs a multi-tenant auth seam
+ * not yet standardized for this surface — that half soft-skips, mirroring
+ * `kv-cross-tenant-isolation`'s seam gate.
+ *
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §E
+ * @see SECURITY/invariants.yaml — annotation-cross-tenant-isolation
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readFeedbackCap, seedRun } from '../lib/feedback.js';
+describe('feedback-cross-tenant-isolation (RFC 0056 §E)', () => {
+  it('a run\'s annotation list contains only that run\'s annotations', async () => {
+    const cap = await readFeedbackCap();
+    if (cap?.supported !== true) return;
+    const runId = await seedRun('feedback-cti');
+    if (!runId) return;
+    const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'label', label: 'cti-probe' } });
+    if (post.status === 501 || post.status === 404) return;
+    expect(post.status).toBe(201);
+    const list = await driver.get(`/v1/runs/${runId}/annotations`);
+    const ann = (list.json as { annotations?: Array<{ target?: { runId?: string } }> } | undefined)?.annotations ?? [];
+    for (const a of ann) {
+      expect(
+        a.target?.runId,
+        driver.describe('RFC 0056 §E', 'an annotation list MUST contain only this run\'s annotations (CTI-1)'),
+      ).toBe(runId);
+    }
+  });
+});