npm - @openwop/openwop-conformance - Versions diffs - 1.6.1 → 1.10.0 - Mend

@openwop/openwop-conformance 1.6.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (159) hide show

package/src/scenarios/run-execution-bounds-shape.test.ts ADDED Viewed

@@ -0,0 +1,133 @@
+/**
+ * run-execution-bounds-shape — RFC 0058 advertisement-shape + breach-contract
+ * verification for the two run-scoped execution bounds.
+ *
+ * Status: ACTIVE. RFC 0058 (run execution bounds) is `Active`. The
+ * `capabilities.limits.{maxRunDurationMs,maxLoopIterations}` fields and the
+ * `run-duration` / `loop-iterations` kinds on `cap.breached` have landed in
+ * `schemas/capabilities.schema.json` + `schemas/run-event-payloads.schema.json`.
+ *
+ * Always runs (shape-only): when the host advertises either limit, its value
+ * MUST be well-formed. Behavior is capability- AND fixture-gated. The
+ * `run-duration` (wall-clock timeout) block is now enforced + green against the
+ * in-memory reference host. The `loop-iterations` block stays soft-skipped until
+ * an execution-loop host advertises `multiAgent.executionModel` (RFC 0061),
+ * mirroring the RFC 0052 scheduling pattern.
+ *
+ * What this scenario asserts:
+ *   1. `capabilities.limits.maxRunDurationMs`, when present, is an integer ≥ 1000.
+ *   2. `capabilities.limits.maxLoopIterations`, when present, is an integer ≥ 1.
+ *   3. (gated) A run with `configurable.runTimeoutMs` below its real duration
+ *      reaches terminal `failed` with `error.code = "run_timeout"` and emits
+ *      `cap.breached { kind: "run-duration" }` whose `observed > limit`.
+ *
+ * @see RFCS/0058-run-execution-bounds.md
+ * @see spec/v1/run-options.md §Reserved keys (runTimeoutMs / maxLoopIterations)
+ * @see spec/v1/capabilities.md §"Engine-enforced limits and the cap.breached event"
+ * @see schemas/run-event-payloads.schema.json §capBreached
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
+interface DiscoveryLimits {
+  maxRunDurationMs?: number;
+  maxLoopIterations?: number;
+}
+interface DiscoveryDoc {
+  capabilities?: { limits?: DiscoveryLimits };
+}
+interface RunEvent {
+  readonly type: string;
+  readonly sequence: number;
+  readonly payload?: unknown;
+}
+const TIMEOUT_FIXTURE = 'conformance-run-duration-breach';
+async function readLimits(): Promise<DiscoveryLimits | null> {
+  const res = await driver.get('/.well-known/openwop');
+  const body = res.json as DiscoveryDoc | undefined;
+  return capabilityFamily(body, 'limits') ?? null;
+}
+describe('run-execution-bounds-shape: advertisement shape (RFC 0058)', () => {
+  it('maxRunDurationMs is an integer >= 1000 when present', async () => {
+    const limits = await readLimits();
+    if (limits?.maxRunDurationMs === undefined) return; // not advertised
+    expect(
+      Number.isInteger(limits.maxRunDurationMs) && limits.maxRunDurationMs >= 1000,
+      driver.describe(
+        'capabilities.schema.json §limits.maxRunDurationMs',
+        `capabilities.limits.maxRunDurationMs MUST be an integer >= 1000, got: ${limits.maxRunDurationMs}`,
+      ),
+    ).toBe(true);
+  });
+  it('maxLoopIterations is an integer >= 1 when present', async () => {
+    const limits = await readLimits();
+    if (limits?.maxLoopIterations === undefined) return; // not advertised
+    expect(
+      Number.isInteger(limits.maxLoopIterations) && limits.maxLoopIterations >= 1,
+      driver.describe(
+        'capabilities.schema.json §limits.maxLoopIterations',
+        `capabilities.limits.maxLoopIterations MUST be an integer >= 1, got: ${limits.maxLoopIterations}`,
+      ),
+    ).toBe(true);
+  });
+});
+// Behavior: capability- AND fixture-gated. Skips on hosts that do not enforce
+// run-duration timeouts (incl. the reference hosts) until one wires the seam.
+const SKIP_TIMEOUT = !isFixtureAdvertised(TIMEOUT_FIXTURE);
+describe.skipIf(SKIP_TIMEOUT)('run-execution-bounds: run-duration breach (RFC 0058)', () => {
+  it('a run with runTimeoutMs below its real duration fails with run_timeout + cap.breached{run-duration}', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: TIMEOUT_FIXTURE,
+      configurable: { runTimeoutMs: 1000 },
+    });
+    expect(create.status, driver.describe(
+      'rest-endpoints.md POST /v1/runs',
+      'run creation MUST accept a runTimeoutMs override',
+    )).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId);
+    expect(terminal.status, driver.describe(
+      'run-options.md §runTimeoutMs',
+      'a run exceeding its runTimeoutMs MUST reach terminal `failed`',
+    )).toBe('failed');
+    expect(terminal.error?.code, driver.describe(
+      'rest-endpoints.md §run_timeout',
+      'RunSnapshot.error.code MUST equal "run_timeout" on wall-clock timeout',
+    )).toBe('run_timeout');
+    const eventsRes = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=0&timeout=1`,
+    );
+    const events = (eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? [];
+    const breach = events.find((e) => e.type === 'cap.breached');
+    expect(breach, driver.describe(
+      'capabilities.md §Engine-enforced limits',
+      'a cap.breached event MUST be emitted on run-duration breach',
+    )).toBeDefined();
+    const payload = breach!.payload as { kind?: string; limit?: number; observed?: number } | undefined;
+    expect(payload?.kind, driver.describe(
+      'run-event-payloads.schema.json §capBreached.kind',
+      'cap.breached payload MUST carry kind="run-duration"',
+    )).toBe('run-duration');
+    expect(
+      typeof payload?.observed === 'number' && typeof payload?.limit === 'number' && payload!.observed > payload!.limit,
+      driver.describe(
+        'run-event-payloads.schema.json §capBreached.observed',
+        'observed (elapsedMs) MUST be strictly greater than limit (resolved timeout)',
+      ),
+    ).toBe(true);
+  });
+});

package/src/scenarios/sandbox-memory-cap.test.ts CHANGED Viewed

@@ -15,6 +15,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -26,7 +27,7 @@ async function readSandbox(): Promise<{ supported: boolean; memoryLimitBytes?: n
   try {
     const r = await driver.get('/.well-known/openwop');
     if (r.status !== 200) return null;
-    const sb = (r.json as D).capabilities?.sandbox;
+    const sb = capabilityFamily((r.json as D), 'sandbox');
     if (!sb || sb.supported !== true) return null;
     return {
       supported: true,

package/src/scenarios/sandbox-mvp-behavior.test.ts CHANGED Viewed

@@ -37,6 +37,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -66,7 +67,7 @@ async function isSandboxAdvertised(): Promise<boolean> {
   try {
     const res = await driver.get('/.well-known/openwop');
     if (res.status !== 200) return false;
-    return (res.json as DiscoveryDoc).capabilities?.sandbox?.supported === true;
+    return capabilityFamily((res.json as DiscoveryDoc), 'sandbox')?.supported === true;
   } catch {
     return false;
   }

package/src/scenarios/sandbox-no-host-fs-escape.test.ts CHANGED Viewed

@@ -26,6 +26,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -45,7 +46,7 @@ async function readSandboxCaps(): Promise<SandboxCaps | null> {
   try {
     const res = await driver.get('/.well-known/openwop');
     if (res.status !== 200) return null;
-    return (res.json as DiscoveryDoc).capabilities?.sandbox ?? null;
+    return capabilityFamily((res.json as DiscoveryDoc), 'sandbox') ?? null;
   } catch {
     return null;
   }

package/src/scenarios/sandbox-timeout-cap.test.ts CHANGED Viewed

@@ -15,6 +15,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -26,7 +27,7 @@ async function readSandbox(): Promise<{ supported: boolean; wallClockLimitMs?: n
   try {
     const r = await driver.get('/.well-known/openwop');
     if (r.status !== 200) return null;
-    const sb = (r.json as D).capabilities?.sandbox;
+    const sb = capabilityFamily((r.json as D), 'sandbox');
     if (!sb || sb.supported !== true) return null;
     return {
       supported: true,

package/src/scenarios/scheduling-capability-shape.test.ts CHANGED Viewed

@@ -20,6 +20,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 interface DiscoveryScheduling {
   supported?: boolean;
@@ -39,7 +40,7 @@ const ISO_DURATION = /^P(?:\d+Y)?(?:\d+M)?(?:\d+W)?(?:\d+D)?(?:T(?:\d+H)?(?:\d+M
 async function readScheduling(): Promise<DiscoveryScheduling | null> {
   const res = await driver.get('/.well-known/openwop');
   const body = res.json as DiscoveryDoc | undefined;
-  return body?.capabilities?.scheduling ?? null;
+  return capabilityFamily(body, 'scheduling') ?? null;
 }
 describe('scheduling-capability-shape: advertisement shape (RFC 0052 §A)', () => {

package/src/scenarios/scheduling-cron-fires-once.test.ts CHANGED Viewed

@@ -26,6 +26,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 interface DiscoveryDoc {
   capabilities?: { scheduling?: { supported?: boolean; cron?: boolean } };
@@ -33,7 +34,7 @@ interface DiscoveryDoc {
 async function readScheduling(): Promise<{ supported?: boolean; cron?: boolean } | null> {
   const res = await driver.get('/.well-known/openwop');
-  return (res.json as DiscoveryDoc | undefined)?.capabilities?.scheduling ?? null;
+  return capabilityFamily((res.json as DiscoveryDoc | undefined), 'scheduling') ?? null;
 }
 describe('scheduling-cron-fires-once: once-per-tick + missed-tick (RFC 0052 §B)', () => {

package/src/scenarios/secret-leakage-otel-attribute.test.ts CHANGED Viewed

@@ -55,6 +55,7 @@ import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
 import { pollUntilTerminal } from '../lib/polling.js';
 import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
 const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
@@ -99,8 +100,8 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
         return;
       }
       const d = await readDiscovery();
-      const secretsOk = d?.capabilities?.secrets?.supported === true;
-      const seamOk = d?.capabilities?.observability?.testSeams?.otelScrape === true;
+      const secretsOk = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported === true;
+      const seamOk = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.otelScrape === true;
       if (!secretsOk || !seamOk) {
         ctx.skip();
         return;
@@ -168,8 +169,8 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
         return;
       }
       const d = await readDiscovery();
-      const secretsOk = d?.capabilities?.secrets?.supported === true;
-      const seamOk = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
+      const secretsOk = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported === true;
+      const seamOk = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.debugBundleExport === true;
       if (!secretsOk || !seamOk) {
         ctx.skip();
         return;
@@ -209,11 +210,11 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
   () => {
     it('when secrets.supported is true, observability.testSeams advertisements MUST be boolean if present', async (ctx) => {
       const d = await readDiscovery();
-      if (d?.capabilities?.secrets?.supported !== true) {
+      if (capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported !== true) {
         ctx.skip();
         return;
       }
-      const seams = d?.capabilities?.observability?.testSeams;
+      const seams = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams;
       if (seams === undefined) {
         ctx.skip(); // host honest about not exposing the seams — Drift #17 path
         return;

package/src/scenarios/spec-corpus-validity.test.ts CHANGED Viewed

@@ -1019,7 +1019,7 @@ describe('spec-corpus: AsyncAPI 3.1 spec is structurally valid', () => {
     // `run.annotated` (RFC 0056) is a live SSE notification carrying an
     // Annotation — NOT a RunEventDoc and deliberately NOT in the RunEventType
     // enum (annotations are a side-resource, excluded from fork/replay).
-    const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated']);
+    const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated', 'heartbeat.evaluated', 'heartbeat.stateChanged']);
     expect(messageNames.length, 'AsyncAPI MUST declare named SSE messages').toBeGreaterThan(0);

package/src/scenarios/subrun-approval-fail-closed.test.ts ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * subrun-approval-fail-closed — RFC 0063 §C. A parent that terminates or whose
+ * approval interrupt expires WITHOUT an `accept`/`edit-accept` MUST NOT merge the
+ * child outputs. Absence of an approval is denial — backs the proposed
+ * protocol-tier SECURITY invariant `subrun-merge-approval-fail-closed` (lands
+ * with this test promoted to load-bearing at reference-host implementation).
+ *
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
+ * @see SECURITY/invariants.yaml — subrun-merge-approval-fail-closed (lands at impl)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
+describe('subrun-approval-fail-closed (RFC 0063 §C)', () => {
+  it('no accept/edit-accept (terminated or expired) MUST NOT merge', async () => {
+    if ((await readSubRunAttestationCap()) !== true) return;
+    // approvalAction omitted models a run that terminated without a response.
+    const res = await invokeSubRunAttest({
+      childOutputs: { artifact: 'unverified' },
+      outputAttestation: { requireApproval: true },
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      res.merged,
+      driver.describe('RFC 0063 §C', 'an unresolved approval MUST fail closed — outputs MUST NOT be merged'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/subrun-approval-gate.test.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * subrun-approval-gate — RFC 0063 §C. When `requireApproval: true`, the host
+ * suspends before merge; `accept` merges the child outputs, `reject` does not.
+ *
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
+ * @see spec/v1/interrupt.md — `approval` kind + resume actions (RFC 0051, reused)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
+describe('subrun-approval-gate (RFC 0063 §C)', () => {
+  it('accept merges the child outputs; reject does not', async () => {
+    if ((await readSubRunAttestationCap()) !== true) return;
+    const base = { childOutputs: { artifact: 'x' }, outputAttestation: { requireApproval: true } };
+    const accepted = await invokeSubRunAttest({ ...base, approvalAction: 'accept' });
+    if (accepted === null) return; // seam absent — soft-skip
+    expect(
+      accepted.merged,
+      driver.describe('RFC 0063 §C', 'an `accept` approval MUST merge the child outputs'),
+    ).toBe(true);
+    const rejected = await invokeSubRunAttest({ ...base, approvalAction: 'reject' });
+    if (rejected === null) return;
+    expect(
+      rejected.merged,
+      driver.describe('RFC 0063 §C', 'a `reject` approval MUST NOT merge the child outputs'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/subrun-attestation-shape.test.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * subrun-attestation-shape — RFC 0063 §A. The `capabilities.agents.subRunAttestation`
+ * advertisement flag is either absent or a boolean.
+ *
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
+ * in the sibling subrun-*.test.ts scenarios, gated on the flag + the host
+ * sub-run attestation seam.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §A
+ * @see spec/v1/node-packs.md §"`outputAttestation` — verify-before-merge"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap } from '../lib/subRunAttestation.js';
+describe('subrun-attestation-shape: advertisement (RFC 0063 §A)', () => {
+  it('capabilities.agents.subRunAttestation is absent or a boolean', async () => {
+    const cap = await readSubRunAttestationCap();
+    // null = unadvertised (no agents block OR flag omitted) — valid.
+    if (cap === null) return;
+    expect(
+      typeof cap,
+      driver.describe(
+        'capabilities.schema.json §agents.subRunAttestation',
+        'agents.subRunAttestation MUST be a boolean when present',
+      ),
+    ).toBe('boolean');
+  });
+});

package/src/scenarios/subrun-checksum-stable.test.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * subrun-checksum-stable — RFC 0063 §B. A child's output checksum is byte-stable
+ * for identical outputs and host-independent (the RFC 8785 JCS + SHA-256 recipe
+ * pinned in replay.md), and is surfaced as the `attestation` object on the
+ * existing `core.workflowChain.event { phase: 'output.harvested' }`.
+ *
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §B
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
+describe('subrun-checksum-stable (RFC 0063 §B)', () => {
+  it('identical child outputs produce an identical sha256 attestation checksum', async () => {
+    if ((await readSubRunAttestationCap()) !== true) return;
+    const childOutputs = { report: 'done', score: 0.9, tags: ['a', 'b'] };
+    const a = await invokeSubRunAttest({ childOutputs, outputAttestation: { checksum: true } });
+    if (a === null) return; // seam absent — soft-skip
+    // Key-reordered but value-identical: JCS canonicalization MUST yield the same hash.
+    const b = await invokeSubRunAttest({
+      childOutputs: { tags: ['a', 'b'], score: 0.9, report: 'done' },
+      outputAttestation: { checksum: true },
+    });
+    if (b === null) return;
+    const att = a.attestation ?? {};
+    expect(
+      typeof att.checksum === 'string' && (att.checksum as string).length > 0,
+      driver.describe('RFC 0063 §B', 'output.harvested MUST carry a non-empty attestation.checksum when checksum:true'),
+    ).toBe(true);
+    expect(
+      att.algorithm,
+      driver.describe('RFC 0063 §B', 'attestation.algorithm MUST be "sha256" (the v1 recipe)'),
+    ).toBe('sha256');
+    expect(
+      (b.attestation ?? {}).checksum,
+      driver.describe('RFC 0063 §B', 'JCS canonicalization MUST make the checksum invariant to key order — same content, same hash'),
+    ).toBe(att.checksum);
+  });
+});

package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * tool-hooks-authorization-fail-closed — RFC 0064 §C. A principal lacking a
+ * tool's required scope (or whose authorization cannot be evaluated) gets
+ * `agent.toolReturned { status: 'forbidden' }` and the tool is never invoked —
+ * the per-tool application of RFC 0049's `authorization-fail-closed` invariant.
+ *
+ * Gated on `capabilities.toolHooks.perToolAuthorization` + the host tool-hooks
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §C
+ * @see SECURITY/invariants.yaml — authorization-fail-closed (RFC 0049, reused)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+describe('tool-hooks-authorization-fail-closed (RFC 0064 §C)', () => {
+  it('a principal lacking a tool scope is denied and the tool is not invoked', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.perToolAuthorization !== true) return;
+    // A principal with no scopes against a tool requiring one MUST be denied.
+    const res = await invokeToolHook({
+      principal: 'conformance-unprivileged',
+      toolName: 'db.delete',
+      requiredScopes: ['db:write'],
+      args: {},
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      (res.toolReturned ?? {}).status,
+      driver.describe('RFC 0064 §C', 'a missing/unevaluable tool scope MUST fail closed → status:"forbidden"'),
+    ).toBe('forbidden');
+    expect(
+      (res.toolReturned ?? {}).durationMs,
+      driver.describe('RFC 0064 §C', 'a forbidden call never starts, so durationMs MUST be absent'),
+    ).toBeUndefined();
+  });
+});

package/src/scenarios/tool-hooks-content-free.test.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * tool-hooks-content-free — RFC 0064 §B. When `prePostEvents`, a tool call's
+ * `agent.toolCalled` carries `argsHash` (the content-free, SIEM-safe
+ * alternative to raw `inputs`) + `agent.toolReturned` carries `status` +
+ * `durationMs`.
+ *
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
+ * soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+describe('tool-hooks-content-free (RFC 0064 §B)', () => {
+  it('toolCalled carries argsHash; toolReturned carries status + durationMs', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.prePostEvents !== true) return;
+    const res = await invokeToolHook({ principal: 'core.system', toolName: 'web.search', args: { q: 'openwop' } });
+    if (res === null) return; // seam absent — soft-skip
+    const called = res.toolCalled ?? {};
+    const returned = res.toolReturned ?? {};
+    expect(
+      typeof called.argsHash === 'string' && (called.argsHash as string).length > 0,
+      driver.describe('RFC 0064 §B', 'agent.toolCalled MUST carry a non-empty argsHash when prePostEvents'),
+    ).toBe(true);
+    expect(
+      ['ok', 'error', 'forbidden', 'rate_limited'].includes(returned.status as string),
+      driver.describe('RFC 0064 §B', 'agent.toolReturned MUST carry a tool-hooks status'),
+    ).toBe(true);
+    if (returned.status === 'ok') {
+      expect(
+        typeof returned.durationMs === 'number' && (returned.durationMs as number) >= 0,
+        driver.describe('RFC 0064 §B', 'a completed tool call MUST record a non-negative durationMs'),
+      ).toBe(true);
+    }
+  });
+});

package/src/scenarios/tool-hooks-rate-limit.test.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * tool-hooks-rate-limit — RFC 0064 §D. Exhausting a `(principal, tool)` token
+ * bucket → `agent.toolReturned { status: 'rate_limited' }` and the tool is not
+ * invoked, surfacing the existing `rate_limited` (429) error.
+ *
+ * Gated on `capabilities.toolHooks.perToolRateLimit` + the host tool-hooks
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §D
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+describe('tool-hooks-rate-limit (RFC 0064 §D)', () => {
+  it('an exhausted (principal, tool) bucket yields status:"rate_limited"', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.perToolRateLimit !== true) return;
+    const res = await invokeToolHook({
+      principal: 'core.system',
+      toolName: 'web.search',
+      args: { q: 'x' },
+      simulateRateLimitExhausted: true,
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      (res.toolReturned ?? {}).status,
+      driver.describe('RFC 0064 §D', 'an exhausted token bucket MUST yield status:"rate_limited" without invoking the tool'),
+    ).toBe('rate_limited');
+  });
+});

package/src/scenarios/tool-hooks-secret-redaction.test.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * tool-hooks-secret-redaction — RFC 0064 §B/§E. A tool arg containing a
+ * resolved secret is redacted (SR-1) before hashing; the raw value never
+ * appears in `argsHash` or anywhere in the emitted `agent.toolCalled` /
+ * `agent.toolReturned` pair.
+ *
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
+ * soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B (argsHash SR-1 redaction), §E (credentials)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+const CANARY = 'sk-canary-rfc0064-do-not-leak-xyz789';
+describe('tool-hooks-secret-redaction (RFC 0064 §E)', () => {
+  it('a secret-shaped tool arg never appears in the emitted events', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.prePostEvents !== true) return;
+    const res = await invokeToolHook({
+      principal: 'core.system',
+      toolName: 'web.search',
+      args: { apiKey: CANARY, q: 'openwop' },
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      JSON.stringify(res).includes(CANARY),
+      driver.describe('RFC 0064 §B', 'a resolved secret MUST be redacted before hashing; the raw value MUST NOT appear in argsHash or any emitted field (SR-1)'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/tool-hooks-shape.test.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * tool-hooks-shape — RFC 0064 §A. The `capabilities.toolHooks` advertisement
+ * block is either absent or a well-formed object.
+ *
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
+ * in the sibling tool-hooks-*.test.ts scenarios, gated on the sub-flags + the
+ * host tool-hooks seam.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §A
+ * @see spec/v1/host-capabilities.md §host.toolHooks
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap } from '../lib/toolHooks.js';
+describe('tool-hooks-shape: advertisement (RFC 0064 §A)', () => {
+  it('capabilities.toolHooks is absent or a well-formed object', async () => {
+    const cap = await readToolHooksCap();
+    if (cap === null) return; // not advertised — valid
+    expect(
+      typeof cap.supported,
+      driver.describe('capabilities.schema.json §toolHooks', 'toolHooks.supported MUST be a boolean when the block is present'),
+    ).toBe('boolean');
+    for (const k of ['prePostEvents', 'perToolAuthorization', 'perToolRateLimit'] as const) {
+      if (cap[k] !== undefined) {
+        expect(
+          typeof cap[k],
+          driver.describe('capabilities.schema.json §toolHooks', `toolHooks.${k} MUST be a boolean when present`),
+        ).toBe('boolean');
+      }
+    }
+  });
+});

package/src/scenarios/wasm-pack-abi-version-rejection.test.ts CHANGED Viewed

@@ -26,6 +26,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const MISBEHAVING_PACK_NAME = 'vendor.openwop.misbehaving-abi';
 const WELL_BEHAVED_PACK_NAME = 'vendor.openwop.rust-hello';
@@ -34,9 +35,7 @@ describe('wasm-pack-abi-version-rejection: host advertises supported ABI version
   it('abiVersions[] contains positive integers; loader rejects unsupported versions', async () => {
     const disco = await driver.get('/.well-known/openwop');
     const wasm =
-      (disco.json as {
-        capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean; abiVersions?: unknown } } };
-      }).capabilities?.nodePackRuntimes?.wasm;
+      capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
     if (!wasm?.supported) return;
@@ -62,13 +61,7 @@ describe('wasm-pack-abi-version-rejection: positive path via misbehaving pack',
   it('misbehaving-abi pack (declares ABI 999) MUST NOT appear in loadedPacks[]', async () => {
     const disco = await driver.get('/.well-known/openwop');
     const wasm =
-      (disco.json as {
-        capabilities?: {
-          nodePackRuntimes?: {
-            wasm?: { supported?: boolean; loadedPacks?: unknown };
-          };
-        };
-      }).capabilities?.nodePackRuntimes?.wasm;
+      capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
     if (!wasm?.supported) return;