npm - @openwop/openwop-conformance - Versions diffs - 1.6.1 → 1.11.0 - Mend

@openwop/openwop-conformance 1.6.1 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

package/src/scenarios/sandbox-mvp-behavior.test.ts CHANGED Viewed

@@ -37,6 +37,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -66,7 +67,7 @@ async function isSandboxAdvertised(): Promise<boolean> {
   try {
     const res = await driver.get('/.well-known/openwop');
     if (res.status !== 200) return false;
-    return (res.json as DiscoveryDoc).capabilities?.sandbox?.supported === true;
+    return capabilityFamily((res.json as DiscoveryDoc), 'sandbox')?.supported === true;
   } catch {
     return false;
   }

package/src/scenarios/sandbox-no-host-fs-escape.test.ts CHANGED Viewed

@@ -26,6 +26,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -45,7 +46,7 @@ async function readSandboxCaps(): Promise<SandboxCaps | null> {
   try {
     const res = await driver.get('/.well-known/openwop');
     if (res.status !== 200) return null;
-    return (res.json as DiscoveryDoc).capabilities?.sandbox ?? null;
+    return capabilityFamily((res.json as DiscoveryDoc), 'sandbox') ?? null;
   } catch {
     return null;
   }

package/src/scenarios/sandbox-timeout-cap.test.ts CHANGED Viewed

@@ -15,6 +15,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -26,7 +27,7 @@ async function readSandbox(): Promise<{ supported: boolean; wallClockLimitMs?: n
   try {
     const r = await driver.get('/.well-known/openwop');
     if (r.status !== 200) return null;
-    const sb = (r.json as D).capabilities?.sandbox;
+    const sb = capabilityFamily((r.json as D), 'sandbox');
     if (!sb || sb.supported !== true) return null;
     return {
       supported: true,

package/src/scenarios/scheduling-capability-shape.test.ts CHANGED Viewed

@@ -20,6 +20,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 interface DiscoveryScheduling {
   supported?: boolean;
@@ -39,7 +40,7 @@ const ISO_DURATION = /^P(?:\d+Y)?(?:\d+M)?(?:\d+W)?(?:\d+D)?(?:T(?:\d+H)?(?:\d+M
 async function readScheduling(): Promise<DiscoveryScheduling | null> {
   const res = await driver.get('/.well-known/openwop');
   const body = res.json as DiscoveryDoc | undefined;
-  return body?.capabilities?.scheduling ?? null;
+  return capabilityFamily(body, 'scheduling') ?? null;
 }
 describe('scheduling-capability-shape: advertisement shape (RFC 0052 §A)', () => {

package/src/scenarios/scheduling-cron-fires-once.test.ts CHANGED Viewed

@@ -26,6 +26,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 interface DiscoveryDoc {
   capabilities?: { scheduling?: { supported?: boolean; cron?: boolean } };
@@ -33,7 +34,7 @@ interface DiscoveryDoc {
 async function readScheduling(): Promise<{ supported?: boolean; cron?: boolean } | null> {
   const res = await driver.get('/.well-known/openwop');
-  return (res.json as DiscoveryDoc | undefined)?.capabilities?.scheduling ?? null;
+  return capabilityFamily((res.json as DiscoveryDoc | undefined), 'scheduling') ?? null;
 }
 describe('scheduling-cron-fires-once: once-per-tick + missed-tick (RFC 0052 §B)', () => {

package/src/scenarios/secret-leakage-otel-attribute.test.ts CHANGED Viewed

@@ -55,6 +55,7 @@ import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
 import { pollUntilTerminal } from '../lib/polling.js';
 import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
 const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
@@ -99,8 +100,8 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
         return;
       }
       const d = await readDiscovery();
-      const secretsOk = d?.capabilities?.secrets?.supported === true;
-      const seamOk = d?.capabilities?.observability?.testSeams?.otelScrape === true;
+      const secretsOk = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported === true;
+      const seamOk = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.otelScrape === true;
       if (!secretsOk || !seamOk) {
         ctx.skip();
         return;
@@ -168,8 +169,8 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
         return;
       }
       const d = await readDiscovery();
-      const secretsOk = d?.capabilities?.secrets?.supported === true;
-      const seamOk = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
+      const secretsOk = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported === true;
+      const seamOk = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.debugBundleExport === true;
       if (!secretsOk || !seamOk) {
         ctx.skip();
         return;
@@ -209,11 +210,11 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
   () => {
     it('when secrets.supported is true, observability.testSeams advertisements MUST be boolean if present', async (ctx) => {
       const d = await readDiscovery();
-      if (d?.capabilities?.secrets?.supported !== true) {
+      if (capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported !== true) {
         ctx.skip();
         return;
       }
-      const seams = d?.capabilities?.observability?.testSeams;
+      const seams = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams;
       if (seams === undefined) {
         ctx.skip(); // host honest about not exposing the seams — Drift #17 path
         return;

package/src/scenarios/spec-corpus-validity.test.ts CHANGED Viewed

@@ -384,16 +384,32 @@ function extractReadmeDocumentIndex(readme: string): string {
   return readme.slice(start, end);
 }
-function listMarkdownFilesRecursive(dir: string): string[] {
+function listMarkdownFilesRecursive(dir: string, repoRoot: string = dir): string[] {
   const ignoredDirs = new Set(['.git', 'node_modules', 'dist']);
+  // Repo-relative directory paths to prune. These are subtrees whose
+  // content shouldn't be link-checked because either (a) they're
+  // generated build output (`site/out`) or (b) they're a vendored
+  // mirror of a canonical source whose READMEs use links relative to
+  // the canonical path, not the vendored path:
+  //
+  //  - `apps/workflow-engine/packs/` mirrors repo-root `packs/`, synced
+  //    via `apps/workflow-engine/scripts/sync-packs.sh` so the Cloud
+  //    Run image's `apps/workflow-engine/` build context can ship them.
+  //    Pack READMEs use `../../RFCS/...` / `../../spec/v1/...` links
+  //    that resolve from the canonical location (which this walker
+  //    DOES check) but break from the deeper vendored path. The
+  //    canonical copies are authoritative; the vendored copies are
+  //    byte-for-byte identical via cp -R.
+  const prunedRepoRelative = new Set(['site/out', 'apps/workflow-engine/packs']);
   const files: string[] = [];
   for (const entry of readdirSync(dir, { withFileTypes: true })) {
     if (entry.isDirectory()) {
       if (ignoredDirs.has(entry.name)) continue;
       const child = join(dir, entry.name);
-      if (relative(dir, child).startsWith('site/out')) continue;
-      files.push(...listMarkdownFilesRecursive(child));
+      const repoRelChild = relative(repoRoot, child);
+      if (prunedRepoRelative.has(repoRelChild)) continue;
+      files.push(...listMarkdownFilesRecursive(child, repoRoot));
       continue;
     }
     if (entry.isFile() && entry.name.endsWith('.md')) {
@@ -1019,7 +1035,7 @@ describe('spec-corpus: AsyncAPI 3.1 spec is structurally valid', () => {
     // `run.annotated` (RFC 0056) is a live SSE notification carrying an
     // Annotation — NOT a RunEventDoc and deliberately NOT in the RunEventType
     // enum (annotations are a side-resource, excluded from fork/replay).
-    const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated']);
+    const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated', 'heartbeat.evaluated', 'heartbeat.stateChanged']);
     expect(messageNames.length, 'AsyncAPI MUST declare named SSE messages').toBeGreaterThan(0);

package/src/scenarios/subrun-approval-fail-closed.test.ts ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * subrun-approval-fail-closed — RFC 0063 §C. A parent that terminates or whose
+ * approval interrupt expires WITHOUT an `accept`/`edit-accept` MUST NOT merge the
+ * child outputs. Absence of an approval is denial — backs the proposed
+ * protocol-tier SECURITY invariant `subrun-merge-approval-fail-closed` (lands
+ * with this test promoted to load-bearing at reference-host implementation).
+ *
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
+ * @see SECURITY/invariants.yaml — subrun-merge-approval-fail-closed (lands at impl)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
+describe('subrun-approval-fail-closed (RFC 0063 §C)', () => {
+  it('no accept/edit-accept (terminated or expired) MUST NOT merge', async () => {
+    if ((await readSubRunAttestationCap()) !== true) return;
+    // approvalAction omitted models a run that terminated without a response.
+    const res = await invokeSubRunAttest({
+      childOutputs: { artifact: 'unverified' },
+      outputAttestation: { requireApproval: true },
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      res.merged,
+      driver.describe('RFC 0063 §C', 'an unresolved approval MUST fail closed — outputs MUST NOT be merged'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/subrun-approval-gate.test.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * subrun-approval-gate — RFC 0063 §C. When `requireApproval: true`, the host
+ * suspends before merge; `accept` merges the child outputs, `reject` does not.
+ *
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
+ * @see spec/v1/interrupt.md — `approval` kind + resume actions (RFC 0051, reused)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
+describe('subrun-approval-gate (RFC 0063 §C)', () => {
+  it('accept merges the child outputs; reject does not', async () => {
+    if ((await readSubRunAttestationCap()) !== true) return;
+    const base = { childOutputs: { artifact: 'x' }, outputAttestation: { requireApproval: true } };
+    const accepted = await invokeSubRunAttest({ ...base, approvalAction: 'accept' });
+    if (accepted === null) return; // seam absent — soft-skip
+    expect(
+      accepted.merged,
+      driver.describe('RFC 0063 §C', 'an `accept` approval MUST merge the child outputs'),
+    ).toBe(true);
+    const rejected = await invokeSubRunAttest({ ...base, approvalAction: 'reject' });
+    if (rejected === null) return;
+    expect(
+      rejected.merged,
+      driver.describe('RFC 0063 §C', 'a `reject` approval MUST NOT merge the child outputs'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/subrun-attestation-shape.test.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * subrun-attestation-shape — RFC 0063 §A. The `capabilities.agents.subRunAttestation`
+ * advertisement flag is either absent or a boolean.
+ *
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
+ * in the sibling subrun-*.test.ts scenarios, gated on the flag + the host
+ * sub-run attestation seam.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §A
+ * @see spec/v1/node-packs.md §"`outputAttestation` — verify-before-merge"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap } from '../lib/subRunAttestation.js';
+describe('subrun-attestation-shape: advertisement (RFC 0063 §A)', () => {
+  it('capabilities.agents.subRunAttestation is absent or a boolean', async () => {
+    const cap = await readSubRunAttestationCap();
+    // null = unadvertised (no agents block OR flag omitted) — valid.
+    if (cap === null) return;
+    expect(
+      typeof cap,
+      driver.describe(
+        'capabilities.schema.json §agents.subRunAttestation',
+        'agents.subRunAttestation MUST be a boolean when present',
+      ),
+    ).toBe('boolean');
+  });
+});

package/src/scenarios/subrun-checksum-stable.test.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * subrun-checksum-stable — RFC 0063 §B. A child's output checksum is byte-stable
+ * for identical outputs and host-independent (the RFC 8785 JCS + SHA-256 recipe
+ * pinned in replay.md), and is surfaced as the `attestation` object on the
+ * existing `core.workflowChain.event { phase: 'output.harvested' }`.
+ *
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §B
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
+describe('subrun-checksum-stable (RFC 0063 §B)', () => {
+  it('identical child outputs produce an identical sha256 attestation checksum', async () => {
+    if ((await readSubRunAttestationCap()) !== true) return;
+    const childOutputs = { report: 'done', score: 0.9, tags: ['a', 'b'] };
+    const a = await invokeSubRunAttest({ childOutputs, outputAttestation: { checksum: true } });
+    if (a === null) return; // seam absent — soft-skip
+    // Key-reordered but value-identical: JCS canonicalization MUST yield the same hash.
+    const b = await invokeSubRunAttest({
+      childOutputs: { tags: ['a', 'b'], score: 0.9, report: 'done' },
+      outputAttestation: { checksum: true },
+    });
+    if (b === null) return;
+    const att = a.attestation ?? {};
+    expect(
+      typeof att.checksum === 'string' && (att.checksum as string).length > 0,
+      driver.describe('RFC 0063 §B', 'output.harvested MUST carry a non-empty attestation.checksum when checksum:true'),
+    ).toBe(true);
+    expect(
+      att.algorithm,
+      driver.describe('RFC 0063 §B', 'attestation.algorithm MUST be "sha256" (the v1 recipe)'),
+    ).toBe('sha256');
+    expect(
+      (b.attestation ?? {}).checksum,
+      driver.describe('RFC 0063 §B', 'JCS canonicalization MUST make the checksum invariant to key order — same content, same hash'),
+    ).toBe(att.checksum);
+  });
+});

package/src/scenarios/tool-descriptor-shape.test.ts ADDED Viewed

@@ -0,0 +1,133 @@
+/**
+ * Portable tool catalog — descriptor + capability + session-event shapes (RFC 0078).
+ *
+ * Always-on, server-free schema-shape probe. Verifies that:
+ *   - `tool-descriptor.schema.json` compiles and round-trips a conforming
+ *     `ToolDescriptor`, and rejects a descriptor missing the REQUIRED
+ *     `safetyTier`.
+ *   - the §C-1 / §F-4 cross-field MUST is enforced: a `safetyTier: "exec"`
+ *     descriptor MUST carry `source: "host-extension"` (RFC 0069 — exec is never
+ *     protocol-tier); an `exec` + `node-pack` descriptor is rejected, an `exec`
+ *     + `host-extension` descriptor is accepted.
+ *   - `capabilities.toolCatalog` is declared with its `supported` / `sources` /
+ *     `sessionLifecycle` sub-flags.
+ *   - the `tool.session.opened` / `tool.session.closed` payload $defs validate
+ *     conforming content-free records and reject malformed ones (a `closed`
+ *     missing `outcome`; an out-of-enum `outcome`), and both event names appear
+ *     in the RunEventType enum.
+ *
+ * Behavioral assertions (a live `GET /v1/tools` returning authorization-scoped
+ * descriptors, the `404` non-disclosure, the `tool.session.*` bracket ordering)
+ * are gated on `capabilities.toolCatalog.supported` and land in
+ * `tool-catalog-projection.test.ts` + `tool-session-lifecycle.test.ts` (deferred
+ * per RFC 0078 §Conformance — reference host deferred). This scenario asserts the
+ * wire contract, not host behavior.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0069-exec-class-tool-host-extension-safety-contract.md (exec ⇒ host-extension)
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import Ajv2020 from 'ajv/dist/2020.js';
+import addFormats from 'ajv-formats';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
+function loadSchema(name: string): Record<string, unknown> {
+  return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
+}
+describe('tool-descriptor-shape: ToolDescriptor (RFC 0078 §C, server-free)', () => {
+  const ajv = addFormats(new Ajv2020({ strict: false }));
+  const validate = ajv.compile(loadSchema('tool-descriptor.schema.json'));
+  it('a conforming descriptor validates', () => {
+    expect(
+      validate({
+        toolId: 'mcp:fs.read', source: 'mcp', title: 'Read file',
+        inputSchema: { type: 'object' }, auth: { scopes: ['tools:fs:read'] },
+        egress: 'none', approval: 'never', replayPolicy: 'idempotent',
+        safetyTier: 'read', costHint: 'low', latencyHint: 'low',
+      }),
+      why('tool-catalog.md §C', 'a conforming ToolDescriptor MUST validate'),
+    ).toBe(true);
+  });
+  it('a descriptor missing the REQUIRED safetyTier is rejected', () => {
+    expect(
+      validate({ toolId: 'x', source: 'mcp' }),
+      why('tool-catalog.md §C', 'safetyTier is REQUIRED'),
+    ).toBe(false);
+  });
+  it('enforces exec ⇒ host-extension (RFC 0069; §C-1/§F-4)', () => {
+    expect(
+      validate({ toolId: 'x-host-acme-shell', source: 'host-extension', safetyTier: 'exec', approval: 'always', egress: 'host-owned' }),
+      why('tool-catalog.md §C-1', 'an exec tool sourced from host-extension MUST validate'),
+    ).toBe(true);
+    expect(
+      validate({ toolId: 'openwop:run-shell', source: 'node-pack', safetyTier: 'exec' }),
+      why('tool-catalog.md §C-1 / RFC 0069', 'an exec tool MUST NOT be protocol-tier (node-pack)'),
+    ).toBe(false);
+  });
+  it('rejects an unknown property (additionalProperties:false)', () => {
+    expect(
+      validate({ toolId: 'x', source: 'mcp', safetyTier: 'read', danger: true }),
+      why('tool-catalog.md §C', 'ToolDescriptor MUST be additionalProperties:false'),
+    ).toBe(false);
+  });
+});
+describe('tool-descriptor-shape: capability advertisement (RFC 0078 §A, server-free)', () => {
+  it('capabilities.toolCatalog is declared with its sub-flags', () => {
+    const caps = loadSchema('capabilities.schema.json');
+    const toolCatalog = (caps.properties as Record<string, { properties?: Record<string, unknown> }>).toolCatalog;
+    expect(
+      toolCatalog,
+      why('capabilities.md §toolCatalog', 'capabilities.toolCatalog MUST be declared'),
+    ).toBeDefined();
+    for (const flag of ['supported', 'sources', 'sessionLifecycle']) {
+      expect(
+        toolCatalog?.properties?.[flag],
+        why('tool-catalog.md §A', `capabilities.toolCatalog.${flag} MUST be declared`),
+      ).toBeDefined();
+    }
+  });
+});
+describe('tool-descriptor-shape: session lifecycle events (RFC 0078 §D, server-free)', () => {
+  const payloads = loadSchema('run-event-payloads.schema.json');
+  const ajv = addFormats(new Ajv2020({ strict: false }));
+  const compile = (defName: string) => ajv.compile({
+    $schema: 'https://json-schema.org/draft/2020-12/schema',
+    $defs: (payloads as { $defs: Record<string, unknown> }).$defs,
+    $ref: `#/$defs/${defName}`,
+  } as Record<string, unknown>);
+  it('tool.session.opened validates a content-free record', () => {
+    const v = compile('toolSessionOpened');
+    expect(v({ sessionId: 's1', toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'opened MUST validate')).toBe(true);
+    expect(v({ toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'opened requires sessionId')).toBe(false);
+  });
+  it('tool.session.closed validates + enforces the closed outcome enum', () => {
+    const v = compile('toolSessionClosed');
+    expect(v({ sessionId: 's1', toolId: 'mcp:fs.read', outcome: 'completed' }), why('tool-catalog.md §D', 'closed MUST validate')).toBe(true);
+    expect(v({ sessionId: 's1', toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'closed requires outcome')).toBe(false);
+    expect(v({ sessionId: 's1', toolId: 'mcp:fs.read', outcome: 'exploded' }), why('tool-catalog.md §D', 'outcome is a closed enum')).toBe(false);
+  });
+  it('both session event names appear in the RunEventType enum', () => {
+    const runEvent = loadSchema('run-event.schema.json');
+    const enumVals = ((runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum) ?? [];
+    for (const name of ['tool.session.opened', 'tool.session.closed']) {
+      expect(enumVals.includes(name), why('run-event.schema.json', `${name} MUST be in the RunEventType enum`)).toBe(true);
+    }
+  });
+});

package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * tool-hooks-authorization-fail-closed — RFC 0064 §C. A principal lacking a
+ * tool's required scope (or whose authorization cannot be evaluated) gets
+ * `agent.toolReturned { status: 'forbidden' }` and the tool is never invoked —
+ * the per-tool application of RFC 0049's `authorization-fail-closed` invariant.
+ *
+ * Gated on `capabilities.toolHooks.perToolAuthorization` + the host tool-hooks
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §C
+ * @see SECURITY/invariants.yaml — authorization-fail-closed (RFC 0049, reused)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+describe('tool-hooks-authorization-fail-closed (RFC 0064 §C)', () => {
+  it('a principal lacking a tool scope is denied and the tool is not invoked', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.perToolAuthorization !== true) return;
+    // A principal with no scopes against a tool requiring one MUST be denied.
+    const res = await invokeToolHook({
+      principal: 'conformance-unprivileged',
+      toolName: 'db.delete',
+      requiredScopes: ['db:write'],
+      args: {},
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      (res.toolReturned ?? {}).status,
+      driver.describe('RFC 0064 §C', 'a missing/unevaluable tool scope MUST fail closed → status:"forbidden"'),
+    ).toBe('forbidden');
+    expect(
+      (res.toolReturned ?? {}).durationMs,
+      driver.describe('RFC 0064 §C', 'a forbidden call never starts, so durationMs MUST be absent'),
+    ).toBeUndefined();
+  });
+});

package/src/scenarios/tool-hooks-content-free.test.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * tool-hooks-content-free — RFC 0064 §B. When `prePostEvents`, a tool call's
+ * `agent.toolCalled` carries `argsHash` (the content-free, SIEM-safe
+ * alternative to raw `inputs`) + `agent.toolReturned` carries `status` +
+ * `durationMs`.
+ *
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
+ * soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+describe('tool-hooks-content-free (RFC 0064 §B)', () => {
+  it('toolCalled carries argsHash; toolReturned carries status + durationMs', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.prePostEvents !== true) return;
+    const res = await invokeToolHook({ principal: 'core.system', toolName: 'web.search', args: { q: 'openwop' } });
+    if (res === null) return; // seam absent — soft-skip
+    const called = res.toolCalled ?? {};
+    const returned = res.toolReturned ?? {};
+    expect(
+      typeof called.argsHash === 'string' && (called.argsHash as string).length > 0,
+      driver.describe('RFC 0064 §B', 'agent.toolCalled MUST carry a non-empty argsHash when prePostEvents'),
+    ).toBe(true);
+    expect(
+      ['ok', 'error', 'forbidden', 'rate_limited'].includes(returned.status as string),
+      driver.describe('RFC 0064 §B', 'agent.toolReturned MUST carry a tool-hooks status'),
+    ).toBe(true);
+    if (returned.status === 'ok') {
+      expect(
+        typeof returned.durationMs === 'number' && (returned.durationMs as number) >= 0,
+        driver.describe('RFC 0064 §B', 'a completed tool call MUST record a non-negative durationMs'),
+      ).toBe(true);
+    }
+  });
+});

package/src/scenarios/tool-hooks-rate-limit.test.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * tool-hooks-rate-limit — RFC 0064 §D. Exhausting a `(principal, tool)` token
+ * bucket → `agent.toolReturned { status: 'rate_limited' }` and the tool is not
+ * invoked, surfacing the existing `rate_limited` (429) error.
+ *
+ * Gated on `capabilities.toolHooks.perToolRateLimit` + the host tool-hooks
+ * seam; soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §D
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+describe('tool-hooks-rate-limit (RFC 0064 §D)', () => {
+  it('an exhausted (principal, tool) bucket yields status:"rate_limited"', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.perToolRateLimit !== true) return;
+    const res = await invokeToolHook({
+      principal: 'core.system',
+      toolName: 'web.search',
+      args: { q: 'x' },
+      simulateRateLimitExhausted: true,
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      (res.toolReturned ?? {}).status,
+      driver.describe('RFC 0064 §D', 'an exhausted token bucket MUST yield status:"rate_limited" without invoking the tool'),
+    ).toBe('rate_limited');
+  });
+});

package/src/scenarios/tool-hooks-secret-redaction.test.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * tool-hooks-secret-redaction — RFC 0064 §B/§E. A tool arg containing a
+ * resolved secret is redacted (SR-1) before hashing; the raw value never
+ * appears in `argsHash` or anywhere in the emitted `agent.toolCalled` /
+ * `agent.toolReturned` pair.
+ *
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
+ * soft-skips when either is absent.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B (argsHash SR-1 redaction), §E (credentials)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
+const CANARY = 'sk-canary-rfc0064-do-not-leak-xyz789';
+describe('tool-hooks-secret-redaction (RFC 0064 §E)', () => {
+  it('a secret-shaped tool arg never appears in the emitted events', async () => {
+    const cap = await readToolHooksCap();
+    if (cap?.prePostEvents !== true) return;
+    const res = await invokeToolHook({
+      principal: 'core.system',
+      toolName: 'web.search',
+      args: { apiKey: CANARY, q: 'openwop' },
+    });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      JSON.stringify(res).includes(CANARY),
+      driver.describe('RFC 0064 §B', 'a resolved secret MUST be redacted before hashing; the raw value MUST NOT appear in argsHash or any emitted field (SR-1)'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/tool-hooks-shape.test.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * tool-hooks-shape — RFC 0064 §A. The `capabilities.toolHooks` advertisement
+ * block is either absent or a well-formed object.
+ *
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
+ * in the sibling tool-hooks-*.test.ts scenarios, gated on the sub-flags + the
+ * host tool-hooks seam.
+ *
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §A
+ * @see spec/v1/host-capabilities.md §host.toolHooks
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { readToolHooksCap } from '../lib/toolHooks.js';
+describe('tool-hooks-shape: advertisement (RFC 0064 §A)', () => {
+  it('capabilities.toolHooks is absent or a well-formed object', async () => {
+    const cap = await readToolHooksCap();
+    if (cap === null) return; // not advertised — valid
+    expect(
+      typeof cap.supported,
+      driver.describe('capabilities.schema.json §toolHooks', 'toolHooks.supported MUST be a boolean when the block is present'),
+    ).toBe('boolean');
+    for (const k of ['prePostEvents', 'perToolAuthorization', 'perToolRateLimit'] as const) {
+      if (cap[k] !== undefined) {
+        expect(
+          typeof cap[k],
+          driver.describe('capabilities.schema.json §toolHooks', `toolHooks.${k} MUST be a boolean when present`),
+        ).toBe('boolean');
+      }
+    }
+  });
+});