npm - @openwop/openwop-conformance - Versions diffs - 1.6.1 → 1.11.0 - Mend

@openwop/openwop-conformance 1.6.1 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

package/src/scenarios/run-execution-bounds-shape.test.ts ADDED Viewed

@@ -0,0 +1,133 @@
+/**
+ * run-execution-bounds-shape — RFC 0058 advertisement-shape + breach-contract
+ * verification for the two run-scoped execution bounds.
+ *
+ * Status: ACTIVE. RFC 0058 (run execution bounds) is `Active`. The
+ * `capabilities.limits.{maxRunDurationMs,maxLoopIterations}` fields and the
+ * `run-duration` / `loop-iterations` kinds on `cap.breached` have landed in
+ * `schemas/capabilities.schema.json` + `schemas/run-event-payloads.schema.json`.
+ *
+ * Always runs (shape-only): when the host advertises either limit, its value
+ * MUST be well-formed. Behavior is capability- AND fixture-gated. The
+ * `run-duration` (wall-clock timeout) block is now enforced + green against the
+ * in-memory reference host. The `loop-iterations` block stays soft-skipped until
+ * an execution-loop host advertises `multiAgent.executionModel` (RFC 0061),
+ * mirroring the RFC 0052 scheduling pattern.
+ *
+ * What this scenario asserts:
+ *   1. `capabilities.limits.maxRunDurationMs`, when present, is an integer ≥ 1000.
+ *   2. `capabilities.limits.maxLoopIterations`, when present, is an integer ≥ 1.
+ *   3. (gated) A run with `configurable.runTimeoutMs` below its real duration
+ *      reaches terminal `failed` with `error.code = "run_timeout"` and emits
+ *      `cap.breached { kind: "run-duration" }` whose `observed > limit`.
+ *
+ * @see RFCS/0058-run-execution-bounds.md
+ * @see spec/v1/run-options.md §Reserved keys (runTimeoutMs / maxLoopIterations)
+ * @see spec/v1/capabilities.md §"Engine-enforced limits and the cap.breached event"
+ * @see schemas/run-event-payloads.schema.json §capBreached
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
+interface DiscoveryLimits {
+  maxRunDurationMs?: number;
+  maxLoopIterations?: number;
+}
+interface DiscoveryDoc {
+  capabilities?: { limits?: DiscoveryLimits };
+}
+interface RunEvent {
+  readonly type: string;
+  readonly sequence: number;
+  readonly payload?: unknown;
+}
+const TIMEOUT_FIXTURE = 'conformance-run-duration-breach';
+async function readLimits(): Promise<DiscoveryLimits | null> {
+  const res = await driver.get('/.well-known/openwop');
+  const body = res.json as DiscoveryDoc | undefined;
+  return capabilityFamily(body, 'limits') ?? null;
+}
+describe('run-execution-bounds-shape: advertisement shape (RFC 0058)', () => {
+  it('maxRunDurationMs is an integer >= 1000 when present', async () => {
+    const limits = await readLimits();
+    if (limits?.maxRunDurationMs === undefined) return; // not advertised
+    expect(
+      Number.isInteger(limits.maxRunDurationMs) && limits.maxRunDurationMs >= 1000,
+      driver.describe(
+        'capabilities.schema.json §limits.maxRunDurationMs',
+        `capabilities.limits.maxRunDurationMs MUST be an integer >= 1000, got: ${limits.maxRunDurationMs}`,
+      ),
+    ).toBe(true);
+  });
+  it('maxLoopIterations is an integer >= 1 when present', async () => {
+    const limits = await readLimits();
+    if (limits?.maxLoopIterations === undefined) return; // not advertised
+    expect(
+      Number.isInteger(limits.maxLoopIterations) && limits.maxLoopIterations >= 1,
+      driver.describe(
+        'capabilities.schema.json §limits.maxLoopIterations',
+        `capabilities.limits.maxLoopIterations MUST be an integer >= 1, got: ${limits.maxLoopIterations}`,
+      ),
+    ).toBe(true);
+  });
+});
+// Behavior: capability- AND fixture-gated. Skips on hosts that do not enforce
+// run-duration timeouts (incl. the reference hosts) until one wires the seam.
+const SKIP_TIMEOUT = !isFixtureAdvertised(TIMEOUT_FIXTURE);
+describe.skipIf(SKIP_TIMEOUT)('run-execution-bounds: run-duration breach (RFC 0058)', () => {
+  it('a run with runTimeoutMs below its real duration fails with run_timeout + cap.breached{run-duration}', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: TIMEOUT_FIXTURE,
+      configurable: { runTimeoutMs: 1000 },
+    });
+    expect(create.status, driver.describe(
+      'rest-endpoints.md POST /v1/runs',
+      'run creation MUST accept a runTimeoutMs override',
+    )).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId);
+    expect(terminal.status, driver.describe(
+      'run-options.md §runTimeoutMs',
+      'a run exceeding its runTimeoutMs MUST reach terminal `failed`',
+    )).toBe('failed');
+    expect(terminal.error?.code, driver.describe(
+      'rest-endpoints.md §run_timeout',
+      'RunSnapshot.error.code MUST equal "run_timeout" on wall-clock timeout',
+    )).toBe('run_timeout');
+    const eventsRes = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=0&timeout=1`,
+    );
+    const events = (eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? [];
+    const breach = events.find((e) => e.type === 'cap.breached');
+    expect(breach, driver.describe(
+      'capabilities.md §Engine-enforced limits',
+      'a cap.breached event MUST be emitted on run-duration breach',
+    )).toBeDefined();
+    const payload = breach!.payload as { kind?: string; limit?: number; observed?: number } | undefined;
+    expect(payload?.kind, driver.describe(
+      'run-event-payloads.schema.json §capBreached.kind',
+      'cap.breached payload MUST carry kind="run-duration"',
+    )).toBe('run-duration');
+    expect(
+      typeof payload?.observed === 'number' && typeof payload?.limit === 'number' && payload!.observed > payload!.limit,
+      driver.describe(
+        'run-event-payloads.schema.json §capBreached.observed',
+        'observed (elapsedMs) MUST be strictly greater than limit (resolved timeout)',
+      ),
+    ).toBe(true);
+  });
+});

package/src/scenarios/runtime-requires-install-gate.test.ts ADDED Viewed

@@ -0,0 +1,92 @@
+/**
+ * Pack runtime-requirements install gate — `registry-operations.md`
+ * §"Runtime-requirement install gate" + `node-packs.md` §"Runtime platform
+ * requirements" (RFC 0076 §A).
+ *
+ * Seam-gated behavioral scenarios for the install-time gate. A sandbox host MUST
+ * evaluate a pack's `runtime.requires[]` against the primitives it will grant
+ * and refuse install (`pack_runtime_requirement_unmet`) for any it won't grant —
+ * rather than silently installing and failing at first invocation (the
+ * `node:dns/promises` trial-load failure that motivated RFC 0076). A non-gating
+ * host SHOULD instead project `runtime.requires[]` onto the pack's inventory
+ * entry for operator visibility.
+ *
+ *   1. install-grant — requires ⊆ grant-set ⇒ install succeeds.
+ *   2. install-refuse — a required primitive the host won't grant ⇒
+ *      `pack_runtime_requirement_unmet { unmet, manifest, advice? }`, reusing the
+ *      `capability_not_provided` envelope shape.
+ *   3. non-sandbox projection — a host that does NOT gate platform access
+ *      installs and projects the declared requires[] for visibility (the §A SHOULD).
+ *
+ * All three drive `POST /v1/host/sample/packs/install-gate` and soft-skip when
+ * the host doesn't wire the seam (404). Behavior grade is `host-pending` until a
+ * runtime-requires-gating host (MyndHyve is the first adopter) lights it up.
+ *
+ * @see spec/v1/registry-operations.md §"Runtime-requirement install gate"
+ * @see spec/v1/host-sample-test-seams.md §"Open seams"
+ * @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md §A
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { installGate } from '../lib/runtimeRequires.js';
+function manifest(requires: string[]) {
+  return {
+    name: 'vendor.example.http',
+    version: '1.0.0',
+    engines: { openwop: '>=1.1 <2.0.0' },
+    runtime: { language: 'javascript', entry: 'index.mjs', requires },
+    nodes: [{ typeId: 'vendor.example.http.fetch', version: '1.0.0', category: 'integration', role: 'side-effect' }],
+  };
+}
+describe('runtime-requires install gate (RFC 0076 §A)', () => {
+  it('install-grant: requires ⊆ grant-set ⇒ install succeeds', async () => {
+    const res = await installGate({ manifest: manifest(['net.dns']), grantSet: ['net.dns', 'net.outbound'] });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      res.status,
+      driver.describe('registry-operations.md §"Runtime-requirement install gate"', 'a pack whose runtime.requires are all grantable MUST install (no refusal)'),
+    ).toBe(200);
+    expect(
+      res.body.outcome,
+      driver.describe('registry-operations.md §"Runtime-requirement install gate"', 'a granted install reports outcome:"installed"'),
+    ).toBe('installed');
+  });
+  it('install-refuse: an ungrantable primitive ⇒ pack_runtime_requirement_unmet', async () => {
+    const res = await installGate({ manifest: manifest(['net.dns']), grantSet: [] });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      res.status,
+      driver.describe('registry-operations.md §"Runtime-requirement install gate"', 'a pack requiring an ungranted primitive MUST be refused at install (not at first invocation)'),
+    ).toBe(400);
+    expect(
+      res.body.error,
+      driver.describe('registry-operations.md §"Runtime-requirement install gate"', 'the refusal MUST carry error code pack_runtime_requirement_unmet'),
+    ).toBe('pack_runtime_requirement_unmet');
+    expect(
+      Array.isArray(res.body.unmet) && (res.body.unmet as unknown[]).includes('net.dns'),
+      driver.describe('registry-operations.md §"Runtime-requirement install gate"', 'unmet[] MUST list the ungranted primitive(s) (capability_not_provided envelope)'),
+    ).toBe(true);
+    expect(
+      typeof res.body.manifest === 'string' && (res.body.manifest as string).includes('vendor.example.http'),
+      driver.describe('registry-operations.md §"Runtime-requirement install gate"', 'the refusal MUST name the offending manifest (name@version)'),
+    ).toBe(true);
+  });
+  it('non-sandbox projection: a non-gating host installs and projects requires[] (§A SHOULD)', async () => {
+    const res = await installGate({ manifest: manifest(['net.dns', 'net.outbound']), gating: false });
+    if (res === null) return; // seam absent — soft-skip
+    // A non-gating host installs unconditionally; the SHOULD is the projection.
+    // If the host gates anyway (returns 400) the projection SHOULD does not apply — tolerate either install shape.
+    if (res.status !== 200) return;
+    if (res.body.requiresProjected === undefined) return; // SHOULD, not MUST — a non-projecting host is conformant
+    const projected = res.body.requiresProjected as unknown;
+    expect(
+      Array.isArray(projected) && ['net.dns', 'net.outbound'].every((t) => (projected as unknown[]).includes(t)),
+      driver.describe('node-packs.md §"Runtime platform requirements"', 'a non-gating host that projects SHOULD surface the declared runtime.requires[] on the inventory entry verbatim'),
+    ).toBe(true);
+  });
+});

package/src/scenarios/runtime-requires-shape.test.ts ADDED Viewed

@@ -0,0 +1,134 @@
+/**
+ * Pack runtime-requirements vocabulary + shape — `node-packs.md`
+ * §"Runtime platform requirements" + `schemas/node-pack-manifest.schema.json`
+ * `$defs/Runtime.requires` (RFC 0076 §A).
+ *
+ * Server-free schema-validation scenario. The `runtime.requires[]` field is an
+ * OPTIONAL, closed, runtime-agnostic vocabulary a pack uses to declare the
+ * platform primitives its code exercises, so a sandbox host can gate at install
+ * time instead of trial-load. This file exercises the schema layer (the §A
+ * "vocabulary-validation" normative behavior — a raw builtin name is rejected —
+ * plus the additive/empty-array shape contract):
+ *
+ *   1. Positive: a manifest declaring valid primitives validates cleanly.
+ *   2. Positive: the field is OPTIONAL — a manifest omitting it validates.
+ *   3. Positive: an empty array (`requires: []`) validates and is equivalent to
+ *      omission (no host may read a distinct meaning into it; §A).
+ *   4. Positive: every one of the 8 vocabulary tokens individually validates.
+ *   5. Negative — raw builtin name: `"node:dns/promises"` (the value that
+ *      motivated the abstract vocabulary) is rejected; the registry/host
+ *      surfaces this as `invalid_manifest`.
+ *   6. Negative — duplicate token: `uniqueItems` is enforced.
+ *
+ * The install-time GATE behavior (grant / refuse → `pack_runtime_requirement_unmet`,
+ * and the non-sandbox-host SHOULD-projection) is host behavior and lives in the
+ * seam-gated `runtime-requires-install-gate.test.ts`.
+ *
+ * @see spec/v1/node-packs.md §"Runtime platform requirements"
+ * @see spec/v1/registry-operations.md §"Runtime-requirement install gate"
+ * @see schemas/node-pack-manifest.schema.json
+ * @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync, readdirSync } from 'node:fs';
+import { join } from 'node:path';
+import Ajv2020 from 'ajv/dist/2020.js';
+import addFormats from 'ajv-formats';
+import type { ErrorObject, ValidateFunction } from 'ajv';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+const SCHEMA_PATH = join(SCHEMAS_DIR, 'node-pack-manifest.schema.json');
+const VOCABULARY = [
+  'net.dns',
+  'net.outbound',
+  'crypto',
+  'subprocess',
+  'fs.read',
+  'fs.write',
+  'env.read',
+  'clock',
+] as const;
+function manifest(requires?: unknown) {
+  const runtime: Record<string, unknown> = { language: 'javascript', entry: 'index.mjs' };
+  if (requires !== undefined) runtime.requires = requires;
+  return {
+    name: 'vendor.example.http',
+    version: '1.0.0',
+    engines: { openwop: '>=1.1 <2.0.0' },
+    runtime,
+    nodes: [{ typeId: 'vendor.example.http.fetch', version: '1.0.0', category: 'integration', role: 'side-effect' }],
+  };
+}
+describe('category: runtime.requires vocabulary + shape (RFC 0076 §A)', () => {
+  const ajv = new Ajv2020({ allErrors: true, strict: false });
+  addFormats(ajv);
+  // Register every schema first so cross-$refs resolve (node-pack-manifest
+  // references agent-manifest.schema.json for its agents[] branch). addSchema
+  // registers without compiling; the target compiles below.
+  for (const file of readdirSync(SCHEMAS_DIR)) {
+    if (!file.endsWith('.schema.json')) continue;
+    try {
+      ajv.addSchema(JSON.parse(readFileSync(join(SCHEMAS_DIR, file), 'utf8')) as Record<string, unknown>);
+    } catch {
+      /* duplicate/already-registered — the target is compiled below */
+    }
+  }
+  const schema = JSON.parse(readFileSync(SCHEMA_PATH, 'utf8'));
+  const validate = (ajv.getSchema(schema['$id'] as string) ?? ajv.compile(schema)) as ValidateFunction;
+  const errorsOn = (m: unknown): ErrorObject[] => {
+    expect(validate(m)).toBe(false);
+    return validate.errors ?? [];
+  };
+  it('positive: a manifest declaring valid primitives validates cleanly', () => {
+    const ok = validate(manifest(['net.dns', 'net.outbound']));
+    expect(
+      ok,
+      `node-packs.md §"Runtime platform requirements": a well-formed runtime.requires MUST validate. Errors: ${JSON.stringify(validate.errors)}`,
+    ).toBe(true);
+  });
+  it('positive: runtime.requires is OPTIONAL — a manifest omitting it validates (additive)', () => {
+    expect(
+      validate(manifest(undefined)),
+      'node-pack-manifest.schema.json: runtime.requires is additive/OPTIONAL — packs predating RFC 0076 validate unchanged',
+    ).toBe(true);
+  });
+  it('positive: an empty requires[] validates (equivalent to omission per §A)', () => {
+    expect(
+      validate(manifest([])),
+      'node-packs.md §"Runtime platform requirements": runtime.requires:[] is valid and equivalent to omission',
+    ).toBe(true);
+  });
+  it('positive: every vocabulary token individually validates', () => {
+    for (const token of VOCABULARY) {
+      expect(
+        validate(manifest([token])),
+        `node-pack-manifest.schema.json: "${token}" is in the RFC 0076 §A vocabulary. Errors: ${JSON.stringify(validate.errors)}`,
+      ).toBe(true);
+    }
+  });
+  it('negative: a raw builtin name (node:dns/promises) is rejected (→ invalid_manifest)', () => {
+    const errs = errorsOn(manifest(['node:dns/promises']));
+    expect(
+      errs.some((e) => e.instancePath.includes('/runtime/requires')),
+      'node-packs.md §"Runtime platform requirements": raw language builtin names are NOT in the closed vocabulary — the abstract net.dns is the portable equivalent; the registry/host surfaces this as invalid_manifest',
+    ).toBe(true);
+  });
+  it('negative: a duplicate token is rejected (uniqueItems)', () => {
+    const errs = errorsOn(manifest(['net.dns', 'net.dns']));
+    expect(
+      errs.some((e) => e.keyword === 'uniqueItems'),
+      'node-pack-manifest.schema.json: runtime.requires has uniqueItems:true',
+    ).toBe(true);
+  });
+});

package/src/scenarios/safefetch-behavior.test.ts ADDED Viewed

@@ -0,0 +1,99 @@
+/**
+ * Host-provided safe-fetch behavior — `host-capabilities.md` §host.http
+ * (`ctx.http.safeFetch`) + RFC 0076 §B.
+ *
+ * Seam-gated behavioral scenarios for the pack-facing `ctx.http.safeFetch`. When
+ * a host advertises `capabilities.httpClient.safeFetch.supported`, the
+ * host-mediated fetch MUST apply the §host.http SSRF guard (resolve→pin→connect)
+ * so a pack can do outbound HTTP without reaching for `node:dns` / raw sockets:
+ *
+ *   1. SSRF block — a loopback / RFC 1918 / cloud-metadata target ⇒
+ *      `{ outcome: "blocked", blocked: "ssrf" }`; the host MUST NOT connect.
+ *   2. DNS-rebinding — a public name re-resolving to a blocked address
+ *      (`simulateRebindTo`) ⇒ also blocked (the resolved IP is pinned).
+ *   3. Connection-upgrade refusal — `Connection: upgrade` ⇒
+ *      `{ outcome: "blocked", blocked: "upgrade" }` (no 101 socket-hijack escape).
+ *   4. Audit-when-both — when `toolHooks.prePostEvents` is also advertised, a
+ *      fetched call emits the `agent.toolCalled` / `agent.toolReturned` pair
+ *      (`transport: "http"`).
+ *
+ * All drive `POST /v1/host/sample/http/safe-fetch` and soft-skip when the host
+ * doesn't advertise `safeFetch` or doesn't wire the seam (404). Behavior grade
+ * is `host-pending` until a `safeFetch` host lights it up. The SSRF *guarantee*
+ * reuses the `http-client-ssrf-guard` SECURITY invariant — no new invariant.
+ *
+ * @see spec/v1/host-capabilities.md §host.http
+ * @see spec/v1/host-sample-test-seams.md §"Open seams"
+ * @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md §B
+ * @see SECURITY/invariants.yaml id: http-client-ssrf-guard
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { isSafeFetchSupported, isToolHookAuditOn, safeFetch } from '../lib/safeFetch.js';
+describe('safefetch-behavior (RFC 0076 §B / §host.http)', () => {
+  it('blocks a metadata-endpoint target (SSRF guard)', async () => {
+    if (!(await isSafeFetchSupported())) return; // capability absent — soft-skip
+    const res = await safeFetch({ url: 'http://169.254.169.254/latest/meta-data/' });
+    if (res === null) return; // seam absent — soft-skip
+    expect(
+      res.outcome,
+      driver.describe('host-capabilities.md §host.http', 'safeFetch MUST NOT connect to a cloud-metadata address'),
+    ).toBe('blocked');
+    expect(
+      res.blocked,
+      driver.describe('host-capabilities.md §host.http', 'a blocked SSRF target reports blocked:"ssrf" (http-client-ssrf-guard invariant)'),
+    ).toBe('ssrf');
+  });
+  it('blocks a loopback target (SSRF guard)', async () => {
+    if (!(await isSafeFetchSupported())) return;
+    const res = await safeFetch({ url: 'http://127.0.0.1:6379/' });
+    if (res === null) return;
+    expect(
+      res.outcome,
+      driver.describe('host-capabilities.md §host.http', 'safeFetch MUST NOT connect to loopback'),
+    ).toBe('blocked');
+  });
+  it('blocks DNS-rebinding (resolved IP is pinned for the connection)', async () => {
+    if (!(await isSafeFetchSupported())) return;
+    const res = await safeFetch({ url: 'http://example.com/', simulateRebindTo: '169.254.169.254' });
+    if (res === null) return;
+    expect(
+      res.outcome,
+      driver.describe('host-capabilities.md §host.http', 'a public name that re-resolves to a blocked address MUST be blocked (rebinding defeat)'),
+    ).toBe('blocked');
+  });
+  it('refuses a Connection: upgrade request (no 101 socket-hijack escape)', async () => {
+    if (!(await isSafeFetchSupported())) return;
+    const res = await safeFetch({ url: 'https://example.com/', init: { headers: { Connection: 'upgrade' } } });
+    if (res === null) return;
+    expect(
+      res.outcome,
+      driver.describe('host-capabilities.md §host.http', 'safeFetch MUST refuse a connection-upgrade attempt'),
+    ).toBe('blocked');
+    expect(
+      res.blocked,
+      driver.describe('host-capabilities.md §host.http', 'a refused upgrade reports blocked:"upgrade"'),
+    ).toBe('upgrade');
+  });
+  it('emits the tool-hooks audit pair when prePostEvents is also advertised', async () => {
+    if (!(await isSafeFetchSupported())) return;
+    if (!(await isToolHookAuditOn())) return; // audit MUST applies only when both advertised
+    const res = await safeFetch({ url: 'https://example.com/' });
+    if (res === null) return;
+    if (res.outcome !== 'fetched') return; // only a completed call carries the pair
+    expect(
+      res.toolCalled !== undefined && res.toolReturned !== undefined,
+      driver.describe('host-capabilities.md §host.http', 'when toolHooks.prePostEvents + safeFetch are both advertised, a safeFetch call MUST emit the agent.toolCalled/agent.toolReturned pair'),
+    ).toBe(true);
+    expect(
+      (res.toolCalled as { transport?: string } | undefined)?.transport,
+      driver.describe('host-capabilities.md §host.http', 'the audit pair carries transport:"http"'),
+    ).toBe('http');
+  });
+});

package/src/scenarios/safefetch-live-audit.test.ts ADDED Viewed

@@ -0,0 +1,175 @@
+/**
+ * Live-run safe-fetch audit emission — `host-capabilities.md` §host.http
+ * (`ctx.http.safeFetch`) + RFC 0076 §B + RFC 0064 §B.
+ *
+ * Closes the seam-vs-production gap left by `safefetch-behavior.test.ts`. That
+ * scenario drives `POST /v1/host/sample/http/safe-fetch` and reads the audit
+ * pair the SEAM returns INLINE — it never proves the *production* per-ctx
+ * `ctx.http.safeFetch` (the client injected into a real run) emits anything. A
+ * host can co-advertise `toolHooks.prePostEvents` + `httpClient.safeFetch`,
+ * pass the seam, and still ship a production `createSafeFetch()` with no audit
+ * hooks — the "quiet bypass" §host.http line "centralizing egress in the host
+ * must increase auditability, not become a quiet bypass" forbids.
+ *
+ * The normative MUST (host-capabilities.md §host.http; RFC 0076 §B):
+ *   When `toolHooks.prePostEvents: true` AND `httpClient.safeFetch.supported:
+ *   true` are BOTH advertised, the host MUST emit the `agent.toolCalled` /
+ *   `agent.toolReturned` pair (`transport: "http"`) **for every `safeFetch`
+ *   invocation** — including a *refused* one (a blocked egress attempt is
+ *   exactly the security-relevant event the audit log must capture).
+ *
+ * This scenario verifies that MUST against the DURABLE run event log, not the
+ * seam's inline echo, and does so **without depending on outbound egress** so
+ * the bar can never pass vacuously:
+ *   1. EGRESS-FREE FLOOR (required): drive one `ctx.http.safeFetch` to a
+ *      guaranteed-blocked link-local / cloud-metadata URL inside a REAL run via
+ *      `POST /v1/host/sample/http/safe-fetch-run`. A conformant SSRF guard
+ *      refuses it on every host with zero connectivity, yet the production
+ *      injection + auditHooks path is still exercised, so the durable pair MUST
+ *      be present. This removes the "no public egress ⇒ green-but-proves-nothing"
+ *      hole that a `fetched`-only assertion left.
+ *   2. SUCCESS-PATH COVERAGE (best-effort): drive a public URL; when it actually
+ *      `fetched`, assert the same durable pair (catches a host that audits only
+ *      the reject path). Skipped — not failed — where the environment has no
+ *      public egress; the floor already proved emission.
+ *   3. Read each run's persisted events via the test event-log seam
+ *      (`GET /v1/host/sample/test/runs/:runId/events`) and assert a `callId`-
+ *      paired `agent.toolCalled` (`transport:"http"`) / `agent.toolReturned`.
+ *
+ * Gating: `behaviorGate('openwop-safefetch-live-audit', <both flags>)` — NOT an
+ * inline soft-skip. So it skips-with-reason in default mode but FAILS under
+ * `OPENWOP_REQUIRE_BEHAVIOR=true` when a host advertises both flags yet does not
+ * emit. This is the RFC 0076 §B → Accepted bar a non-steward host validates
+ * against. The run seam itself (`safe-fetch-run`) is host-pending: a 404 from a
+ * not-yet-wired seam soft-skips even in strict mode (the seam is test-only
+ * infrastructure, distinct from the advertised production capability).
+ * The SSRF guarantee reuses the existing `http-client-ssrf-guard` invariant —
+ * no new SECURITY invariant; the audit MUST is RFC 0064's existing posture.
+ *
+ * @see spec/v1/host-capabilities.md §host.http
+ * @see spec/v1/host-sample-test-seams.md §"Open seams" (safe-fetch-run)
+ * @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md §B
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { behaviorGate } from '../lib/behavior-gate.js';
+import { isSafeFetchLiveAuditAdvertised, safeFetchViaRun } from '../lib/safeFetch.js';
+import { queryTestEvents } from '../lib/event-log-query.js';
+const PROFILE = 'openwop-safefetch-live-audit';
+const CITE = 'host-capabilities.md §host.http';
+// A link-local / cloud-metadata URL the SSRF guard MUST refuse — reachable on
+// EVERY host regardless of outbound egress, so the durable-pair assertion never
+// passes vacuously. Per §host.http the audit MUST is per-invocation: a *blocked*
+// safeFetch still emits the agent.toolCalled/agent.toolReturned pair (the
+// toolReturned carries the forbidden status). cf. `http-client-ssrf-guard`.
+const BLOCKED_URL = 'http://169.254.169.254/latest/meta-data/';
+// A public URL the guard SHOULD allow — best-effort coverage of the *success*
+// path; skipped (not failed) where the environment has no public egress.
+const FETCH_URL = 'https://example.com/';
+/**
+ * Read the durable run event log for `runId` and assert a `callId`-paired
+ * `agent.toolCalled` (`transport:"http"`) / `agent.toolReturned` exists, with
+ * the RFC 0002 §B causation chain tolerated when the host surfaces it. Returns
+ * `false` (caller treats as host-pending soft-skip) only when the event-log
+ * query seam is unavailable; otherwise asserts and returns `true`.
+ */
+async function assertDurableHttpPair(runId: string, label: string): Promise<boolean> {
+  const calledQ = await queryTestEvents(runId, { type: 'agent.toolCalled' });
+  const returnedQ = await queryTestEvents(runId, { type: 'agent.toolReturned' });
+  if (!calledQ.ok || !returnedQ.ok) {
+    // eslint-disable-next-line no-console
+    console.warn(`[${PROFILE}] event-log query seam unavailable; host-pending — skipping`);
+    return false;
+  }
+  // The HTTP-transport tool call: a durable agent.toolCalled with transport:"http".
+  const httpCall = calledQ.events.find((e) => (e.payload as { transport?: string }).transport === 'http');
+  expect(
+    httpCall !== undefined,
+    driver.describe(
+      CITE,
+      `(${label}) when toolHooks.prePostEvents + safeFetch are both advertised, a production ctx.http.safeFetch call MUST persist an agent.toolCalled with transport:"http" to the durable run event log (not just the seam echo), for EVERY invocation incl. blocked ones`,
+    ),
+  ).toBe(true);
+  if (!httpCall) return true;
+  const callId = (httpCall.payload as { callId?: string }).callId;
+  expect(
+    typeof callId === 'string' && callId.length > 0,
+    driver.describe(CITE, `(${label}) the persisted agent.toolCalled MUST carry the required callId (run-event-payloads.schema.json §agentToolCalled)`),
+  ).toBe(true);
+  // The paired agent.toolReturned — matched by the required callId (RFC 0002 §B pairing).
+  const paired = returnedQ.events.find((e) => (e.payload as { callId?: string }).callId === callId);
+  expect(
+    paired !== undefined,
+    driver.describe(CITE, `(${label}) the agent.toolCalled MUST be followed by a callId-paired agent.toolReturned in the durable log (no quiet bypass)`),
+  ).toBe(true);
+  // Stricter, when the host surfaces causation: RFC 0002 §B says
+  // toolReturned.causationId === the paired toolCalled.eventId. Tolerate
+  // hosts that omit causationId (callId pairing already proven above).
+  if (paired && typeof paired.causationId === 'string') {
+    expect(
+      paired.causationId,
+      driver.describe('RFC 0002 §B', 'agent.toolReturned.causationId MUST equal the paired agent.toolCalled.eventId when surfaced'),
+    ).toBe(httpCall.eventId);
+  }
+  return true;
+}
+describe('safefetch-live-audit (RFC 0076 §B / RFC 0064 §B — production path, durable log)', () => {
+  it('a BLOCKED real-run safeFetch emits the durable agent.toolCalled/agent.toolReturned pair (transport:"http") — egress-free floor', async () => {
+    const advertised = await isSafeFetchLiveAuditAdvertised();
+    if (!behaviorGate(PROFILE, advertised)) return; // default-skip; strict-fail when both flags advertised
+    // Run seam is host-pending infrastructure — soft-skip (even in strict mode)
+    // until a safeFetch host wires it. behaviorGate above already enforced the
+    // capability co-advertisement; this only gates on the test vehicle.
+    const run = await safeFetchViaRun({ url: BLOCKED_URL });
+    if (run === null) {
+      // eslint-disable-next-line no-console
+      console.warn(`[${PROFILE}] safe-fetch-run seam unwired (404); host-pending — skipping`);
+      return;
+    }
+    // The metadata IP MUST be refused by a conformant SSRF guard
+    // (http-client-ssrf.test.ts owns that contract). Regardless of the exact
+    // outcome, the production injection path ran, so the durable audit pair MUST
+    // exist — this is the egress-independent floor that makes the bar non-vacuous.
+    expect(
+      typeof run.runId === 'string' && (run.runId as string).length > 0,
+      driver.describe(CITE, 'the safe-fetch-run seam MUST return the runId of the real run it executed the safeFetch in'),
+    ).toBe(true);
+    await assertDurableHttpPair(run.runId as string, 'blocked');
+  });
+  it('a FETCHED real-run safeFetch also emits the durable pair (success-path coverage — skipped without public egress)', async () => {
+    const advertised = await isSafeFetchLiveAuditAdvertised();
+    if (!behaviorGate(PROFILE, advertised)) return;
+    const run = await safeFetchViaRun({ url: FETCH_URL });
+    if (run === null) return; // seam unwired — already warned by the floor test
+    if (run.outcome !== 'fetched') {
+      // No public egress in this environment — the blocked-path floor already
+      // proved the production audit path emits. Skip success-path coverage
+      // rather than fail; this is coverage, not the floor.
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[${PROFILE}] ${FETCH_URL} did not fetch (outcome=${run.outcome ?? 'n/a'}); no public egress — success-path coverage skipped (the blocked floor covers emission)`,
+      );
+      return;
+    }
+    expect(
+      typeof run.runId === 'string' && (run.runId as string).length > 0,
+      driver.describe(CITE, 'the safe-fetch-run seam MUST return the runId of the real run it executed the fetch in'),
+    ).toBe(true);
+    await assertDurableHttpPair(run.runId as string, 'fetched');
+  });
+});

package/src/scenarios/sandbox-memory-cap.test.ts CHANGED Viewed

@@ -15,6 +15,7 @@
 import { describe, it, expect } from 'vitest';
 import { driver } from '../lib/driver.js';
+import { capabilityFamily } from '../lib/discovery-capabilities.js';
 const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
@@ -26,7 +27,7 @@ async function readSandbox(): Promise<{ supported: boolean; memoryLimitBytes?: n
   try {
     const r = await driver.get('/.well-known/openwop');
     if (r.status !== 200) return null;
-    const sb = (r.json as D).capabilities?.sandbox;
+    const sb = capabilityFamily((r.json as D), 'sandbox');
     if (!sb || sb.supported !== true) return null;
     return {
       supported: true,