npm - @openwop/openwop-conformance - Versions diffs - 1.1.1 → 1.2.0 - Mend

@openwop/openwop-conformance 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/CHANGELOG.md +25 -0
package/README.md +2 -2
package/coverage.md +26 -14
package/fixtures/conformance-agent-low-confidence.json +7 -4
package/fixtures/conformance-agent-pack-handoff-schema-validation.json +30 -0
package/fixtures/conformance-agent-reasoning.json +23 -4
package/fixtures/conformance-dispatch-cross-worker-handoff-child-a.json +27 -0
package/fixtures/conformance-dispatch-cross-worker-handoff-child-b.json +25 -0
package/fixtures/conformance-dispatch-cross-worker-handoff.json +60 -0
package/fixtures/conformance-dispatch-input-mapping-child.json +25 -0
package/fixtures/conformance-dispatch-input-mapping.json +49 -0
package/fixtures/conformance-dispatch-output-mapping-child.json +27 -0
package/fixtures/conformance-dispatch-output-mapping.json +49 -0
package/fixtures/conformance-subworkflow-input-mapping-child.json +27 -0
package/fixtures/conformance-subworkflow-input-mapping.json +33 -0
package/fixtures.md +12 -2
package/package.json +1 -1
package/schemas/README.md +7 -0
package/schemas/agent-ref.schema.json +1 -1
package/schemas/ai-envelope.schema.json +106 -0
package/schemas/capabilities.schema.json +248 -0
package/schemas/core-conformance-mock-agent-config.schema.json +147 -0
package/schemas/dispatch-config.schema.json +26 -0
package/schemas/envelopes/clarification.request.schema.json +43 -0
package/schemas/envelopes/error.schema.json +26 -0
package/schemas/envelopes/schema.request.schema.json +22 -0
package/schemas/envelopes/schema.response.schema.json +22 -0
package/schemas/node-pack-manifest.schema.json +5 -0
package/schemas/pack-lockfile.schema.json +16 -0
package/schemas/workflow-chain-pack-manifest.schema.json +226 -0
package/src/lib/webhook-receiver.ts +137 -0
package/src/lib/workflow-chain-expansion.ts +213 -0
package/src/scenarios/agentPackCatalog.test.ts +216 -0
package/src/scenarios/agentPackHandoffSchemaValidation.test.ts +146 -0
package/src/scenarios/agentReasoningEvents.test.ts +58 -7
package/src/scenarios/agents-run-tool-allowlist.test.ts +182 -0
package/src/scenarios/ai-envelope-shape.test.ts +362 -0
package/src/scenarios/aiEnvelope.capBreached.test.ts +173 -0
package/src/scenarios/aiEnvelope.contractRefusal.test.ts +150 -0
package/src/scenarios/aiEnvelope.correlationReplay.test.ts +69 -0
package/src/scenarios/aiEnvelope.redaction.test.ts +73 -0
package/src/scenarios/aiEnvelope.schemaDrift.test.ts +87 -0
package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +143 -0
package/src/scenarios/aiEnvelope.universalKinds.test.ts +176 -0
package/src/scenarios/append-ordering.test.ts +44 -0
package/src/scenarios/artifact-auth.test.ts +58 -0
package/src/scenarios/blob-cross-tenant-isolation.test.ts +66 -0
package/src/scenarios/blob-presign-expiry.test.ts +66 -0
package/src/scenarios/blob-roundtrip.test.ts +48 -0
package/src/scenarios/cache-cross-tenant-isolation.test.ts +61 -0
package/src/scenarios/cache-ttl-expiry.test.ts +47 -0
package/src/scenarios/dispatch-cross-worker-handoff.test.ts +98 -0
package/src/scenarios/dispatch-input-mapping.test.ts +94 -0
package/src/scenarios/dispatch-output-mapping.test.ts +65 -0
package/src/scenarios/fs-path-traversal.test.ts +124 -0
package/src/scenarios/idempotency-key-determinism.test.ts +230 -0
package/src/scenarios/interrupt-token-matrix.test.ts +126 -0
package/src/scenarios/kv-atomic-increment.test.ts +74 -0
package/src/scenarios/kv-cas.test.ts +75 -0
package/src/scenarios/kv-cross-tenant-isolation.test.ts +85 -0
package/src/scenarios/kv-ttl-expiry.test.ts +47 -0
package/src/scenarios/mcp-server-elicitation-bridge.test.ts +92 -0
package/src/scenarios/mcp-server-prompt-roundtrip.test.ts +80 -0
package/src/scenarios/mcp-server-resource-roundtrip.test.ts +82 -0
package/src/scenarios/mcp-server-sampling-bridge.test.ts +84 -0
package/src/scenarios/mcp-server-tool-roundtrip.test.ts +107 -0
package/src/scenarios/mcp-server-untrusted-args.test.ts +105 -0
package/src/scenarios/pause-resume.test.ts +43 -0
package/src/scenarios/queue-ack-nack-dlq.test.ts +67 -0
package/src/scenarios/queue-cross-tenant-isolation.test.ts +66 -0
package/src/scenarios/queue-publish-consume-roundtrip.test.ts +48 -0
package/src/scenarios/search-bm25-roundtrip.test.ts +47 -0
package/src/scenarios/spec-corpus-validity.test.ts +17 -1
package/src/scenarios/sql-injection-rejection.test.ts +84 -0
package/src/scenarios/sql-transaction-atomicity.test.ts +66 -0
package/src/scenarios/stream-subscribe-from-beginning.test.ts +66 -0
package/src/scenarios/subworkflow-input-mapping.test.ts +100 -0
package/src/scenarios/table-cross-tenant-isolation.test.ts +65 -0
package/src/scenarios/table-cursor-pagination.test.ts +47 -0
package/src/scenarios/table-schema-enforcement.test.ts +47 -0
package/src/scenarios/vector-knn-roundtrip.test.ts +48 -0
package/src/scenarios/webhook-receiver-adversarial.test.ts +210 -0
package/src/scenarios/workflow-chain-expansion.test.ts +366 -0
package/src/scenarios/workflow-chain-pack-manifest-validation.test.ts +232 -0
package/src/scenarios/workflow-chain-pack-signature-verification.test.ts +138 -0
package/src/scenarios/workflow-chain-unresolvable-typeid.test.ts +170 -0

package/src/scenarios/agentPackHandoffSchemaValidation.test.ts ADDED Viewed

@@ -0,0 +1,146 @@
+/**
+ * Multi-Agent Shift Phase 2 — handoff-schema validation at dispatch (HV-1).
+ * Normative reference: RFCS/0003-agent-packs.md §D (handoff schema resolution)
+ *
+ * Verifies that when an agent's manifest carries `handoff.taskSchemaRef`, the
+ * host MUST validate inbound dispatch payloads against the referenced JSON
+ * Schema (resolved at install time per RFC 0003 §D) BEFORE dispatching the
+ * agent. Invalid payloads MUST be rejected with a structured error envelope
+ * — the agent MUST NOT see the malformed payload.
+ *
+ * Symmetric assertion on `handoff.returnSchemaRef`: when an agent returns a
+ * payload that fails return-schema validation, the host MUST reject before
+ * persistence and surface a structured error rather than silently storing
+ * an off-contract result.
+ *
+ * Capability-gated: skips when host doesn't advertise
+ * `capabilities.agents.supported: true` AND `capabilities.agents.dispatch: true`.
+ * Fixture-gated: requires `conformance-agent-pack-handoff-schema-validation`.
+ *
+ * @see RFCS/0003-agent-packs.md §D
+ * @see schemas/agent-manifest.schema.json #/properties/handoff
+ * @see packs/core.openwop.agent-examples/agents[structured-fixture]
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { isAgentSupported } from '../lib/multi-agent-capabilities.js';
+const FIXTURE = 'conformance-agent-pack-handoff-schema-validation';
+const SKIP = !isAgentSupported() || !isFixtureAdvertised(FIXTURE);
+describe.skipIf(SKIP)('agentPackHandoffSchemaValidation: handoff schema enforcement at dispatch', () => {
+  it('valid task payload that matches taskSchemaRef is dispatched and completes', async () => {
+    // The fixture workflow dispatches `core.openwop.agent-examples.structured-fixture`
+    // with a VALID task payload matching schemas/structured-fixture.task.schema.json
+    // (`{ text: string, extractionFields: string[], language?: string }`).
+    const create = await driver.post('/v1/runs', {
+      workflowId: FIXTURE,
+      inputs: {
+        scenario: 'valid-task',
+        text: 'Acme Corp invoiced $1,200 on 2026-04-15 for Q2 consulting.',
+        extractionFields: ['vendor', 'amount', 'date'],
+      },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    let snap: { status: string } | undefined;
+    for (let i = 0; i < 40; i++) {
+      const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
+      const body = res.json as { status: string };
+      if (['completed', 'failed', 'waiting-approval'].includes(body.status)) {
+        snap = body;
+        break;
+      }
+      await new Promise((r) => setTimeout(r, 100));
+    }
+    expect(snap?.status, 'HV-1a: valid task payload should NOT be rejected by handoff-schema validation').toBe('completed');
+  });
+  it('invalid task payload (missing required field) is rejected before dispatch with structured error', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: FIXTURE,
+      inputs: {
+        scenario: 'invalid-task',
+        // intentionally missing required `extractionFields`
+        text: 'Some input text',
+      },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    let snap: { status: string } | undefined;
+    for (let i = 0; i < 40; i++) {
+      const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
+      const body = res.json as { status: string };
+      if (['completed', 'failed'].includes(body.status)) {
+        snap = body;
+        break;
+      }
+      await new Promise((r) => setTimeout(r, 100));
+    }
+    expect(snap?.status, 'HV-1b: invalid task payload MUST cause the run to fail rather than silently dispatch off-contract').toBe('failed');
+    const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+    const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
+      .events ?? [];
+    const validationFailure = list.find(
+      (e) =>
+        e.type === 'node.failed' &&
+        typeof e.payload?.error === 'object' &&
+        ((e.payload?.error as Record<string, unknown>)?.code === 'handoff_task_schema_violation' ||
+          (e.payload?.error as Record<string, unknown>)?.code === 'agent_dispatch_validation_failed'),
+    );
+    expect(
+      validationFailure,
+      'HV-1b: failure event payload MUST carry a recognizable handoff-validation error code',
+    ).toBeDefined();
+  });
+  it('agent return payload that fails returnSchemaRef is rejected before persistence', async () => {
+    // The fixture's `mock-return-violation` scenario causes the agent runtime
+    // to emit a return payload that violates schemas/structured-fixture.return.schema.json
+    // (e.g., omits the required `extracted` field while not declaring `error`).
+    const create = await driver.post('/v1/runs', {
+      workflowId: FIXTURE,
+      inputs: { scenario: 'mock-return-violation' },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    let snap: { status: string } | undefined;
+    for (let i = 0; i < 40; i++) {
+      const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
+      const body = res.json as { status: string };
+      if (['completed', 'failed'].includes(body.status)) {
+        snap = body;
+        break;
+      }
+      await new Promise((r) => setTimeout(r, 100));
+    }
+    // Hosts MAY surface return-schema violations as either a failed run OR a
+    // run that completes with a flagged error envelope, but the persisted
+    // result MUST NOT carry an off-schema body. Tolerate both outcomes here;
+    // the strict assertion is that downstream readers can detect the violation.
+    expect(['completed', 'failed']).toContain(snap?.status);
+    const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+    const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
+      .events ?? [];
+    const returnViolation = list.find(
+      (e) =>
+        (e.type === 'node.failed' || e.type === 'agent.error') &&
+        typeof e.payload?.error === 'object' &&
+        ((e.payload?.error as Record<string, unknown>)?.code === 'handoff_return_schema_violation' ||
+          (e.payload?.error as Record<string, unknown>)?.code === 'agent_return_validation_failed'),
+    );
+    expect(
+      returnViolation,
+      'HV-1c: off-schema return payload MUST surface a structured violation event before persistence',
+    ).toBeDefined();
+  });
+});

package/src/scenarios/agentReasoningEvents.test.ts CHANGED Viewed

@@ -47,26 +47,77 @@ describe.skipIf(SKIP)('agentReasoningEvents: agent.* event family emission', ()
     const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
     expect(events.status).toBe(200);
-    const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
-      .events ?? [];
+    const list = (events.json as {
+      events?: Array<{
+        type: string;
+        eventId?: string;
+        causationId?: string;
+        payload?: Record<string, unknown>;
+      }>;
+    }).events ?? [];
     const agentEvents = list.filter((e) => REASONING_EVENT_TYPES.has(e.type));
     expect(agentEvents.length).toBeGreaterThan(0);
-    // Every agent.* event payload MUST carry `agentId` (per RFC 0002 §C).
+    // Every agent.* event payload MUST identify the agent. Per
+    // `run-event-payloads.schema.json` §`agent*` shapes, four of the
+    // five events (`reasoned`, `toolCalled`, `toolReturned`, `decided`)
+    // carry `agentId`; `agent.handoff` carries `fromAgentId` + `toAgentId`
+    // instead. Allow either shape.
     for (const ev of agentEvents) {
-      expect(typeof ev.payload?.agentId).toBe('string');
-      expect((ev.payload!.agentId as string).length).toBeGreaterThanOrEqual(3);
+      const p = (ev.payload ?? {}) as Record<string, unknown>;
+      if (ev.type === 'agent.handoff') {
+        expect(typeof p.fromAgentId).toBe('string');
+        expect(typeof p.toAgentId).toBe('string');
+        expect((p.fromAgentId as string).length).toBeGreaterThanOrEqual(3);
+        expect((p.toAgentId as string).length).toBeGreaterThanOrEqual(3);
+      } else {
+        expect(typeof p.agentId).toBe('string');
+        expect((p.agentId as string).length).toBeGreaterThanOrEqual(3);
+      }
     }
-    // agent.toolCalled / agent.toolReturned MUST share a `callId` correlation.
+    // agent.toolCalled / agent.toolReturned pairing — two normative
+    // requirements per RFC 0002 §B (`agentToolReturned`):
+    //
+    //   1. callId correlation. The pair shares a host-minted `callId`
+    //      on their payloads; readers correlate request → response by
+    //      this id even when the events arrive interleaved with other
+    //      agent.* activity.
+    //
+    //   2. causationId === eventId of the paired agent.toolCalled.
+    //      RFC 0002 §B states "`causationId` MUST equal the `eventId`
+    //      of the corresponding `agent.toolCalled`." This is stricter
+    //      than callId-pairing alone — it threads the event-log identity
+    //      through the correlation chain so replay-determinism guarantees
+    //      (`spec/v1/replay.md` §"Determinism with non-deterministic
+    //      agents") survive event-id reuse and out-of-order delivery.
+    //      Hosts whose `appendEvent` surface doesn't return the eventId
+    //      synchronously need to extend it so the node can thread the
+    //      paired eventId through.
     const calls = agentEvents.filter((e) => e.type === 'agent.toolCalled');
     const returns = agentEvents.filter((e) => e.type === 'agent.toolReturned');
     for (const ret of returns) {
       const callId = ret.payload?.callId as string | undefined;
       if (callId === undefined) continue;
       const matched = calls.find((c) => c.payload?.callId === callId);
-      expect(matched, `agent.toolReturned.callId=${callId} MUST pair with a prior agent.toolCalled`).toBeDefined();
+      expect(
+        matched,
+        `agent.toolReturned.callId=${callId} MUST pair with a prior agent.toolCalled`,
+      ).toBeDefined();
+      // Strict causationId chain — only assert when the host actually
+      // surfaces eventId on the matched toolCalled event. Hosts that
+      // omit eventId from their `/events` projection skip this check
+      // (and SHOULD add it — RFC 0002 §B's chain integrity depends on
+      // it).
+      const matchedEventId = matched?.eventId;
+      if (typeof matchedEventId === 'string' && matchedEventId.length > 0) {
+        expect(
+          ret.causationId,
+          `agent.toolReturned (callId=${callId}) MUST carry causationId === paired agent.toolCalled.eventId per RFC 0002 §B`,
+        ).toBe(matchedEventId);
+      }
     }
   });
 });

package/src/scenarios/agents-run-tool-allowlist.test.ts ADDED Viewed

@@ -0,0 +1,182 @@
+/**
+ * core.openwop.agents.run — tool-allowlist enforcement contract
+ *
+ * Closes `OPENWOP-AUDIT-2026-003`: the 1.0.0 pack invoked workflow-supplied
+ * `tool.handler` as raw JS in its fallback loop, breaking the spec's
+ * `prompt-injection-tool-allowlist` invariant (`threat-model-prompt-injection.md`
+ * §"Authority bypass"). 1.0.1 refuses function-typed handlers outright; this
+ * scenario locks the refusal in as a CI gate so a future pack reimplementation
+ * cannot silently regress.
+ *
+ * Server-free. Loads the pack via dynamic import and asserts:
+ *
+ *   1. `tools[]` entries with `typeof handler === 'function'` are rejected
+ *      with `INVALID_TOOL_DECLARATION` BEFORE any LLM call. The defect path.
+ *   2. `tools[]` entries missing a `name` are rejected (declaration discipline).
+ *   3. `tools[]` entries missing a `kind` discriminator are rejected (the host
+ *      cannot resolve an unkinded tool through its connector registry).
+ *   4. Tool-driven runs (`tools.length > 0`) WITHOUT `ctx.agentRuntime` refuse
+ *      with `HOST_CAPABILITY_MISSING` — the inline fallback that invoked raw
+ *      handlers was removed in 1.0.1; there is no longer a host-less path for
+ *      tool dispatch.
+ *   5. Tool-less runs (`tools.length === 0`) succeed via `ctx.callAIWithTools`
+ *      with no tool dispatch (safe path preserved across the fix).
+ *   6. The preferred `ctx.agentRuntime.run` path threads through unchanged.
+ *
+ * Skip-conditions: soft-skips when `packs/core.openwop.agents/index.mjs` is not
+ * present (published-conformance-package context where pack source isn't shipped).
+ *
+ * @see SECURITY/internal-pre-audit-findings.json#OPENWOP-AUDIT-2026-003
+ * @see SECURITY/threat-model-prompt-injection.md §"Authority bypass" + §"prompt-injection-tool-allowlist"
+ * @see SECURITY/invariants.yaml#agents-run-no-raw-handler
+ * @see packs/core.openwop.agents/index.mjs (1.0.1)
+ */
+import { describe, it, expect, beforeAll } from 'vitest';
+import { existsSync } from 'node:fs';
+import { dirname, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const PACK_PATH = resolve(__dirname, '../../../packs/core.openwop.agents/index.mjs');
+interface AgentRunCtx {
+  config?: Record<string, unknown>;
+  inputs?: {
+    userPrompt?: string;
+    tools?: unknown[];
+    memory?: unknown;
+    outputParser?: unknown;
+  };
+  agentRuntime?: { run?: (...args: unknown[]) => Promise<unknown> };
+  callAIWithTools?: (...args: unknown[]) => Promise<{ text?: string; usage?: unknown; toolCalls?: unknown[] }>;
+  emit?: (...args: unknown[]) => Promise<void>;
+}
+type AgentRunFn = (ctx: AgentRunCtx) => Promise<{ status: 'success'; outputs: Record<string, unknown> }>;
+async function expectRejection(fn: () => Promise<unknown>, expectedCode: string, description: string): Promise<void> {
+  let caught: unknown;
+  try {
+    await fn();
+  } catch (err) {
+    caught = err;
+  }
+  expect(caught, description).toBeInstanceOf(Error);
+  expect((caught as Error & { code?: string }).code, `${description} → code`).toBe(expectedCode);
+}
+describe('category: core.openwop.agents.run — tool-allowlist enforcement (OPENWOP-AUDIT-2026-003)', () => {
+  let agentRun: AgentRunFn;
+  let packAvailable: boolean;
+  beforeAll(async () => {
+    packAvailable = existsSync(PACK_PATH);
+    if (!packAvailable) return;
+    const mod = (await import(PACK_PATH)) as { agentRun?: AgentRunFn };
+    if (typeof mod.agentRun !== 'function') {
+      throw new Error(`expected packs/core.openwop.agents/index.mjs to export agentRun; got ${typeof mod.agentRun}`);
+    }
+    agentRun = mod.agentRun;
+  });
+  it('skips cleanly when pack source is not bundled', () => {
+    if (!packAvailable) {
+      console.warn('[agents-run-tool-allowlist] pack source not present; skipping');
+      expect(packAvailable).toBe(false);
+      return;
+    }
+    expect(packAvailable).toBe(true);
+  });
+  it('rejects function-typed tool.handler (the defect path)', async () => {
+    if (!packAvailable) return;
+    // The 1.0.0 defect: a workflow author could supply executable JS via
+    // tools[].handler and the pack would await it directly with ctx. Closed
+    // in 1.0.1 — the validator throws INVALID_TOOL_DECLARATION at the run
+    // boundary, BEFORE any LLM call.
+    await expectRejection(
+      () => agentRun({
+        config: {},
+        inputs: {
+          userPrompt: 'x',
+          tools: [{ name: 'evil', kind: 'function', handler: () => 'rce' }],
+        },
+      }),
+      'INVALID_TOOL_DECLARATION',
+      'function-typed handler MUST be refused',
+    );
+  });
+  it('rejects tool declaration missing a name', async () => {
+    if (!packAvailable) return;
+    await expectRejection(
+      () => agentRun({ config: {}, inputs: { userPrompt: 'x', tools: [{ kind: 'workflow' }] } }),
+      'INVALID_TOOL_DECLARATION',
+      'unnamed tool MUST be refused',
+    );
+  });
+  it('rejects tool declaration missing a kind discriminator', async () => {
+    if (!packAvailable) return;
+    await expectRejection(
+      () => agentRun({ config: {}, inputs: { userPrompt: 'x', tools: [{ name: 't1' }] } }),
+      'INVALID_TOOL_DECLARATION',
+      'unkinded tool MUST be refused — host cannot resolve through its registry',
+    );
+  });
+  it('rejects tool-driven runs when host does not provide agentRuntime', async () => {
+    if (!packAvailable) return;
+    // Tool dispatch MUST go through a host-resolved runtime — the 1.0.0
+    // inline-handler fallback is gone.
+    await expectRejection(
+      () => agentRun({
+        config: {},
+        inputs: { userPrompt: 'x', tools: [{ name: 't1', kind: 'workflow' }] },
+      }),
+      'HOST_CAPABILITY_MISSING',
+      'tools[] with no agentRuntime MUST refuse',
+    );
+  });
+  it('tool-less run succeeds via callAIWithTools (safe fallback preserved)', async () => {
+    if (!packAvailable) return;
+    let toolsSeen: unknown = 'never-called';
+    const ctx: AgentRunCtx = {
+      config: {},
+      inputs: { userPrompt: 'hi', tools: [] },
+      callAIWithTools: async (args: unknown) => {
+        toolsSeen = (args as { tools?: unknown[] }).tools;
+        return { text: 'hello back', usage: { input_tokens: 1, output_tokens: 1 } };
+      },
+    };
+    const result = await agentRun(ctx);
+    expect(result.outputs.result).toBe('hello back');
+    expect(result.outputs.finishReason).toBe('complete');
+    expect(toolsSeen, 'tool-less fallback MUST pass an empty tools array — no LLM-driven dispatch').toEqual([]);
+  });
+  it('agentRuntime.run path threads through unchanged when host provides it', async () => {
+    if (!packAvailable) return;
+    let receivedTools: unknown;
+    const ctx: AgentRunCtx = {
+      config: {},
+      inputs: {
+        userPrompt: 'x',
+        tools: [{ name: 't1', kind: 'workflow', ref: 'vendor.acme.demo' }],
+      },
+      agentRuntime: {
+        run: async (req: unknown) => {
+          receivedTools = (req as { tools?: unknown[] }).tools;
+          return { result: 'from-runtime', toolCalls: [{ name: 't1' }] };
+        },
+      },
+    };
+    const result = await agentRun(ctx);
+    expect(result.outputs.result).toBe('from-runtime');
+    expect(receivedTools, 'host MUST receive the validated tools array').toEqual([
+      { name: 't1', kind: 'workflow', ref: 'vendor.acme.demo' },
+    ]);
+  });
+});