@openwop/openwop-conformance 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/README.md +2 -2
  3. package/coverage.md +26 -14
  4. package/fixtures/conformance-agent-low-confidence.json +7 -4
  5. package/fixtures/conformance-agent-pack-handoff-schema-validation.json +30 -0
  6. package/fixtures/conformance-agent-reasoning.json +23 -4
  7. package/fixtures/conformance-dispatch-cross-worker-handoff-child-a.json +27 -0
  8. package/fixtures/conformance-dispatch-cross-worker-handoff-child-b.json +25 -0
  9. package/fixtures/conformance-dispatch-cross-worker-handoff.json +60 -0
  10. package/fixtures/conformance-dispatch-input-mapping-child.json +25 -0
  11. package/fixtures/conformance-dispatch-input-mapping.json +49 -0
  12. package/fixtures/conformance-dispatch-output-mapping-child.json +27 -0
  13. package/fixtures/conformance-dispatch-output-mapping.json +49 -0
  14. package/fixtures/conformance-subworkflow-input-mapping-child.json +27 -0
  15. package/fixtures/conformance-subworkflow-input-mapping.json +33 -0
  16. package/fixtures.md +12 -2
  17. package/package.json +1 -1
  18. package/schemas/README.md +7 -0
  19. package/schemas/agent-ref.schema.json +1 -1
  20. package/schemas/ai-envelope.schema.json +106 -0
  21. package/schemas/capabilities.schema.json +248 -0
  22. package/schemas/core-conformance-mock-agent-config.schema.json +147 -0
  23. package/schemas/dispatch-config.schema.json +26 -0
  24. package/schemas/envelopes/clarification.request.schema.json +43 -0
  25. package/schemas/envelopes/error.schema.json +26 -0
  26. package/schemas/envelopes/schema.request.schema.json +22 -0
  27. package/schemas/envelopes/schema.response.schema.json +22 -0
  28. package/schemas/node-pack-manifest.schema.json +5 -0
  29. package/schemas/pack-lockfile.schema.json +16 -0
  30. package/schemas/workflow-chain-pack-manifest.schema.json +226 -0
  31. package/src/lib/webhook-receiver.ts +137 -0
  32. package/src/lib/workflow-chain-expansion.ts +213 -0
  33. package/src/scenarios/agentPackCatalog.test.ts +216 -0
  34. package/src/scenarios/agentPackHandoffSchemaValidation.test.ts +146 -0
  35. package/src/scenarios/agentReasoningEvents.test.ts +58 -7
  36. package/src/scenarios/agents-run-tool-allowlist.test.ts +182 -0
  37. package/src/scenarios/ai-envelope-shape.test.ts +362 -0
  38. package/src/scenarios/aiEnvelope.capBreached.test.ts +173 -0
  39. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +150 -0
  40. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +69 -0
  41. package/src/scenarios/aiEnvelope.redaction.test.ts +73 -0
  42. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +87 -0
  43. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +143 -0
  44. package/src/scenarios/aiEnvelope.universalKinds.test.ts +176 -0
  45. package/src/scenarios/append-ordering.test.ts +44 -0
  46. package/src/scenarios/artifact-auth.test.ts +58 -0
  47. package/src/scenarios/blob-cross-tenant-isolation.test.ts +66 -0
  48. package/src/scenarios/blob-presign-expiry.test.ts +66 -0
  49. package/src/scenarios/blob-roundtrip.test.ts +48 -0
  50. package/src/scenarios/cache-cross-tenant-isolation.test.ts +61 -0
  51. package/src/scenarios/cache-ttl-expiry.test.ts +47 -0
  52. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +98 -0
  53. package/src/scenarios/dispatch-input-mapping.test.ts +94 -0
  54. package/src/scenarios/dispatch-output-mapping.test.ts +65 -0
  55. package/src/scenarios/fs-path-traversal.test.ts +124 -0
  56. package/src/scenarios/idempotency-key-determinism.test.ts +230 -0
  57. package/src/scenarios/interrupt-token-matrix.test.ts +126 -0
  58. package/src/scenarios/kv-atomic-increment.test.ts +74 -0
  59. package/src/scenarios/kv-cas.test.ts +75 -0
  60. package/src/scenarios/kv-cross-tenant-isolation.test.ts +85 -0
  61. package/src/scenarios/kv-ttl-expiry.test.ts +47 -0
  62. package/src/scenarios/mcp-server-elicitation-bridge.test.ts +92 -0
  63. package/src/scenarios/mcp-server-prompt-roundtrip.test.ts +80 -0
  64. package/src/scenarios/mcp-server-resource-roundtrip.test.ts +82 -0
  65. package/src/scenarios/mcp-server-sampling-bridge.test.ts +84 -0
  66. package/src/scenarios/mcp-server-tool-roundtrip.test.ts +107 -0
  67. package/src/scenarios/mcp-server-untrusted-args.test.ts +105 -0
  68. package/src/scenarios/pause-resume.test.ts +43 -0
  69. package/src/scenarios/queue-ack-nack-dlq.test.ts +67 -0
  70. package/src/scenarios/queue-cross-tenant-isolation.test.ts +66 -0
  71. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +48 -0
  72. package/src/scenarios/search-bm25-roundtrip.test.ts +47 -0
  73. package/src/scenarios/spec-corpus-validity.test.ts +17 -1
  74. package/src/scenarios/sql-injection-rejection.test.ts +84 -0
  75. package/src/scenarios/sql-transaction-atomicity.test.ts +66 -0
  76. package/src/scenarios/stream-subscribe-from-beginning.test.ts +66 -0
  77. package/src/scenarios/subworkflow-input-mapping.test.ts +100 -0
  78. package/src/scenarios/table-cross-tenant-isolation.test.ts +65 -0
  79. package/src/scenarios/table-cursor-pagination.test.ts +47 -0
  80. package/src/scenarios/table-schema-enforcement.test.ts +47 -0
  81. package/src/scenarios/vector-knn-roundtrip.test.ts +48 -0
  82. package/src/scenarios/webhook-receiver-adversarial.test.ts +210 -0
  83. package/src/scenarios/workflow-chain-expansion.test.ts +366 -0
  84. package/src/scenarios/workflow-chain-pack-manifest-validation.test.ts +232 -0
  85. package/src/scenarios/workflow-chain-pack-signature-verification.test.ts +138 -0
  86. package/src/scenarios/workflow-chain-unresolvable-typeid.test.ts +170 -0
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Multi-Agent Shift Phase 2 — handoff-schema validation at dispatch (HV-1).
3
+ * Normative reference: RFCS/0003-agent-packs.md §D (handoff schema resolution)
4
+ *
5
+ * Verifies that when an agent's manifest carries `handoff.taskSchemaRef`, the
6
+ * host MUST validate inbound dispatch payloads against the referenced JSON
7
+ * Schema (resolved at install time per RFC 0003 §D) BEFORE dispatching the
8
+ * agent. Invalid payloads MUST be rejected with a structured error envelope
9
+ * — the agent MUST NOT see the malformed payload.
10
+ *
11
+ * Symmetric assertion on `handoff.returnSchemaRef`: when an agent returns a
12
+ * payload that fails return-schema validation, the host MUST reject before
13
+ * persistence and surface a structured error rather than silently storing
14
+ * an off-contract result.
15
+ *
16
+ * Capability-gated: skips when host doesn't advertise
17
+ * `capabilities.agents.supported: true` AND `capabilities.agents.dispatch: true`.
18
+ * Fixture-gated: requires `conformance-agent-pack-handoff-schema-validation`.
19
+ *
20
+ * @see RFCS/0003-agent-packs.md §D
21
+ * @see schemas/agent-manifest.schema.json #/properties/handoff
22
+ * @see packs/core.openwop.agent-examples/agents[structured-fixture]
23
+ */
24
+
25
+ import { describe, it, expect } from 'vitest';
26
+ import { driver } from '../lib/driver.js';
27
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
28
+ import { isAgentSupported } from '../lib/multi-agent-capabilities.js';
29
+
30
+ const FIXTURE = 'conformance-agent-pack-handoff-schema-validation';
31
+ const SKIP = !isAgentSupported() || !isFixtureAdvertised(FIXTURE);
32
+
33
+ describe.skipIf(SKIP)('agentPackHandoffSchemaValidation: handoff schema enforcement at dispatch', () => {
34
+ it('valid task payload that matches taskSchemaRef is dispatched and completes', async () => {
35
+ // The fixture workflow dispatches `core.openwop.agent-examples.structured-fixture`
36
+ // with a VALID task payload matching schemas/structured-fixture.task.schema.json
37
+ // (`{ text: string, extractionFields: string[], language?: string }`).
38
+ const create = await driver.post('/v1/runs', {
39
+ workflowId: FIXTURE,
40
+ inputs: {
41
+ scenario: 'valid-task',
42
+ text: 'Acme Corp invoiced $1,200 on 2026-04-15 for Q2 consulting.',
43
+ extractionFields: ['vendor', 'amount', 'date'],
44
+ },
45
+ });
46
+ expect(create.status).toBe(201);
47
+ const runId = (create.json as { runId: string }).runId;
48
+
49
+ let snap: { status: string } | undefined;
50
+ for (let i = 0; i < 40; i++) {
51
+ const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
52
+ const body = res.json as { status: string };
53
+ if (['completed', 'failed', 'waiting-approval'].includes(body.status)) {
54
+ snap = body;
55
+ break;
56
+ }
57
+ await new Promise((r) => setTimeout(r, 100));
58
+ }
59
+ expect(snap?.status, 'HV-1a: valid task payload should NOT be rejected by handoff-schema validation').toBe('completed');
60
+ });
61
+
62
+ it('invalid task payload (missing required field) is rejected before dispatch with structured error', async () => {
63
+ const create = await driver.post('/v1/runs', {
64
+ workflowId: FIXTURE,
65
+ inputs: {
66
+ scenario: 'invalid-task',
67
+ // intentionally missing required `extractionFields`
68
+ text: 'Some input text',
69
+ },
70
+ });
71
+ expect(create.status).toBe(201);
72
+ const runId = (create.json as { runId: string }).runId;
73
+
74
+ let snap: { status: string } | undefined;
75
+ for (let i = 0; i < 40; i++) {
76
+ const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
77
+ const body = res.json as { status: string };
78
+ if (['completed', 'failed'].includes(body.status)) {
79
+ snap = body;
80
+ break;
81
+ }
82
+ await new Promise((r) => setTimeout(r, 100));
83
+ }
84
+ expect(snap?.status, 'HV-1b: invalid task payload MUST cause the run to fail rather than silently dispatch off-contract').toBe('failed');
85
+
86
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
87
+ const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
88
+ .events ?? [];
89
+
90
+ const validationFailure = list.find(
91
+ (e) =>
92
+ e.type === 'node.failed' &&
93
+ typeof e.payload?.error === 'object' &&
94
+ ((e.payload?.error as Record<string, unknown>)?.code === 'handoff_task_schema_violation' ||
95
+ (e.payload?.error as Record<string, unknown>)?.code === 'agent_dispatch_validation_failed'),
96
+ );
97
+ expect(
98
+ validationFailure,
99
+ 'HV-1b: failure event payload MUST carry a recognizable handoff-validation error code',
100
+ ).toBeDefined();
101
+ });
102
+
103
+ it('agent return payload that fails returnSchemaRef is rejected before persistence', async () => {
104
+ // The fixture's `mock-return-violation` scenario causes the agent runtime
105
+ // to emit a return payload that violates schemas/structured-fixture.return.schema.json
106
+ // (e.g., omits the required `extracted` field while not declaring `error`).
107
+ const create = await driver.post('/v1/runs', {
108
+ workflowId: FIXTURE,
109
+ inputs: { scenario: 'mock-return-violation' },
110
+ });
111
+ expect(create.status).toBe(201);
112
+ const runId = (create.json as { runId: string }).runId;
113
+
114
+ let snap: { status: string } | undefined;
115
+ for (let i = 0; i < 40; i++) {
116
+ const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
117
+ const body = res.json as { status: string };
118
+ if (['completed', 'failed'].includes(body.status)) {
119
+ snap = body;
120
+ break;
121
+ }
122
+ await new Promise((r) => setTimeout(r, 100));
123
+ }
124
+ // Hosts MAY surface return-schema violations as either a failed run OR a
125
+ // run that completes with a flagged error envelope, but the persisted
126
+ // result MUST NOT carry an off-schema body. Tolerate both outcomes here;
127
+ // the strict assertion is that downstream readers can detect the violation.
128
+ expect(['completed', 'failed']).toContain(snap?.status);
129
+
130
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
131
+ const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
132
+ .events ?? [];
133
+
134
+ const returnViolation = list.find(
135
+ (e) =>
136
+ (e.type === 'node.failed' || e.type === 'agent.error') &&
137
+ typeof e.payload?.error === 'object' &&
138
+ ((e.payload?.error as Record<string, unknown>)?.code === 'handoff_return_schema_violation' ||
139
+ (e.payload?.error as Record<string, unknown>)?.code === 'agent_return_validation_failed'),
140
+ );
141
+ expect(
142
+ returnViolation,
143
+ 'HV-1c: off-schema return payload MUST surface a structured violation event before persistence',
144
+ ).toBeDefined();
145
+ });
146
+ });
@@ -47,26 +47,77 @@ describe.skipIf(SKIP)('agentReasoningEvents: agent.* event family emission', ()
47
47
 
48
48
  const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
49
49
  expect(events.status).toBe(200);
50
- const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
51
- .events ?? [];
50
+ const list = (events.json as {
51
+ events?: Array<{
52
+ type: string;
53
+ eventId?: string;
54
+ causationId?: string;
55
+ payload?: Record<string, unknown>;
56
+ }>;
57
+ }).events ?? [];
52
58
 
53
59
  const agentEvents = list.filter((e) => REASONING_EVENT_TYPES.has(e.type));
54
60
  expect(agentEvents.length).toBeGreaterThan(0);
55
61
 
56
- // Every agent.* event payload MUST carry `agentId` (per RFC 0002 §C).
62
+ // Every agent.* event payload MUST identify the agent. Per
63
+ // `run-event-payloads.schema.json` §`agent*` shapes, four of the
64
+ // five events (`reasoned`, `toolCalled`, `toolReturned`, `decided`)
65
+ // carry `agentId`; `agent.handoff` carries `fromAgentId` + `toAgentId`
66
+ // instead. Allow either shape.
57
67
  for (const ev of agentEvents) {
58
- expect(typeof ev.payload?.agentId).toBe('string');
59
- expect((ev.payload!.agentId as string).length).toBeGreaterThanOrEqual(3);
68
+ const p = (ev.payload ?? {}) as Record<string, unknown>;
69
+ if (ev.type === 'agent.handoff') {
70
+ expect(typeof p.fromAgentId).toBe('string');
71
+ expect(typeof p.toAgentId).toBe('string');
72
+ expect((p.fromAgentId as string).length).toBeGreaterThanOrEqual(3);
73
+ expect((p.toAgentId as string).length).toBeGreaterThanOrEqual(3);
74
+ } else {
75
+ expect(typeof p.agentId).toBe('string');
76
+ expect((p.agentId as string).length).toBeGreaterThanOrEqual(3);
77
+ }
60
78
  }
61
79
 
62
- // agent.toolCalled / agent.toolReturned MUST share a `callId` correlation.
80
+ // agent.toolCalled / agent.toolReturned pairing two normative
81
+ // requirements per RFC 0002 §B (`agentToolReturned`):
82
+ //
83
+ // 1. callId correlation. The pair shares a host-minted `callId`
84
+ // on their payloads; readers correlate request → response by
85
+ // this id even when the events arrive interleaved with other
86
+ // agent.* activity.
87
+ //
88
+ // 2. causationId === eventId of the paired agent.toolCalled.
89
+ // RFC 0002 §B states "`causationId` MUST equal the `eventId`
90
+ // of the corresponding `agent.toolCalled`." This is stricter
91
+ // than callId-pairing alone — it threads the event-log identity
92
+ // through the correlation chain so replay-determinism guarantees
93
+ // (`spec/v1/replay.md` §"Determinism with non-deterministic
94
+ // agents") survive event-id reuse and out-of-order delivery.
95
+ // Hosts whose `appendEvent` surface doesn't return the eventId
96
+ // synchronously need to extend it so the node can thread the
97
+ // paired eventId through.
63
98
  const calls = agentEvents.filter((e) => e.type === 'agent.toolCalled');
64
99
  const returns = agentEvents.filter((e) => e.type === 'agent.toolReturned');
65
100
  for (const ret of returns) {
66
101
  const callId = ret.payload?.callId as string | undefined;
67
102
  if (callId === undefined) continue;
68
103
  const matched = calls.find((c) => c.payload?.callId === callId);
69
- expect(matched, `agent.toolReturned.callId=${callId} MUST pair with a prior agent.toolCalled`).toBeDefined();
104
+ expect(
105
+ matched,
106
+ `agent.toolReturned.callId=${callId} MUST pair with a prior agent.toolCalled`,
107
+ ).toBeDefined();
108
+
109
+ // Strict causationId chain — only assert when the host actually
110
+ // surfaces eventId on the matched toolCalled event. Hosts that
111
+ // omit eventId from their `/events` projection skip this check
112
+ // (and SHOULD add it — RFC 0002 §B's chain integrity depends on
113
+ // it).
114
+ const matchedEventId = matched?.eventId;
115
+ if (typeof matchedEventId === 'string' && matchedEventId.length > 0) {
116
+ expect(
117
+ ret.causationId,
118
+ `agent.toolReturned (callId=${callId}) MUST carry causationId === paired agent.toolCalled.eventId per RFC 0002 §B`,
119
+ ).toBe(matchedEventId);
120
+ }
70
121
  }
71
122
  });
72
123
  });
@@ -0,0 +1,182 @@
1
+ /**
2
+ * core.openwop.agents.run — tool-allowlist enforcement contract
3
+ *
4
+ * Closes `OPENWOP-AUDIT-2026-003`: the 1.0.0 pack invoked workflow-supplied
5
+ * `tool.handler` as raw JS in its fallback loop, breaking the spec's
6
+ * `prompt-injection-tool-allowlist` invariant (`threat-model-prompt-injection.md`
7
+ * §"Authority bypass"). 1.0.1 refuses function-typed handlers outright; this
8
+ * scenario locks the refusal in as a CI gate so a future pack reimplementation
9
+ * cannot silently regress.
10
+ *
11
+ * Server-free. Loads the pack via dynamic import and asserts:
12
+ *
13
+ * 1. `tools[]` entries with `typeof handler === 'function'` are rejected
14
+ * with `INVALID_TOOL_DECLARATION` BEFORE any LLM call. The defect path.
15
+ * 2. `tools[]` entries missing a `name` are rejected (declaration discipline).
16
+ * 3. `tools[]` entries missing a `kind` discriminator are rejected (the host
17
+ * cannot resolve an unkinded tool through its connector registry).
18
+ * 4. Tool-driven runs (`tools.length > 0`) WITHOUT `ctx.agentRuntime` refuse
19
+ * with `HOST_CAPABILITY_MISSING` — the inline fallback that invoked raw
20
+ * handlers was removed in 1.0.1; there is no longer a host-less path for
21
+ * tool dispatch.
22
+ * 5. Tool-less runs (`tools.length === 0`) succeed via `ctx.callAIWithTools`
23
+ * with no tool dispatch (safe path preserved across the fix).
24
+ * 6. The preferred `ctx.agentRuntime.run` path threads through unchanged.
25
+ *
26
+ * Skip-conditions: soft-skips when `packs/core.openwop.agents/index.mjs` is not
27
+ * present (published-conformance-package context where pack source isn't shipped).
28
+ *
29
+ * @see SECURITY/internal-pre-audit-findings.json#OPENWOP-AUDIT-2026-003
30
+ * @see SECURITY/threat-model-prompt-injection.md §"Authority bypass" + §"prompt-injection-tool-allowlist"
31
+ * @see SECURITY/invariants.yaml#agents-run-no-raw-handler
32
+ * @see packs/core.openwop.agents/index.mjs (1.0.1)
33
+ */
34
+
35
+ import { describe, it, expect, beforeAll } from 'vitest';
36
+ import { existsSync } from 'node:fs';
37
+ import { dirname, resolve } from 'node:path';
38
+ import { fileURLToPath } from 'node:url';
39
+
40
+ const __dirname = dirname(fileURLToPath(import.meta.url));
41
+ const PACK_PATH = resolve(__dirname, '../../../packs/core.openwop.agents/index.mjs');
42
+
43
+ interface AgentRunCtx {
44
+ config?: Record<string, unknown>;
45
+ inputs?: {
46
+ userPrompt?: string;
47
+ tools?: unknown[];
48
+ memory?: unknown;
49
+ outputParser?: unknown;
50
+ };
51
+ agentRuntime?: { run?: (...args: unknown[]) => Promise<unknown> };
52
+ callAIWithTools?: (...args: unknown[]) => Promise<{ text?: string; usage?: unknown; toolCalls?: unknown[] }>;
53
+ emit?: (...args: unknown[]) => Promise<void>;
54
+ }
55
+
56
+ type AgentRunFn = (ctx: AgentRunCtx) => Promise<{ status: 'success'; outputs: Record<string, unknown> }>;
57
+
58
+ async function expectRejection(fn: () => Promise<unknown>, expectedCode: string, description: string): Promise<void> {
59
+ let caught: unknown;
60
+ try {
61
+ await fn();
62
+ } catch (err) {
63
+ caught = err;
64
+ }
65
+ expect(caught, description).toBeInstanceOf(Error);
66
+ expect((caught as Error & { code?: string }).code, `${description} → code`).toBe(expectedCode);
67
+ }
68
+
69
+ describe('category: core.openwop.agents.run — tool-allowlist enforcement (OPENWOP-AUDIT-2026-003)', () => {
70
+ let agentRun: AgentRunFn;
71
+ let packAvailable: boolean;
72
+
73
+ beforeAll(async () => {
74
+ packAvailable = existsSync(PACK_PATH);
75
+ if (!packAvailable) return;
76
+ const mod = (await import(PACK_PATH)) as { agentRun?: AgentRunFn };
77
+ if (typeof mod.agentRun !== 'function') {
78
+ throw new Error(`expected packs/core.openwop.agents/index.mjs to export agentRun; got ${typeof mod.agentRun}`);
79
+ }
80
+ agentRun = mod.agentRun;
81
+ });
82
+
83
+ it('skips cleanly when pack source is not bundled', () => {
84
+ if (!packAvailable) {
85
+ console.warn('[agents-run-tool-allowlist] pack source not present; skipping');
86
+ expect(packAvailable).toBe(false);
87
+ return;
88
+ }
89
+ expect(packAvailable).toBe(true);
90
+ });
91
+
92
+ it('rejects function-typed tool.handler (the defect path)', async () => {
93
+ if (!packAvailable) return;
94
+ // The 1.0.0 defect: a workflow author could supply executable JS via
95
+ // tools[].handler and the pack would await it directly with ctx. Closed
96
+ // in 1.0.1 — the validator throws INVALID_TOOL_DECLARATION at the run
97
+ // boundary, BEFORE any LLM call.
98
+ await expectRejection(
99
+ () => agentRun({
100
+ config: {},
101
+ inputs: {
102
+ userPrompt: 'x',
103
+ tools: [{ name: 'evil', kind: 'function', handler: () => 'rce' }],
104
+ },
105
+ }),
106
+ 'INVALID_TOOL_DECLARATION',
107
+ 'function-typed handler MUST be refused',
108
+ );
109
+ });
110
+
111
+ it('rejects tool declaration missing a name', async () => {
112
+ if (!packAvailable) return;
113
+ await expectRejection(
114
+ () => agentRun({ config: {}, inputs: { userPrompt: 'x', tools: [{ kind: 'workflow' }] } }),
115
+ 'INVALID_TOOL_DECLARATION',
116
+ 'unnamed tool MUST be refused',
117
+ );
118
+ });
119
+
120
+ it('rejects tool declaration missing a kind discriminator', async () => {
121
+ if (!packAvailable) return;
122
+ await expectRejection(
123
+ () => agentRun({ config: {}, inputs: { userPrompt: 'x', tools: [{ name: 't1' }] } }),
124
+ 'INVALID_TOOL_DECLARATION',
125
+ 'unkinded tool MUST be refused — host cannot resolve through its registry',
126
+ );
127
+ });
128
+
129
+ it('rejects tool-driven runs when host does not provide agentRuntime', async () => {
130
+ if (!packAvailable) return;
131
+ // Tool dispatch MUST go through a host-resolved runtime — the 1.0.0
132
+ // inline-handler fallback is gone.
133
+ await expectRejection(
134
+ () => agentRun({
135
+ config: {},
136
+ inputs: { userPrompt: 'x', tools: [{ name: 't1', kind: 'workflow' }] },
137
+ }),
138
+ 'HOST_CAPABILITY_MISSING',
139
+ 'tools[] with no agentRuntime MUST refuse',
140
+ );
141
+ });
142
+
143
+ it('tool-less run succeeds via callAIWithTools (safe fallback preserved)', async () => {
144
+ if (!packAvailable) return;
145
+ let toolsSeen: unknown = 'never-called';
146
+ const ctx: AgentRunCtx = {
147
+ config: {},
148
+ inputs: { userPrompt: 'hi', tools: [] },
149
+ callAIWithTools: async (args: unknown) => {
150
+ toolsSeen = (args as { tools?: unknown[] }).tools;
151
+ return { text: 'hello back', usage: { input_tokens: 1, output_tokens: 1 } };
152
+ },
153
+ };
154
+ const result = await agentRun(ctx);
155
+ expect(result.outputs.result).toBe('hello back');
156
+ expect(result.outputs.finishReason).toBe('complete');
157
+ expect(toolsSeen, 'tool-less fallback MUST pass an empty tools array — no LLM-driven dispatch').toEqual([]);
158
+ });
159
+
160
+ it('agentRuntime.run path threads through unchanged when host provides it', async () => {
161
+ if (!packAvailable) return;
162
+ let receivedTools: unknown;
163
+ const ctx: AgentRunCtx = {
164
+ config: {},
165
+ inputs: {
166
+ userPrompt: 'x',
167
+ tools: [{ name: 't1', kind: 'workflow', ref: 'vendor.acme.demo' }],
168
+ },
169
+ agentRuntime: {
170
+ run: async (req: unknown) => {
171
+ receivedTools = (req as { tools?: unknown[] }).tools;
172
+ return { result: 'from-runtime', toolCalls: [{ name: 't1' }] };
173
+ },
174
+ },
175
+ };
176
+ const result = await agentRun(ctx);
177
+ expect(result.outputs.result).toBe('from-runtime');
178
+ expect(receivedTools, 'host MUST receive the validated tools array').toEqual([
179
+ { name: 't1', kind: 'workflow', ref: 'vendor.acme.demo' },
180
+ ]);
181
+ });
182
+ });