@openwop/openwop-conformance 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.md +2 -2
- package/coverage.md +29 -17
- package/fixtures/conformance-agent-low-confidence.json +7 -4
- package/fixtures/conformance-agent-pack-handoff-schema-validation.json +30 -0
- package/fixtures/conformance-agent-reasoning.json +23 -4
- package/fixtures/conformance-dispatch-cross-worker-handoff-child-a.json +27 -0
- package/fixtures/conformance-dispatch-cross-worker-handoff-child-b.json +25 -0
- package/fixtures/conformance-dispatch-cross-worker-handoff.json +60 -0
- package/fixtures/conformance-dispatch-input-mapping-child.json +25 -0
- package/fixtures/conformance-dispatch-input-mapping.json +49 -0
- package/fixtures/conformance-dispatch-output-mapping-child.json +27 -0
- package/fixtures/conformance-dispatch-output-mapping.json +49 -0
- package/fixtures/conformance-subworkflow-input-mapping-child.json +27 -0
- package/fixtures/conformance-subworkflow-input-mapping.json +33 -0
- package/fixtures.md +12 -2
- package/package.json +1 -1
- package/schemas/README.md +7 -0
- package/schemas/agent-ref.schema.json +1 -1
- package/schemas/ai-envelope.schema.json +106 -0
- package/schemas/capabilities.schema.json +300 -3
- package/schemas/core-conformance-mock-agent-config.schema.json +147 -0
- package/schemas/dispatch-config.schema.json +26 -0
- package/schemas/envelopes/clarification.request.schema.json +43 -0
- package/schemas/envelopes/error.schema.json +26 -0
- package/schemas/envelopes/schema.request.schema.json +22 -0
- package/schemas/envelopes/schema.response.schema.json +22 -0
- package/schemas/node-pack-manifest.schema.json +5 -0
- package/schemas/pack-lockfile.schema.json +16 -0
- package/schemas/run-event-payloads.schema.json +18 -2
- package/schemas/run-event.schema.json +2 -1
- package/schemas/workflow-chain-pack-manifest.schema.json +226 -0
- package/src/lib/behavior-gate.ts +44 -5
- package/src/lib/env.ts +27 -0
- package/src/lib/webhook-receiver.ts +137 -0
- package/src/lib/workflow-chain-expansion.ts +213 -0
- package/src/scenarios/agentPackCatalog.test.ts +216 -0
- package/src/scenarios/agentPackHandoffSchemaValidation.test.ts +146 -0
- package/src/scenarios/agentReasoningEvents.test.ts +58 -7
- package/src/scenarios/agents-run-tool-allowlist.test.ts +182 -0
- package/src/scenarios/ai-envelope-shape.test.ts +362 -0
- package/src/scenarios/aiEnvelope.capBreached.test.ts +173 -0
- package/src/scenarios/aiEnvelope.contractRefusal.test.ts +150 -0
- package/src/scenarios/aiEnvelope.correlationReplay.test.ts +69 -0
- package/src/scenarios/aiEnvelope.redaction.test.ts +73 -0
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +87 -0
- package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +143 -0
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +176 -0
- package/src/scenarios/append-ordering.test.ts +44 -0
- package/src/scenarios/artifact-auth.test.ts +58 -0
- package/src/scenarios/blob-cross-tenant-isolation.test.ts +66 -0
- package/src/scenarios/blob-presign-expiry.test.ts +66 -0
- package/src/scenarios/blob-roundtrip.test.ts +48 -0
- package/src/scenarios/cache-cross-tenant-isolation.test.ts +61 -0
- package/src/scenarios/cache-ttl-expiry.test.ts +47 -0
- package/src/scenarios/dispatch-cross-worker-handoff.test.ts +98 -0
- package/src/scenarios/dispatch-input-mapping.test.ts +94 -0
- package/src/scenarios/dispatch-output-mapping.test.ts +65 -0
- package/src/scenarios/fs-path-traversal.test.ts +124 -0
- package/src/scenarios/idempotency-key-determinism.test.ts +230 -0
- package/src/scenarios/interrupt-token-matrix.test.ts +126 -0
- package/src/scenarios/kv-atomic-increment.test.ts +74 -0
- package/src/scenarios/kv-cas.test.ts +75 -0
- package/src/scenarios/kv-cross-tenant-isolation.test.ts +85 -0
- package/src/scenarios/kv-ttl-expiry.test.ts +47 -0
- package/src/scenarios/mcp-server-elicitation-bridge.test.ts +92 -0
- package/src/scenarios/mcp-server-prompt-roundtrip.test.ts +80 -0
- package/src/scenarios/mcp-server-resource-roundtrip.test.ts +82 -0
- package/src/scenarios/mcp-server-sampling-bridge.test.ts +84 -0
- package/src/scenarios/mcp-server-tool-roundtrip.test.ts +107 -0
- package/src/scenarios/mcp-server-untrusted-args.test.ts +105 -0
- package/src/scenarios/mcp-tool-roundtrip.test.ts +13 -6
- package/src/scenarios/memory-compaction-event-emitted.test.ts +121 -0
- package/src/scenarios/memory-compaction-provenance-tag.test.ts +116 -0
- package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +127 -0
- package/src/scenarios/multi-region-idempotency.test.ts +39 -4
- package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +139 -0
- package/src/scenarios/pause-resume.test.ts +43 -0
- package/src/scenarios/queue-ack-nack-dlq.test.ts +67 -0
- package/src/scenarios/queue-cross-tenant-isolation.test.ts +66 -0
- package/src/scenarios/queue-publish-consume-roundtrip.test.ts +48 -0
- package/src/scenarios/registry-public.test.ts +91 -0
- package/src/scenarios/search-bm25-roundtrip.test.ts +47 -0
- package/src/scenarios/spec-corpus-validity.test.ts +28 -7
- package/src/scenarios/sql-injection-rejection.test.ts +84 -0
- package/src/scenarios/sql-transaction-atomicity.test.ts +66 -0
- package/src/scenarios/stream-subscribe-from-beginning.test.ts +66 -0
- package/src/scenarios/subworkflow-input-mapping.test.ts +100 -0
- package/src/scenarios/table-cross-tenant-isolation.test.ts +65 -0
- package/src/scenarios/table-cursor-pagination.test.ts +47 -0
- package/src/scenarios/table-schema-enforcement.test.ts +47 -0
- package/src/scenarios/vector-knn-roundtrip.test.ts +48 -0
- package/src/scenarios/webhook-receiver-adversarial.test.ts +210 -0
- package/src/scenarios/workflow-chain-expansion.test.ts +366 -0
- package/src/scenarios/workflow-chain-pack-manifest-validation.test.ts +232 -0
- package/src/scenarios/workflow-chain-pack-signature-verification.test.ts +138 -0
- package/src/scenarios/workflow-chain-unresolvable-typeid.test.ts +170 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Agent Shift — `core.openwop.agents.{deep-research, react, supervisor}`
|
|
3
|
+
* pack-catalog evidence.
|
|
4
|
+
*
|
|
5
|
+
* The three reference agent packs published 2026-05-17 are registry-signed
|
|
6
|
+
* (keyId `openwop-team-1`) but had no in-tree conformance scenarios
|
|
7
|
+
* proving their `agents[]` manifests are reachable via the host pack
|
|
8
|
+
* surface AND that each manifest's contents match the contract documented
|
|
9
|
+
* in `RFCS/0003-agent-packs.md` + `schemas/agent-manifest.schema.json`.
|
|
10
|
+
*
|
|
11
|
+
* This file closes that gap. Three test groups, one per pack. Each group:
|
|
12
|
+
* 1. Skips when the host doesn't advertise `capabilities.agents.supported`
|
|
13
|
+
* OR doesn't expose a pack-listing endpoint (`/v1/packs` returning
|
|
14
|
+
* 404/501 → soft-skip).
|
|
15
|
+
* 2. Locates the pack by name in the host's pack list.
|
|
16
|
+
* 3. Validates the pack's `agents[]` entry against the AgentManifest
|
|
17
|
+
* contract: required fields, agentId namespace pattern, modelClass
|
|
18
|
+
* enum, toolAllowlist format, handoff schema refs.
|
|
19
|
+
*
|
|
20
|
+
* Behavioral assertions (the agent actually researches / reacts / supervises)
|
|
21
|
+
* require an LLM + real agentRuntime host and live outside the public
|
|
22
|
+
* conformance suite. The advertisement-shape + manifest-validity coverage
|
|
23
|
+
* here is the wire-level guarantee a third-party host MUST satisfy to
|
|
24
|
+
* claim "I ship the reference agent packs."
|
|
25
|
+
*
|
|
26
|
+
* @see RFCS/0003-agent-packs.md
|
|
27
|
+
* @see schemas/agent-manifest.schema.json
|
|
28
|
+
* @see packs/core.openwop.agents.{deep-research,react,supervisor}/pack.json
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect } from 'vitest';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
import { isAgentSupported } from '../lib/multi-agent-capabilities.js';
|
|
34
|
+
|
|
35
|
+
interface PackList {
|
|
36
|
+
packs?: Array<{
|
|
37
|
+
name?: string;
|
|
38
|
+
version?: string;
|
|
39
|
+
agents?: Array<{
|
|
40
|
+
agentId?: string;
|
|
41
|
+
persona?: string;
|
|
42
|
+
modelClass?: string;
|
|
43
|
+
systemPrompt?: string;
|
|
44
|
+
systemPromptRef?: string;
|
|
45
|
+
toolAllowlist?: string[];
|
|
46
|
+
memoryShape?: Record<string, unknown>;
|
|
47
|
+
handoff?: { taskSchemaRef?: string; returnSchemaRef?: string };
|
|
48
|
+
}>;
|
|
49
|
+
}>;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// AgentManifest agentId pattern from schemas/agent-manifest.schema.json.
|
|
53
|
+
const AGENT_ID_PATTERN = /^(core|vendor|community|private|local)\.[a-z][a-z0-9_-]*(\.[a-z][a-zA-Z0-9_-]*)+$/;
|
|
54
|
+
const VALID_MODEL_CLASSES = new Set([
|
|
55
|
+
'reasoning', 'tool-using', 'chat', 'code', 'vision', 'multimodal',
|
|
56
|
+
'embedding', 'classification', 'retrieval', 'research', 'delegate',
|
|
57
|
+
]);
|
|
58
|
+
const VALID_TOOL_SCOPES = ['openwop:', 'mcp:', 'vendor.', 'community.', 'private.', 'local.', 'host:'];
|
|
59
|
+
|
|
60
|
+
async function findPack(name: string): Promise<PackList['packs'] extends Array<infer T> | undefined ? T | null : never> {
|
|
61
|
+
const res = await driver.get('/v1/packs');
|
|
62
|
+
if (res.status === 404 || res.status === 501) return null as never;
|
|
63
|
+
if (res.status !== 200) return null as never;
|
|
64
|
+
const body = res.json as PackList;
|
|
65
|
+
if (!Array.isArray(body.packs)) return null as never;
|
|
66
|
+
const found = body.packs.find((p) => p.name === name);
|
|
67
|
+
// Cast through unknown to satisfy the conditional return type.
|
|
68
|
+
return (found ?? null) as never;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function assertAgentManifestShape(
|
|
72
|
+
agent: NonNullable<NonNullable<PackList['packs']>[number]['agents']>[number],
|
|
73
|
+
expectations: { agentIdEndsWith?: string; modelClass?: string; minTools?: number },
|
|
74
|
+
): void {
|
|
75
|
+
// Required: agentId, persona, modelClass.
|
|
76
|
+
expect(typeof agent.agentId, 'AgentManifest.agentId MUST be a string').toBe('string');
|
|
77
|
+
expect(typeof agent.persona, 'AgentManifest.persona MUST be a string').toBe('string');
|
|
78
|
+
expect(typeof agent.modelClass, 'AgentManifest.modelClass MUST be a string').toBe('string');
|
|
79
|
+
|
|
80
|
+
// agentId pattern (RFCS/0003 §A namespace tiers).
|
|
81
|
+
expect(
|
|
82
|
+
AGENT_ID_PATTERN.test(agent.agentId ?? ''),
|
|
83
|
+
driver.describe(
|
|
84
|
+
'schemas/agent-manifest.schema.json §agentId',
|
|
85
|
+
`agentId "${agent.agentId}" MUST match the namespace-tier pattern`,
|
|
86
|
+
),
|
|
87
|
+
).toBe(true);
|
|
88
|
+
|
|
89
|
+
// modelClass enum check (loose — the schema declares an enum but
|
|
90
|
+
// hosts MAY extend with research/delegate per the reference packs).
|
|
91
|
+
if (agent.modelClass !== undefined) {
|
|
92
|
+
expect(
|
|
93
|
+
VALID_MODEL_CLASSES.has(agent.modelClass),
|
|
94
|
+
`AgentManifest.modelClass "${agent.modelClass}" SHOULD be a recognized class`,
|
|
95
|
+
).toBe(true);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// systemPrompt XOR systemPromptRef.
|
|
99
|
+
const hasInline = typeof agent.systemPrompt === 'string' && agent.systemPrompt.length > 0;
|
|
100
|
+
const hasRef = typeof agent.systemPromptRef === 'string' && agent.systemPromptRef.length > 0;
|
|
101
|
+
expect(
|
|
102
|
+
hasInline !== hasRef,
|
|
103
|
+
'AgentManifest MUST have exactly one of systemPrompt | systemPromptRef',
|
|
104
|
+
).toBe(true);
|
|
105
|
+
|
|
106
|
+
// toolAllowlist: optional, but when present each entry MUST start with a recognized scope.
|
|
107
|
+
if (Array.isArray(agent.toolAllowlist)) {
|
|
108
|
+
for (const tool of agent.toolAllowlist) {
|
|
109
|
+
expect(
|
|
110
|
+
VALID_TOOL_SCOPES.some((scope) => tool.startsWith(scope)),
|
|
111
|
+
`toolAllowlist entry "${tool}" MUST start with a recognized scope`,
|
|
112
|
+
).toBe(true);
|
|
113
|
+
}
|
|
114
|
+
if (expectations.minTools !== undefined) {
|
|
115
|
+
expect(
|
|
116
|
+
agent.toolAllowlist.length,
|
|
117
|
+
`agent's toolAllowlist MUST have at least ${expectations.minTools} entries`,
|
|
118
|
+
).toBeGreaterThanOrEqual(expectations.minTools);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Per-pack expectations.
|
|
123
|
+
if (expectations.agentIdEndsWith !== undefined) {
|
|
124
|
+
expect(agent.agentId ?? '').toContain(expectations.agentIdEndsWith);
|
|
125
|
+
}
|
|
126
|
+
if (expectations.modelClass !== undefined) {
|
|
127
|
+
expect(agent.modelClass).toBe(expectations.modelClass);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const SKIP = !isAgentSupported();
|
|
132
|
+
|
|
133
|
+
describe.skipIf(SKIP)('core.openwop.agents.deep-research — pack catalog evidence', () => {
|
|
134
|
+
it('host pack-list includes deep-research with a well-formed AgentManifest', async () => {
|
|
135
|
+
const pack = await findPack('core.openwop.agents.deep-research');
|
|
136
|
+
if (pack === null) return; // host doesn't expose /v1/packs or doesn't have this pack
|
|
137
|
+
expect(pack.version, 'pack version MUST be present').toBeDefined();
|
|
138
|
+
expect(Array.isArray(pack.agents) && pack.agents.length === 1, 'deep-research ships exactly one agent').toBe(true);
|
|
139
|
+
assertAgentManifestShape(pack.agents![0]!, {
|
|
140
|
+
agentIdEndsWith: 'deep-research',
|
|
141
|
+
modelClass: 'research',
|
|
142
|
+
minTools: 1,
|
|
143
|
+
});
|
|
144
|
+
// Domain-specific: deep-research uses long-term memory + RAG retrievers.
|
|
145
|
+
const tools = pack.agents![0]!.toolAllowlist ?? [];
|
|
146
|
+
expect(
|
|
147
|
+
tools.some((t) => t.includes('rag') || t.includes('retriever')),
|
|
148
|
+
'deep-research SHOULD allow at least one rag/retriever tool',
|
|
149
|
+
).toBe(true);
|
|
150
|
+
expect(
|
|
151
|
+
pack.agents![0]!.memoryShape?.longTerm,
|
|
152
|
+
'deep-research MUST request longTerm memory (it persists facts across runs)',
|
|
153
|
+
).toBe(true);
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
describe.skipIf(SKIP)('core.openwop.agents.react — pack catalog evidence', () => {
|
|
158
|
+
it('host pack-list includes react with a well-formed AgentManifest', async () => {
|
|
159
|
+
const pack = await findPack('core.openwop.agents.react');
|
|
160
|
+
if (pack === null) return;
|
|
161
|
+
expect(pack.version).toBeDefined();
|
|
162
|
+
expect(Array.isArray(pack.agents) && pack.agents.length >= 1, 'react ships at least one agent').toBe(true);
|
|
163
|
+
assertAgentManifestShape(pack.agents![0]!, {
|
|
164
|
+
agentIdEndsWith: 'react',
|
|
165
|
+
});
|
|
166
|
+
// ReAct pattern requires handoff schemas (task + return).
|
|
167
|
+
const handoff = pack.agents![0]!.handoff;
|
|
168
|
+
expect(handoff, 'react AgentManifest MUST include a handoff block').toBeDefined();
|
|
169
|
+
expect(typeof handoff?.taskSchemaRef, 'handoff.taskSchemaRef MUST be a string').toBe('string');
|
|
170
|
+
expect(typeof handoff?.returnSchemaRef, 'handoff.returnSchemaRef MUST be a string').toBe('string');
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
describe.skipIf(SKIP)('core.openwop.agents.supervisor — pack catalog evidence', () => {
|
|
175
|
+
it('host pack-list includes supervisor with a well-formed AgentManifest', async () => {
|
|
176
|
+
const pack = await findPack('core.openwop.agents.supervisor');
|
|
177
|
+
if (pack === null) return;
|
|
178
|
+
expect(pack.version).toBeDefined();
|
|
179
|
+
expect(Array.isArray(pack.agents) && pack.agents.length >= 1, 'supervisor ships at least one agent').toBe(true);
|
|
180
|
+
assertAgentManifestShape(pack.agents![0]!, {
|
|
181
|
+
agentIdEndsWith: 'supervisor',
|
|
182
|
+
});
|
|
183
|
+
// Supervisor pattern delegates to crew members; its modelClass should
|
|
184
|
+
// be `delegate` or `reasoning` (it makes orchestration decisions).
|
|
185
|
+
const mc = pack.agents![0]!.modelClass;
|
|
186
|
+
expect(
|
|
187
|
+
mc === 'delegate' || mc === 'reasoning',
|
|
188
|
+
`supervisor SHOULD have modelClass=delegate|reasoning, got "${mc}"`,
|
|
189
|
+
).toBe(true);
|
|
190
|
+
// Supervisor needs handoff schemas to dispatch work.
|
|
191
|
+
expect(pack.agents![0]!.handoff, 'supervisor MUST include handoff schemas').toBeDefined();
|
|
192
|
+
});
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
describe.skipIf(SKIP)('agent-pack catalog summary', () => {
|
|
196
|
+
it('all three 2026-05-17 reference agent packs are catalog-reachable', async () => {
|
|
197
|
+
const names = [
|
|
198
|
+
'core.openwop.agents.deep-research',
|
|
199
|
+
'core.openwop.agents.react',
|
|
200
|
+
'core.openwop.agents.supervisor',
|
|
201
|
+
];
|
|
202
|
+
const found: string[] = [];
|
|
203
|
+
for (const n of names) {
|
|
204
|
+
const p = await findPack(n);
|
|
205
|
+
if (p !== null) found.push(n);
|
|
206
|
+
}
|
|
207
|
+
// Either none are present (host doesn't ship these — skip) OR all are
|
|
208
|
+
// present (host ships the full reference batch). Half-shipping is a
|
|
209
|
+
// configuration error worth flagging.
|
|
210
|
+
if (found.length === 0) return;
|
|
211
|
+
expect(
|
|
212
|
+
found.length,
|
|
213
|
+
'host SHOULD ship the reference agent packs as a coherent batch (none, or all three)',
|
|
214
|
+
).toBe(names.length);
|
|
215
|
+
});
|
|
216
|
+
});
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Agent Shift Phase 2 — handoff-schema validation at dispatch (HV-1).
|
|
3
|
+
* Normative reference: RFCS/0003-agent-packs.md §D (handoff schema resolution)
|
|
4
|
+
*
|
|
5
|
+
* Verifies that when an agent's manifest carries `handoff.taskSchemaRef`, the
|
|
6
|
+
* host MUST validate inbound dispatch payloads against the referenced JSON
|
|
7
|
+
* Schema (resolved at install time per RFC 0003 §D) BEFORE dispatching the
|
|
8
|
+
* agent. Invalid payloads MUST be rejected with a structured error envelope
|
|
9
|
+
* — the agent MUST NOT see the malformed payload.
|
|
10
|
+
*
|
|
11
|
+
* Symmetric assertion on `handoff.returnSchemaRef`: when an agent returns a
|
|
12
|
+
* payload that fails return-schema validation, the host MUST reject before
|
|
13
|
+
* persistence and surface a structured error rather than silently storing
|
|
14
|
+
* an off-contract result.
|
|
15
|
+
*
|
|
16
|
+
* Capability-gated: skips when host doesn't advertise
|
|
17
|
+
* `capabilities.agents.supported: true` AND `capabilities.agents.dispatch: true`.
|
|
18
|
+
* Fixture-gated: requires `conformance-agent-pack-handoff-schema-validation`.
|
|
19
|
+
*
|
|
20
|
+
* @see RFCS/0003-agent-packs.md §D
|
|
21
|
+
* @see schemas/agent-manifest.schema.json #/properties/handoff
|
|
22
|
+
* @see packs/core.openwop.agent-examples/agents[structured-fixture]
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { describe, it, expect } from 'vitest';
|
|
26
|
+
import { driver } from '../lib/driver.js';
|
|
27
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
28
|
+
import { isAgentSupported } from '../lib/multi-agent-capabilities.js';
|
|
29
|
+
|
|
30
|
+
const FIXTURE = 'conformance-agent-pack-handoff-schema-validation';
|
|
31
|
+
const SKIP = !isAgentSupported() || !isFixtureAdvertised(FIXTURE);
|
|
32
|
+
|
|
33
|
+
describe.skipIf(SKIP)('agentPackHandoffSchemaValidation: handoff schema enforcement at dispatch', () => {
|
|
34
|
+
it('valid task payload that matches taskSchemaRef is dispatched and completes', async () => {
|
|
35
|
+
// The fixture workflow dispatches `core.openwop.agent-examples.structured-fixture`
|
|
36
|
+
// with a VALID task payload matching schemas/structured-fixture.task.schema.json
|
|
37
|
+
// (`{ text: string, extractionFields: string[], language?: string }`).
|
|
38
|
+
const create = await driver.post('/v1/runs', {
|
|
39
|
+
workflowId: FIXTURE,
|
|
40
|
+
inputs: {
|
|
41
|
+
scenario: 'valid-task',
|
|
42
|
+
text: 'Acme Corp invoiced $1,200 on 2026-04-15 for Q2 consulting.',
|
|
43
|
+
extractionFields: ['vendor', 'amount', 'date'],
|
|
44
|
+
},
|
|
45
|
+
});
|
|
46
|
+
expect(create.status).toBe(201);
|
|
47
|
+
const runId = (create.json as { runId: string }).runId;
|
|
48
|
+
|
|
49
|
+
let snap: { status: string } | undefined;
|
|
50
|
+
for (let i = 0; i < 40; i++) {
|
|
51
|
+
const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
|
|
52
|
+
const body = res.json as { status: string };
|
|
53
|
+
if (['completed', 'failed', 'waiting-approval'].includes(body.status)) {
|
|
54
|
+
snap = body;
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
58
|
+
}
|
|
59
|
+
expect(snap?.status, 'HV-1a: valid task payload should NOT be rejected by handoff-schema validation').toBe('completed');
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('invalid task payload (missing required field) is rejected before dispatch with structured error', async () => {
|
|
63
|
+
const create = await driver.post('/v1/runs', {
|
|
64
|
+
workflowId: FIXTURE,
|
|
65
|
+
inputs: {
|
|
66
|
+
scenario: 'invalid-task',
|
|
67
|
+
// intentionally missing required `extractionFields`
|
|
68
|
+
text: 'Some input text',
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
expect(create.status).toBe(201);
|
|
72
|
+
const runId = (create.json as { runId: string }).runId;
|
|
73
|
+
|
|
74
|
+
let snap: { status: string } | undefined;
|
|
75
|
+
for (let i = 0; i < 40; i++) {
|
|
76
|
+
const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
|
|
77
|
+
const body = res.json as { status: string };
|
|
78
|
+
if (['completed', 'failed'].includes(body.status)) {
|
|
79
|
+
snap = body;
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
83
|
+
}
|
|
84
|
+
expect(snap?.status, 'HV-1b: invalid task payload MUST cause the run to fail rather than silently dispatch off-contract').toBe('failed');
|
|
85
|
+
|
|
86
|
+
const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
87
|
+
const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
|
|
88
|
+
.events ?? [];
|
|
89
|
+
|
|
90
|
+
const validationFailure = list.find(
|
|
91
|
+
(e) =>
|
|
92
|
+
e.type === 'node.failed' &&
|
|
93
|
+
typeof e.payload?.error === 'object' &&
|
|
94
|
+
((e.payload?.error as Record<string, unknown>)?.code === 'handoff_task_schema_violation' ||
|
|
95
|
+
(e.payload?.error as Record<string, unknown>)?.code === 'agent_dispatch_validation_failed'),
|
|
96
|
+
);
|
|
97
|
+
expect(
|
|
98
|
+
validationFailure,
|
|
99
|
+
'HV-1b: failure event payload MUST carry a recognizable handoff-validation error code',
|
|
100
|
+
).toBeDefined();
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('agent return payload that fails returnSchemaRef is rejected before persistence', async () => {
|
|
104
|
+
// The fixture's `mock-return-violation` scenario causes the agent runtime
|
|
105
|
+
// to emit a return payload that violates schemas/structured-fixture.return.schema.json
|
|
106
|
+
// (e.g., omits the required `extracted` field while not declaring `error`).
|
|
107
|
+
const create = await driver.post('/v1/runs', {
|
|
108
|
+
workflowId: FIXTURE,
|
|
109
|
+
inputs: { scenario: 'mock-return-violation' },
|
|
110
|
+
});
|
|
111
|
+
expect(create.status).toBe(201);
|
|
112
|
+
const runId = (create.json as { runId: string }).runId;
|
|
113
|
+
|
|
114
|
+
let snap: { status: string } | undefined;
|
|
115
|
+
for (let i = 0; i < 40; i++) {
|
|
116
|
+
const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
|
|
117
|
+
const body = res.json as { status: string };
|
|
118
|
+
if (['completed', 'failed'].includes(body.status)) {
|
|
119
|
+
snap = body;
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
123
|
+
}
|
|
124
|
+
// Hosts MAY surface return-schema violations as either a failed run OR a
|
|
125
|
+
// run that completes with a flagged error envelope, but the persisted
|
|
126
|
+
// result MUST NOT carry an off-schema body. Tolerate both outcomes here;
|
|
127
|
+
// the strict assertion is that downstream readers can detect the violation.
|
|
128
|
+
expect(['completed', 'failed']).toContain(snap?.status);
|
|
129
|
+
|
|
130
|
+
const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
131
|
+
const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
|
|
132
|
+
.events ?? [];
|
|
133
|
+
|
|
134
|
+
const returnViolation = list.find(
|
|
135
|
+
(e) =>
|
|
136
|
+
(e.type === 'node.failed' || e.type === 'agent.error') &&
|
|
137
|
+
typeof e.payload?.error === 'object' &&
|
|
138
|
+
((e.payload?.error as Record<string, unknown>)?.code === 'handoff_return_schema_violation' ||
|
|
139
|
+
(e.payload?.error as Record<string, unknown>)?.code === 'agent_return_validation_failed'),
|
|
140
|
+
);
|
|
141
|
+
expect(
|
|
142
|
+
returnViolation,
|
|
143
|
+
'HV-1c: off-schema return payload MUST surface a structured violation event before persistence',
|
|
144
|
+
).toBeDefined();
|
|
145
|
+
});
|
|
146
|
+
});
|
|
@@ -47,26 +47,77 @@ describe.skipIf(SKIP)('agentReasoningEvents: agent.* event family emission', ()
|
|
|
47
47
|
|
|
48
48
|
const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
49
49
|
expect(events.status).toBe(200);
|
|
50
|
-
const list = (events.json as {
|
|
51
|
-
|
|
50
|
+
const list = (events.json as {
|
|
51
|
+
events?: Array<{
|
|
52
|
+
type: string;
|
|
53
|
+
eventId?: string;
|
|
54
|
+
causationId?: string;
|
|
55
|
+
payload?: Record<string, unknown>;
|
|
56
|
+
}>;
|
|
57
|
+
}).events ?? [];
|
|
52
58
|
|
|
53
59
|
const agentEvents = list.filter((e) => REASONING_EVENT_TYPES.has(e.type));
|
|
54
60
|
expect(agentEvents.length).toBeGreaterThan(0);
|
|
55
61
|
|
|
56
|
-
// Every agent.* event payload MUST
|
|
62
|
+
// Every agent.* event payload MUST identify the agent. Per
|
|
63
|
+
// `run-event-payloads.schema.json` §`agent*` shapes, four of the
|
|
64
|
+
// five events (`reasoned`, `toolCalled`, `toolReturned`, `decided`)
|
|
65
|
+
// carry `agentId`; `agent.handoff` carries `fromAgentId` + `toAgentId`
|
|
66
|
+
// instead. Allow either shape.
|
|
57
67
|
for (const ev of agentEvents) {
|
|
58
|
-
|
|
59
|
-
|
|
68
|
+
const p = (ev.payload ?? {}) as Record<string, unknown>;
|
|
69
|
+
if (ev.type === 'agent.handoff') {
|
|
70
|
+
expect(typeof p.fromAgentId).toBe('string');
|
|
71
|
+
expect(typeof p.toAgentId).toBe('string');
|
|
72
|
+
expect((p.fromAgentId as string).length).toBeGreaterThanOrEqual(3);
|
|
73
|
+
expect((p.toAgentId as string).length).toBeGreaterThanOrEqual(3);
|
|
74
|
+
} else {
|
|
75
|
+
expect(typeof p.agentId).toBe('string');
|
|
76
|
+
expect((p.agentId as string).length).toBeGreaterThanOrEqual(3);
|
|
77
|
+
}
|
|
60
78
|
}
|
|
61
79
|
|
|
62
|
-
// agent.toolCalled / agent.toolReturned
|
|
80
|
+
// agent.toolCalled / agent.toolReturned pairing — two normative
|
|
81
|
+
// requirements per RFC 0002 §B (`agentToolReturned`):
|
|
82
|
+
//
|
|
83
|
+
// 1. callId correlation. The pair shares a host-minted `callId`
|
|
84
|
+
// on their payloads; readers correlate request → response by
|
|
85
|
+
// this id even when the events arrive interleaved with other
|
|
86
|
+
// agent.* activity.
|
|
87
|
+
//
|
|
88
|
+
// 2. causationId === eventId of the paired agent.toolCalled.
|
|
89
|
+
// RFC 0002 §B states "`causationId` MUST equal the `eventId`
|
|
90
|
+
// of the corresponding `agent.toolCalled`." This is stricter
|
|
91
|
+
// than callId-pairing alone — it threads the event-log identity
|
|
92
|
+
// through the correlation chain so replay-determinism guarantees
|
|
93
|
+
// (`spec/v1/replay.md` §"Determinism with non-deterministic
|
|
94
|
+
// agents") survive event-id reuse and out-of-order delivery.
|
|
95
|
+
// Hosts whose `appendEvent` surface doesn't return the eventId
|
|
96
|
+
// synchronously need to extend it so the node can thread the
|
|
97
|
+
// paired eventId through.
|
|
63
98
|
const calls = agentEvents.filter((e) => e.type === 'agent.toolCalled');
|
|
64
99
|
const returns = agentEvents.filter((e) => e.type === 'agent.toolReturned');
|
|
65
100
|
for (const ret of returns) {
|
|
66
101
|
const callId = ret.payload?.callId as string | undefined;
|
|
67
102
|
if (callId === undefined) continue;
|
|
68
103
|
const matched = calls.find((c) => c.payload?.callId === callId);
|
|
69
|
-
expect(
|
|
104
|
+
expect(
|
|
105
|
+
matched,
|
|
106
|
+
`agent.toolReturned.callId=${callId} MUST pair with a prior agent.toolCalled`,
|
|
107
|
+
).toBeDefined();
|
|
108
|
+
|
|
109
|
+
// Strict causationId chain — only assert when the host actually
|
|
110
|
+
// surfaces eventId on the matched toolCalled event. Hosts that
|
|
111
|
+
// omit eventId from their `/events` projection skip this check
|
|
112
|
+
// (and SHOULD add it — RFC 0002 §B's chain integrity depends on
|
|
113
|
+
// it).
|
|
114
|
+
const matchedEventId = matched?.eventId;
|
|
115
|
+
if (typeof matchedEventId === 'string' && matchedEventId.length > 0) {
|
|
116
|
+
expect(
|
|
117
|
+
ret.causationId,
|
|
118
|
+
`agent.toolReturned (callId=${callId}) MUST carry causationId === paired agent.toolCalled.eventId per RFC 0002 §B`,
|
|
119
|
+
).toBe(matchedEventId);
|
|
120
|
+
}
|
|
70
121
|
}
|
|
71
122
|
});
|
|
72
123
|
});
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* core.openwop.agents.run — tool-allowlist enforcement contract
|
|
3
|
+
*
|
|
4
|
+
* Closes `OPENWOP-AUDIT-2026-003`: the 1.0.0 pack invoked workflow-supplied
|
|
5
|
+
* `tool.handler` as raw JS in its fallback loop, breaking the spec's
|
|
6
|
+
* `prompt-injection-tool-allowlist` invariant (`threat-model-prompt-injection.md`
|
|
7
|
+
* §"Authority bypass"). 1.0.1 refuses function-typed handlers outright; this
|
|
8
|
+
* scenario locks the refusal in as a CI gate so a future pack reimplementation
|
|
9
|
+
* cannot silently regress.
|
|
10
|
+
*
|
|
11
|
+
* Server-free. Loads the pack via dynamic import and asserts:
|
|
12
|
+
*
|
|
13
|
+
* 1. `tools[]` entries with `typeof handler === 'function'` are rejected
|
|
14
|
+
* with `INVALID_TOOL_DECLARATION` BEFORE any LLM call. The defect path.
|
|
15
|
+
* 2. `tools[]` entries missing a `name` are rejected (declaration discipline).
|
|
16
|
+
* 3. `tools[]` entries missing a `kind` discriminator are rejected (the host
|
|
17
|
+
* cannot resolve an unkinded tool through its connector registry).
|
|
18
|
+
* 4. Tool-driven runs (`tools.length > 0`) WITHOUT `ctx.agentRuntime` refuse
|
|
19
|
+
* with `HOST_CAPABILITY_MISSING` — the inline fallback that invoked raw
|
|
20
|
+
* handlers was removed in 1.0.1; there is no longer a host-less path for
|
|
21
|
+
* tool dispatch.
|
|
22
|
+
* 5. Tool-less runs (`tools.length === 0`) succeed via `ctx.callAIWithTools`
|
|
23
|
+
* with no tool dispatch (safe path preserved across the fix).
|
|
24
|
+
* 6. The preferred `ctx.agentRuntime.run` path threads through unchanged.
|
|
25
|
+
*
|
|
26
|
+
* Skip-conditions: soft-skips when `packs/core.openwop.agents/index.mjs` is not
|
|
27
|
+
* present (published-conformance-package context where pack source isn't shipped).
|
|
28
|
+
*
|
|
29
|
+
* @see SECURITY/internal-pre-audit-findings.json#OPENWOP-AUDIT-2026-003
|
|
30
|
+
* @see SECURITY/threat-model-prompt-injection.md §"Authority bypass" + §"prompt-injection-tool-allowlist"
|
|
31
|
+
* @see SECURITY/invariants.yaml#agents-run-no-raw-handler
|
|
32
|
+
* @see packs/core.openwop.agents/index.mjs (1.0.1)
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import { describe, it, expect, beforeAll } from 'vitest';
|
|
36
|
+
import { existsSync } from 'node:fs';
|
|
37
|
+
import { dirname, resolve } from 'node:path';
|
|
38
|
+
import { fileURLToPath } from 'node:url';
|
|
39
|
+
|
|
40
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
41
|
+
const PACK_PATH = resolve(__dirname, '../../../packs/core.openwop.agents/index.mjs');
|
|
42
|
+
|
|
43
|
+
interface AgentRunCtx {
|
|
44
|
+
config?: Record<string, unknown>;
|
|
45
|
+
inputs?: {
|
|
46
|
+
userPrompt?: string;
|
|
47
|
+
tools?: unknown[];
|
|
48
|
+
memory?: unknown;
|
|
49
|
+
outputParser?: unknown;
|
|
50
|
+
};
|
|
51
|
+
agentRuntime?: { run?: (...args: unknown[]) => Promise<unknown> };
|
|
52
|
+
callAIWithTools?: (...args: unknown[]) => Promise<{ text?: string; usage?: unknown; toolCalls?: unknown[] }>;
|
|
53
|
+
emit?: (...args: unknown[]) => Promise<void>;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
type AgentRunFn = (ctx: AgentRunCtx) => Promise<{ status: 'success'; outputs: Record<string, unknown> }>;
|
|
57
|
+
|
|
58
|
+
async function expectRejection(fn: () => Promise<unknown>, expectedCode: string, description: string): Promise<void> {
|
|
59
|
+
let caught: unknown;
|
|
60
|
+
try {
|
|
61
|
+
await fn();
|
|
62
|
+
} catch (err) {
|
|
63
|
+
caught = err;
|
|
64
|
+
}
|
|
65
|
+
expect(caught, description).toBeInstanceOf(Error);
|
|
66
|
+
expect((caught as Error & { code?: string }).code, `${description} → code`).toBe(expectedCode);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
describe('category: core.openwop.agents.run — tool-allowlist enforcement (OPENWOP-AUDIT-2026-003)', () => {
|
|
70
|
+
let agentRun: AgentRunFn;
|
|
71
|
+
let packAvailable: boolean;
|
|
72
|
+
|
|
73
|
+
beforeAll(async () => {
|
|
74
|
+
packAvailable = existsSync(PACK_PATH);
|
|
75
|
+
if (!packAvailable) return;
|
|
76
|
+
const mod = (await import(PACK_PATH)) as { agentRun?: AgentRunFn };
|
|
77
|
+
if (typeof mod.agentRun !== 'function') {
|
|
78
|
+
throw new Error(`expected packs/core.openwop.agents/index.mjs to export agentRun; got ${typeof mod.agentRun}`);
|
|
79
|
+
}
|
|
80
|
+
agentRun = mod.agentRun;
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('skips cleanly when pack source is not bundled', () => {
|
|
84
|
+
if (!packAvailable) {
|
|
85
|
+
console.warn('[agents-run-tool-allowlist] pack source not present; skipping');
|
|
86
|
+
expect(packAvailable).toBe(false);
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
expect(packAvailable).toBe(true);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('rejects function-typed tool.handler (the defect path)', async () => {
|
|
93
|
+
if (!packAvailable) return;
|
|
94
|
+
// The 1.0.0 defect: a workflow author could supply executable JS via
|
|
95
|
+
// tools[].handler and the pack would await it directly with ctx. Closed
|
|
96
|
+
// in 1.0.1 — the validator throws INVALID_TOOL_DECLARATION at the run
|
|
97
|
+
// boundary, BEFORE any LLM call.
|
|
98
|
+
await expectRejection(
|
|
99
|
+
() => agentRun({
|
|
100
|
+
config: {},
|
|
101
|
+
inputs: {
|
|
102
|
+
userPrompt: 'x',
|
|
103
|
+
tools: [{ name: 'evil', kind: 'function', handler: () => 'rce' }],
|
|
104
|
+
},
|
|
105
|
+
}),
|
|
106
|
+
'INVALID_TOOL_DECLARATION',
|
|
107
|
+
'function-typed handler MUST be refused',
|
|
108
|
+
);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('rejects tool declaration missing a name', async () => {
|
|
112
|
+
if (!packAvailable) return;
|
|
113
|
+
await expectRejection(
|
|
114
|
+
() => agentRun({ config: {}, inputs: { userPrompt: 'x', tools: [{ kind: 'workflow' }] } }),
|
|
115
|
+
'INVALID_TOOL_DECLARATION',
|
|
116
|
+
'unnamed tool MUST be refused',
|
|
117
|
+
);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('rejects tool declaration missing a kind discriminator', async () => {
|
|
121
|
+
if (!packAvailable) return;
|
|
122
|
+
await expectRejection(
|
|
123
|
+
() => agentRun({ config: {}, inputs: { userPrompt: 'x', tools: [{ name: 't1' }] } }),
|
|
124
|
+
'INVALID_TOOL_DECLARATION',
|
|
125
|
+
'unkinded tool MUST be refused — host cannot resolve through its registry',
|
|
126
|
+
);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('rejects tool-driven runs when host does not provide agentRuntime', async () => {
|
|
130
|
+
if (!packAvailable) return;
|
|
131
|
+
// Tool dispatch MUST go through a host-resolved runtime — the 1.0.0
|
|
132
|
+
// inline-handler fallback is gone.
|
|
133
|
+
await expectRejection(
|
|
134
|
+
() => agentRun({
|
|
135
|
+
config: {},
|
|
136
|
+
inputs: { userPrompt: 'x', tools: [{ name: 't1', kind: 'workflow' }] },
|
|
137
|
+
}),
|
|
138
|
+
'HOST_CAPABILITY_MISSING',
|
|
139
|
+
'tools[] with no agentRuntime MUST refuse',
|
|
140
|
+
);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('tool-less run succeeds via callAIWithTools (safe fallback preserved)', async () => {
|
|
144
|
+
if (!packAvailable) return;
|
|
145
|
+
let toolsSeen: unknown = 'never-called';
|
|
146
|
+
const ctx: AgentRunCtx = {
|
|
147
|
+
config: {},
|
|
148
|
+
inputs: { userPrompt: 'hi', tools: [] },
|
|
149
|
+
callAIWithTools: async (args: unknown) => {
|
|
150
|
+
toolsSeen = (args as { tools?: unknown[] }).tools;
|
|
151
|
+
return { text: 'hello back', usage: { input_tokens: 1, output_tokens: 1 } };
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
const result = await agentRun(ctx);
|
|
155
|
+
expect(result.outputs.result).toBe('hello back');
|
|
156
|
+
expect(result.outputs.finishReason).toBe('complete');
|
|
157
|
+
expect(toolsSeen, 'tool-less fallback MUST pass an empty tools array — no LLM-driven dispatch').toEqual([]);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it('agentRuntime.run path threads through unchanged when host provides it', async () => {
|
|
161
|
+
if (!packAvailable) return;
|
|
162
|
+
let receivedTools: unknown;
|
|
163
|
+
const ctx: AgentRunCtx = {
|
|
164
|
+
config: {},
|
|
165
|
+
inputs: {
|
|
166
|
+
userPrompt: 'x',
|
|
167
|
+
tools: [{ name: 't1', kind: 'workflow', ref: 'vendor.acme.demo' }],
|
|
168
|
+
},
|
|
169
|
+
agentRuntime: {
|
|
170
|
+
run: async (req: unknown) => {
|
|
171
|
+
receivedTools = (req as { tools?: unknown[] }).tools;
|
|
172
|
+
return { result: 'from-runtime', toolCalls: [{ name: 't1' }] };
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
};
|
|
176
|
+
const result = await agentRun(ctx);
|
|
177
|
+
expect(result.outputs.result).toBe('from-runtime');
|
|
178
|
+
expect(receivedTools, 'host MUST receive the validated tools array').toEqual([
|
|
179
|
+
{ name: 't1', kind: 'workflow', ref: 'vendor.acme.demo' },
|
|
180
|
+
]);
|
|
181
|
+
});
|
|
182
|
+
});
|