@crewhaus/ir-passes 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +42 -0
- package/src/index.test.ts +380 -0
- package/src/index.ts +441 -0
- package/src/wellformedness.test.ts +118 -0
package/package.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@crewhaus/ir-passes",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Idempotent IR optimization passes (dead-tool-elimination, prompt-cache-prefix-sort, redundant-mcp-server-collapse, permission-rule-canonicalize)",
|
|
6
|
+
"main": "src/index.ts",
|
|
7
|
+
"types": "src/index.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": "./src/index.ts"
|
|
10
|
+
},
|
|
11
|
+
"scripts": {
|
|
12
|
+
"test": "bun test src"
|
|
13
|
+
},
|
|
14
|
+
"dependencies": {
|
|
15
|
+
"@crewhaus/errors": "0.0.0",
|
|
16
|
+
"@crewhaus/ir": "0.0.0"
|
|
17
|
+
},
|
|
18
|
+
"license": "Apache-2.0",
|
|
19
|
+
"author": {
|
|
20
|
+
"name": "Max Meier",
|
|
21
|
+
"email": "max@studiomax.io",
|
|
22
|
+
"url": "https://studiomax.io"
|
|
23
|
+
},
|
|
24
|
+
"repository": {
|
|
25
|
+
"type": "git",
|
|
26
|
+
"url": "git+https://github.com/crewhaus/factory.git",
|
|
27
|
+
"directory": "packages/ir-passes"
|
|
28
|
+
},
|
|
29
|
+
"homepage": "https://github.com/crewhaus/factory/tree/main/packages/ir-passes#readme",
|
|
30
|
+
"bugs": {
|
|
31
|
+
"url": "https://github.com/crewhaus/factory/issues"
|
|
32
|
+
},
|
|
33
|
+
"publishConfig": {
|
|
34
|
+
"access": "restricted"
|
|
35
|
+
},
|
|
36
|
+
"files": [
|
|
37
|
+
"src",
|
|
38
|
+
"README.md",
|
|
39
|
+
"LICENSE",
|
|
40
|
+
"NOTICE"
|
|
41
|
+
]
|
|
42
|
+
}
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Section 28 — `ir-passes` tests:
|
|
3
|
+
* - T1 per built-in pass
|
|
4
|
+
* - T9 idempotence (apply(apply(x)) === apply(x))
|
|
5
|
+
* - T4 fixture replay
|
|
6
|
+
*/
|
|
7
|
+
import { describe, expect, test } from "bun:test";
|
|
8
|
+
import type { IrNode, IrV0 } from "@crewhaus/ir";
|
|
9
|
+
import {
|
|
10
|
+
DEFAULT_PIPELINE,
|
|
11
|
+
IrPassError,
|
|
12
|
+
applyPasses,
|
|
13
|
+
deadToolElimination,
|
|
14
|
+
permissionRuleCanonicalize,
|
|
15
|
+
promptCachePrefixSort,
|
|
16
|
+
redundantMcpServerCollapse,
|
|
17
|
+
transactionPolicyEnforcement,
|
|
18
|
+
} from "./index";
|
|
19
|
+
|
|
20
|
+
function makeCli(overrides: Partial<IrV0> = {}): IrV0 {
|
|
21
|
+
return {
|
|
22
|
+
version: 0,
|
|
23
|
+
name: "test",
|
|
24
|
+
target: "cli",
|
|
25
|
+
agent: { model: "claude-opus-4-7", instructions: "be helpful" },
|
|
26
|
+
tools: [],
|
|
27
|
+
toolConfigs: {},
|
|
28
|
+
mcp_servers: {},
|
|
29
|
+
permissions: { rules: [] },
|
|
30
|
+
subAgents: [],
|
|
31
|
+
compaction: {},
|
|
32
|
+
...overrides,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
describe("ir-passes — deadToolElimination (T1)", () => {
|
|
37
|
+
test("no rules + no sub-agents → returns input unchanged (no inference)", () => {
|
|
38
|
+
const ir = makeCli({ tools: ["Read", "Write", "Bash"] });
|
|
39
|
+
const out = deadToolElimination(ir) as IrV0;
|
|
40
|
+
expect(out.tools).toEqual(ir.tools);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test("with rules referring only to Read+Bash → drops Write", () => {
|
|
44
|
+
const ir = makeCli({
|
|
45
|
+
tools: ["Read", "Write", "Bash"],
|
|
46
|
+
permissions: {
|
|
47
|
+
rules: [
|
|
48
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
49
|
+
{ type: "alwaysAllow", pattern: "Bash(*)" },
|
|
50
|
+
],
|
|
51
|
+
},
|
|
52
|
+
});
|
|
53
|
+
const out = deadToolElimination(ir) as IrV0;
|
|
54
|
+
expect([...out.tools].sort()).toEqual(["Bash", "Read"]);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("sub-agent that uses Write keeps Write in the parent's tool list", () => {
|
|
58
|
+
const ir = makeCli({
|
|
59
|
+
tools: ["Read", "Write"],
|
|
60
|
+
permissions: { rules: [{ type: "alwaysAllow", pattern: "Read" }] },
|
|
61
|
+
subAgents: [
|
|
62
|
+
{
|
|
63
|
+
name: "writer",
|
|
64
|
+
description: "writes files",
|
|
65
|
+
instructions: "x",
|
|
66
|
+
tools: ["Write"],
|
|
67
|
+
permissions: "inherit",
|
|
68
|
+
inheritBypass: false,
|
|
69
|
+
},
|
|
70
|
+
],
|
|
71
|
+
});
|
|
72
|
+
const out = deadToolElimination(ir) as IrV0;
|
|
73
|
+
expect([...out.tools].sort()).toEqual(["Read", "Write"]);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test("non-cli targets pass through unchanged", () => {
|
|
77
|
+
// Use a fixture-shaped non-cli IR
|
|
78
|
+
const ir: IrNode = {
|
|
79
|
+
version: 0,
|
|
80
|
+
name: "wf",
|
|
81
|
+
target: "workflow",
|
|
82
|
+
steps: [],
|
|
83
|
+
} as unknown as IrNode;
|
|
84
|
+
expect(deadToolElimination(ir)).toBe(ir);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
describe("ir-passes — redundantMcpServerCollapse (T1)", () => {
|
|
89
|
+
test("dedup stdio servers by (command, args)", () => {
|
|
90
|
+
const ir = makeCli({
|
|
91
|
+
mcp_servers: {
|
|
92
|
+
a: { transport: "stdio", command: "npx", args: ["@x"] },
|
|
93
|
+
b: { transport: "stdio", command: "npx", args: ["@x"] },
|
|
94
|
+
c: { transport: "stdio", command: "npx", args: ["@y"] },
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
const out = redundantMcpServerCollapse(ir) as IrV0;
|
|
98
|
+
expect(Object.keys(out.mcp_servers).sort()).toEqual(["a", "c"]);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test("dedup sse servers by url", () => {
|
|
102
|
+
const ir = makeCli({
|
|
103
|
+
mcp_servers: {
|
|
104
|
+
a: { transport: "sse", url: "http://x" },
|
|
105
|
+
b: { transport: "sse", url: "http://x" },
|
|
106
|
+
c: { transport: "sse", url: "http://y" },
|
|
107
|
+
},
|
|
108
|
+
});
|
|
109
|
+
const out = redundantMcpServerCollapse(ir) as IrV0;
|
|
110
|
+
expect(Object.keys(out.mcp_servers).sort()).toEqual(["a", "c"]);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("returns input unchanged when no duplicates", () => {
|
|
114
|
+
const ir = makeCli({
|
|
115
|
+
mcp_servers: {
|
|
116
|
+
a: { transport: "stdio", command: "x", args: [] },
|
|
117
|
+
},
|
|
118
|
+
});
|
|
119
|
+
expect(redundantMcpServerCollapse(ir)).toBe(ir);
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
describe("ir-passes — permissionRuleCanonicalize (T1)", () => {
|
|
124
|
+
test("sorts by tier (deny > ask > allow) then alpha within tier", () => {
|
|
125
|
+
const ir = makeCli({
|
|
126
|
+
permissions: {
|
|
127
|
+
rules: [
|
|
128
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
129
|
+
{ type: "alwaysDeny", pattern: "Bash(rm *)" },
|
|
130
|
+
{ type: "alwaysAsk", pattern: "Edit" },
|
|
131
|
+
{ type: "alwaysAllow", pattern: "Bash" },
|
|
132
|
+
{ type: "alwaysDeny", pattern: "Write" },
|
|
133
|
+
],
|
|
134
|
+
},
|
|
135
|
+
});
|
|
136
|
+
const out = permissionRuleCanonicalize(ir) as IrV0;
|
|
137
|
+
expect(out.permissions.rules.map((r) => `${r.type}:${r.pattern}`)).toEqual([
|
|
138
|
+
"alwaysDeny:Bash(rm *)",
|
|
139
|
+
"alwaysDeny:Write",
|
|
140
|
+
"alwaysAsk:Edit",
|
|
141
|
+
"alwaysAllow:Bash",
|
|
142
|
+
"alwaysAllow:Read",
|
|
143
|
+
]);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
test("dedups exact duplicates", () => {
|
|
147
|
+
const ir = makeCli({
|
|
148
|
+
permissions: {
|
|
149
|
+
rules: [
|
|
150
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
151
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
152
|
+
{ type: "alwaysAllow", pattern: "Bash" },
|
|
153
|
+
],
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
const out = permissionRuleCanonicalize(ir) as IrV0;
|
|
157
|
+
expect(out.permissions.rules.length).toBe(2);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test("returns input unchanged when already canonical", () => {
|
|
161
|
+
const ir = makeCli({
|
|
162
|
+
permissions: {
|
|
163
|
+
rules: [
|
|
164
|
+
{ type: "alwaysDeny", pattern: "Bash(rm *)" },
|
|
165
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
166
|
+
],
|
|
167
|
+
},
|
|
168
|
+
});
|
|
169
|
+
const out = permissionRuleCanonicalize(ir);
|
|
170
|
+
expect(out).toBe(ir);
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
describe("ir-passes — promptCachePrefixSort (T1 stub)", () => {
|
|
175
|
+
test("v0 stub returns input unchanged", () => {
|
|
176
|
+
const ir = makeCli();
|
|
177
|
+
expect(promptCachePrefixSort(ir)).toBe(ir);
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
describe("ir-passes — applyPasses + idempotence (T9)", () => {
|
|
182
|
+
test("applyPasses runs the default pipeline", () => {
|
|
183
|
+
const ir = makeCli({
|
|
184
|
+
tools: ["Read", "Write", "Bash"],
|
|
185
|
+
permissions: {
|
|
186
|
+
rules: [
|
|
187
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
188
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
189
|
+
],
|
|
190
|
+
},
|
|
191
|
+
mcp_servers: {
|
|
192
|
+
a: { transport: "stdio", command: "x", args: [] },
|
|
193
|
+
b: { transport: "stdio", command: "x", args: [] },
|
|
194
|
+
},
|
|
195
|
+
});
|
|
196
|
+
const once = applyPasses(ir) as IrV0;
|
|
197
|
+
expect([...once.tools].sort()).toEqual(["Read"]);
|
|
198
|
+
expect(once.permissions.rules.length).toBe(1);
|
|
199
|
+
expect(Object.keys(once.mcp_servers).length).toBe(1);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
test("idempotence: applyPasses(applyPasses(x)) === applyPasses(x)", () => {
|
|
203
|
+
const ir = makeCli({
|
|
204
|
+
tools: ["Read", "Write", "Bash"],
|
|
205
|
+
permissions: {
|
|
206
|
+
rules: [
|
|
207
|
+
{ type: "alwaysAllow", pattern: "Read" },
|
|
208
|
+
{ type: "alwaysDeny", pattern: "Bash" },
|
|
209
|
+
],
|
|
210
|
+
},
|
|
211
|
+
mcp_servers: {
|
|
212
|
+
a: { transport: "stdio", command: "x", args: ["y"] },
|
|
213
|
+
b: { transport: "stdio", command: "x", args: ["y"] },
|
|
214
|
+
},
|
|
215
|
+
});
|
|
216
|
+
const a = applyPasses(ir);
|
|
217
|
+
const b = applyPasses(a);
|
|
218
|
+
expect(JSON.stringify(b)).toBe(JSON.stringify(a));
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
test("custom pipeline applied in order", () => {
|
|
222
|
+
const calls: string[] = [];
|
|
223
|
+
const passes = [
|
|
224
|
+
(n: IrNode) => {
|
|
225
|
+
calls.push("a");
|
|
226
|
+
return n;
|
|
227
|
+
},
|
|
228
|
+
(n: IrNode) => {
|
|
229
|
+
calls.push("b");
|
|
230
|
+
return n;
|
|
231
|
+
},
|
|
232
|
+
];
|
|
233
|
+
applyPasses(makeCli(), { passes });
|
|
234
|
+
expect(calls).toEqual(["a", "b"]);
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
test("DEFAULT_PIPELINE has 6 passes (+ wellFormednessCheck from Track F §57)", () => {
|
|
238
|
+
expect(DEFAULT_PIPELINE.length).toBe(6);
|
|
239
|
+
});
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
describe("ir-passes — transactionPolicyEnforcement (T1, T8)", () => {
|
|
243
|
+
const baseChain = {
|
|
244
|
+
id: "base-mainnet",
|
|
245
|
+
kind: "evm" as const,
|
|
246
|
+
rpcUrls: [{ kind: "literal" as const, value: "https://rpc.test" }],
|
|
247
|
+
rpcPolicy: "single" as const,
|
|
248
|
+
finality: { kind: "finalized" as const },
|
|
249
|
+
reorgTolerant: true,
|
|
250
|
+
};
|
|
251
|
+
const treasuryWallet = {
|
|
252
|
+
id: "treasury",
|
|
253
|
+
chainId: "base-mainnet",
|
|
254
|
+
custody: "user-controlled" as const,
|
|
255
|
+
signingPolicy: "explicit-user-approval" as const,
|
|
256
|
+
};
|
|
257
|
+
const usdcContract = {
|
|
258
|
+
id: "usdc",
|
|
259
|
+
chainId: "base-mainnet",
|
|
260
|
+
address: "0xusdc",
|
|
261
|
+
abiRef: "abi://erc20",
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
test("empty subsystem is a no-op (existing specs untouched)", () => {
|
|
265
|
+
const ir = makeCli();
|
|
266
|
+
expect(transactionPolicyEnforcement(ir)).toBe(ir);
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
test("valid subsystem passes through unchanged", () => {
|
|
270
|
+
const ir = makeCli({
|
|
271
|
+
chains: [baseChain],
|
|
272
|
+
wallets: [treasuryWallet],
|
|
273
|
+
contracts: [usdcContract],
|
|
274
|
+
transactionPolicy: {
|
|
275
|
+
defaultWriteApproval: "required",
|
|
276
|
+
allowedContracts: ["usdc"],
|
|
277
|
+
simulationRequired: true,
|
|
278
|
+
},
|
|
279
|
+
});
|
|
280
|
+
expect(transactionPolicyEnforcement(ir)).toBe(ir);
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
test("rejects wallets[].chainId not declared in chains[]", () => {
|
|
284
|
+
const ir = makeCli({
|
|
285
|
+
chains: [baseChain],
|
|
286
|
+
wallets: [{ ...treasuryWallet, chainId: "polygon-mainnet" }],
|
|
287
|
+
});
|
|
288
|
+
expect(() => transactionPolicyEnforcement(ir)).toThrow(IrPassError);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
test("rejects contracts[].chainId not declared in chains[]", () => {
|
|
292
|
+
const ir = makeCli({
|
|
293
|
+
chains: [baseChain],
|
|
294
|
+
contracts: [{ ...usdcContract, chainId: "polygon-mainnet" }],
|
|
295
|
+
});
|
|
296
|
+
expect(() => transactionPolicyEnforcement(ir)).toThrow(/not declared in chains\[\]/);
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
test("rejects allowedContracts entry not in contracts[].id", () => {
|
|
300
|
+
const ir = makeCli({
|
|
301
|
+
chains: [baseChain],
|
|
302
|
+
contracts: [usdcContract],
|
|
303
|
+
transactionPolicy: {
|
|
304
|
+
defaultWriteApproval: "required",
|
|
305
|
+
allowedContracts: ["unknown-token"],
|
|
306
|
+
simulationRequired: true,
|
|
307
|
+
},
|
|
308
|
+
});
|
|
309
|
+
expect(() => transactionPolicyEnforcement(ir)).toThrow(/unknown-token/);
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
test("rejects approval=none with non-automated wallet", () => {
|
|
313
|
+
const ir = makeCli({
|
|
314
|
+
chains: [baseChain],
|
|
315
|
+
wallets: [treasuryWallet],
|
|
316
|
+
contracts: [usdcContract],
|
|
317
|
+
transactionPolicy: {
|
|
318
|
+
defaultWriteApproval: "none",
|
|
319
|
+
allowedContracts: ["usdc"],
|
|
320
|
+
simulationRequired: false,
|
|
321
|
+
},
|
|
322
|
+
});
|
|
323
|
+
expect(() => transactionPolicyEnforcement(ir)).toThrow(/automated/);
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
test("accepts approval=none when every wallet is automated", () => {
|
|
327
|
+
const ir = makeCli({
|
|
328
|
+
chains: [baseChain],
|
|
329
|
+
wallets: [
|
|
330
|
+
{
|
|
331
|
+
...treasuryWallet,
|
|
332
|
+
custody: "kms",
|
|
333
|
+
signingPolicy: "automated",
|
|
334
|
+
keyRef: { kind: "env", name: "KMS_KEY" },
|
|
335
|
+
},
|
|
336
|
+
],
|
|
337
|
+
contracts: [usdcContract],
|
|
338
|
+
transactionPolicy: {
|
|
339
|
+
defaultWriteApproval: "none",
|
|
340
|
+
allowedContracts: ["usdc"],
|
|
341
|
+
simulationRequired: false,
|
|
342
|
+
},
|
|
343
|
+
});
|
|
344
|
+
expect(transactionPolicyEnforcement(ir)).toBe(ir);
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
test("rejects kms wallet without keyRef", () => {
|
|
348
|
+
const ir = makeCli({
|
|
349
|
+
chains: [baseChain],
|
|
350
|
+
wallets: [{ ...treasuryWallet, custody: "kms" }],
|
|
351
|
+
});
|
|
352
|
+
expect(() => transactionPolicyEnforcement(ir)).toThrow(/keyRef/);
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test("rejects duplicate chains[].id", () => {
|
|
356
|
+
const ir = makeCli({ chains: [baseChain, baseChain] });
|
|
357
|
+
expect(() => transactionPolicyEnforcement(ir)).toThrow(/duplicate chains/);
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
test("rejects duplicate wallets[].id", () => {
|
|
361
|
+
const ir = makeCli({
|
|
362
|
+
chains: [baseChain],
|
|
363
|
+
wallets: [treasuryWallet, treasuryWallet],
|
|
364
|
+
});
|
|
365
|
+
expect(() => transactionPolicyEnforcement(ir)).toThrow(/duplicate wallets/);
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
test("non-blockchain shapes (managed, voice, browser, eval) pass through unchanged", () => {
|
|
369
|
+
const ir: IrNode = {
|
|
370
|
+
version: 0,
|
|
371
|
+
name: "test-mgd",
|
|
372
|
+
target: "managed",
|
|
373
|
+
agent: { model: "claude-opus-4-7", instructions: "hi" },
|
|
374
|
+
tenants: [{ id: "t1", budget: { maxInputTokens: 1, maxOutputTokens: 1 } }],
|
|
375
|
+
permissions: { rules: [] },
|
|
376
|
+
compaction: {},
|
|
377
|
+
};
|
|
378
|
+
expect(transactionPolicyEnforcement(ir)).toBe(ir);
|
|
379
|
+
});
|
|
380
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Section 28 — `ir-passes`. Idempotent IR optimization passes; each pass
|
|
3
|
+
* is a pure `(IrNode) → IrNode` function. The pipeline is composable and
|
|
4
|
+
* deterministic: same input → same output regardless of pass order
|
|
5
|
+
* (within the published "safe order" — see `applyPasses`).
|
|
6
|
+
*
|
|
7
|
+
* Built-in passes:
|
|
8
|
+
* - `deadToolElimination` — drop entries from `tools` that no sub-agent
|
|
9
|
+
* or permission rule references. Catches the common case where a spec
|
|
10
|
+
* declares `tools: [Read, Write, Bash]` then locks Write down with an
|
|
11
|
+
* `alwaysDeny` rule and never uses it elsewhere.
|
|
12
|
+
* - `redundantMcpServerCollapse` — dedup `mcp_servers` map entries by
|
|
13
|
+
* `(transport, command, args)` signature so two specs that import the
|
|
14
|
+
* same server under different keys collapse into one boot per process.
|
|
15
|
+
* - `permissionRuleCanonicalize` — sort + dedup `permissions.rules` by
|
|
16
|
+
* canonical (type, pattern) tuples; preserves source priority order
|
|
17
|
+
* (alwaysDeny > alwaysAsk > alwaysAllow) but de-dupes identical entries.
|
|
18
|
+
* - `promptCachePrefixSort` — TODO: re-orders system-block segments so
|
|
19
|
+
* the cache prefix is maximised. v0 stub returns IR unchanged so the
|
|
20
|
+
* pipeline contract holds; v1 follow-up wires this once we land
|
|
21
|
+
* multi-block system prompts in IR.
|
|
22
|
+
*
|
|
23
|
+
* Pipeline order in `applyPasses` (the safe default):
|
|
24
|
+
* deadToolElimination → redundantMcpServerCollapse →
|
|
25
|
+
* permissionRuleCanonicalize → promptCachePrefixSort
|
|
26
|
+
*/
|
|
27
|
+
import { CrewhausError } from "@crewhaus/errors";
|
|
28
|
+
import type {
|
|
29
|
+
IrChainBinding,
|
|
30
|
+
IrChannelV0,
|
|
31
|
+
IrContractBinding,
|
|
32
|
+
IrCrewV0,
|
|
33
|
+
IrGraphV0,
|
|
34
|
+
IrManagedV0,
|
|
35
|
+
IrMcpServerConfig,
|
|
36
|
+
IrMcpServers,
|
|
37
|
+
IrNode,
|
|
38
|
+
IrPermissionRule,
|
|
39
|
+
IrPermissions,
|
|
40
|
+
IrTransactionPolicy,
|
|
41
|
+
IrV0,
|
|
42
|
+
IrWalletBinding,
|
|
43
|
+
} from "@crewhaus/ir";
|
|
44
|
+
|
|
45
|
+
export class IrPassError extends CrewhausError {
|
|
46
|
+
override readonly name = "IrPassError";
|
|
47
|
+
constructor(message: string, cause?: unknown) {
|
|
48
|
+
super("compiler", message, cause);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export type IrPass = (ir: IrNode) => IrNode;
|
|
53
|
+
|
|
54
|
+
export type ApplyPassesOptions = {
|
|
55
|
+
/** Override the pass order. Default: safe order. */
|
|
56
|
+
readonly passes?: ReadonlyArray<IrPass>;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
/** Apply every pass in order; returns the final IR. */
|
|
60
|
+
export function applyPasses(ir: IrNode, opts: ApplyPassesOptions = {}): IrNode {
|
|
61
|
+
const pipeline = opts.passes ?? DEFAULT_PIPELINE;
|
|
62
|
+
let current = ir;
|
|
63
|
+
for (const pass of pipeline) {
|
|
64
|
+
current = pass(current);
|
|
65
|
+
}
|
|
66
|
+
return current;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Pass 1 — drop tools that no permission rule and no sub-agent references.
|
|
71
|
+
* v0 only inspects `IrV0` (the cli target) since that's where tools[] +
|
|
72
|
+
* sub_agents + permissions live in the same shape. Other targets either
|
|
73
|
+
* don't carry `tools[]` (workflow uses per-step tools) or aren't v0-IR
|
|
74
|
+
* subjects of dead-tool elimination yet (graph nodes carry their own
|
|
75
|
+
* tool sets — handled by a follow-up pass).
|
|
76
|
+
*/
|
|
77
|
+
export function deadToolElimination(ir: IrNode): IrNode {
|
|
78
|
+
if (ir.target !== "cli") return ir;
|
|
79
|
+
const cli = ir as IrV0;
|
|
80
|
+
const tools = cli.tools ?? [];
|
|
81
|
+
if (tools.length === 0) return ir;
|
|
82
|
+
// Track which tool names have any reachable use site.
|
|
83
|
+
const used = new Set<string>();
|
|
84
|
+
// Permission rules can reference a specific tool name (e.g. "Bash" or
|
|
85
|
+
// "Bash(rm *)"). The matcher's compilePattern parses this as the tool
|
|
86
|
+
// followed by an optional invocation pattern in parens. We look at the
|
|
87
|
+
// characters before the first `(` or `:` to extract the tool name.
|
|
88
|
+
const ruleNames = (cli.permissions?.rules ?? []).map((r) => extractToolFromPattern(r.pattern));
|
|
89
|
+
for (const n of ruleNames) if (n) used.add(n);
|
|
90
|
+
// Sub-agents inherit a subset of the parent's tools — surface them.
|
|
91
|
+
const subAgentRefs = cli.subAgents ?? [];
|
|
92
|
+
for (const sa of subAgentRefs) {
|
|
93
|
+
for (const t of sa.tools) used.add(t);
|
|
94
|
+
}
|
|
95
|
+
// Always-allow defaults: if any rule references a tool by exact name we
|
|
96
|
+
// count it; otherwise the original tool list serves as the
|
|
97
|
+
// "implicitly used" baseline. We use case-insensitive comparison since
|
|
98
|
+
// tool registration uses the lowercase variant.
|
|
99
|
+
const filtered = tools.filter(
|
|
100
|
+
(t) =>
|
|
101
|
+
used.has(t) ||
|
|
102
|
+
used.has(t.toLowerCase()) ||
|
|
103
|
+
[...used].some((u) => u.toLowerCase() === t.toLowerCase()),
|
|
104
|
+
);
|
|
105
|
+
// If no reference exists at all (rules + sub-agents both empty), return
|
|
106
|
+
// input unchanged — eliminating every tool would be wrong.
|
|
107
|
+
if (used.size === 0) return ir;
|
|
108
|
+
if (filtered.length === tools.length) return ir;
|
|
109
|
+
return { ...cli, tools: Object.freeze(filtered) } as IrNode;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function extractToolFromPattern(pattern: string): string | undefined {
|
|
113
|
+
if (!pattern) return undefined;
|
|
114
|
+
// Strip a `(...)` invocation suffix: "Bash(rm *)" → "Bash".
|
|
115
|
+
const parenIdx = pattern.indexOf("(");
|
|
116
|
+
const head = (parenIdx === -1 ? pattern : pattern.slice(0, parenIdx)).trim();
|
|
117
|
+
if (!head) return undefined;
|
|
118
|
+
return head;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Pass 2 — collapse `mcp_servers` entries that share `(transport, command,
|
|
123
|
+
* args)` (stdio) or `(transport, url)` (sse). The first key wins on
|
|
124
|
+
* collision; later duplicates are dropped. Order is preserved for entries
|
|
125
|
+
* that survive.
|
|
126
|
+
*/
|
|
127
|
+
export function redundantMcpServerCollapse(ir: IrNode): IrNode {
|
|
128
|
+
// mcp_servers lives on cli, channel, managed
|
|
129
|
+
const carriesMcp = (n: IrNode): n is IrV0 | IrChannelV0 | IrManagedV0 =>
|
|
130
|
+
n.target === "cli" || n.target === "channel" || n.target === "managed";
|
|
131
|
+
if (!carriesMcp(ir)) return ir;
|
|
132
|
+
const ms = (ir as { mcp_servers?: IrMcpServers }).mcp_servers;
|
|
133
|
+
if (!ms || Object.keys(ms).length < 2) return ir;
|
|
134
|
+
const sigToFirstKey = new Map<string, string>();
|
|
135
|
+
const keptOrdered: Array<[string, IrMcpServerConfig]> = [];
|
|
136
|
+
for (const [k, v] of Object.entries(ms)) {
|
|
137
|
+
const sig = mcpSignature(v);
|
|
138
|
+
if (sigToFirstKey.has(sig)) continue;
|
|
139
|
+
sigToFirstKey.set(sig, k);
|
|
140
|
+
keptOrdered.push([k, v]);
|
|
141
|
+
}
|
|
142
|
+
if (keptOrdered.length === Object.keys(ms).length) return ir;
|
|
143
|
+
const next: Record<string, IrMcpServerConfig> = {};
|
|
144
|
+
for (const [k, v] of keptOrdered) next[k] = v;
|
|
145
|
+
return { ...ir, mcp_servers: Object.freeze(next) } as IrNode;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function mcpSignature(c: IrMcpServerConfig): string {
|
|
149
|
+
if (c.transport === "stdio") {
|
|
150
|
+
return `stdio|${c.command}|${(c.args ?? []).join(" ")}`;
|
|
151
|
+
}
|
|
152
|
+
return `sse|${c.url}`;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Pass 3 — sort + dedup permission rules. Within each precedence tier
|
|
157
|
+
* (alwaysDeny > alwaysAsk > alwaysAllow), rules are sorted alphabetically
|
|
158
|
+
* by pattern and exact duplicates dropped.
|
|
159
|
+
*/
|
|
160
|
+
export function permissionRuleCanonicalize(ir: IrNode): IrNode {
|
|
161
|
+
const carriesPerms = (n: IrNode): n is IrV0 | IrChannelV0 | IrManagedV0 =>
|
|
162
|
+
n.target === "cli" || n.target === "channel" || n.target === "managed";
|
|
163
|
+
if (!carriesPerms(ir)) return ir;
|
|
164
|
+
const perms = (ir as { permissions?: IrPermissions }).permissions;
|
|
165
|
+
if (!perms) return ir;
|
|
166
|
+
const tier = (t: IrPermissionRule["type"]): number =>
|
|
167
|
+
t === "alwaysDeny" ? 0 : t === "alwaysAsk" ? 1 : 2;
|
|
168
|
+
const seen = new Set<string>();
|
|
169
|
+
const sorted = [...perms.rules]
|
|
170
|
+
.map((r) => ({ r, key: `${r.type}:${r.pattern}` }))
|
|
171
|
+
.filter(({ key }) => {
|
|
172
|
+
if (seen.has(key)) return false;
|
|
173
|
+
seen.add(key);
|
|
174
|
+
return true;
|
|
175
|
+
})
|
|
176
|
+
.sort((a, b) => {
|
|
177
|
+
const ta = tier(a.r.type);
|
|
178
|
+
const tb = tier(b.r.type);
|
|
179
|
+
if (ta !== tb) return ta - tb;
|
|
180
|
+
return a.r.pattern.localeCompare(b.r.pattern);
|
|
181
|
+
})
|
|
182
|
+
.map(({ r }) => r);
|
|
183
|
+
if (sorted.length === perms.rules.length) {
|
|
184
|
+
let identical = true;
|
|
185
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
186
|
+
if (sorted[i] !== perms.rules[i]) {
|
|
187
|
+
identical = false;
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
if (identical) return ir;
|
|
192
|
+
}
|
|
193
|
+
const newPerms: IrPermissions = {
|
|
194
|
+
...perms,
|
|
195
|
+
rules: Object.freeze(sorted),
|
|
196
|
+
};
|
|
197
|
+
return { ...ir, permissions: newPerms } as IrNode;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Pass 4 — re-order system-block segments to maximise the prompt-cache
|
|
202
|
+
* prefix. v0 stub: IR carries the system prompt as a single string, so
|
|
203
|
+
* there's no segmentation to reorder yet. Returns input unchanged.
|
|
204
|
+
* The placeholder keeps the pipeline contract stable; once IR carries
|
|
205
|
+
* multi-block system prompts (Section 31's Studio v1 paths), the real
|
|
206
|
+
* impl lands here.
|
|
207
|
+
*/
|
|
208
|
+
export function promptCachePrefixSort(ir: IrNode): IrNode {
|
|
209
|
+
return ir;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Pass 5 — §47 transaction-policy enforcement. Validates the
|
|
214
|
+
* cross-cutting blockchain blocks at compile time so the runtime
|
|
215
|
+
* can rely on referential integrity:
|
|
216
|
+
*
|
|
217
|
+
* - every `wallets[].chainId` references a declared `chains[].id`
|
|
218
|
+
* - every `contracts[].chainId` references a declared `chains[].id`
|
|
219
|
+
* - every `transaction_policy.allowed_contracts` entry references
|
|
220
|
+
* a declared `contracts[].id`
|
|
221
|
+
* - `transaction_policy.defaultWriteApproval = "none"` is only
|
|
222
|
+
* permitted when every wallet is `automated` custody
|
|
223
|
+
* - declared wallets without a key reference are user-controlled
|
|
224
|
+
* (kms / hsm / local custody require `keyRef`)
|
|
225
|
+
*
|
|
226
|
+
* Mismatches throw `IrPassError` so compilation halts before any
|
|
227
|
+
* bundle is emitted. The wallet-engine also re-checks these at
|
|
228
|
+
* runtime (defense in depth); this pass catches mistakes at the
|
|
229
|
+
* earliest possible point.
|
|
230
|
+
*/
|
|
231
|
+
type CarriesChainSubsystem = {
|
|
232
|
+
readonly chains?: ReadonlyArray<IrChainBinding>;
|
|
233
|
+
readonly wallets?: ReadonlyArray<IrWalletBinding>;
|
|
234
|
+
readonly contracts?: ReadonlyArray<IrContractBinding>;
|
|
235
|
+
readonly transactionPolicy?: IrTransactionPolicy;
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
function carriesChainSubsystem(ir: IrNode): ir is IrNode & CarriesChainSubsystem {
|
|
239
|
+
const t = ir.target;
|
|
240
|
+
return (
|
|
241
|
+
t === "cli" ||
|
|
242
|
+
t === "workflow" ||
|
|
243
|
+
t === "channel" ||
|
|
244
|
+
t === "graph" ||
|
|
245
|
+
t === "crew" ||
|
|
246
|
+
t === "research" ||
|
|
247
|
+
t === "batch"
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
export function transactionPolicyEnforcement(ir: IrNode): IrNode {
|
|
252
|
+
if (!carriesChainSubsystem(ir)) return ir;
|
|
253
|
+
const chains = ir.chains;
|
|
254
|
+
const wallets = ir.wallets;
|
|
255
|
+
const contracts = ir.contracts;
|
|
256
|
+
const policy = ir.transactionPolicy;
|
|
257
|
+
|
|
258
|
+
// Empty subsystem is a no-op (existing specs untouched).
|
|
259
|
+
if (
|
|
260
|
+
(chains === undefined || chains.length === 0) &&
|
|
261
|
+
(wallets === undefined || wallets.length === 0) &&
|
|
262
|
+
(contracts === undefined || contracts.length === 0) &&
|
|
263
|
+
policy === undefined
|
|
264
|
+
) {
|
|
265
|
+
return ir;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const chainIds = new Set<string>((chains ?? []).map((c) => c.id));
|
|
269
|
+
const contractIds = new Set<string>((contracts ?? []).map((c) => c.id));
|
|
270
|
+
|
|
271
|
+
// chains[] uniqueness
|
|
272
|
+
if (chains !== undefined) {
|
|
273
|
+
const seen = new Set<string>();
|
|
274
|
+
for (const c of chains) {
|
|
275
|
+
if (seen.has(c.id)) {
|
|
276
|
+
throw new IrPassError(`duplicate chains[].id "${c.id}"`);
|
|
277
|
+
}
|
|
278
|
+
seen.add(c.id);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// wallets[*].chainId ⊆ chains[].id; keyRef required for non-user-controlled
|
|
283
|
+
if (wallets !== undefined) {
|
|
284
|
+
const seen = new Set<string>();
|
|
285
|
+
for (const w of wallets) {
|
|
286
|
+
if (seen.has(w.id)) {
|
|
287
|
+
throw new IrPassError(`duplicate wallets[].id "${w.id}"`);
|
|
288
|
+
}
|
|
289
|
+
seen.add(w.id);
|
|
290
|
+
if (!chainIds.has(w.chainId)) {
|
|
291
|
+
throw new IrPassError(
|
|
292
|
+
`wallets[].id "${w.id}" references chainId "${w.chainId}" which is not declared in chains[]`,
|
|
293
|
+
);
|
|
294
|
+
}
|
|
295
|
+
if (w.custody !== "user-controlled" && w.keyRef === undefined) {
|
|
296
|
+
throw new IrPassError(
|
|
297
|
+
`wallets[].id "${w.id}" has custody="${w.custody}" but no keyRef; kms/hsm/local custody requires keyRef`,
|
|
298
|
+
);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// contracts[*].chainId ⊆ chains[].id
|
|
304
|
+
if (contracts !== undefined) {
|
|
305
|
+
const seen = new Set<string>();
|
|
306
|
+
for (const c of contracts) {
|
|
307
|
+
if (seen.has(c.id)) {
|
|
308
|
+
throw new IrPassError(`duplicate contracts[].id "${c.id}"`);
|
|
309
|
+
}
|
|
310
|
+
seen.add(c.id);
|
|
311
|
+
if (!chainIds.has(c.chainId)) {
|
|
312
|
+
throw new IrPassError(
|
|
313
|
+
`contracts[].id "${c.id}" references chainId "${c.chainId}" which is not declared in chains[]`,
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// transaction_policy.allowed_contracts ⊆ contracts[].id
|
|
320
|
+
if (policy !== undefined) {
|
|
321
|
+
for (const cid of policy.allowedContracts) {
|
|
322
|
+
if (!contractIds.has(cid)) {
|
|
323
|
+
throw new IrPassError(
|
|
324
|
+
`transaction_policy.allowedContracts entry "${cid}" is not a declared contracts[].id`,
|
|
325
|
+
);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
if (policy.defaultWriteApproval === "none") {
|
|
329
|
+
const allAutomated = (wallets ?? []).every((w) => w.signingPolicy === "automated");
|
|
330
|
+
if (!allAutomated) {
|
|
331
|
+
throw new IrPassError(
|
|
332
|
+
'transaction_policy.defaultWriteApproval="none" requires every wallet to have signingPolicy="automated"',
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// No structural rewrite — the pass validates and passes through.
|
|
339
|
+
return ir;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Track F (Section 57) — well-formedness check for typed multi-agent
|
|
344
|
+
* graphs. Source: AgentFlow (arxiv 2604.20801). Before any candidate
|
|
345
|
+
* harness is sent for expensive LLM evaluation, this pass:
|
|
346
|
+
*
|
|
347
|
+
* 1. Verifies every graph edge connects declared nodes.
|
|
348
|
+
* 2. Verifies the graph is connected (every node reachable from entry).
|
|
349
|
+
* 3. Verifies every edge's `schema` either is `untyped` or resolves
|
|
350
|
+
* to a declared `messageSchemas` entry.
|
|
351
|
+
* 4. For crews: verifies routing.match targets all reference declared
|
|
352
|
+
* roles (a subset of what `parseSpec` does; we re-check here so
|
|
353
|
+
* `applyPasses(ir)` is safe to call standalone).
|
|
354
|
+
*
|
|
355
|
+
* Failing this check is fast and cheap, which means the search budget
|
|
356
|
+
* for upstream optimizers (Tracks D, E) goes to well-formed harnesses
|
|
357
|
+
* only. Cited paper: AgentFlow (arxiv 2604.20801).
|
|
358
|
+
*/
|
|
359
|
+
export function wellFormednessCheck(ir: IrNode): IrNode {
|
|
360
|
+
if (ir.target === "graph") {
|
|
361
|
+
const g = ir as IrGraphV0;
|
|
362
|
+
const nodeNames = new Set(g.nodes.map((n) => n.name));
|
|
363
|
+
if (!nodeNames.has(g.entry)) {
|
|
364
|
+
throw new IrPassError(
|
|
365
|
+
`graph entry "${g.entry}" is not a declared node (nodes: ${[...nodeNames].join(", ")})`,
|
|
366
|
+
);
|
|
367
|
+
}
|
|
368
|
+
const schemaNames = new Set((g.messageSchemas ?? []).map((s) => s.name));
|
|
369
|
+
for (const e of g.edges) {
|
|
370
|
+
if (!nodeNames.has(e.from)) {
|
|
371
|
+
throw new IrPassError(`graph edge from "${e.from}" references an undeclared node`);
|
|
372
|
+
}
|
|
373
|
+
if (!nodeNames.has(e.to)) {
|
|
374
|
+
throw new IrPassError(`graph edge to "${e.to}" references an undeclared node`);
|
|
375
|
+
}
|
|
376
|
+
if (e.schema !== undefined && e.schema.kind === "named") {
|
|
377
|
+
if (!schemaNames.has(e.schema.name)) {
|
|
378
|
+
throw new IrPassError(
|
|
379
|
+
`graph edge ${e.from}→${e.to} references undeclared message schema "${e.schema.name}"`,
|
|
380
|
+
);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
// Reachability from entry.
|
|
385
|
+
const reachable = new Set<string>([g.entry]);
|
|
386
|
+
let added = true;
|
|
387
|
+
while (added) {
|
|
388
|
+
added = false;
|
|
389
|
+
for (const e of g.edges) {
|
|
390
|
+
if (reachable.has(e.from) && !reachable.has(e.to)) {
|
|
391
|
+
reachable.add(e.to);
|
|
392
|
+
added = true;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
for (const n of g.nodes) {
|
|
397
|
+
if (!reachable.has(n.name)) {
|
|
398
|
+
throw new IrPassError(`graph node "${n.name}" is unreachable from entry "${g.entry}"`);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
} else if (ir.target === "crew") {
|
|
402
|
+
const c = ir as IrCrewV0;
|
|
403
|
+
const roleNames = new Set(c.roles.map((r) => r.name));
|
|
404
|
+
if (!roleNames.has(c.entry)) {
|
|
405
|
+
throw new IrPassError(
|
|
406
|
+
`crew entry "${c.entry}" is not a declared role (roles: ${[...roleNames].join(", ")})`,
|
|
407
|
+
);
|
|
408
|
+
}
|
|
409
|
+
const schemaNames = new Set((c.messageSchemas ?? []).map((s) => s.name));
|
|
410
|
+
if (c.routing?.kind === "match" && c.routing.match !== undefined) {
|
|
411
|
+
for (const [from, rules] of Object.entries(c.routing.match)) {
|
|
412
|
+
if (!roleNames.has(from)) {
|
|
413
|
+
throw new IrPassError(`crew routing.match["${from}"]: source role not declared`);
|
|
414
|
+
}
|
|
415
|
+
for (const rule of rules) {
|
|
416
|
+
if (!roleNames.has(rule.to)) {
|
|
417
|
+
throw new IrPassError(
|
|
418
|
+
`crew routing.match["${from}"].to "${rule.to}": target role not declared`,
|
|
419
|
+
);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
// Schema declarations exist (they're optional but if present they must
|
|
425
|
+
// be uniquely named). This catches the easy authoring bug of declaring
|
|
426
|
+
// two schemas with the same name.
|
|
427
|
+
if (schemaNames.size !== (c.messageSchemas ?? []).length) {
|
|
428
|
+
throw new IrPassError("crew messageSchemas contains duplicate names");
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return ir;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
export const DEFAULT_PIPELINE: ReadonlyArray<IrPass> = Object.freeze([
|
|
435
|
+
deadToolElimination,
|
|
436
|
+
redundantMcpServerCollapse,
|
|
437
|
+
permissionRuleCanonicalize,
|
|
438
|
+
transactionPolicyEnforcement,
|
|
439
|
+
wellFormednessCheck,
|
|
440
|
+
promptCachePrefixSort,
|
|
441
|
+
]);
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Track F (§57) — wellformedness check tests. Source: AgentFlow
|
|
3
|
+
* (arxiv 2604.20801).
|
|
4
|
+
*/
|
|
5
|
+
import { describe, expect, test } from "bun:test";
|
|
6
|
+
import type { IrCrewV0, IrGraphV0 } from "@crewhaus/ir";
|
|
7
|
+
import { IrPassError, wellFormednessCheck } from "./index";
|
|
8
|
+
|
|
9
|
+
const baseGraph: IrGraphV0 = {
|
|
10
|
+
version: 0,
|
|
11
|
+
name: "test-graph",
|
|
12
|
+
target: "graph",
|
|
13
|
+
entry: "a",
|
|
14
|
+
nodes: [
|
|
15
|
+
{ name: "a", instructions: "node a", model: "m", tools: [], toolConfigs: {} },
|
|
16
|
+
{ name: "b", instructions: "node b", model: "m", tools: [], toolConfigs: {} },
|
|
17
|
+
],
|
|
18
|
+
edges: [{ from: "a", to: "b" }],
|
|
19
|
+
permissions: { rules: [] },
|
|
20
|
+
compaction: {},
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
describe("Track F — wellFormednessCheck (graph)", () => {
|
|
24
|
+
test("accepts a well-formed graph", () => {
|
|
25
|
+
expect(() => wellFormednessCheck(baseGraph)).not.toThrow();
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("rejects edge referencing undeclared node", () => {
|
|
29
|
+
const bad: IrGraphV0 = {
|
|
30
|
+
...baseGraph,
|
|
31
|
+
edges: [{ from: "a", to: "ghost" }],
|
|
32
|
+
};
|
|
33
|
+
expect(() => wellFormednessCheck(bad)).toThrow(IrPassError);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("rejects unreachable node", () => {
|
|
37
|
+
const bad: IrGraphV0 = {
|
|
38
|
+
...baseGraph,
|
|
39
|
+
nodes: [
|
|
40
|
+
...baseGraph.nodes,
|
|
41
|
+
{ name: "orphan", instructions: "x", model: "m", tools: [], toolConfigs: {} },
|
|
42
|
+
],
|
|
43
|
+
};
|
|
44
|
+
expect(() => wellFormednessCheck(bad)).toThrow(IrPassError);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("rejects entry that's not a declared node", () => {
|
|
48
|
+
const bad: IrGraphV0 = { ...baseGraph, entry: "missing" };
|
|
49
|
+
expect(() => wellFormednessCheck(bad)).toThrow(IrPassError);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test("rejects edge schema that references undeclared schema", () => {
|
|
53
|
+
const bad: IrGraphV0 = {
|
|
54
|
+
...baseGraph,
|
|
55
|
+
edges: [{ from: "a", to: "b", schema: { kind: "named", name: "ghost-schema" } }],
|
|
56
|
+
};
|
|
57
|
+
expect(() => wellFormednessCheck(bad)).toThrow(IrPassError);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test("accepts edge with untyped schema", () => {
|
|
61
|
+
const g: IrGraphV0 = {
|
|
62
|
+
...baseGraph,
|
|
63
|
+
edges: [{ from: "a", to: "b", schema: { kind: "untyped" } }],
|
|
64
|
+
};
|
|
65
|
+
expect(() => wellFormednessCheck(g)).not.toThrow();
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("accepts edge with declared named schema", () => {
|
|
69
|
+
const g: IrGraphV0 = {
|
|
70
|
+
...baseGraph,
|
|
71
|
+
messageSchemas: [{ name: "decision", schema: { type: "object" } }],
|
|
72
|
+
edges: [{ from: "a", to: "b", schema: { kind: "named", name: "decision" } }],
|
|
73
|
+
};
|
|
74
|
+
expect(() => wellFormednessCheck(g)).not.toThrow();
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
const baseCrew: IrCrewV0 = {
|
|
79
|
+
version: 0,
|
|
80
|
+
name: "test-crew",
|
|
81
|
+
target: "crew",
|
|
82
|
+
entry: "alpha",
|
|
83
|
+
roles: [
|
|
84
|
+
{ name: "alpha", model: "m", instructions: "a", tools: [], toolConfigs: {}, subAgents: [] },
|
|
85
|
+
{ name: "beta", model: "m", instructions: "b", tools: [], toolConfigs: {}, subAgents: [] },
|
|
86
|
+
],
|
|
87
|
+
mcp_servers: {},
|
|
88
|
+
permissions: { rules: [] },
|
|
89
|
+
compaction: {},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
describe("Track F — wellFormednessCheck (crew)", () => {
|
|
93
|
+
test("accepts a well-formed crew", () => {
|
|
94
|
+
expect(() => wellFormednessCheck(baseCrew)).not.toThrow();
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
test("rejects routing.match to undeclared role", () => {
|
|
98
|
+
const bad: IrCrewV0 = {
|
|
99
|
+
...baseCrew,
|
|
100
|
+
routing: {
|
|
101
|
+
kind: "match",
|
|
102
|
+
match: { alpha: [{ contains: "x", to: "ghost" }] },
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
expect(() => wellFormednessCheck(bad)).toThrow(IrPassError);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test("rejects duplicate messageSchemas", () => {
|
|
109
|
+
const bad: IrCrewV0 = {
|
|
110
|
+
...baseCrew,
|
|
111
|
+
messageSchemas: [
|
|
112
|
+
{ name: "dup", schema: { type: "object" } },
|
|
113
|
+
{ name: "dup", schema: { type: "object" } },
|
|
114
|
+
],
|
|
115
|
+
};
|
|
116
|
+
expect(() => wellFormednessCheck(bad)).toThrow(IrPassError);
|
|
117
|
+
});
|
|
118
|
+
});
|