@aigne/ash 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DESIGN.md +41 -0
- package/dist/ai-dev-loop/ash-run-result.cjs +12 -0
- package/dist/ai-dev-loop/ash-run-result.d.cts +28 -0
- package/dist/ai-dev-loop/ash-run-result.d.cts.map +1 -0
- package/dist/ai-dev-loop/ash-run-result.d.mts +28 -0
- package/dist/ai-dev-loop/ash-run-result.d.mts.map +1 -0
- package/dist/ai-dev-loop/ash-run-result.mjs +11 -0
- package/dist/ai-dev-loop/ash-run-result.mjs.map +1 -0
- package/dist/ai-dev-loop/ash-typed-error.cjs +51 -0
- package/dist/ai-dev-loop/ash-typed-error.d.cts +54 -0
- package/dist/ai-dev-loop/ash-typed-error.d.cts.map +1 -0
- package/dist/ai-dev-loop/ash-typed-error.d.mts +54 -0
- package/dist/ai-dev-loop/ash-typed-error.d.mts.map +1 -0
- package/dist/ai-dev-loop/ash-typed-error.mjs +50 -0
- package/dist/ai-dev-loop/ash-typed-error.mjs.map +1 -0
- package/dist/ai-dev-loop/ash-validate.cjs +27 -0
- package/dist/ai-dev-loop/ash-validate.d.cts +7 -0
- package/dist/ai-dev-loop/ash-validate.d.cts.map +1 -0
- package/dist/ai-dev-loop/ash-validate.d.mts +7 -0
- package/dist/ai-dev-loop/ash-validate.d.mts.map +1 -0
- package/dist/ai-dev-loop/ash-validate.mjs +28 -0
- package/dist/ai-dev-loop/ash-validate.mjs.map +1 -0
- package/dist/ai-dev-loop/dev-loop.cjs +134 -0
- package/dist/ai-dev-loop/dev-loop.d.cts +28 -0
- package/dist/ai-dev-loop/dev-loop.d.cts.map +1 -0
- package/dist/ai-dev-loop/dev-loop.d.mts +28 -0
- package/dist/ai-dev-loop/dev-loop.d.mts.map +1 -0
- package/dist/ai-dev-loop/dev-loop.mjs +135 -0
- package/dist/ai-dev-loop/dev-loop.mjs.map +1 -0
- package/dist/ai-dev-loop/index.cjs +24 -0
- package/dist/ai-dev-loop/index.d.cts +9 -0
- package/dist/ai-dev-loop/index.d.mts +9 -0
- package/dist/ai-dev-loop/index.mjs +10 -0
- package/dist/ai-dev-loop/live-mode.cjs +17 -0
- package/dist/ai-dev-loop/live-mode.d.cts +24 -0
- package/dist/ai-dev-loop/live-mode.d.cts.map +1 -0
- package/dist/ai-dev-loop/live-mode.d.mts +24 -0
- package/dist/ai-dev-loop/live-mode.d.mts.map +1 -0
- package/dist/ai-dev-loop/live-mode.mjs +17 -0
- package/dist/ai-dev-loop/live-mode.mjs.map +1 -0
- package/dist/ai-dev-loop/meta-tools.cjs +123 -0
- package/dist/ai-dev-loop/meta-tools.d.cts +24 -0
- package/dist/ai-dev-loop/meta-tools.d.cts.map +1 -0
- package/dist/ai-dev-loop/meta-tools.d.mts +24 -0
- package/dist/ai-dev-loop/meta-tools.d.mts.map +1 -0
- package/dist/ai-dev-loop/meta-tools.mjs +120 -0
- package/dist/ai-dev-loop/meta-tools.mjs.map +1 -0
- package/dist/ai-dev-loop/structured-runner.cjs +154 -0
- package/dist/ai-dev-loop/structured-runner.d.cts +12 -0
- package/dist/ai-dev-loop/structured-runner.d.cts.map +1 -0
- package/dist/ai-dev-loop/structured-runner.d.mts +12 -0
- package/dist/ai-dev-loop/structured-runner.d.mts.map +1 -0
- package/dist/ai-dev-loop/structured-runner.mjs +155 -0
- package/dist/ai-dev-loop/structured-runner.mjs.map +1 -0
- package/dist/ai-dev-loop/system-prompt.cjs +55 -0
- package/dist/ai-dev-loop/system-prompt.d.cts +20 -0
- package/dist/ai-dev-loop/system-prompt.d.cts.map +1 -0
- package/dist/ai-dev-loop/system-prompt.d.mts +20 -0
- package/dist/ai-dev-loop/system-prompt.d.mts.map +1 -0
- package/dist/ai-dev-loop/system-prompt.mjs +54 -0
- package/dist/ai-dev-loop/system-prompt.mjs.map +1 -0
- package/dist/ast.d.cts +140 -0
- package/dist/ast.d.cts.map +1 -0
- package/dist/ast.d.mts +140 -0
- package/dist/ast.d.mts.map +1 -0
- package/dist/compiler.cjs +802 -0
- package/dist/compiler.d.cts +103 -0
- package/dist/compiler.d.cts.map +1 -0
- package/dist/compiler.d.mts +103 -0
- package/dist/compiler.d.mts.map +1 -0
- package/dist/compiler.mjs +802 -0
- package/dist/compiler.mjs.map +1 -0
- package/dist/index.cjs +14 -0
- package/dist/index.d.cts +7 -0
- package/dist/index.d.mts +7 -0
- package/dist/index.mjs +7 -0
- package/dist/lexer.cjs +451 -0
- package/dist/lexer.d.cts +14 -0
- package/dist/lexer.d.cts.map +1 -0
- package/dist/lexer.d.mts +14 -0
- package/dist/lexer.d.mts.map +1 -0
- package/dist/lexer.mjs +451 -0
- package/dist/lexer.mjs.map +1 -0
- package/dist/parser.cjs +734 -0
- package/dist/parser.d.cts +40 -0
- package/dist/parser.d.cts.map +1 -0
- package/dist/parser.d.mts +40 -0
- package/dist/parser.d.mts.map +1 -0
- package/dist/parser.mjs +734 -0
- package/dist/parser.mjs.map +1 -0
- package/dist/reference.cjs +130 -0
- package/dist/reference.d.cts +11 -0
- package/dist/reference.d.cts.map +1 -0
- package/dist/reference.d.mts +11 -0
- package/dist/reference.d.mts.map +1 -0
- package/dist/reference.mjs +130 -0
- package/dist/reference.mjs.map +1 -0
- package/dist/template.cjs +85 -0
- package/dist/template.mjs +84 -0
- package/dist/template.mjs.map +1 -0
- package/dist/type-checker.cjs +582 -0
- package/dist/type-checker.d.cts +31 -0
- package/dist/type-checker.d.cts.map +1 -0
- package/dist/type-checker.d.mts +31 -0
- package/dist/type-checker.d.mts.map +1 -0
- package/dist/type-checker.mjs +573 -0
- package/dist/type-checker.mjs.map +1 -0
- package/package.json +29 -0
- package/src/ai-dev-loop/ash-run-result.test.ts +113 -0
- package/src/ai-dev-loop/ash-run-result.ts +46 -0
- package/src/ai-dev-loop/ash-typed-error.test.ts +136 -0
- package/src/ai-dev-loop/ash-typed-error.ts +50 -0
- package/src/ai-dev-loop/ash-validate.test.ts +54 -0
- package/src/ai-dev-loop/ash-validate.ts +34 -0
- package/src/ai-dev-loop/dev-loop.test.ts +364 -0
- package/src/ai-dev-loop/dev-loop.ts +156 -0
- package/src/ai-dev-loop/dry-run.test.ts +107 -0
- package/src/ai-dev-loop/e2e-multi-fix.test.ts +473 -0
- package/src/ai-dev-loop/e2e.test.ts +324 -0
- package/src/ai-dev-loop/index.ts +15 -0
- package/src/ai-dev-loop/invariants.test.ts +253 -0
- package/src/ai-dev-loop/live-mode.test.ts +63 -0
- package/src/ai-dev-loop/live-mode.ts +33 -0
- package/src/ai-dev-loop/meta-tools.test.ts +120 -0
- package/src/ai-dev-loop/meta-tools.ts +142 -0
- package/src/ai-dev-loop/structured-runner.test.ts +159 -0
- package/src/ai-dev-loop/structured-runner.ts +209 -0
- package/src/ai-dev-loop/system-prompt.test.ts +102 -0
- package/src/ai-dev-loop/system-prompt.ts +81 -0
- package/src/ast.ts +186 -0
- package/src/compiler.test.ts +2933 -0
- package/src/compiler.ts +1103 -0
- package/src/e2e.test.ts +552 -0
- package/src/index.ts +16 -0
- package/src/lexer.test.ts +538 -0
- package/src/lexer.ts +222 -0
- package/src/parser.test.ts +1024 -0
- package/src/parser.ts +835 -0
- package/src/reference.test.ts +166 -0
- package/src/reference.ts +125 -0
- package/src/template.test.ts +210 -0
- package/src/template.ts +139 -0
- package/src/type-checker.test.ts +1494 -0
- package/src/type-checker.ts +785 -0
- package/tsconfig.json +9 -0
- package/tsdown.config.ts +12 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-End Tests for ASH AI Dev Loop.
|
|
3
|
+
*
|
|
4
|
+
* These tests use REAL ASH compilation, validation, and execution.
|
|
5
|
+
* Only ctx.think() is faked (no real LLM calls).
|
|
6
|
+
*
|
|
7
|
+
* Verifies the full chain: think → validate → compile → execute → observe.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect, vi } from "vitest";
|
|
11
|
+
import { runDevLoop } from "./dev-loop.js";
|
|
12
|
+
import { runStructured } from "./structured-runner.js";
|
|
13
|
+
import { ashValidate } from "./ash-validate.js";
|
|
14
|
+
import type { WorldInterface, JobLogger, JobContext } from "../compiler.js";
|
|
15
|
+
import { isAshTypedError } from "./ash-typed-error.js";
|
|
16
|
+
|
|
17
|
+
// ── Real World + Context ─────────────────────────────────
|
|
18
|
+
|
|
19
|
+
function makeWorld(data: Record<string, unknown[]> = {}): WorldInterface & {
|
|
20
|
+
written: Record<string, unknown[]>;
|
|
21
|
+
published: Record<string, unknown[]>;
|
|
22
|
+
} {
|
|
23
|
+
const written: Record<string, unknown[]> = {};
|
|
24
|
+
const published: Record<string, unknown[]> = {};
|
|
25
|
+
return {
|
|
26
|
+
read(path: string) { return data[path] ?? []; },
|
|
27
|
+
write(path: string, records: unknown[]) { written[path] = records; },
|
|
28
|
+
publish(topic: string, records: unknown[]) { published[topic] = records; },
|
|
29
|
+
written,
|
|
30
|
+
published,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function makeCtx(data: Record<string, unknown[]> = {}) {
|
|
35
|
+
const world = makeWorld(data);
|
|
36
|
+
return {
|
|
37
|
+
ctx: {
|
|
38
|
+
world,
|
|
39
|
+
caps: new Set(["*"]),
|
|
40
|
+
logger: { log() {} } as JobLogger,
|
|
41
|
+
} as JobContext,
|
|
42
|
+
world,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ── Fake Think Helper ────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
function thinkReturning(scripts: string[]) {
|
|
49
|
+
let i = 0;
|
|
50
|
+
return vi.fn(async () => ({
|
|
51
|
+
kind: "completed" as const,
|
|
52
|
+
response: {
|
|
53
|
+
content: "",
|
|
54
|
+
tool_calls: [{
|
|
55
|
+
id: `tc-${i}`,
|
|
56
|
+
name: "ash_run",
|
|
57
|
+
arguments: JSON.stringify({ script: scripts[i++] ?? "" }),
|
|
58
|
+
}],
|
|
59
|
+
},
|
|
60
|
+
toolResults: [],
|
|
61
|
+
}));
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ── Real runner + validate wrappers ──────────────────────
|
|
65
|
+
|
|
66
|
+
function realRunner(ctx: JobContext, mode: "dry-run" | "live" = "dry-run") {
|
|
67
|
+
return async (source: string) => runStructured(source, ctx, { mode });
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function realValidate() {
|
|
71
|
+
return async (source: string) => ashValidate(source);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── E2E Tests ────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
describe("E2E: Full AI Dev Loop with Real ASH", () => {
|
|
77
|
+
it("valid script → real compile + execute → success on first try", async () => {
|
|
78
|
+
const { ctx } = makeCtx({ "/users": [{ id: 1, name: "Alice" }] });
|
|
79
|
+
const think = thinkReturning(['job "e2e" { find /users }']);
|
|
80
|
+
const observe = vi.fn(async () => {});
|
|
81
|
+
|
|
82
|
+
const result = await runDevLoop({
|
|
83
|
+
intent: "find all users",
|
|
84
|
+
max_iterations: 3,
|
|
85
|
+
think,
|
|
86
|
+
runner: realRunner(ctx),
|
|
87
|
+
validate: realValidate(),
|
|
88
|
+
observe,
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
expect(result.status).toBe("ok");
|
|
92
|
+
expect(result.iterations).toBe(1);
|
|
93
|
+
expect(result.finalResult).toBeDefined();
|
|
94
|
+
expect(result.finalResult!.status).toBe("ok");
|
|
95
|
+
expect(observe).toHaveBeenCalledTimes(1);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("invalid syntax → validate catches it → correction → second script succeeds", async () => {
|
|
99
|
+
const { ctx } = makeCtx({ "/users": [{ id: 1 }] });
|
|
100
|
+
const think = thinkReturning([
|
|
101
|
+
'job "broken" { foobar }', // invalid command → validate catches
|
|
102
|
+
'job "fixed" { find /users }', // valid
|
|
103
|
+
]);
|
|
104
|
+
const observe = vi.fn(async () => {});
|
|
105
|
+
|
|
106
|
+
const result = await runDevLoop({
|
|
107
|
+
intent: "find users",
|
|
108
|
+
max_iterations: 3,
|
|
109
|
+
think,
|
|
110
|
+
runner: realRunner(ctx),
|
|
111
|
+
validate: realValidate(),
|
|
112
|
+
observe,
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
expect(result.status).toBe("ok");
|
|
116
|
+
expect(result.iterations).toBe(2);
|
|
117
|
+
// First iteration: validate error, no runner call
|
|
118
|
+
// Second iteration: validate ok, runner ok
|
|
119
|
+
expect(observe).toHaveBeenCalledTimes(2);
|
|
120
|
+
// First observation should have validation_errors
|
|
121
|
+
const firstObs = (observe.mock.calls as any)[0][0];
|
|
122
|
+
expect(firstObs!.validation_errors).toBeDefined();
|
|
123
|
+
expect(firstObs!.validation_errors.length).toBeGreaterThan(0);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it("dry-run mode: world is NOT written to", async () => {
|
|
127
|
+
const { ctx, world } = makeCtx({ "/users": [{ id: 1, name: "Alice" }] });
|
|
128
|
+
const think = thinkReturning([
|
|
129
|
+
'job "write-test" { find /users | save /output }',
|
|
130
|
+
]);
|
|
131
|
+
|
|
132
|
+
const result = await runDevLoop({
|
|
133
|
+
intent: "copy users to output",
|
|
134
|
+
max_iterations: 1,
|
|
135
|
+
think,
|
|
136
|
+
runner: realRunner(ctx, "dry-run"),
|
|
137
|
+
validate: realValidate(),
|
|
138
|
+
observe: vi.fn(async () => {}),
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
expect(result.status).toBe("ok");
|
|
142
|
+
expect(result.mode).toBe("dry-run");
|
|
143
|
+
// World should NOT have been written to
|
|
144
|
+
expect(world.written["/output"]).toBeUndefined();
|
|
145
|
+
expect(Object.keys(world.written)).toHaveLength(0);
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it("live mode: world IS written to", async () => {
|
|
149
|
+
const { ctx, world } = makeCtx({ "/users": [{ id: 1, name: "Alice" }] });
|
|
150
|
+
const think = thinkReturning([
|
|
151
|
+
'job "write-test" { find /users | save /output }',
|
|
152
|
+
]);
|
|
153
|
+
|
|
154
|
+
const result = await runDevLoop({
|
|
155
|
+
intent: "copy users to output",
|
|
156
|
+
max_iterations: 1,
|
|
157
|
+
mode: "live",
|
|
158
|
+
think,
|
|
159
|
+
runner: realRunner(ctx, "live"),
|
|
160
|
+
validate: realValidate(),
|
|
161
|
+
observe: vi.fn(async () => {}),
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
expect(result.status).toBe("ok");
|
|
165
|
+
expect(result.mode).toBe("live");
|
|
166
|
+
// World SHOULD have been written to
|
|
167
|
+
expect(world.written["/output"]).toBeDefined();
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it("all iterations fail with bad syntax → returns error with all observations", async () => {
|
|
171
|
+
const { ctx } = makeCtx();
|
|
172
|
+
const think = thinkReturning([
|
|
173
|
+
'job "bad1" { foobar }',
|
|
174
|
+
'job "bad2" { bazqux }',
|
|
175
|
+
'job "bad3" { nope }',
|
|
176
|
+
]);
|
|
177
|
+
const observe = vi.fn(async () => {});
|
|
178
|
+
|
|
179
|
+
const result = await runDevLoop({
|
|
180
|
+
intent: "do something",
|
|
181
|
+
max_iterations: 3,
|
|
182
|
+
think,
|
|
183
|
+
runner: realRunner(ctx),
|
|
184
|
+
validate: realValidate(),
|
|
185
|
+
observe,
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
expect(result.status).toBe("error");
|
|
189
|
+
expect(result.iterations).toBe(3);
|
|
190
|
+
expect(result.observations).toHaveLength(3);
|
|
191
|
+
// Every observation should have validation errors
|
|
192
|
+
for (const obs of result.observations) {
|
|
193
|
+
expect(obs.validation_errors).toBeDefined();
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it("correction prompt includes error details from real validation failure", async () => {
|
|
198
|
+
const { ctx } = makeCtx({ "/users": [{ id: 1 }] });
|
|
199
|
+
let correctionMessage: string | undefined;
|
|
200
|
+
let callCount = 0;
|
|
201
|
+
|
|
202
|
+
const think = vi.fn(async (req: any) => {
|
|
203
|
+
callCount++;
|
|
204
|
+
if (callCount === 2) {
|
|
205
|
+
// Capture the correction prompt on second call
|
|
206
|
+
correctionMessage = req.messages?.[0]?.content;
|
|
207
|
+
}
|
|
208
|
+
const scripts = [
|
|
209
|
+
'job "broken" { foobar }',
|
|
210
|
+
'job "fixed" { find /users }',
|
|
211
|
+
];
|
|
212
|
+
return {
|
|
213
|
+
kind: "completed" as const,
|
|
214
|
+
response: {
|
|
215
|
+
content: "",
|
|
216
|
+
tool_calls: [{
|
|
217
|
+
id: `tc-${callCount}`,
|
|
218
|
+
name: "ash_run",
|
|
219
|
+
arguments: JSON.stringify({ script: scripts[callCount - 1] }),
|
|
220
|
+
}],
|
|
221
|
+
},
|
|
222
|
+
toolResults: [],
|
|
223
|
+
};
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
const result = await runDevLoop({
|
|
227
|
+
intent: "find users",
|
|
228
|
+
max_iterations: 3,
|
|
229
|
+
think,
|
|
230
|
+
runner: realRunner(ctx),
|
|
231
|
+
validate: realValidate(),
|
|
232
|
+
observe: vi.fn(async () => {}),
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
expect(result.status).toBe("ok");
|
|
236
|
+
expect(correctionMessage).toBeDefined();
|
|
237
|
+
expect(correctionMessage).toContain("previous ASH script failed");
|
|
238
|
+
expect(correctionMessage).toContain("ParseError");
|
|
239
|
+
expect(correctionMessage).toContain("foobar");
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
it("observations contain real AshRunResult from execution", async () => {
|
|
243
|
+
const { ctx } = makeCtx({ "/users": [{ id: 1 }, { id: 2 }] });
|
|
244
|
+
const think = thinkReturning(['job "real" { find /users }']);
|
|
245
|
+
const observe = vi.fn(async () => {});
|
|
246
|
+
|
|
247
|
+
await runDevLoop({
|
|
248
|
+
intent: "find users",
|
|
249
|
+
max_iterations: 1,
|
|
250
|
+
think,
|
|
251
|
+
runner: realRunner(ctx),
|
|
252
|
+
validate: realValidate(),
|
|
253
|
+
observe,
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
const obs = (observe.mock.calls as any)[0][0];
|
|
257
|
+
expect(obs!.result).toBeDefined();
|
|
258
|
+
expect(obs!.result.status).toBe("ok");
|
|
259
|
+
expect(obs!.result.duration_ms).toBeGreaterThanOrEqual(0);
|
|
260
|
+
expect(Array.isArray(obs!.result.steps)).toBe(true);
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
it("runtime error in ASH execution → loop attempts correction", async () => {
|
|
264
|
+
// Use a script that compiles but fails at runtime (e.g., read from non-existent path)
|
|
265
|
+
const { ctx } = makeCtx({}); // empty world
|
|
266
|
+
const think = thinkReturning([
|
|
267
|
+
'job "fetch" { find /nonexistent }', // compiles, runs, returns empty
|
|
268
|
+
'job "fetch2" { find /users }',
|
|
269
|
+
]);
|
|
270
|
+
|
|
271
|
+
const result = await runDevLoop({
|
|
272
|
+
intent: "find data",
|
|
273
|
+
max_iterations: 2,
|
|
274
|
+
think,
|
|
275
|
+
runner: realRunner(ctx),
|
|
276
|
+
validate: realValidate(),
|
|
277
|
+
observe: vi.fn(async () => {}),
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
// Both scripts compile and run — find with no data is still "ok" (empty result)
|
|
281
|
+
expect(result.status).toBe("ok");
|
|
282
|
+
expect(result.iterations).toBe(1); // first one already succeeds (empty find is ok)
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
it("e2e with real validate catching error → real run on corrected script → world unchanged (dry-run)", async () => {
|
|
286
|
+
const { ctx, world } = makeCtx({ "/data": [{ x: 1 }] });
|
|
287
|
+
const think = thinkReturning([
|
|
288
|
+
'job "bad" { unknowncmd }', // validation error
|
|
289
|
+
'job "good" { find /data | save /out }', // valid, will run in dry-run
|
|
290
|
+
]);
|
|
291
|
+
|
|
292
|
+
const result = await runDevLoop({
|
|
293
|
+
intent: "process data",
|
|
294
|
+
max_iterations: 3,
|
|
295
|
+
think,
|
|
296
|
+
runner: realRunner(ctx, "dry-run"),
|
|
297
|
+
validate: realValidate(),
|
|
298
|
+
observe: vi.fn(async () => {}),
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
expect(result.status).toBe("ok");
|
|
302
|
+
expect(result.iterations).toBe(2);
|
|
303
|
+
// INV-1: dry-run → world unchanged
|
|
304
|
+
expect(Object.keys(world.written)).toHaveLength(0);
|
|
305
|
+
expect(Object.keys(world.published)).toHaveLength(0);
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
it("max_iterations=1 with valid script → single pass e2e", async () => {
|
|
309
|
+
const { ctx } = makeCtx({ "/items": [{ a: 1 }] });
|
|
310
|
+
const think = thinkReturning(['job "single" { find /items }']);
|
|
311
|
+
|
|
312
|
+
const result = await runDevLoop({
|
|
313
|
+
intent: "get items",
|
|
314
|
+
max_iterations: 1,
|
|
315
|
+
think,
|
|
316
|
+
runner: realRunner(ctx),
|
|
317
|
+
validate: realValidate(),
|
|
318
|
+
observe: vi.fn(async () => {}),
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
expect(result.status).toBe("ok");
|
|
322
|
+
expect(result.iterations).toBe(1);
|
|
323
|
+
});
|
|
324
|
+
});
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI Dev Loop — bounded generate → validate → run → observe → correct loop.
|
|
3
|
+
*/
|
|
4
|
+
export { runDevLoop } from "./dev-loop.js";
|
|
5
|
+
export type { DevLoopConfig, DevLoopResult } from "./dev-loop.js";
|
|
6
|
+
export { runStructured } from "./structured-runner.js";
|
|
7
|
+
export { ashValidate } from "./ash-validate.js";
|
|
8
|
+
export { isAshRunSuccess, isAshRunFailure } from "./ash-run-result.js";
|
|
9
|
+
export type { AshRunResult, AshRunFailure, AshRunSuccess, AshStepResult } from "./ash-run-result.js";
|
|
10
|
+
export { isAshTypedError, fromJobError } from "./ash-typed-error.js";
|
|
11
|
+
export type { AshTypedError } from "./ash-typed-error.js";
|
|
12
|
+
export { checkLivePermission } from "./live-mode.js";
|
|
13
|
+
export type { LiveModePolicy } from "./live-mode.js";
|
|
14
|
+
export { buildSystemPrompt, buildCorrectionPrompt } from "./system-prompt.js";
|
|
15
|
+
export { ashRunToolDef, ashValidateToolDef, ashExplainErrorToolDef, META_TOOL_DEFS, executeMetaTool } from "./meta-tools.js";
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from "vitest";
|
|
2
|
+
import { runDevLoop } from "./dev-loop.js";
|
|
3
|
+
import type { AshRunResult, AshRunFailure } from "./ash-run-result.js";
|
|
4
|
+
import { isAshTypedError } from "./ash-typed-error.js";
|
|
5
|
+
|
|
6
|
+
// ── Helpers ──────────────────────────────────────────────
|
|
7
|
+
|
|
8
|
+
function fakeThink(scripts: string[]) {
|
|
9
|
+
let i = 0;
|
|
10
|
+
return vi.fn(async () => ({
|
|
11
|
+
kind: "completed" as const,
|
|
12
|
+
response: {
|
|
13
|
+
content: "",
|
|
14
|
+
tool_calls: [{ id: `tc-${i}`, name: "ash_run", arguments: JSON.stringify({ script: scripts[i++] ?? "" }) }],
|
|
15
|
+
},
|
|
16
|
+
toolResults: [],
|
|
17
|
+
}));
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function fakeRunner(results: Map<string, AshRunResult>) {
|
|
21
|
+
return vi.fn(async (source: string): Promise<AshRunResult> =>
|
|
22
|
+
results.get(source) ?? { status: "ok", steps: [], output: [], duration_ms: 0 },
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ── INV-1: Default Dry-Run ───────────────────────────────
|
|
27
|
+
|
|
28
|
+
describe("INV-1: Default execution is dry-run", () => {
|
|
29
|
+
it("default mode is dry-run", async () => {
|
|
30
|
+
const result = await runDevLoop({
|
|
31
|
+
intent: "test",
|
|
32
|
+
max_iterations: 1,
|
|
33
|
+
think: fakeThink(["script"]),
|
|
34
|
+
runner: vi.fn(async () => ({ status: "ok" as const, steps: [], output: [], duration_ms: 0 })),
|
|
35
|
+
validate: vi.fn(async () => []),
|
|
36
|
+
observe: vi.fn(async () => {}),
|
|
37
|
+
});
|
|
38
|
+
expect(result.mode).toBe("dry-run");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("explicit live mode is live", async () => {
|
|
42
|
+
const result = await runDevLoop({
|
|
43
|
+
intent: "test",
|
|
44
|
+
max_iterations: 1,
|
|
45
|
+
mode: "live",
|
|
46
|
+
think: fakeThink(["script"]),
|
|
47
|
+
runner: vi.fn(async () => ({ status: "ok" as const, steps: [], output: [], duration_ms: 0 })),
|
|
48
|
+
validate: vi.fn(async () => []),
|
|
49
|
+
observe: vi.fn(async () => {}),
|
|
50
|
+
});
|
|
51
|
+
expect(result.mode).toBe("live");
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
// ── INV-2: Structured Typed Errors ───────────────────────
|
|
56
|
+
|
|
57
|
+
describe("INV-2: Every error is AshTypedError", () => {
|
|
58
|
+
it("AshRunFailure.failedAt is always AshTypedError", async () => {
|
|
59
|
+
const failure: AshRunFailure = {
|
|
60
|
+
status: "error",
|
|
61
|
+
steps: [],
|
|
62
|
+
failedAt: { kind: "RuntimeError", message: "boom" },
|
|
63
|
+
duration_ms: 0,
|
|
64
|
+
};
|
|
65
|
+
expect(isAshTypedError(failure.failedAt)).toBe(true);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("IntentDenied includes invariant + suggestion", () => {
|
|
69
|
+
const err = { kind: "IntentDenied" as const, invariant: "INV-1", message: "denied", suggestion: "use dry-run" };
|
|
70
|
+
expect(isAshTypedError(err)).toBe(true);
|
|
71
|
+
expect(err.invariant).toBeDefined();
|
|
72
|
+
expect(err.suggestion).toBeDefined();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("ToolNotFound includes available[] list", () => {
|
|
76
|
+
const err = { kind: "ToolNotFound" as const, name: "foo", available: ["bar", "baz"], message: "not found" };
|
|
77
|
+
expect(isAshTypedError(err)).toBe(true);
|
|
78
|
+
expect(err.available).toEqual(["bar", "baz"]);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("each error kind has enough info for AI self-repair", () => {
|
|
82
|
+
const errors = [
|
|
83
|
+
{ kind: "IntentDenied", invariant: "INV-1", message: "denied" },
|
|
84
|
+
{ kind: "CapabilityMissing", capability: "net", message: "missing" },
|
|
85
|
+
{ kind: "ToolNotFound", name: "x", available: [], message: "not found" },
|
|
86
|
+
{ kind: "ValidationFailed", field: "f", expected: "string", got: "number", message: "mismatch" },
|
|
87
|
+
{ kind: "BudgetExceeded", device: "llm", limit: 100, used: 101, message: "over" },
|
|
88
|
+
{ kind: "Timeout", step: "find", limit_ms: 1000, message: "slow" },
|
|
89
|
+
{ kind: "ParseError", message: "syntax", line: 1 },
|
|
90
|
+
{ kind: "RuntimeError", message: "crash" },
|
|
91
|
+
];
|
|
92
|
+
for (const err of errors) {
|
|
93
|
+
expect(isAshTypedError(err)).toBe(true);
|
|
94
|
+
expect(err.message).toBeDefined();
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
// ── INV-3: Observe Only, Never Commit ────────────────────
|
|
100
|
+
|
|
101
|
+
describe("INV-3: Observe only, never commit", () => {
|
|
102
|
+
it("every iteration calls observe", async () => {
|
|
103
|
+
const observe = vi.fn(async () => {});
|
|
104
|
+
await runDevLoop({
|
|
105
|
+
intent: "test",
|
|
106
|
+
max_iterations: 3,
|
|
107
|
+
think: fakeThink(["bad", "bad", "good"]),
|
|
108
|
+
runner: fakeRunner(new Map([
|
|
109
|
+
["bad", { status: "error", steps: [], failedAt: { kind: "RuntimeError", message: "x" }, duration_ms: 0 } as AshRunResult],
|
|
110
|
+
["good", { status: "ok", steps: [], output: [], duration_ms: 0 }],
|
|
111
|
+
])),
|
|
112
|
+
validate: vi.fn(async () => []),
|
|
113
|
+
observe,
|
|
114
|
+
});
|
|
115
|
+
expect(observe).toHaveBeenCalledTimes(3);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it("no commit function is called (observe-only API)", async () => {
|
|
119
|
+
// The DevLoopConfig has observe but no commit — enforced by type system
|
|
120
|
+
const observe = vi.fn(async () => {});
|
|
121
|
+
await runDevLoop({
|
|
122
|
+
intent: "test",
|
|
123
|
+
max_iterations: 1,
|
|
124
|
+
think: fakeThink(["script"]),
|
|
125
|
+
runner: vi.fn(async () => ({ status: "ok" as const, steps: [], output: [], duration_ms: 0 })),
|
|
126
|
+
validate: vi.fn(async () => []),
|
|
127
|
+
observe,
|
|
128
|
+
});
|
|
129
|
+
// If we got here with no commit call, INV-3 holds (type system prevents commit)
|
|
130
|
+
expect(observe).toHaveBeenCalledTimes(1);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("failed iterations are observed", async () => {
|
|
134
|
+
const observe = vi.fn(async () => {});
|
|
135
|
+
await runDevLoop({
|
|
136
|
+
intent: "test",
|
|
137
|
+
max_iterations: 2,
|
|
138
|
+
think: fakeThink(["fail1", "fail2"]),
|
|
139
|
+
runner: fakeRunner(new Map([
|
|
140
|
+
["fail1", { status: "error", steps: [], failedAt: { kind: "RuntimeError", message: "x" }, duration_ms: 0 } as AshRunResult],
|
|
141
|
+
["fail2", { status: "error", steps: [], failedAt: { kind: "RuntimeError", message: "y" }, duration_ms: 0 } as AshRunResult],
|
|
142
|
+
])),
|
|
143
|
+
validate: vi.fn(async () => []),
|
|
144
|
+
observe,
|
|
145
|
+
});
|
|
146
|
+
expect(observe).toHaveBeenCalledTimes(2);
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// ── INV-4: Bounded Iterations ────────────────────────────
|
|
151
|
+
|
|
152
|
+
describe("INV-4: Bounded loop", () => {
|
|
153
|
+
it("max_iterations=5 → loop runs at most 5 times", async () => {
|
|
154
|
+
const think = fakeThink(["f1", "f2", "f3", "f4", "f5", "f6"]);
|
|
155
|
+
const runner = vi.fn(async (): Promise<AshRunResult> => ({
|
|
156
|
+
status: "error", steps: [], failedAt: { kind: "RuntimeError", message: "fail" }, duration_ms: 0,
|
|
157
|
+
}));
|
|
158
|
+
|
|
159
|
+
const result = await runDevLoop({
|
|
160
|
+
intent: "test",
|
|
161
|
+
max_iterations: 5,
|
|
162
|
+
think,
|
|
163
|
+
runner,
|
|
164
|
+
validate: vi.fn(async () => []),
|
|
165
|
+
observe: vi.fn(async () => {}),
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
expect(result.iterations).toBe(5);
|
|
169
|
+
expect(think).toHaveBeenCalledTimes(5);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it("max_iterations=1 → loop runs exactly 1 time", async () => {
|
|
173
|
+
const think = fakeThink(["script"]);
|
|
174
|
+
const result = await runDevLoop({
|
|
175
|
+
intent: "test",
|
|
176
|
+
max_iterations: 1,
|
|
177
|
+
think,
|
|
178
|
+
runner: vi.fn(async () => ({ status: "ok" as const, steps: [], output: [], duration_ms: 0 })),
|
|
179
|
+
validate: vi.fn(async () => []),
|
|
180
|
+
observe: vi.fn(async () => {}),
|
|
181
|
+
});
|
|
182
|
+
expect(result.iterations).toBe(1);
|
|
183
|
+
expect(think).toHaveBeenCalledTimes(1);
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it("exceeding max returns last state + all observations", async () => {
|
|
187
|
+
const result = await runDevLoop({
|
|
188
|
+
intent: "test",
|
|
189
|
+
max_iterations: 3,
|
|
190
|
+
think: fakeThink(["a", "b", "c"]),
|
|
191
|
+
runner: vi.fn(async (): Promise<AshRunResult> => ({
|
|
192
|
+
status: "error", steps: [], failedAt: { kind: "RuntimeError", message: "fail" }, duration_ms: 0,
|
|
193
|
+
})),
|
|
194
|
+
validate: vi.fn(async () => []),
|
|
195
|
+
observe: vi.fn(async () => {}),
|
|
196
|
+
});
|
|
197
|
+
expect(result.status).toBe("error");
|
|
198
|
+
expect(result.observations).toHaveLength(3);
|
|
199
|
+
expect(result.finalResult).toBeDefined();
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("no infinite retry possible", async () => {
|
|
203
|
+
// Even with a runner that always fails, the loop terminates
|
|
204
|
+
const think = fakeThink(Array.from({ length: 100 }, (_, i) => `script${i}`));
|
|
205
|
+
const result = await runDevLoop({
|
|
206
|
+
intent: "test",
|
|
207
|
+
max_iterations: 10,
|
|
208
|
+
think,
|
|
209
|
+
runner: vi.fn(async (): Promise<AshRunResult> => ({
|
|
210
|
+
status: "error", steps: [], failedAt: { kind: "RuntimeError", message: "always fail" }, duration_ms: 0,
|
|
211
|
+
})),
|
|
212
|
+
validate: vi.fn(async () => []),
|
|
213
|
+
observe: vi.fn(async () => {}),
|
|
214
|
+
});
|
|
215
|
+
expect(result.iterations).toBe(10);
|
|
216
|
+
expect(think).toHaveBeenCalledTimes(10);
|
|
217
|
+
});
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
// ── INV-5: ASH is Ephemeral ─────────────────────────────
|
|
221
|
+
|
|
222
|
+
describe("INV-5: ASH scripts are ephemeral", () => {
|
|
223
|
+
it("scripts are not persisted — only in observations (transient)", async () => {
|
|
224
|
+
const observe = vi.fn(async () => {});
|
|
225
|
+
const result = await runDevLoop({
|
|
226
|
+
intent: "test",
|
|
227
|
+
max_iterations: 1,
|
|
228
|
+
think: fakeThink(["ephemeral script"]),
|
|
229
|
+
runner: vi.fn(async () => ({ status: "ok" as const, steps: [], output: [], duration_ms: 0 })),
|
|
230
|
+
validate: vi.fn(async () => []),
|
|
231
|
+
observe,
|
|
232
|
+
});
|
|
233
|
+
// Script appears in observations (transient) but DevLoopResult has no persisted script field
|
|
234
|
+
expect(result.observations[0].script).toBe("ephemeral script");
|
|
235
|
+
expect((result as any).script).toBeUndefined();
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
it("scripts are not committed to memory — observe only", async () => {
|
|
239
|
+
// Same as INV-3: observe callback gets data, no commit API exists
|
|
240
|
+
const observe = vi.fn(async () => {});
|
|
241
|
+
await runDevLoop({
|
|
242
|
+
intent: "test",
|
|
243
|
+
max_iterations: 1,
|
|
244
|
+
think: fakeThink(["temp script"]),
|
|
245
|
+
runner: vi.fn(async () => ({ status: "ok" as const, steps: [], output: [], duration_ms: 0 })),
|
|
246
|
+
validate: vi.fn(async () => []),
|
|
247
|
+
observe,
|
|
248
|
+
});
|
|
249
|
+
const observed = (observe.mock.calls as any)[0][0];
|
|
250
|
+
expect(observed!.script).toBe("temp script");
|
|
251
|
+
// No commit was called — type system prevents it
|
|
252
|
+
});
|
|
253
|
+
});
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from "vitest";
|
|
2
|
+
import { checkLivePermission, type LiveModePolicy } from "./live-mode.js";
|
|
3
|
+
|
|
4
|
+
describe("Live Mode Permission Check", () => {
|
|
5
|
+
// ── Happy Path ─────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
it("proc with ash.live capability → allowed", async () => {
|
|
8
|
+
const policy: LiveModePolicy = { capabilities: new Set(["ash.live"]) };
|
|
9
|
+
expect(await checkLivePermission(policy)).toBe(true);
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("user confirmation returns true → allowed", async () => {
|
|
13
|
+
const policy: LiveModePolicy = {
|
|
14
|
+
capabilities: new Set(),
|
|
15
|
+
confirm: async () => true,
|
|
16
|
+
};
|
|
17
|
+
expect(await checkLivePermission(policy)).toBe(true);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("debug profile + restricted scope → allowed", async () => {
|
|
21
|
+
const policy: LiveModePolicy = {
|
|
22
|
+
capabilities: new Set(),
|
|
23
|
+
debug: true,
|
|
24
|
+
};
|
|
25
|
+
expect(await checkLivePermission(policy)).toBe(true);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// ── Bad Path ───────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
it("no capability + no confirmation → rejected", async () => {
|
|
31
|
+
const policy: LiveModePolicy = { capabilities: new Set() };
|
|
32
|
+
expect(await checkLivePermission(policy)).toBe(false);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("user confirmation returns false → rejected", async () => {
|
|
36
|
+
const policy: LiveModePolicy = {
|
|
37
|
+
capabilities: new Set(),
|
|
38
|
+
confirm: async () => false,
|
|
39
|
+
};
|
|
40
|
+
expect(await checkLivePermission(policy)).toBe(false);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("non-debug profile without capability → rejected", async () => {
|
|
44
|
+
const policy: LiveModePolicy = {
|
|
45
|
+
capabilities: new Set(),
|
|
46
|
+
debug: false,
|
|
47
|
+
};
|
|
48
|
+
expect(await checkLivePermission(policy)).toBe(false);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// ── Edge Cases ─────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
it("capability + confirmation both present → capability takes priority", async () => {
|
|
54
|
+
const confirm = vi.fn(async () => false);
|
|
55
|
+
const policy: LiveModePolicy = {
|
|
56
|
+
capabilities: new Set(["ash.live"]),
|
|
57
|
+
confirm,
|
|
58
|
+
};
|
|
59
|
+
expect(await checkLivePermission(policy)).toBe(true);
|
|
60
|
+
// confirm should not be called since capability was sufficient
|
|
61
|
+
expect(confirm).not.toHaveBeenCalled();
|
|
62
|
+
});
|
|
63
|
+
});
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live Mode Permission — gates upgrade from dry-run to live execution.
|
|
3
|
+
*
|
|
4
|
+
* Per INTENT §6.1, live mode requires one of:
|
|
5
|
+
* a) ash.live capability in proc caps
|
|
6
|
+
* b) User confirmation callback
|
|
7
|
+
* c) Debug profile flag
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export interface LiveModePolicy {
|
|
11
|
+
capabilities: Set<string>;
|
|
12
|
+
confirm?: () => Promise<boolean>;
|
|
13
|
+
debug?: boolean;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Check if live mode is permitted. Checks in priority order:
|
|
18
|
+
* 1. Capability (ash.live)
|
|
19
|
+
* 2. Debug flag
|
|
20
|
+
* 3. User confirmation
|
|
21
|
+
*/
|
|
22
|
+
export async function checkLivePermission(policy: LiveModePolicy): Promise<boolean> {
|
|
23
|
+
// 1. Capability takes priority
|
|
24
|
+
if (policy.capabilities.has("ash.live")) return true;
|
|
25
|
+
|
|
26
|
+
// 2. Debug profile
|
|
27
|
+
if (policy.debug) return true;
|
|
28
|
+
|
|
29
|
+
// 3. User confirmation
|
|
30
|
+
if (policy.confirm) return policy.confirm();
|
|
31
|
+
|
|
32
|
+
return false;
|
|
33
|
+
}
|