@aigne/ash 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DESIGN.md +41 -0
- package/dist/ai-dev-loop/ash-run-result.cjs +12 -0
- package/dist/ai-dev-loop/ash-run-result.d.cts +28 -0
- package/dist/ai-dev-loop/ash-run-result.d.cts.map +1 -0
- package/dist/ai-dev-loop/ash-run-result.d.mts +28 -0
- package/dist/ai-dev-loop/ash-run-result.d.mts.map +1 -0
- package/dist/ai-dev-loop/ash-run-result.mjs +11 -0
- package/dist/ai-dev-loop/ash-run-result.mjs.map +1 -0
- package/dist/ai-dev-loop/ash-typed-error.cjs +51 -0
- package/dist/ai-dev-loop/ash-typed-error.d.cts +54 -0
- package/dist/ai-dev-loop/ash-typed-error.d.cts.map +1 -0
- package/dist/ai-dev-loop/ash-typed-error.d.mts +54 -0
- package/dist/ai-dev-loop/ash-typed-error.d.mts.map +1 -0
- package/dist/ai-dev-loop/ash-typed-error.mjs +50 -0
- package/dist/ai-dev-loop/ash-typed-error.mjs.map +1 -0
- package/dist/ai-dev-loop/ash-validate.cjs +27 -0
- package/dist/ai-dev-loop/ash-validate.d.cts +7 -0
- package/dist/ai-dev-loop/ash-validate.d.cts.map +1 -0
- package/dist/ai-dev-loop/ash-validate.d.mts +7 -0
- package/dist/ai-dev-loop/ash-validate.d.mts.map +1 -0
- package/dist/ai-dev-loop/ash-validate.mjs +28 -0
- package/dist/ai-dev-loop/ash-validate.mjs.map +1 -0
- package/dist/ai-dev-loop/dev-loop.cjs +134 -0
- package/dist/ai-dev-loop/dev-loop.d.cts +28 -0
- package/dist/ai-dev-loop/dev-loop.d.cts.map +1 -0
- package/dist/ai-dev-loop/dev-loop.d.mts +28 -0
- package/dist/ai-dev-loop/dev-loop.d.mts.map +1 -0
- package/dist/ai-dev-loop/dev-loop.mjs +135 -0
- package/dist/ai-dev-loop/dev-loop.mjs.map +1 -0
- package/dist/ai-dev-loop/index.cjs +24 -0
- package/dist/ai-dev-loop/index.d.cts +9 -0
- package/dist/ai-dev-loop/index.d.mts +9 -0
- package/dist/ai-dev-loop/index.mjs +10 -0
- package/dist/ai-dev-loop/live-mode.cjs +17 -0
- package/dist/ai-dev-loop/live-mode.d.cts +24 -0
- package/dist/ai-dev-loop/live-mode.d.cts.map +1 -0
- package/dist/ai-dev-loop/live-mode.d.mts +24 -0
- package/dist/ai-dev-loop/live-mode.d.mts.map +1 -0
- package/dist/ai-dev-loop/live-mode.mjs +17 -0
- package/dist/ai-dev-loop/live-mode.mjs.map +1 -0
- package/dist/ai-dev-loop/meta-tools.cjs +123 -0
- package/dist/ai-dev-loop/meta-tools.d.cts +24 -0
- package/dist/ai-dev-loop/meta-tools.d.cts.map +1 -0
- package/dist/ai-dev-loop/meta-tools.d.mts +24 -0
- package/dist/ai-dev-loop/meta-tools.d.mts.map +1 -0
- package/dist/ai-dev-loop/meta-tools.mjs +120 -0
- package/dist/ai-dev-loop/meta-tools.mjs.map +1 -0
- package/dist/ai-dev-loop/structured-runner.cjs +154 -0
- package/dist/ai-dev-loop/structured-runner.d.cts +12 -0
- package/dist/ai-dev-loop/structured-runner.d.cts.map +1 -0
- package/dist/ai-dev-loop/structured-runner.d.mts +12 -0
- package/dist/ai-dev-loop/structured-runner.d.mts.map +1 -0
- package/dist/ai-dev-loop/structured-runner.mjs +155 -0
- package/dist/ai-dev-loop/structured-runner.mjs.map +1 -0
- package/dist/ai-dev-loop/system-prompt.cjs +55 -0
- package/dist/ai-dev-loop/system-prompt.d.cts +20 -0
- package/dist/ai-dev-loop/system-prompt.d.cts.map +1 -0
- package/dist/ai-dev-loop/system-prompt.d.mts +20 -0
- package/dist/ai-dev-loop/system-prompt.d.mts.map +1 -0
- package/dist/ai-dev-loop/system-prompt.mjs +54 -0
- package/dist/ai-dev-loop/system-prompt.mjs.map +1 -0
- package/dist/ast.d.cts +140 -0
- package/dist/ast.d.cts.map +1 -0
- package/dist/ast.d.mts +140 -0
- package/dist/ast.d.mts.map +1 -0
- package/dist/compiler.cjs +802 -0
- package/dist/compiler.d.cts +103 -0
- package/dist/compiler.d.cts.map +1 -0
- package/dist/compiler.d.mts +103 -0
- package/dist/compiler.d.mts.map +1 -0
- package/dist/compiler.mjs +802 -0
- package/dist/compiler.mjs.map +1 -0
- package/dist/index.cjs +14 -0
- package/dist/index.d.cts +7 -0
- package/dist/index.d.mts +7 -0
- package/dist/index.mjs +7 -0
- package/dist/lexer.cjs +451 -0
- package/dist/lexer.d.cts +14 -0
- package/dist/lexer.d.cts.map +1 -0
- package/dist/lexer.d.mts +14 -0
- package/dist/lexer.d.mts.map +1 -0
- package/dist/lexer.mjs +451 -0
- package/dist/lexer.mjs.map +1 -0
- package/dist/parser.cjs +734 -0
- package/dist/parser.d.cts +40 -0
- package/dist/parser.d.cts.map +1 -0
- package/dist/parser.d.mts +40 -0
- package/dist/parser.d.mts.map +1 -0
- package/dist/parser.mjs +734 -0
- package/dist/parser.mjs.map +1 -0
- package/dist/reference.cjs +130 -0
- package/dist/reference.d.cts +11 -0
- package/dist/reference.d.cts.map +1 -0
- package/dist/reference.d.mts +11 -0
- package/dist/reference.d.mts.map +1 -0
- package/dist/reference.mjs +130 -0
- package/dist/reference.mjs.map +1 -0
- package/dist/template.cjs +85 -0
- package/dist/template.mjs +84 -0
- package/dist/template.mjs.map +1 -0
- package/dist/type-checker.cjs +582 -0
- package/dist/type-checker.d.cts +31 -0
- package/dist/type-checker.d.cts.map +1 -0
- package/dist/type-checker.d.mts +31 -0
- package/dist/type-checker.d.mts.map +1 -0
- package/dist/type-checker.mjs +573 -0
- package/dist/type-checker.mjs.map +1 -0
- package/package.json +29 -0
- package/src/ai-dev-loop/ash-run-result.test.ts +113 -0
- package/src/ai-dev-loop/ash-run-result.ts +46 -0
- package/src/ai-dev-loop/ash-typed-error.test.ts +136 -0
- package/src/ai-dev-loop/ash-typed-error.ts +50 -0
- package/src/ai-dev-loop/ash-validate.test.ts +54 -0
- package/src/ai-dev-loop/ash-validate.ts +34 -0
- package/src/ai-dev-loop/dev-loop.test.ts +364 -0
- package/src/ai-dev-loop/dev-loop.ts +156 -0
- package/src/ai-dev-loop/dry-run.test.ts +107 -0
- package/src/ai-dev-loop/e2e-multi-fix.test.ts +473 -0
- package/src/ai-dev-loop/e2e.test.ts +324 -0
- package/src/ai-dev-loop/index.ts +15 -0
- package/src/ai-dev-loop/invariants.test.ts +253 -0
- package/src/ai-dev-loop/live-mode.test.ts +63 -0
- package/src/ai-dev-loop/live-mode.ts +33 -0
- package/src/ai-dev-loop/meta-tools.test.ts +120 -0
- package/src/ai-dev-loop/meta-tools.ts +142 -0
- package/src/ai-dev-loop/structured-runner.test.ts +159 -0
- package/src/ai-dev-loop/structured-runner.ts +209 -0
- package/src/ai-dev-loop/system-prompt.test.ts +102 -0
- package/src/ai-dev-loop/system-prompt.ts +81 -0
- package/src/ast.ts +186 -0
- package/src/compiler.test.ts +2933 -0
- package/src/compiler.ts +1103 -0
- package/src/e2e.test.ts +552 -0
- package/src/index.ts +16 -0
- package/src/lexer.test.ts +538 -0
- package/src/lexer.ts +222 -0
- package/src/parser.test.ts +1024 -0
- package/src/parser.ts +835 -0
- package/src/reference.test.ts +166 -0
- package/src/reference.ts +125 -0
- package/src/template.test.ts +210 -0
- package/src/template.ts +139 -0
- package/src/type-checker.test.ts +1494 -0
- package/src/type-checker.ts +785 -0
- package/tsconfig.json +9 -0
- package/tsdown.config.ts +12 -0
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Complex E2E: Multi-error correction chain.
|
|
3
|
+
*
|
|
4
|
+
* Simulates a realistic AI dev loop where the "LLM" makes multiple
|
|
5
|
+
* different mistakes, each requiring a different fix strategy.
|
|
6
|
+
* All ASH compilation, validation, and execution are REAL.
|
|
7
|
+
* Only think() is scripted.
|
|
8
|
+
*
|
|
9
|
+
* Scenario: AI tries to build an ETL pipeline that reads users,
|
|
10
|
+
* filters active ones, maps to summary, and saves to output.
|
|
11
|
+
* It makes 4 different errors before getting it right.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, it, expect, vi } from "vitest";
|
|
15
|
+
import { runDevLoop } from "./dev-loop.js";
|
|
16
|
+
import { runStructured } from "./structured-runner.js";
|
|
17
|
+
import { ashValidate } from "./ash-validate.js";
|
|
18
|
+
import type { WorldInterface, JobLogger, JobContext } from "../compiler.js";
|
|
19
|
+
|
|
20
|
+
// ── Real World ───────────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
function makeWorld(data: Record<string, unknown[]> = {}): WorldInterface & {
|
|
23
|
+
written: Record<string, unknown[]>;
|
|
24
|
+
published: Record<string, unknown[]>;
|
|
25
|
+
} {
|
|
26
|
+
const written: Record<string, unknown[]> = {};
|
|
27
|
+
const published: Record<string, unknown[]> = {};
|
|
28
|
+
return {
|
|
29
|
+
read(path: string) { return data[path] ?? []; },
|
|
30
|
+
write(path: string, records: unknown[]) { written[path] = records; },
|
|
31
|
+
publish(topic: string, records: unknown[]) { published[topic] = records; },
|
|
32
|
+
written,
|
|
33
|
+
published,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function makeCtx(data: Record<string, unknown[]> = {}) {
|
|
38
|
+
const world = makeWorld(data);
|
|
39
|
+
return {
|
|
40
|
+
ctx: {
|
|
41
|
+
world,
|
|
42
|
+
caps: new Set(["*"]),
|
|
43
|
+
logger: { log() {} } as JobLogger,
|
|
44
|
+
} as JobContext,
|
|
45
|
+
world,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ── Scripted Think ───────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Creates a think function that returns scripts in sequence,
|
|
53
|
+
* and records what correction prompts it received.
|
|
54
|
+
*/
|
|
55
|
+
function scriptedThink(scripts: string[]) {
|
|
56
|
+
let i = 0;
|
|
57
|
+
const corrections: string[] = [];
|
|
58
|
+
const think = vi.fn(async (req: any) => {
|
|
59
|
+
// Record correction prompt (not the first call which is the intent)
|
|
60
|
+
if (i > 0 && req.messages?.[0]?.content) {
|
|
61
|
+
corrections.push(req.messages[0].content);
|
|
62
|
+
}
|
|
63
|
+
const script = scripts[i++] ?? "";
|
|
64
|
+
return {
|
|
65
|
+
kind: "completed" as const,
|
|
66
|
+
response: {
|
|
67
|
+
content: "",
|
|
68
|
+
tool_calls: [{
|
|
69
|
+
id: `tc-${i}`,
|
|
70
|
+
name: "ash_run",
|
|
71
|
+
arguments: JSON.stringify({ script }),
|
|
72
|
+
}],
|
|
73
|
+
},
|
|
74
|
+
toolResults: [],
|
|
75
|
+
};
|
|
76
|
+
});
|
|
77
|
+
return { think, corrections };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function realRunner(ctx: JobContext, mode: "dry-run" | "live" = "dry-run") {
|
|
81
|
+
return async (source: string) => runStructured(source, ctx, { mode });
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function realValidate() {
|
|
85
|
+
return async (source: string) => ashValidate(source);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ── Tests ────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
describe("E2E: Complex multi-error correction chain", () => {
|
|
91
|
+
|
|
92
|
+
it("4 different errors → 4 corrections → 5th attempt succeeds", async () => {
|
|
93
|
+
const { ctx } = makeCtx({
|
|
94
|
+
"/world/users": [
|
|
95
|
+
{ id: 1, name: "Alice", active: true, score: 90 },
|
|
96
|
+
{ id: 2, name: "Bob", active: false, score: 60 },
|
|
97
|
+
{ id: 3, name: "Carol", active: true, score: 75 },
|
|
98
|
+
],
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
const { think, corrections } = scriptedThink([
|
|
102
|
+
// Attempt 1: unknown command "fetch" (should be "find")
|
|
103
|
+
'job etl { fetch /world/users }',
|
|
104
|
+
|
|
105
|
+
// Attempt 2: unknown command "filter" (should be "where")
|
|
106
|
+
'job etl { find /world/users | filter active == true }',
|
|
107
|
+
|
|
108
|
+
// Attempt 3: unterminated string
|
|
109
|
+
'job etl { find /world/users | where active == true | save /world/output',
|
|
110
|
+
|
|
111
|
+
// Attempt 4: type mismatch — save then where (save outputs none)
|
|
112
|
+
`job etl {
|
|
113
|
+
find /world/users
|
|
114
|
+
| save /world/output
|
|
115
|
+
| where active == true
|
|
116
|
+
}`,
|
|
117
|
+
|
|
118
|
+
// Attempt 5: correct!
|
|
119
|
+
`job etl {
|
|
120
|
+
find /world/users
|
|
121
|
+
| where active == true
|
|
122
|
+
| save /world/output
|
|
123
|
+
}`,
|
|
124
|
+
]);
|
|
125
|
+
|
|
126
|
+
const observe = vi.fn(async () => {});
|
|
127
|
+
|
|
128
|
+
const result = await runDevLoop({
|
|
129
|
+
intent: "Build an ETL pipeline: read users, filter active ones, save to /world/output",
|
|
130
|
+
max_iterations: 6,
|
|
131
|
+
think,
|
|
132
|
+
runner: realRunner(ctx),
|
|
133
|
+
validate: realValidate(),
|
|
134
|
+
observe,
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
expect(result.status).toBe("ok");
|
|
138
|
+
expect(result.iterations).toBe(5);
|
|
139
|
+
expect(observe).toHaveBeenCalledTimes(5);
|
|
140
|
+
|
|
141
|
+
// Verify first 4 observations have errors
|
|
142
|
+
for (let i = 0; i < 4; i++) {
|
|
143
|
+
const obs = (observe.mock.calls as any)[i][0];
|
|
144
|
+
expect(
|
|
145
|
+
obs!.validation_errors?.length > 0 || obs!.result?.status === "error",
|
|
146
|
+
).toBe(true);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// 5th observation is success
|
|
150
|
+
const lastObs = (observe.mock.calls as any)[4][0];
|
|
151
|
+
expect(lastObs!.result.status).toBe("ok");
|
|
152
|
+
|
|
153
|
+
// Verify correction prompts contain useful error info
|
|
154
|
+
expect(corrections).toHaveLength(4);
|
|
155
|
+
|
|
156
|
+
// Correction 1: should mention unknown command "fetch"
|
|
157
|
+
expect(corrections[0]).toContain("ParseError");
|
|
158
|
+
|
|
159
|
+
// Correction 2: should mention unknown command "filter"
|
|
160
|
+
expect(corrections[1]).toContain("ParseError");
|
|
161
|
+
|
|
162
|
+
// Correction 3: should mention syntax issue
|
|
163
|
+
expect(corrections[2]).toContain("ParseError");
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("readonly violation → correction → remove save → success", async () => {
|
|
167
|
+
const { ctx } = makeCtx({
|
|
168
|
+
"/public/data": [{ id: 1, value: "hello" }],
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
const { think } = scriptedThink([
|
|
172
|
+
// Attempt 1: @readonly with save → violation
|
|
173
|
+
'@readonly\njob read { find /public/data | save /public/out }',
|
|
174
|
+
|
|
175
|
+
// Attempt 2: remove save, readonly is fine
|
|
176
|
+
'@readonly\njob read { find /public/data }',
|
|
177
|
+
]);
|
|
178
|
+
|
|
179
|
+
const result = await runDevLoop({
|
|
180
|
+
intent: "read data in readonly mode",
|
|
181
|
+
max_iterations: 3,
|
|
182
|
+
think,
|
|
183
|
+
runner: realRunner(ctx),
|
|
184
|
+
validate: realValidate(),
|
|
185
|
+
observe: vi.fn(async () => {}),
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
expect(result.status).toBe("ok");
|
|
189
|
+
expect(result.iterations).toBe(2);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it("progressive fix: syntax → type → success, with dry-run world isolation", async () => {
|
|
193
|
+
const { ctx, world } = makeCtx({
|
|
194
|
+
"/world/items": [
|
|
195
|
+
{ id: 1, name: "Widget", price: 10 },
|
|
196
|
+
{ id: 2, name: "Gadget", price: 25 },
|
|
197
|
+
],
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
const { think, corrections } = scriptedThink([
|
|
201
|
+
// Attempt 1: syntax error — missing closing brace
|
|
202
|
+
'job transform { find /world/items | map { name name, cost price }',
|
|
203
|
+
|
|
204
|
+
// Attempt 2: unknown command
|
|
205
|
+
'job transform { find /world/items | transform name | save /world/out }',
|
|
206
|
+
|
|
207
|
+
// Attempt 3: correct
|
|
208
|
+
`job transform {
|
|
209
|
+
find /world/items
|
|
210
|
+
| map { name name, cost price }
|
|
211
|
+
| save /world/out
|
|
212
|
+
}`,
|
|
213
|
+
]);
|
|
214
|
+
|
|
215
|
+
const observe = vi.fn(async () => {});
|
|
216
|
+
|
|
217
|
+
const result = await runDevLoop({
|
|
218
|
+
intent: "transform items: extract name and price as cost",
|
|
219
|
+
max_iterations: 5,
|
|
220
|
+
think,
|
|
221
|
+
runner: realRunner(ctx, "dry-run"),
|
|
222
|
+
validate: realValidate(),
|
|
223
|
+
observe,
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
expect(result.status).toBe("ok");
|
|
227
|
+
expect(result.iterations).toBe(3);
|
|
228
|
+
|
|
229
|
+
// INV-1: dry-run — world must NOT be written to
|
|
230
|
+
expect(Object.keys(world.written)).toHaveLength(0);
|
|
231
|
+
|
|
232
|
+
// Corrections should have error details
|
|
233
|
+
expect(corrections.length).toBeGreaterThanOrEqual(1);
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
it("annotation errors → fix → readonly violation → fix → success", async () => {
|
|
237
|
+
const { ctx } = makeCtx({
|
|
238
|
+
"/world/logs": [{ ts: 1, msg: "hello" }],
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
const { think } = scriptedThink([
|
|
242
|
+
// Attempt 1: invalid annotation argument
|
|
243
|
+
'@approval(robot)\njob audit { find /world/logs }',
|
|
244
|
+
|
|
245
|
+
// Attempt 2: readonly + save = violation
|
|
246
|
+
'@readonly\njob audit { find /world/logs | save /world/archive }',
|
|
247
|
+
|
|
248
|
+
// Attempt 3: correct — readonly with no save
|
|
249
|
+
'@readonly\njob audit { find /world/logs }',
|
|
250
|
+
]);
|
|
251
|
+
|
|
252
|
+
const result = await runDevLoop({
|
|
253
|
+
intent: "audit logs read-only",
|
|
254
|
+
max_iterations: 4,
|
|
255
|
+
think,
|
|
256
|
+
runner: realRunner(ctx),
|
|
257
|
+
validate: realValidate(),
|
|
258
|
+
observe: vi.fn(async () => {}),
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
expect(result.status).toBe("ok");
|
|
262
|
+
expect(result.iterations).toBe(3);
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
it("undefined variable → fix → duplicate variable → fix → success", async () => {
|
|
266
|
+
const { ctx } = makeCtx({
|
|
267
|
+
"/world/scores": [{ name: "A", score: 90 }, { name: "B", score: 50 }],
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
const { think } = scriptedThink([
|
|
271
|
+
// Attempt 1: undefined variable $min
|
|
272
|
+
'job filter { find /world/scores | where score > $min }',
|
|
273
|
+
|
|
274
|
+
// Attempt 2: duplicate let
|
|
275
|
+
'let min = 70\nlet min = 80\njob filter { find /world/scores | where score > $min }',
|
|
276
|
+
|
|
277
|
+
// Attempt 3: correct
|
|
278
|
+
'let min = 70\njob filter { find /world/scores | where score > $min }',
|
|
279
|
+
]);
|
|
280
|
+
|
|
281
|
+
const result = await runDevLoop({
|
|
282
|
+
intent: "filter scores above threshold",
|
|
283
|
+
max_iterations: 4,
|
|
284
|
+
think,
|
|
285
|
+
runner: realRunner(ctx),
|
|
286
|
+
validate: realValidate(),
|
|
287
|
+
observe: vi.fn(async () => {}),
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
expect(result.status).toBe("ok");
|
|
291
|
+
expect(result.iterations).toBe(3);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it("exhausts all iterations with different errors → returns all observations", async () => {
|
|
295
|
+
const { ctx } = makeCtx({});
|
|
296
|
+
|
|
297
|
+
const { think } = scriptedThink([
|
|
298
|
+
'job x { foobar }', // unknown command
|
|
299
|
+
'job x { find /a | unknowncmd }', // unknown command
|
|
300
|
+
'job x { find /a | save /b | where x > 1 }', // type mismatch
|
|
301
|
+
]);
|
|
302
|
+
|
|
303
|
+
const observe = vi.fn(async () => {});
|
|
304
|
+
|
|
305
|
+
const result = await runDevLoop({
|
|
306
|
+
intent: "do something",
|
|
307
|
+
max_iterations: 3,
|
|
308
|
+
think,
|
|
309
|
+
runner: realRunner(ctx),
|
|
310
|
+
validate: realValidate(),
|
|
311
|
+
observe,
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
expect(result.status).toBe("error");
|
|
315
|
+
expect(result.iterations).toBe(3);
|
|
316
|
+
expect(result.observations).toHaveLength(3);
|
|
317
|
+
|
|
318
|
+
// Each observation should have distinct error info
|
|
319
|
+
const errorMessages = result.observations.map((obs: any) => {
|
|
320
|
+
if (obs.validation_errors) return obs.validation_errors[0]?.message;
|
|
321
|
+
if (obs.result?.failedAt) return obs.result.failedAt.message;
|
|
322
|
+
return obs.error;
|
|
323
|
+
});
|
|
324
|
+
// All should be defined
|
|
325
|
+
for (const msg of errorMessages) {
|
|
326
|
+
expect(msg).toBeDefined();
|
|
327
|
+
}
|
|
328
|
+
// Errors should be different (different mistakes each time)
|
|
329
|
+
const unique = new Set(errorMessages);
|
|
330
|
+
expect(unique.size).toBeGreaterThanOrEqual(2);
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
it("complex pipeline: find → where → map → tee → save, with mid-pipeline error fix", async () => {
|
|
334
|
+
const { ctx, world } = makeCtx({
|
|
335
|
+
"/world/products": [
|
|
336
|
+
{ id: 1, name: "Laptop", category: "electronics", price: 999 },
|
|
337
|
+
{ id: 2, name: "Shirt", category: "clothing", price: 29 },
|
|
338
|
+
{ id: 3, name: "Phone", category: "electronics", price: 699 },
|
|
339
|
+
],
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
const { think } = scriptedThink([
|
|
343
|
+
// Attempt 1: typo "wher" instead of "where"
|
|
344
|
+
`job etl {
|
|
345
|
+
find /world/products
|
|
346
|
+
| wher category == "electronics"
|
|
347
|
+
| save /world/electronics
|
|
348
|
+
}`,
|
|
349
|
+
|
|
350
|
+
// Attempt 2: correct complex pipeline
|
|
351
|
+
`job etl {
|
|
352
|
+
find /world/products
|
|
353
|
+
| where category == "electronics"
|
|
354
|
+
| map { name name, price price }
|
|
355
|
+
| save /world/electronics
|
|
356
|
+
}`,
|
|
357
|
+
]);
|
|
358
|
+
|
|
359
|
+
const result = await runDevLoop({
|
|
360
|
+
intent: "extract electronics products",
|
|
361
|
+
max_iterations: 3,
|
|
362
|
+
mode: "live",
|
|
363
|
+
think,
|
|
364
|
+
runner: realRunner(ctx, "live"),
|
|
365
|
+
validate: realValidate(),
|
|
366
|
+
observe: vi.fn(async () => {}),
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
expect(result.status).toBe("ok");
|
|
370
|
+
expect(result.iterations).toBe(2);
|
|
371
|
+
expect(result.mode).toBe("live");
|
|
372
|
+
// In live mode, world should be written to
|
|
373
|
+
expect(world.written["/world/electronics"]).toBeDefined();
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
it("empty script from LLM → correction → valid script → success", async () => {
|
|
377
|
+
const { ctx } = makeCtx({ "/data": [{ x: 1 }] });
|
|
378
|
+
|
|
379
|
+
let callCount = 0;
|
|
380
|
+
const think = vi.fn(async () => {
|
|
381
|
+
callCount++;
|
|
382
|
+
if (callCount === 1) {
|
|
383
|
+
// LLM returns empty (no tool_calls, no content)
|
|
384
|
+
return { kind: "completed", response: { content: "", tool_calls: [] }, toolResults: [] };
|
|
385
|
+
}
|
|
386
|
+
// Second call: valid script
|
|
387
|
+
return {
|
|
388
|
+
kind: "completed",
|
|
389
|
+
response: {
|
|
390
|
+
content: "",
|
|
391
|
+
tool_calls: [{ id: "tc-2", name: "ash_run", arguments: JSON.stringify({ script: 'job x { find /data }' }) }],
|
|
392
|
+
},
|
|
393
|
+
toolResults: [],
|
|
394
|
+
};
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
const result = await runDevLoop({
|
|
398
|
+
intent: "read data",
|
|
399
|
+
max_iterations: 3,
|
|
400
|
+
think,
|
|
401
|
+
runner: realRunner(ctx),
|
|
402
|
+
validate: realValidate(),
|
|
403
|
+
observe: vi.fn(async () => {}),
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
expect(result.status).toBe("ok");
|
|
407
|
+
expect(result.iterations).toBe(2);
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
it("LLM sends script via content (not tool_call) → still works", async () => {
|
|
411
|
+
const { ctx } = makeCtx({ "/items": [{ a: 1 }] });
|
|
412
|
+
|
|
413
|
+
const think = vi.fn(async () => ({
|
|
414
|
+
kind: "completed",
|
|
415
|
+
response: {
|
|
416
|
+
// No tool_calls — script in content directly
|
|
417
|
+
content: 'job x { find /items }',
|
|
418
|
+
tool_calls: undefined,
|
|
419
|
+
},
|
|
420
|
+
toolResults: [],
|
|
421
|
+
}));
|
|
422
|
+
|
|
423
|
+
const result = await runDevLoop({
|
|
424
|
+
intent: "get items",
|
|
425
|
+
max_iterations: 1,
|
|
426
|
+
think,
|
|
427
|
+
runner: realRunner(ctx),
|
|
428
|
+
validate: realValidate(),
|
|
429
|
+
observe: vi.fn(async () => {}),
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
expect(result.status).toBe("ok");
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
it("stress: 5 consecutive different error types before success", async () => {
|
|
436
|
+
const { ctx } = makeCtx({ "/d": [{ v: 1 }] });
|
|
437
|
+
|
|
438
|
+
const { think, corrections } = scriptedThink([
|
|
439
|
+
// 1: unknown command
|
|
440
|
+
'job x { badcmd }',
|
|
441
|
+
// 2: unterminated string
|
|
442
|
+
'job x { find /d | output "hello }',
|
|
443
|
+
// 3: missing brace
|
|
444
|
+
'job x { find /d',
|
|
445
|
+
// 4: invalid annotation
|
|
446
|
+
'@timeout(-5)\njob x { find /d }',
|
|
447
|
+
// 5: duplicate var
|
|
448
|
+
'let a = 1\nlet a = 2\njob x { find /d }',
|
|
449
|
+
// 6: success
|
|
450
|
+
'job x { find /d }',
|
|
451
|
+
]);
|
|
452
|
+
|
|
453
|
+
const observe = vi.fn(async () => {});
|
|
454
|
+
|
|
455
|
+
const result = await runDevLoop({
|
|
456
|
+
intent: "read data from /d",
|
|
457
|
+
max_iterations: 7,
|
|
458
|
+
think,
|
|
459
|
+
runner: realRunner(ctx),
|
|
460
|
+
validate: realValidate(),
|
|
461
|
+
observe,
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
expect(result.status).toBe("ok");
|
|
465
|
+
expect(result.iterations).toBe(6);
|
|
466
|
+
expect(corrections).toHaveLength(5);
|
|
467
|
+
|
|
468
|
+
// All corrections should contain "ParseError" (from validation)
|
|
469
|
+
for (const c of corrections) {
|
|
470
|
+
expect(c).toContain("previous ASH script failed");
|
|
471
|
+
}
|
|
472
|
+
});
|
|
473
|
+
});
|