@aigne/ash 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/DESIGN.md +41 -0
  2. package/dist/ai-dev-loop/ash-run-result.cjs +12 -0
  3. package/dist/ai-dev-loop/ash-run-result.d.cts +28 -0
  4. package/dist/ai-dev-loop/ash-run-result.d.cts.map +1 -0
  5. package/dist/ai-dev-loop/ash-run-result.d.mts +28 -0
  6. package/dist/ai-dev-loop/ash-run-result.d.mts.map +1 -0
  7. package/dist/ai-dev-loop/ash-run-result.mjs +11 -0
  8. package/dist/ai-dev-loop/ash-run-result.mjs.map +1 -0
  9. package/dist/ai-dev-loop/ash-typed-error.cjs +51 -0
  10. package/dist/ai-dev-loop/ash-typed-error.d.cts +54 -0
  11. package/dist/ai-dev-loop/ash-typed-error.d.cts.map +1 -0
  12. package/dist/ai-dev-loop/ash-typed-error.d.mts +54 -0
  13. package/dist/ai-dev-loop/ash-typed-error.d.mts.map +1 -0
  14. package/dist/ai-dev-loop/ash-typed-error.mjs +50 -0
  15. package/dist/ai-dev-loop/ash-typed-error.mjs.map +1 -0
  16. package/dist/ai-dev-loop/ash-validate.cjs +27 -0
  17. package/dist/ai-dev-loop/ash-validate.d.cts +7 -0
  18. package/dist/ai-dev-loop/ash-validate.d.cts.map +1 -0
  19. package/dist/ai-dev-loop/ash-validate.d.mts +7 -0
  20. package/dist/ai-dev-loop/ash-validate.d.mts.map +1 -0
  21. package/dist/ai-dev-loop/ash-validate.mjs +28 -0
  22. package/dist/ai-dev-loop/ash-validate.mjs.map +1 -0
  23. package/dist/ai-dev-loop/dev-loop.cjs +134 -0
  24. package/dist/ai-dev-loop/dev-loop.d.cts +28 -0
  25. package/dist/ai-dev-loop/dev-loop.d.cts.map +1 -0
  26. package/dist/ai-dev-loop/dev-loop.d.mts +28 -0
  27. package/dist/ai-dev-loop/dev-loop.d.mts.map +1 -0
  28. package/dist/ai-dev-loop/dev-loop.mjs +135 -0
  29. package/dist/ai-dev-loop/dev-loop.mjs.map +1 -0
  30. package/dist/ai-dev-loop/index.cjs +24 -0
  31. package/dist/ai-dev-loop/index.d.cts +9 -0
  32. package/dist/ai-dev-loop/index.d.mts +9 -0
  33. package/dist/ai-dev-loop/index.mjs +10 -0
  34. package/dist/ai-dev-loop/live-mode.cjs +17 -0
  35. package/dist/ai-dev-loop/live-mode.d.cts +24 -0
  36. package/dist/ai-dev-loop/live-mode.d.cts.map +1 -0
  37. package/dist/ai-dev-loop/live-mode.d.mts +24 -0
  38. package/dist/ai-dev-loop/live-mode.d.mts.map +1 -0
  39. package/dist/ai-dev-loop/live-mode.mjs +17 -0
  40. package/dist/ai-dev-loop/live-mode.mjs.map +1 -0
  41. package/dist/ai-dev-loop/meta-tools.cjs +123 -0
  42. package/dist/ai-dev-loop/meta-tools.d.cts +24 -0
  43. package/dist/ai-dev-loop/meta-tools.d.cts.map +1 -0
  44. package/dist/ai-dev-loop/meta-tools.d.mts +24 -0
  45. package/dist/ai-dev-loop/meta-tools.d.mts.map +1 -0
  46. package/dist/ai-dev-loop/meta-tools.mjs +120 -0
  47. package/dist/ai-dev-loop/meta-tools.mjs.map +1 -0
  48. package/dist/ai-dev-loop/structured-runner.cjs +154 -0
  49. package/dist/ai-dev-loop/structured-runner.d.cts +12 -0
  50. package/dist/ai-dev-loop/structured-runner.d.cts.map +1 -0
  51. package/dist/ai-dev-loop/structured-runner.d.mts +12 -0
  52. package/dist/ai-dev-loop/structured-runner.d.mts.map +1 -0
  53. package/dist/ai-dev-loop/structured-runner.mjs +155 -0
  54. package/dist/ai-dev-loop/structured-runner.mjs.map +1 -0
  55. package/dist/ai-dev-loop/system-prompt.cjs +55 -0
  56. package/dist/ai-dev-loop/system-prompt.d.cts +20 -0
  57. package/dist/ai-dev-loop/system-prompt.d.cts.map +1 -0
  58. package/dist/ai-dev-loop/system-prompt.d.mts +20 -0
  59. package/dist/ai-dev-loop/system-prompt.d.mts.map +1 -0
  60. package/dist/ai-dev-loop/system-prompt.mjs +54 -0
  61. package/dist/ai-dev-loop/system-prompt.mjs.map +1 -0
  62. package/dist/ast.d.cts +140 -0
  63. package/dist/ast.d.cts.map +1 -0
  64. package/dist/ast.d.mts +140 -0
  65. package/dist/ast.d.mts.map +1 -0
  66. package/dist/compiler.cjs +802 -0
  67. package/dist/compiler.d.cts +103 -0
  68. package/dist/compiler.d.cts.map +1 -0
  69. package/dist/compiler.d.mts +103 -0
  70. package/dist/compiler.d.mts.map +1 -0
  71. package/dist/compiler.mjs +802 -0
  72. package/dist/compiler.mjs.map +1 -0
  73. package/dist/index.cjs +14 -0
  74. package/dist/index.d.cts +7 -0
  75. package/dist/index.d.mts +7 -0
  76. package/dist/index.mjs +7 -0
  77. package/dist/lexer.cjs +451 -0
  78. package/dist/lexer.d.cts +14 -0
  79. package/dist/lexer.d.cts.map +1 -0
  80. package/dist/lexer.d.mts +14 -0
  81. package/dist/lexer.d.mts.map +1 -0
  82. package/dist/lexer.mjs +451 -0
  83. package/dist/lexer.mjs.map +1 -0
  84. package/dist/parser.cjs +734 -0
  85. package/dist/parser.d.cts +40 -0
  86. package/dist/parser.d.cts.map +1 -0
  87. package/dist/parser.d.mts +40 -0
  88. package/dist/parser.d.mts.map +1 -0
  89. package/dist/parser.mjs +734 -0
  90. package/dist/parser.mjs.map +1 -0
  91. package/dist/reference.cjs +130 -0
  92. package/dist/reference.d.cts +11 -0
  93. package/dist/reference.d.cts.map +1 -0
  94. package/dist/reference.d.mts +11 -0
  95. package/dist/reference.d.mts.map +1 -0
  96. package/dist/reference.mjs +130 -0
  97. package/dist/reference.mjs.map +1 -0
  98. package/dist/template.cjs +85 -0
  99. package/dist/template.mjs +84 -0
  100. package/dist/template.mjs.map +1 -0
  101. package/dist/type-checker.cjs +582 -0
  102. package/dist/type-checker.d.cts +31 -0
  103. package/dist/type-checker.d.cts.map +1 -0
  104. package/dist/type-checker.d.mts +31 -0
  105. package/dist/type-checker.d.mts.map +1 -0
  106. package/dist/type-checker.mjs +573 -0
  107. package/dist/type-checker.mjs.map +1 -0
  108. package/package.json +29 -0
  109. package/src/ai-dev-loop/ash-run-result.test.ts +113 -0
  110. package/src/ai-dev-loop/ash-run-result.ts +46 -0
  111. package/src/ai-dev-loop/ash-typed-error.test.ts +136 -0
  112. package/src/ai-dev-loop/ash-typed-error.ts +50 -0
  113. package/src/ai-dev-loop/ash-validate.test.ts +54 -0
  114. package/src/ai-dev-loop/ash-validate.ts +34 -0
  115. package/src/ai-dev-loop/dev-loop.test.ts +364 -0
  116. package/src/ai-dev-loop/dev-loop.ts +156 -0
  117. package/src/ai-dev-loop/dry-run.test.ts +107 -0
  118. package/src/ai-dev-loop/e2e-multi-fix.test.ts +473 -0
  119. package/src/ai-dev-loop/e2e.test.ts +324 -0
  120. package/src/ai-dev-loop/index.ts +15 -0
  121. package/src/ai-dev-loop/invariants.test.ts +253 -0
  122. package/src/ai-dev-loop/live-mode.test.ts +63 -0
  123. package/src/ai-dev-loop/live-mode.ts +33 -0
  124. package/src/ai-dev-loop/meta-tools.test.ts +120 -0
  125. package/src/ai-dev-loop/meta-tools.ts +142 -0
  126. package/src/ai-dev-loop/structured-runner.test.ts +159 -0
  127. package/src/ai-dev-loop/structured-runner.ts +209 -0
  128. package/src/ai-dev-loop/system-prompt.test.ts +102 -0
  129. package/src/ai-dev-loop/system-prompt.ts +81 -0
  130. package/src/ast.ts +186 -0
  131. package/src/compiler.test.ts +2933 -0
  132. package/src/compiler.ts +1103 -0
  133. package/src/e2e.test.ts +552 -0
  134. package/src/index.ts +16 -0
  135. package/src/lexer.test.ts +538 -0
  136. package/src/lexer.ts +222 -0
  137. package/src/parser.test.ts +1024 -0
  138. package/src/parser.ts +835 -0
  139. package/src/reference.test.ts +166 -0
  140. package/src/reference.ts +125 -0
  141. package/src/template.test.ts +210 -0
  142. package/src/template.ts +139 -0
  143. package/src/type-checker.test.ts +1494 -0
  144. package/src/type-checker.ts +785 -0
  145. package/tsconfig.json +9 -0
  146. package/tsdown.config.ts +12 -0
@@ -0,0 +1,364 @@
1
+ import { describe, it, expect, vi } from "vitest";
2
+ import { runDevLoop } from "./dev-loop.js";
3
+ import type { AshRunResult } from "./ash-run-result.js";
4
+
5
+ /** Fake think: returns tool calls with ASH script content */
6
+ function fakeThink(scripts: string[]) {
7
+ let call = 0;
8
+ return vi.fn(async (_req: any) => ({
9
+ kind: "completed" as const,
10
+ response: {
11
+ content: scripts[call] ?? "",
12
+ tool_calls: [
13
+ { id: `tc-${call}`, name: "ash_run", arguments: JSON.stringify({ script: scripts[call++] ?? "" }) },
14
+ ],
15
+ },
16
+ toolResults: [],
17
+ }));
18
+ }
19
+
20
+ /** Fake runner: returns success or failure based on the script */
21
+ function fakeRunner(failScripts: Set<string> = new Set()) {
22
+ return vi.fn(async (source: string): Promise<AshRunResult> => {
23
+ if (failScripts.has(source)) {
24
+ return {
25
+ status: "error",
26
+ steps: [{ step: 1, command: "find", status: "error", duration_ms: 5 }],
27
+ failedAt: { kind: "RuntimeError", message: `Failed: ${source}` },
28
+ duration_ms: 5,
29
+ };
30
+ }
31
+ return {
32
+ status: "ok",
33
+ steps: [{ step: 1, command: "find", status: "ok", duration_ms: 5, output: [{ id: 1 }] }],
34
+ output: [{ id: 1 }],
35
+ duration_ms: 5,
36
+ };
37
+ });
38
+ }
39
+
40
+ function fakeValidate() {
41
+ return vi.fn(async (_source: string) => []);
42
+ }
43
+
44
+ function fakeObserve() {
45
+ return vi.fn(async (_data: any) => {});
46
+ }
47
+
48
+ describe("AI Dev Loop Core", () => {
49
+ // ── Happy Path ─────────────────────────────────────────
50
+
51
+ it("success on first try", async () => {
52
+ const think = fakeThink(['job "test" { find /users }']);
53
+ const runner = fakeRunner();
54
+ const validate = fakeValidate();
55
+ const observe = fakeObserve();
56
+
57
+ const result = await runDevLoop({
58
+ intent: "find all users",
59
+ max_iterations: 3,
60
+ think,
61
+ runner,
62
+ validate,
63
+ observe,
64
+ });
65
+
66
+ expect(result.status).toBe("ok");
67
+ expect(result.iterations).toBe(1);
68
+ expect(result.finalResult?.status).toBe("ok");
69
+ expect(think).toHaveBeenCalledTimes(1);
70
+ });
71
+
72
+ it("first run fails → LLM corrects → second run succeeds", async () => {
73
+ const badScript = "bad script";
74
+ const goodScript = 'job "fixed" { find /users }';
75
+ const think = fakeThink([badScript, goodScript]);
76
+ const runner = fakeRunner(new Set([badScript]));
77
+ const validate = fakeValidate();
78
+ const observe = fakeObserve();
79
+
80
+ const result = await runDevLoop({
81
+ intent: "find users",
82
+ max_iterations: 5,
83
+ think,
84
+ runner,
85
+ validate,
86
+ observe,
87
+ });
88
+
89
+ expect(result.status).toBe("ok");
90
+ expect(result.iterations).toBe(2);
91
+ expect(think).toHaveBeenCalledTimes(2);
92
+ });
93
+
94
+ it("returns final AshRunResult and all observations", async () => {
95
+ const think = fakeThink(["script1"]);
96
+ const runner = fakeRunner();
97
+ const observe = fakeObserve();
98
+
99
+ const result = await runDevLoop({
100
+ intent: "do something",
101
+ max_iterations: 3,
102
+ think,
103
+ runner,
104
+ validate: fakeValidate(),
105
+ observe,
106
+ });
107
+
108
+ expect(result.finalResult).toBeDefined();
109
+ expect(result.observations.length).toBeGreaterThanOrEqual(1);
110
+ });
111
+
112
+ it("each iteration calls observe with execution result", async () => {
113
+ const think = fakeThink(["s1", "s2"]);
114
+ const runner = fakeRunner(new Set(["s1"]));
115
+ const observe = fakeObserve();
116
+
117
+ await runDevLoop({
118
+ intent: "test",
119
+ max_iterations: 3,
120
+ think,
121
+ runner,
122
+ validate: fakeValidate(),
123
+ observe,
124
+ });
125
+
126
+ expect(observe).toHaveBeenCalledTimes(2);
127
+ });
128
+
129
+ it("calls validate before run", async () => {
130
+ const callOrder: string[] = [];
131
+ const validate = vi.fn(async () => { callOrder.push("validate"); return []; });
132
+ const runner = vi.fn(async (): Promise<AshRunResult> => {
133
+ callOrder.push("run");
134
+ return { status: "ok", steps: [], output: [], duration_ms: 0 };
135
+ });
136
+
137
+ await runDevLoop({
138
+ intent: "test",
139
+ max_iterations: 1,
140
+ think: fakeThink(["script"]),
141
+ runner,
142
+ validate,
143
+ observe: fakeObserve(),
144
+ });
145
+
146
+ expect(callOrder).toEqual(["validate", "run"]);
147
+ });
148
+
149
+ it("successful loop returns iteration count", async () => {
150
+ const think = fakeThink(["a", "b", "c"]);
151
+ const runner = fakeRunner(new Set(["a", "b"]));
152
+
153
+ const result = await runDevLoop({
154
+ intent: "test",
155
+ max_iterations: 5,
156
+ think,
157
+ runner,
158
+ validate: fakeValidate(),
159
+ observe: fakeObserve(),
160
+ });
161
+
162
+ expect(result.iterations).toBe(3);
163
+ });
164
+
165
+ // ── Bad Path ───────────────────────────────────────────
166
+
167
+ it("exceeds max_iterations → returns last failure (INV-4)", async () => {
168
+ const think = fakeThink(["bad1", "bad2", "bad3"]);
169
+ const runner = fakeRunner(new Set(["bad1", "bad2", "bad3"]));
170
+
171
+ const result = await runDevLoop({
172
+ intent: "will fail",
173
+ max_iterations: 3,
174
+ think,
175
+ runner,
176
+ validate: fakeValidate(),
177
+ observe: fakeObserve(),
178
+ });
179
+
180
+ expect(result.status).toBe("error");
181
+ expect(result.iterations).toBe(3);
182
+ expect(result.finalResult?.status).toBe("error");
183
+ expect(result.observations).toHaveLength(3);
184
+ });
185
+
186
+ it("validate catches parse error → correction prompt without run", async () => {
187
+ const validate = vi.fn(async (source: string) => {
188
+ if (source === "bad syntax") {
189
+ return [{ kind: "ParseError" as const, message: "unexpected token", line: 1 }];
190
+ }
191
+ return [];
192
+ });
193
+ const runner = fakeRunner();
194
+ const think = fakeThink(["bad syntax", "good script"]);
195
+
196
+ const result = await runDevLoop({
197
+ intent: "test",
198
+ max_iterations: 3,
199
+ think,
200
+ runner,
201
+ validate,
202
+ observe: fakeObserve(),
203
+ });
204
+
205
+ // Runner should only be called for the good script
206
+ expect(runner).toHaveBeenCalledTimes(1);
207
+ expect(result.status).toBe("ok");
208
+ });
209
+
210
+ it("LLM returns empty response → treated as error", async () => {
211
+ const think = vi.fn(async () => ({
212
+ kind: "completed" as const,
213
+ response: { content: "", tool_calls: undefined },
214
+ toolResults: [],
215
+ }));
216
+
217
+ const result = await runDevLoop({
218
+ intent: "test",
219
+ max_iterations: 2,
220
+ think,
221
+ runner: fakeRunner(),
222
+ validate: fakeValidate(),
223
+ observe: fakeObserve(),
224
+ });
225
+
226
+ expect(result.status).toBe("error");
227
+ });
228
+
229
+ it("think throws → loop terminates with error", async () => {
230
+ const think = vi.fn(async () => { throw new Error("LLM down"); });
231
+
232
+ const result = await runDevLoop({
233
+ intent: "test",
234
+ max_iterations: 3,
235
+ think,
236
+ runner: fakeRunner(),
237
+ validate: fakeValidate(),
238
+ observe: fakeObserve(),
239
+ });
240
+
241
+ expect(result.status).toBe("error");
242
+ expect(result.error).toContain("LLM down");
243
+ });
244
+
245
+ it("budget exceeded mid-loop → loop terminates with error", async () => {
246
+ let callCount = 0;
247
+ const think = vi.fn(async () => {
248
+ callCount++;
249
+ if (callCount === 2) throw new Error("BudgetExceeded: token limit reached");
250
+ return {
251
+ kind: "completed" as const,
252
+ response: {
253
+ content: "",
254
+ tool_calls: [{ id: "tc-1", name: "ash_run", arguments: JSON.stringify({ script: "fail" }) }],
255
+ },
256
+ toolResults: [],
257
+ };
258
+ });
259
+
260
+ const result = await runDevLoop({
261
+ intent: "test",
262
+ max_iterations: 5,
263
+ think,
264
+ runner: fakeRunner(new Set(["fail"])),
265
+ validate: fakeValidate(),
266
+ observe: fakeObserve(),
267
+ });
268
+
269
+ expect(result.status).toBe("error");
270
+ expect(result.error).toContain("BudgetExceeded");
271
+ expect(result.iterations).toBe(2);
272
+ });
273
+
274
+ it("extractScript: script from content fallback when no tool_calls", async () => {
275
+ const think = vi.fn(async () => ({
276
+ kind: "completed" as const,
277
+ response: { content: 'job "fromContent" { find /x }', tool_calls: undefined },
278
+ toolResults: [],
279
+ }));
280
+
281
+ const result = await runDevLoop({
282
+ intent: "test",
283
+ max_iterations: 1,
284
+ think,
285
+ runner: fakeRunner(),
286
+ validate: fakeValidate(),
287
+ observe: fakeObserve(),
288
+ });
289
+
290
+ expect(result.status).toBe("ok");
291
+ });
292
+
293
+ // ── Edge Cases ─────────────────────────────────────────
294
+
295
+ it("max_iterations=1 → single attempt, no correction", async () => {
296
+ const think = fakeThink(["script"]);
297
+ const runner = fakeRunner(new Set(["script"]));
298
+
299
+ const result = await runDevLoop({
300
+ intent: "test",
301
+ max_iterations: 1,
302
+ think,
303
+ runner,
304
+ validate: fakeValidate(),
305
+ observe: fakeObserve(),
306
+ });
307
+
308
+ expect(result.status).toBe("error");
309
+ expect(result.iterations).toBe(1);
310
+ expect(think).toHaveBeenCalledTimes(1);
311
+ });
312
+
313
+ it("LLM fixes on last iteration → success", async () => {
314
+ const scripts = ["bad1", "bad2", "bad3", "bad4", "good"];
315
+ const think = fakeThink(scripts);
316
+ const runner = fakeRunner(new Set(["bad1", "bad2", "bad3", "bad4"]));
317
+
318
+ const result = await runDevLoop({
319
+ intent: "test",
320
+ max_iterations: 5,
321
+ think,
322
+ runner,
323
+ validate: fakeValidate(),
324
+ observe: fakeObserve(),
325
+ });
326
+
327
+ expect(result.status).toBe("ok");
328
+ expect(result.iterations).toBe(5);
329
+ });
330
+
331
+ it("all iterations fail → returns array of all failures", async () => {
332
+ const think = fakeThink(["f1", "f2"]);
333
+ const runner = fakeRunner(new Set(["f1", "f2"]));
334
+
335
+ const result = await runDevLoop({
336
+ intent: "test",
337
+ max_iterations: 2,
338
+ think,
339
+ runner,
340
+ validate: fakeValidate(),
341
+ observe: fakeObserve(),
342
+ });
343
+
344
+ expect(result.observations).toHaveLength(2);
345
+ });
346
+
347
+ // ── Security ───────────────────────────────────────────
348
+
349
+ it("default mode is dry-run (INV-1)", async () => {
350
+ const think = fakeThink(["script"]);
351
+ const runner = fakeRunner();
352
+
353
+ const result = await runDevLoop({
354
+ intent: "test",
355
+ max_iterations: 1,
356
+ think,
357
+ runner,
358
+ validate: fakeValidate(),
359
+ observe: fakeObserve(),
360
+ });
361
+
362
+ expect(result.mode).toBe("dry-run");
363
+ });
364
+ });
@@ -0,0 +1,156 @@
1
+ /**
2
+ * AI Dev Loop Core — bounded generate → validate → run → observe → correct loop.
3
+ *
4
+ * Invariants enforced:
5
+ * - INV-1: Default mode is dry-run
6
+ * - INV-3: Observe only, never commit to long-term memory
7
+ * - INV-4: Bounded by max_iterations
8
+ */
9
+
10
+ import type { AshRunResult, AshRunFailure } from "./ash-run-result.js";
11
+ import type { AshTypedError } from "./ash-typed-error.js";
12
+ import { buildSystemPrompt, buildCorrectionPrompt } from "./system-prompt.js";
13
+
14
+ export interface DevLoopConfig {
15
+ intent: string;
16
+ max_iterations: number;
17
+ mode?: "dry-run" | "live";
18
+ think: (request: any) => Promise<any>;
19
+ runner: (source: string) => Promise<AshRunResult>;
20
+ validate: (source: string) => Promise<AshTypedError[]>;
21
+ observe: (data: any) => Promise<void>;
22
+ }
23
+
24
+ export interface DevLoopResult {
25
+ status: "ok" | "error";
26
+ iterations: number;
27
+ mode: "dry-run" | "live";
28
+ finalResult?: AshRunResult;
29
+ observations: any[];
30
+ error?: string;
31
+ }
32
+
33
+ /**
34
+ * Run the AI dev loop: think → extract ASH → validate → run → observe → correct or return.
35
+ */
36
+ export async function runDevLoop(config: DevLoopConfig): Promise<DevLoopResult> {
37
+ const mode = config.mode ?? "dry-run";
38
+ const observations: any[] = [];
39
+ let lastFailure: AshRunFailure | undefined;
40
+ let lastScript: string | undefined;
41
+
42
+ for (let i = 0; i < config.max_iterations; i++) {
43
+ // 1. Think — ask LLM for ASH script
44
+ let thinkResult: any;
45
+ try {
46
+ const messages: any[] = [];
47
+ if (i === 0) {
48
+ messages.push({ role: "system", content: buildSystemPrompt({ max_iterations: config.max_iterations }) });
49
+ messages.push({ role: "user", content: config.intent });
50
+ } else {
51
+ messages.push({ role: "user", content: buildCorrectionPrompt(lastFailure!, lastScript!) });
52
+ }
53
+ thinkResult = await config.think({ messages });
54
+ } catch (err: any) {
55
+ return { status: "error", iterations: i + 1, mode, observations, error: err.message };
56
+ }
57
+
58
+ // 2. Extract script from tool call or content
59
+ const script = extractScript(thinkResult);
60
+ if (!script) {
61
+ const obs = { iteration: i + 1, error: "empty_response" };
62
+ observations.push(obs);
63
+ await config.observe(obs);
64
+ // If no script, treat as failure for this iteration
65
+ if (i === config.max_iterations - 1) {
66
+ return { status: "error", iterations: i + 1, mode, observations, error: "LLM returned no script" };
67
+ }
68
+ // Create a synthetic failure for correction
69
+ lastFailure = {
70
+ status: "error",
71
+ steps: [],
72
+ failedAt: { kind: "RuntimeError", message: "LLM returned no ASH script" } as AshTypedError,
73
+ duration_ms: 0,
74
+ };
75
+ lastScript = "";
76
+ continue;
77
+ }
78
+
79
+ lastScript = script;
80
+
81
+ // 3. Validate
82
+ const errors = await config.validate(script);
83
+ if (errors.length > 0) {
84
+ const obs = { iteration: i + 1, script, validation_errors: errors };
85
+ observations.push(obs);
86
+ await config.observe(obs);
87
+
88
+ if (i === config.max_iterations - 1) {
89
+ return {
90
+ status: "error",
91
+ iterations: i + 1,
92
+ mode,
93
+ observations,
94
+ finalResult: {
95
+ status: "error",
96
+ steps: [],
97
+ failedAt: errors[0],
98
+ duration_ms: 0,
99
+ },
100
+ };
101
+ }
102
+
103
+ lastFailure = {
104
+ status: "error",
105
+ steps: [],
106
+ failedAt: errors[0] as AshTypedError,
107
+ duration_ms: 0,
108
+ };
109
+ continue;
110
+ }
111
+
112
+ // 4. Run
113
+ const result = await config.runner(script);
114
+
115
+ // 5. Observe
116
+ const obs = { iteration: i + 1, script, result };
117
+ observations.push(obs);
118
+ await config.observe(obs);
119
+
120
+ // 6. Check result
121
+ if (result.status === "ok") {
122
+ return { status: "ok", iterations: i + 1, mode, finalResult: result, observations };
123
+ }
124
+
125
+ // Failed — prepare for correction
126
+ lastFailure = result as AshRunFailure;
127
+ }
128
+
129
+ // Exhausted all iterations
130
+ return {
131
+ status: "error",
132
+ iterations: config.max_iterations,
133
+ mode,
134
+ finalResult: lastFailure,
135
+ observations,
136
+ };
137
+ }
138
+
139
+ function extractScript(thinkResult: any): string | undefined {
140
+ // Try tool_calls first (ash_run call)
141
+ const toolCalls = thinkResult?.response?.tool_calls;
142
+ if (Array.isArray(toolCalls)) {
143
+ for (const tc of toolCalls) {
144
+ if (tc.name === "ash_run") {
145
+ try {
146
+ const args = typeof tc.arguments === "string" ? JSON.parse(tc.arguments) : tc.arguments;
147
+ if (args?.script) return args.script;
148
+ } catch { /* ignore parse error */ }
149
+ }
150
+ }
151
+ }
152
+ // Fall back to content
153
+ const content = thinkResult?.response?.content;
154
+ if (typeof content === "string" && content.trim()) return content.trim();
155
+ return undefined;
156
+ }
@@ -0,0 +1,107 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { runStructured } from "./structured-runner.js";
3
+ import type { WorldInterface, JobLogger } from "../compiler.js";
4
+ import { isAshRunSuccess, isAshRunFailure } from "./ash-run-result.js";
5
+
6
+ function makeWorld(data: Record<string, unknown[]> = {}): WorldInterface & {
7
+ written: Record<string, unknown[]>;
8
+ published: Record<string, unknown[]>;
9
+ } {
10
+ const written: Record<string, unknown[]> = {};
11
+ const published: Record<string, unknown[]> = {};
12
+ return {
13
+ read(path: string) { return data[path] ?? []; },
14
+ write(path: string, records: unknown[]) { written[path] = records; },
15
+ publish(topic: string, records: unknown[]) { published[topic] = records; },
16
+ written,
17
+ published,
18
+ };
19
+ }
20
+
21
+ function makeLogger(): JobLogger { return { log() {} }; }
22
+
23
+ function makeCtx(data: Record<string, unknown[]> = {}) {
24
+ return {
25
+ world: makeWorld(data),
26
+ caps: new Set(["*"]),
27
+ logger: makeLogger(),
28
+ };
29
+ }
30
+
31
+ describe("Dry-Run Mode", () => {
32
+ // ── Happy Path ─────────────────────────────────────────
33
+
34
+ it("dry-run find → reads world normally", async () => {
35
+ const ctx = makeCtx({ "/users": [{ name: "Alice" }] });
36
+ const result = await runStructured('job "test" { find /users }', ctx, { mode: "dry-run" });
37
+ expect(isAshRunSuccess(result)).toBe(true);
38
+ });
39
+
40
+ it("dry-run save → does NOT write to world", async () => {
41
+ const ctx = makeCtx({ "/users": [{ name: "Alice" }] });
42
+ await runStructured('job "test" { find /users | save /output }', ctx, { mode: "dry-run" });
43
+ expect(ctx.world.written["/output"]).toBeUndefined();
44
+ });
45
+
46
+ it("dry-run publish → does NOT publish", async () => {
47
+ const ctx = makeCtx({ "/data": [{ x: 1 }] });
48
+ await runStructured('job "test" { find /data | publish /events }', ctx, { mode: "dry-run" });
49
+ expect(ctx.world.published["/events"]).toBeUndefined();
50
+ });
51
+
52
+ it("dry-run returns same AshRunResult shape as live", async () => {
53
+ const ctx = makeCtx({ "/data": [{ x: 1 }] });
54
+ const dryResult = await runStructured('job "test" { find /data }', ctx, { mode: "dry-run" });
55
+ const liveResult = await runStructured('job "test" { find /data }', ctx, { mode: "live" });
56
+ expect(dryResult.status).toBe(liveResult.status);
57
+ });
58
+
59
+ // ── Bad Path ───────────────────────────────────────────
60
+
61
+ it("dry-run with invalid script → same parse error as live", async () => {
62
+ const ctx = makeCtx();
63
+ const result = await runStructured('job "test" { find }', ctx, { mode: "dry-run" });
64
+ expect(isAshRunFailure(result)).toBe(true);
65
+ });
66
+
67
+ // ── Security ───────────────────────────────────────────
68
+
69
+ it("dry-run MUST NOT write to world (INV-1)", async () => {
70
+ const ctx = makeCtx({ "/users": [{ name: "Alice" }] });
71
+ await runStructured('job "test" { find /users | save /backup }', ctx, { mode: "dry-run" });
72
+ expect(Object.keys(ctx.world.written)).toHaveLength(0);
73
+ });
74
+
75
+ it("dry-run MUST NOT publish events", async () => {
76
+ const ctx = makeCtx({ "/data": [{ x: 1 }] });
77
+ await runStructured('job "test" { find /data | publish /events }', ctx, { mode: "dry-run" });
78
+ expect(Object.keys(ctx.world.published)).toHaveLength(0);
79
+ });
80
+
81
+ // ── Data Damage ────────────────────────────────────────
82
+
83
+ it("verify world state unchanged after dry-run of save+publish pipeline", async () => {
84
+ const data = { "/users": [{ name: "Alice" }, { name: "Bob" }] };
85
+ const ctx = makeCtx(data);
86
+ await runStructured(
87
+ 'job "test" { find /users | save /archive }',
88
+ ctx,
89
+ { mode: "dry-run" },
90
+ );
91
+ // World reads unchanged
92
+ expect(ctx.world.read("/users")).toEqual([{ name: "Alice" }, { name: "Bob" }]);
93
+ // No writes happened
94
+ expect(ctx.world.written["/archive"]).toBeUndefined();
95
+ });
96
+
97
+ // ── Live mode comparison ───────────────────────────────
98
+
99
+ it("switching from dry-run to live on same script → live actually writes", async () => {
100
+ const ctx = makeCtx({ "/data": [{ x: 1 }] });
101
+ await runStructured('job "test" { find /data | save /out }', ctx, { mode: "dry-run" });
102
+ expect(ctx.world.written["/out"]).toBeUndefined();
103
+
104
+ await runStructured('job "test" { find /data | save /out }', ctx, { mode: "live" });
105
+ expect(ctx.world.written["/out"]).toBeDefined();
106
+ });
107
+ });