@usezombie/zombiectl 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +76 -0
  2. package/bin/zombiectl.js +11 -0
  3. package/bun.lock +29 -0
  4. package/package.json +28 -0
  5. package/scripts/run-tests.mjs +38 -0
  6. package/src/cli.js +275 -0
  7. package/src/commands/admin.js +39 -0
  8. package/src/commands/agent.js +98 -0
  9. package/src/commands/agent_harness.js +43 -0
  10. package/src/commands/agent_improvement_report.js +42 -0
  11. package/src/commands/agent_profile.js +39 -0
  12. package/src/commands/agent_proposals.js +158 -0
  13. package/src/commands/agent_scores.js +44 -0
  14. package/src/commands/core-ops.js +108 -0
  15. package/src/commands/core.js +537 -0
  16. package/src/commands/harness.js +35 -0
  17. package/src/commands/harness_activate.js +53 -0
  18. package/src/commands/harness_active.js +32 -0
  19. package/src/commands/harness_compile.js +40 -0
  20. package/src/commands/harness_source.js +72 -0
  21. package/src/commands/run_preview.js +212 -0
  22. package/src/commands/run_preview_walk.js +1 -0
  23. package/src/commands/runs.js +35 -0
  24. package/src/commands/spec_init.js +287 -0
  25. package/src/commands/workspace_billing.js +26 -0
  26. package/src/constants/error-codes.js +1 -0
  27. package/src/lib/agent-loop.js +106 -0
  28. package/src/lib/analytics.js +114 -0
  29. package/src/lib/api-paths.js +2 -0
  30. package/src/lib/browser.js +96 -0
  31. package/src/lib/http.js +149 -0
  32. package/src/lib/sse-parser.js +50 -0
  33. package/src/lib/state.js +67 -0
  34. package/src/lib/tool-executors.js +110 -0
  35. package/src/lib/walk-dir.js +41 -0
  36. package/src/program/args.js +95 -0
  37. package/src/program/auth-guard.js +12 -0
  38. package/src/program/auth-token.js +44 -0
  39. package/src/program/banner.js +46 -0
  40. package/src/program/command-registry.js +17 -0
  41. package/src/program/http-client.js +38 -0
  42. package/src/program/io.js +83 -0
  43. package/src/program/routes.js +20 -0
  44. package/src/program/suggest.js +76 -0
  45. package/src/program/validate.js +24 -0
  46. package/src/ui-progress.js +59 -0
  47. package/src/ui-theme.js +62 -0
  48. package/test/admin_config.unit.test.js +25 -0
  49. package/test/agent-loop.unit.test.js +497 -0
  50. package/test/agent_harness.unit.test.js +52 -0
  51. package/test/agent_improvement_report.unit.test.js +74 -0
  52. package/test/agent_profile.unit.test.js +156 -0
  53. package/test/agent_proposals.unit.test.js +167 -0
  54. package/test/agent_scores.unit.test.js +220 -0
  55. package/test/analytics.unit.test.js +41 -0
  56. package/test/args.unit.test.js +69 -0
  57. package/test/auth-guard.test.js +33 -0
  58. package/test/auth-token.unit.test.js +112 -0
  59. package/test/banner.unit.test.js +442 -0
  60. package/test/browser.unit.test.js +16 -0
  61. package/test/cli-analytics.unit.test.js +296 -0
  62. package/test/did-you-mean.integration.test.js +76 -0
  63. package/test/doctor-json.test.js +81 -0
  64. package/test/error-codes.unit.test.js +7 -0
  65. package/test/harness-command.unit.test.js +180 -0
  66. package/test/harness-compile.test.js +81 -0
  67. package/test/harness-lifecycle.integration.test.js +339 -0
  68. package/test/harness-source-put.test.js +72 -0
  69. package/test/harness_activate.unit.test.js +48 -0
  70. package/test/harness_active.unit.test.js +53 -0
  71. package/test/harness_compile.unit.test.js +54 -0
  72. package/test/harness_source.unit.test.js +59 -0
  73. package/test/help.test.js +276 -0
  74. package/test/helpers-fs.js +32 -0
  75. package/test/helpers.js +31 -0
  76. package/test/io.unit.test.js +57 -0
  77. package/test/login.unit.test.js +115 -0
  78. package/test/logout.unit.test.js +65 -0
  79. package/test/parse.test.js +16 -0
  80. package/test/run-preview.edge.test.js +422 -0
  81. package/test/run-preview.integration.test.js +135 -0
  82. package/test/run-preview.security.test.js +246 -0
  83. package/test/run-preview.unit.test.js +131 -0
  84. package/test/run.unit.test.js +149 -0
  85. package/test/runs-cancel.unit.test.js +288 -0
  86. package/test/runs-list.unit.test.js +105 -0
  87. package/test/skill-secret.unit.test.js +94 -0
  88. package/test/spec-init.edge.test.js +232 -0
  89. package/test/spec-init.integration.test.js +128 -0
  90. package/test/spec-init.security.test.js +285 -0
  91. package/test/spec-init.unit.test.js +160 -0
  92. package/test/specs-sync.unit.test.js +164 -0
  93. package/test/sse-parser.unit.test.js +54 -0
  94. package/test/state.unit.test.js +34 -0
  95. package/test/streamfetch.unit.test.js +211 -0
  96. package/test/suggest.test.js +75 -0
  97. package/test/tool-executors.unit.test.js +165 -0
  98. package/test/validate.test.js +81 -0
  99. package/test/workspace-add.test.js +106 -0
  100. package/test/workspace.unit.test.js +230 -0
@@ -0,0 +1,497 @@
1
+ import { describe, test, expect, beforeEach, afterEach } from "bun:test";
2
+ import { mkdirSync, writeFileSync, rmSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import os from "node:os";
5
+ import { agentLoop } from "../src/lib/agent-loop.js";
6
+ import { ApiError } from "../src/lib/http.js";
7
+
8
+ // ── Test helpers ─────────────────────────────────────────────────────────────
9
+
10
+ function makeTmp() {
11
+ const dir = join(os.tmpdir(), `agent-loop-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
12
+ mkdirSync(dir, { recursive: true });
13
+ writeFileSync(join(dir, "README.md"), "# Test Repo");
14
+ mkdirSync(join(dir, "src"), { recursive: true });
15
+ writeFileSync(join(dir, "src", "main.go"), "package main");
16
+ return dir;
17
+ }
18
+
19
+ /**
20
+ * Build a mock fetch response that returns an SSE body from a string.
21
+ */
22
+ function sseResponse(sseBody) {
23
+ const encoder = new TextEncoder();
24
+ return {
25
+ ok: true,
26
+ status: 200,
27
+ body: {
28
+ getReader() {
29
+ let sent = false;
30
+ return {
31
+ read() {
32
+ if (!sent) {
33
+ sent = true;
34
+ return Promise.resolve({ done: false, value: encoder.encode(sseBody) });
35
+ }
36
+ return Promise.resolve({ done: true });
37
+ },
38
+ };
39
+ },
40
+ },
41
+ };
42
+ }
43
+
44
+ function makeCtx(fetchImpl) {
45
+ return {
46
+ apiUrl: "https://api.test.com",
47
+ token: "test-token",
48
+ fetchImpl,
49
+ };
50
+ }
51
+
52
+ // ── T1: Happy path ──────────────────────────────────────────────────────────
53
+
54
+ describe("agentLoop — happy path", () => {
55
+ let tmp;
56
+ beforeEach(() => { tmp = makeTmp(); });
57
+ afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
58
+
59
+ test("completes with text when server returns text_delta + done", async () => {
60
+ const fetchImpl = async () => sseResponse(
61
+ 'event: text_delta\ndata: {"text":"# Spec content"}\n\nevent: done\ndata: {"usage":{"input_tokens":100,"output_tokens":50,"total_tokens":150}}\n\n'
62
+ );
63
+ const ctx = makeCtx(fetchImpl);
64
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "Generate spec", tmp, ctx);
65
+ expect(result.text).toBe("# Spec content");
66
+ expect(result.usage.total_tokens).toBe(150);
67
+ expect(result.toolCalls).toBe(0);
68
+ });
69
+
70
+ test("executes tool calls locally and accumulates messages", async () => {
71
+ let callCount = 0;
72
+ const fetchImpl = async () => {
73
+ callCount++;
74
+ if (callCount === 1) {
75
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"list_dir","input":{"path":"."}}\n\n');
76
+ }
77
+ return sseResponse(
78
+ 'event: text_delta\ndata: {"text":"Found files"}\n\nevent: done\ndata: {"usage":{"total_tokens":200}}\n\n'
79
+ );
80
+ };
81
+ const ctx = makeCtx(fetchImpl);
82
+ const toolCalls = [];
83
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "Explore repo", tmp, ctx, {
84
+ onToolCall: (tc) => toolCalls.push(tc),
85
+ });
86
+ expect(result.toolCalls).toBe(1);
87
+ expect(toolCalls).toHaveLength(1);
88
+ expect(toolCalls[0].name).toBe("list_dir");
89
+ expect(result.text).toBe("Found files");
90
+ });
91
+
92
+ test("executes multiple tool calls across round trips", async () => {
93
+ let callCount = 0;
94
+ const fetchImpl = async () => {
95
+ callCount++;
96
+ if (callCount === 1) {
97
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"list_dir","input":{"path":"."}}\n\n');
98
+ }
99
+ if (callCount === 2) {
100
+ return sseResponse('event: tool_use\ndata: {"id":"tu_02","name":"read_file","input":{"path":"README.md"}}\n\n');
101
+ }
102
+ return sseResponse('event: text_delta\ndata: {"text":"Done"}\n\nevent: done\ndata: {"usage":{"total_tokens":300}}\n\n');
103
+ };
104
+ const ctx = makeCtx(fetchImpl);
105
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "Read repo", tmp, ctx);
106
+ expect(result.toolCalls).toBe(2);
107
+ expect(result.text).toBe("Done");
108
+ });
109
+
110
+ test("fires onText callback for each text_delta", async () => {
111
+ const fetchImpl = async () => sseResponse(
112
+ 'event: text_delta\ndata: {"text":"chunk1"}\n\nevent: text_delta\ndata: {"text":"chunk2"}\n\nevent: done\ndata: {}\n\n'
113
+ );
114
+ const ctx = makeCtx(fetchImpl);
115
+ const chunks = [];
116
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx, {
117
+ onText: (t) => chunks.push(t),
118
+ });
119
+ expect(chunks).toEqual(["chunk1", "chunk2"]);
120
+ expect(result.text).toBe("chunk1chunk2");
121
+ });
122
+
123
+ test("fires onDone callback with usage data", async () => {
124
+ const fetchImpl = async () => sseResponse(
125
+ 'event: done\ndata: {"usage":{"total_tokens":42},"provider":"anthropic"}\n\n'
126
+ );
127
+ const ctx = makeCtx(fetchImpl);
128
+ let doneData = null;
129
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx, {
130
+ onDone: (d) => { doneData = d; },
131
+ });
132
+ expect(doneData.usage.total_tokens).toBe(42);
133
+ expect(doneData.provider).toBe("anthropic");
134
+ });
135
+ });
136
+
137
+ // ── T2: Edge cases ──────────────────────────────────────────────────────────
138
+
139
+ describe("agentLoop — edge cases", () => {
140
+ let tmp;
141
+ beforeEach(() => { tmp = makeTmp(); });
142
+ afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
143
+
144
+ test("handles empty text_delta gracefully", async () => {
145
+ const fetchImpl = async () => sseResponse(
146
+ 'event: text_delta\ndata: {"text":""}\n\nevent: text_delta\ndata: {}\n\nevent: done\ndata: {}\n\n'
147
+ );
148
+ const ctx = makeCtx(fetchImpl);
149
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
150
+ expect(result.text).toBe("");
151
+ });
152
+
153
+ test("handles done event with no usage field", async () => {
154
+ const fetchImpl = async () => sseResponse('event: done\ndata: {}\n\n');
155
+ const ctx = makeCtx(fetchImpl);
156
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
157
+ expect(result.usage).toBeNull();
158
+ });
159
+
160
+ test("tool call for missing file returns error to LLM, loop continues", async () => {
161
+ let callCount = 0;
162
+ const fetchImpl = async () => {
163
+ callCount++;
164
+ if (callCount === 1) {
165
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"read_file","input":{"path":"nonexistent.txt"}}\n\n');
166
+ }
167
+ return sseResponse('event: text_delta\ndata: {"text":"Handled missing file"}\n\nevent: done\ndata: {}\n\n');
168
+ };
169
+ const ctx = makeCtx(fetchImpl);
170
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
171
+ expect(result.text).toBe("Handled missing file");
172
+ expect(result.toolCalls).toBe(1);
173
+ });
174
+
175
+ test("path traversal in tool call is rejected, loop continues", async () => {
176
+ let callCount = 0;
177
+ const fetchImpl = async (url, opts) => {
178
+ callCount++;
179
+ if (callCount === 1) {
180
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"read_file","input":{"path":"../../etc/passwd"}}\n\n');
181
+ }
182
+ // Second call: LLM should receive error and recover
183
+ return sseResponse('event: text_delta\ndata: {"text":"recovered"}\n\nevent: done\ndata: {}\n\n');
184
+ };
185
+ const ctx = makeCtx(fetchImpl);
186
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
187
+ expect(result.text).toBe("recovered");
188
+ // Verify the second POST contains the error message
189
+ expect(callCount).toBe(2);
190
+ });
191
+
192
+ test("unknown tool name returns error string to LLM", async () => {
193
+ let callCount = 0;
194
+ let secondBody = null;
195
+ const fetchImpl = async (url, opts) => {
196
+ callCount++;
197
+ if (callCount === 1) {
198
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"write_file","input":{"path":"x"}}\n\n');
199
+ }
200
+ secondBody = opts.body;
201
+ return sseResponse('event: done\ndata: {}\n\n');
202
+ };
203
+ const ctx = makeCtx(fetchImpl);
204
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
205
+ expect(secondBody).toContain("unknown tool");
206
+ });
207
+ });
208
+
209
+ // ── T3: Error paths ─────────────────────────────────────────────────────────
210
+
211
+ describe("agentLoop — error paths", () => {
212
+ let tmp;
213
+ beforeEach(() => { tmp = makeTmp(); });
214
+ afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
215
+
216
+ test("SSE error event fires onError callback", async () => {
217
+ const fetchImpl = async () => sseResponse(
218
+ 'event: error\ndata: {"message":"provider timeout after 30s"}\n\n'
219
+ );
220
+ const ctx = makeCtx(fetchImpl);
221
+ const errors = [];
222
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx, {
223
+ onError: (e) => errors.push(e),
224
+ });
225
+ expect(errors).toContain("provider timeout after 30s");
226
+ });
227
+
228
+ test("HTTP 401 throws ApiError", async () => {
229
+ const fetchImpl = async () => ({
230
+ ok: false,
231
+ status: 401,
232
+ statusText: "Unauthorized",
233
+ text: async () => JSON.stringify({ error: { code: "AUTH_REQUIRED", message: "not authenticated" } }),
234
+ });
235
+ const ctx = makeCtx(fetchImpl);
236
+ try {
237
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
238
+ expect(true).toBe(false); // should not reach
239
+ } catch (err) {
240
+ expect(err).toBeInstanceOf(ApiError);
241
+ expect(err.status).toBe(401);
242
+ }
243
+ });
244
+
245
+ test("HTTP 500 throws ApiError with server error code", async () => {
246
+ const fetchImpl = async () => ({
247
+ ok: false,
248
+ status: 500,
249
+ statusText: "Internal Server Error",
250
+ text: async () => JSON.stringify({ error: { code: "UZ-INTERNAL-003", message: "provider init failed" } }),
251
+ });
252
+ const ctx = makeCtx(fetchImpl);
253
+ try {
254
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
255
+ expect(true).toBe(false);
256
+ } catch (err) {
257
+ expect(err.code).toBe("UZ-INTERNAL-003");
258
+ }
259
+ });
260
+
261
+ test("network error mid-stream propagates", async () => {
262
+ const fetchImpl = async () => ({
263
+ ok: true,
264
+ status: 200,
265
+ body: {
266
+ getReader() {
267
+ return {
268
+ read() {
269
+ return Promise.reject(new Error("connection reset"));
270
+ },
271
+ };
272
+ },
273
+ },
274
+ });
275
+ const ctx = makeCtx(fetchImpl);
276
+ try {
277
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
278
+ expect(true).toBe(false);
279
+ } catch (err) {
280
+ expect(err.message).toContain("connection reset");
281
+ }
282
+ });
283
+
284
+ test("SSE error event with no message uses fallback", async () => {
285
+ const fetchImpl = async () => sseResponse('event: error\ndata: {}\n\n');
286
+ const ctx = makeCtx(fetchImpl);
287
+ const errors = [];
288
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx, {
289
+ onError: (e) => errors.push(e),
290
+ });
291
+ expect(errors).toContain("unknown error");
292
+ });
293
+ });
294
+
295
+ // ── T5: Guardrails (max tool calls, timeout) ────────────────────────────────
296
+
297
+ describe("agentLoop — guardrails", () => {
298
+ let tmp;
299
+ beforeEach(() => { tmp = makeTmp(); });
300
+ afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
301
+
302
+ test("stops after MAX_TOOL_CALLS (10) and fires onError", async () => {
303
+ let callCount = 0;
304
+ const fetchImpl = async () => {
305
+ callCount++;
306
+ return sseResponse(`event: tool_use\ndata: {"id":"tu_${callCount}","name":"list_dir","input":{"path":"."}}\n\n`);
307
+ };
308
+ const ctx = makeCtx(fetchImpl);
309
+ const errors = [];
310
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx, {
311
+ onError: (e) => errors.push(e),
312
+ });
313
+ expect(result.toolCalls).toBe(10);
314
+ expect(errors.some((e) => e.includes("max tool calls"))).toBe(true);
315
+ });
316
+
317
+ test("returns partial text when max tool calls reached mid-conversation", async () => {
318
+ let callCount = 0;
319
+ const fetchImpl = async () => {
320
+ callCount++;
321
+ if (callCount <= 10) {
322
+ return sseResponse(`event: tool_use\ndata: {"id":"tu_${callCount}","name":"list_dir","input":{"path":"."}}\n\n`);
323
+ }
324
+ return sseResponse('event: done\ndata: {}\n\n');
325
+ };
326
+ const ctx = makeCtx(fetchImpl);
327
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
328
+ expect(result.toolCalls).toBe(10);
329
+ // callCount should be 10 (stopped before 11th fetch)
330
+ expect(callCount).toBe(10);
331
+ });
332
+ });
333
+
334
+ // ── T6: Integration — tool execution round-trip fidelity ────────────────────
335
+
336
+ describe("agentLoop — round-trip fidelity", () => {
337
+ let tmp;
338
+ beforeEach(() => { tmp = makeTmp(); });
339
+ afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
340
+
341
+ test("tool result for read_file contains actual file content", async () => {
342
+ let secondPayload = null;
343
+ let callCount = 0;
344
+ const fetchImpl = async (url, opts) => {
345
+ callCount++;
346
+ if (callCount === 1) {
347
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"read_file","input":{"path":"README.md"}}\n\n');
348
+ }
349
+ secondPayload = JSON.parse(opts.body);
350
+ return sseResponse('event: done\ndata: {}\n\n');
351
+ };
352
+ const ctx = makeCtx(fetchImpl);
353
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
354
+ // Second POST should contain the file content as tool_result
355
+ const lastMsg = secondPayload.messages[secondPayload.messages.length - 1];
356
+ const parsed = JSON.parse(lastMsg.content);
357
+ expect(parsed[0].type).toBe("tool_result");
358
+ expect(parsed[0].content).toBe("# Test Repo");
359
+ });
360
+
361
+ test("tool result for list_dir contains directory entries", async () => {
362
+ let secondPayload = null;
363
+ let callCount = 0;
364
+ const fetchImpl = async (url, opts) => {
365
+ callCount++;
366
+ if (callCount === 1) {
367
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"list_dir","input":{"path":"."}}\n\n');
368
+ }
369
+ secondPayload = JSON.parse(opts.body);
370
+ return sseResponse('event: done\ndata: {}\n\n');
371
+ };
372
+ const ctx = makeCtx(fetchImpl);
373
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
374
+ const lastMsg = secondPayload.messages[secondPayload.messages.length - 1];
375
+ const parsed = JSON.parse(lastMsg.content);
376
+ expect(parsed[0].content).toContain("README.md");
377
+ expect(parsed[0].content).toContain("src/");
378
+ });
379
+
380
+ test("messages accumulate correctly across round trips", async () => {
381
+ let thirdPayload = null;
382
+ let callCount = 0;
383
+ const fetchImpl = async (url, opts) => {
384
+ callCount++;
385
+ if (callCount === 1) {
386
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"list_dir","input":{"path":"."}}\n\n');
387
+ }
388
+ if (callCount === 2) {
389
+ return sseResponse('event: tool_use\ndata: {"id":"tu_02","name":"read_file","input":{"path":"README.md"}}\n\n');
390
+ }
391
+ thirdPayload = JSON.parse(opts.body);
392
+ return sseResponse('event: done\ndata: {}\n\n');
393
+ };
394
+ const ctx = makeCtx(fetchImpl);
395
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
396
+ // 1 user + 2 assistant + 2 tool_result = 5 messages
397
+ expect(thirdPayload.messages).toHaveLength(5);
398
+ expect(thirdPayload.messages[0].role).toBe("user");
399
+ expect(thirdPayload.messages[1].role).toBe("assistant");
400
+ expect(thirdPayload.messages[2].role).toBe("user");
401
+ });
402
+
403
+ test("sends correct Authorization header", async () => {
404
+ let capturedHeaders = null;
405
+ const fetchImpl = async (url, opts) => {
406
+ capturedHeaders = opts.headers;
407
+ return sseResponse('event: done\ndata: {}\n\n');
408
+ };
409
+ const ctx = makeCtx(fetchImpl);
410
+ ctx.token = "my-jwt-token";
411
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
412
+ expect(capturedHeaders.Authorization).toBe("Bearer my-jwt-token");
413
+ });
414
+
415
+ test("sends tool definitions in payload", async () => {
416
+ let capturedPayload = null;
417
+ const fetchImpl = async (url, opts) => {
418
+ capturedPayload = JSON.parse(opts.body);
419
+ return sseResponse('event: done\ndata: {}\n\n');
420
+ };
421
+ const ctx = makeCtx(fetchImpl);
422
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
423
+ expect(capturedPayload.tools).toHaveLength(3);
424
+ expect(capturedPayload.tools.map((t) => t.name)).toEqual(["read_file", "list_dir", "glob"]);
425
+ });
426
+ });
427
+
428
+ // ── T8: Security — path traversal through tool calls ────────────────────────
429
+
430
+ describe("agentLoop — security", () => {
431
+ let tmp;
432
+ beforeEach(() => { tmp = makeTmp(); });
433
+ afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
434
+
435
+ test("read_file with ../../etc/passwd sends error back to LLM, never reads file", async () => {
436
+ let secondPayload = null;
437
+ let callCount = 0;
438
+ const fetchImpl = async (url, opts) => {
439
+ callCount++;
440
+ if (callCount === 1) {
441
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"read_file","input":{"path":"../../etc/passwd"}}\n\n');
442
+ }
443
+ secondPayload = JSON.parse(opts.body);
444
+ return sseResponse('event: done\ndata: {}\n\n');
445
+ };
446
+ const ctx = makeCtx(fetchImpl);
447
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
448
+ const lastMsg = secondPayload.messages[secondPayload.messages.length - 1];
449
+ const parsed = JSON.parse(lastMsg.content);
450
+ expect(parsed[0].content).toContain("error");
451
+ expect(parsed[0].content).toContain("path outside repo root");
452
+ expect(parsed[0].content).not.toContain("root:");
453
+ });
454
+
455
+ test("list_dir with /etc sends error back to LLM", async () => {
456
+ let secondPayload = null;
457
+ let callCount = 0;
458
+ const fetchImpl = async (url, opts) => {
459
+ callCount++;
460
+ if (callCount === 1) {
461
+ return sseResponse('event: tool_use\ndata: {"id":"tu_01","name":"list_dir","input":{"path":"/etc"}}\n\n');
462
+ }
463
+ secondPayload = JSON.parse(opts.body);
464
+ return sseResponse('event: done\ndata: {}\n\n');
465
+ };
466
+ const ctx = makeCtx(fetchImpl);
467
+ await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
468
+ const lastMsg = secondPayload.messages[secondPayload.messages.length - 1];
469
+ const parsed = JSON.parse(lastMsg.content);
470
+ expect(parsed[0].content).toContain("error");
471
+ });
472
+ });
473
+
474
+ // ── T9: Callbacks are optional (no crash when omitted) ──────────────────────
475
+
476
+ describe("agentLoop — optional callbacks", () => {
477
+ let tmp;
478
+ beforeEach(() => { tmp = makeTmp(); });
479
+ afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
480
+
481
+ test("works with no callbacks object", async () => {
482
+ const fetchImpl = async () => sseResponse(
483
+ 'event: text_delta\ndata: {"text":"ok"}\n\nevent: done\ndata: {}\n\n'
484
+ );
485
+ const ctx = makeCtx(fetchImpl);
486
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx);
487
+ expect(result.text).toBe("ok");
488
+ });
489
+
490
+ test("works with empty callbacks object", async () => {
491
+ const fetchImpl = async () => sseResponse('event: error\ndata: {"message":"oops"}\n\n');
492
+ const ctx = makeCtx(fetchImpl);
493
+ // Should not throw even though onError is not provided
494
+ const result = await agentLoop("/v1/workspaces/ws1/spec/template", "msg", tmp, ctx, {});
495
+ expect(result.text).toBe("");
496
+ });
497
+ });
@@ -0,0 +1,52 @@
1
+ import { test } from "bun:test";
2
+ import assert from "node:assert/strict";
3
+ import { commandAgentHarness } from "../src/commands/agent_harness.js";
4
+ import {
5
+ AGENT_ID,
6
+ makeBufferStream,
7
+ makeNoop,
8
+ ui,
9
+ } from "./helpers.js";
10
+
11
+ const CHANGE_ID = "0195b4ba-8d3a-7f13-8abc-000000000092";
12
+
13
+ test("commandAgentHarness revert posts to the revert endpoint", async () => {
14
+ let called = null;
15
+ const deps = {
16
+ request: async (_ctx, url, init) => {
17
+ called = { url, init };
18
+ return {
19
+ change_id: "0195b4ba-8d3a-7f13-8abc-000000000093",
20
+ reverted_from: CHANGE_ID,
21
+ };
22
+ },
23
+ apiHeaders: () => ({ authorization: "Bearer t" }),
24
+ printJson: () => {},
25
+ ui,
26
+ writeLine: () => {},
27
+ };
28
+
29
+ const parsed = { options: { "to-change": CHANGE_ID }, positionals: ["revert", AGENT_ID] };
30
+ const code = await commandAgentHarness({ stdout: makeNoop(), stderr: makeNoop(), jsonMode: false }, parsed, AGENT_ID, deps);
31
+ assert.equal(code, 0);
32
+ assert.match(called.url, /\/harness\/changes\/.*:revert$/);
33
+ assert.equal(called.init.method, "POST");
34
+ });
35
+
36
+ test("commandAgentHarness revert requires --to-change", async () => {
37
+ const stderr = makeBufferStream();
38
+ const deps = {
39
+ request: async () => {
40
+ throw new Error("should not be called");
41
+ },
42
+ apiHeaders: () => ({}),
43
+ printJson: () => {},
44
+ ui,
45
+ writeLine: (stream, line) => stream.write(`${line}\n`),
46
+ };
47
+
48
+ const parsed = { options: {}, positionals: ["revert", AGENT_ID] };
49
+ const code = await commandAgentHarness({ stdout: makeNoop(), stderr: stderr.stream, jsonMode: false }, parsed, AGENT_ID, deps);
50
+ assert.equal(code, 2);
51
+ assert.match(stderr.read(), /requires --to-change/);
52
+ });
@@ -0,0 +1,74 @@
1
+ import { test } from "bun:test";
2
+ import assert from "node:assert/strict";
3
+ import { commandAgentImprovementReport } from "../src/commands/agent_improvement_report.js";
4
+ import {
5
+ makeNoop,
6
+ ui, ApiError,
7
+ AGENT_ID,
8
+ } from "./helpers.js";
9
+
10
+ const SAMPLE_REPORT = {
11
+ agent_id: AGENT_ID,
12
+ trust_level: "UNEARNED",
13
+ improvement_stalled_warning: true,
14
+ proposals_generated: 4,
15
+ proposals_approved: 1,
16
+ proposals_vetoed: 1,
17
+ proposals_rejected: 1,
18
+ proposals_applied: 2,
19
+ avg_score_delta_per_applied_change: -7.5,
20
+ current_tier: "Silver",
21
+ baseline_tier: "Gold",
22
+ };
23
+
24
+ test("commandAgentImprovementReport calls GET /v1/agents/{agent_id}/improvement-report", async () => {
25
+ let calledUrl = null;
26
+ const deps = {
27
+ request: async (_ctx, url) => { calledUrl = url; return SAMPLE_REPORT; },
28
+ apiHeaders: () => ({}),
29
+ printJson: () => {},
30
+ printKeyValue: () => {},
31
+ };
32
+ const code = await commandAgentImprovementReport({ stdout: makeNoop(), stderr: makeNoop(), jsonMode: false }, { options: {}, positionals: [] }, AGENT_ID, deps);
33
+ assert.equal(code, 0);
34
+ assert.match(calledUrl, new RegExp(`${AGENT_ID}/improvement-report$`));
35
+ });
36
+
37
+ test("commandAgentImprovementReport human mode prints report fields", async () => {
38
+ let kvData = null;
39
+ const deps = {
40
+ request: async () => SAMPLE_REPORT,
41
+ apiHeaders: () => ({}),
42
+ printJson: () => {},
43
+ printKeyValue: (_stream, value) => { kvData = value; },
44
+ };
45
+ await commandAgentImprovementReport({ stdout: makeNoop(), stderr: makeNoop(), jsonMode: false }, { options: {}, positionals: [] }, AGENT_ID, deps);
46
+ assert.equal(kvData.improvement_stalled_warning, true);
47
+ assert.equal(kvData.proposals_applied, 2);
48
+ assert.equal(kvData.current_tier, "Silver");
49
+ });
50
+
51
+ test("commandAgentImprovementReport json mode outputs raw response", async () => {
52
+ let printed = null;
53
+ const deps = {
54
+ request: async () => SAMPLE_REPORT,
55
+ apiHeaders: () => ({}),
56
+ printJson: (_stream, value) => { printed = value; },
57
+ printKeyValue: () => {},
58
+ };
59
+ await commandAgentImprovementReport({ stdout: makeNoop(), stderr: makeNoop(), jsonMode: true }, { options: {}, positionals: [] }, AGENT_ID, deps);
60
+ assert.deepEqual(printed, SAMPLE_REPORT);
61
+ });
62
+
63
+ test("commandAgentImprovementReport propagates ApiError", async () => {
64
+ const deps = {
65
+ request: async () => { throw new ApiError("not found", { status: 404, code: "UZ-AGENT-001" }); },
66
+ apiHeaders: () => ({}),
67
+ printJson: () => {},
68
+ printKeyValue: () => {},
69
+ };
70
+ await assert.rejects(
71
+ () => commandAgentImprovementReport({ stdout: makeNoop(), stderr: makeNoop(), jsonMode: false }, { options: {}, positionals: [] }, AGENT_ID, deps),
72
+ (err) => err instanceof ApiError && err.status === 404,
73
+ );
74
+ });