gsd-pi 2.33.1-dev.ee47f1b → 2.34.0-dev.bbb5216

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/dist/bundled-resource-path.d.ts +8 -0
  2. package/dist/bundled-resource-path.js +14 -0
  3. package/dist/headless-query.js +6 -6
  4. package/dist/resources/extensions/gsd/auto/session.js +27 -32
  5. package/dist/resources/extensions/gsd/auto-dashboard.js +29 -109
  6. package/dist/resources/extensions/gsd/auto-direct-dispatch.js +6 -1
  7. package/dist/resources/extensions/gsd/auto-dispatch.js +52 -81
  8. package/dist/resources/extensions/gsd/auto-loop.js +956 -0
  9. package/dist/resources/extensions/gsd/auto-observability.js +4 -2
  10. package/dist/resources/extensions/gsd/auto-post-unit.js +75 -185
  11. package/dist/resources/extensions/gsd/auto-prompts.js +133 -101
  12. package/dist/resources/extensions/gsd/auto-recovery.js +59 -97
  13. package/dist/resources/extensions/gsd/auto-start.js +330 -309
  14. package/dist/resources/extensions/gsd/auto-supervisor.js +5 -11
  15. package/dist/resources/extensions/gsd/auto-timeout-recovery.js +7 -7
  16. package/dist/resources/extensions/gsd/auto-timers.js +3 -4
  17. package/dist/resources/extensions/gsd/auto-verification.js +35 -73
  18. package/dist/resources/extensions/gsd/auto-worktree-sync.js +167 -0
  19. package/dist/resources/extensions/gsd/auto-worktree.js +291 -126
  20. package/dist/resources/extensions/gsd/auto.js +283 -1013
  21. package/dist/resources/extensions/gsd/captures.js +10 -4
  22. package/dist/resources/extensions/gsd/dispatch-guard.js +7 -8
  23. package/dist/resources/extensions/gsd/docs/preferences-reference.md +25 -18
  24. package/dist/resources/extensions/gsd/doctor-checks.js +3 -4
  25. package/dist/resources/extensions/gsd/git-service.js +1 -1
  26. package/dist/resources/extensions/gsd/gsd-db.js +296 -151
  27. package/dist/resources/extensions/gsd/index.js +92 -228
  28. package/dist/resources/extensions/gsd/post-unit-hooks.js +13 -13
  29. package/dist/resources/extensions/gsd/progress-score.js +61 -156
  30. package/dist/resources/extensions/gsd/quick.js +98 -122
  31. package/dist/resources/extensions/gsd/session-lock.js +13 -0
  32. package/dist/resources/extensions/gsd/templates/preferences.md +1 -0
  33. package/dist/resources/extensions/gsd/undo.js +43 -48
  34. package/dist/resources/extensions/gsd/unit-runtime.js +16 -15
  35. package/dist/resources/extensions/gsd/verification-evidence.js +0 -1
  36. package/dist/resources/extensions/gsd/verification-gate.js +6 -35
  37. package/dist/resources/extensions/gsd/worktree-command.js +30 -24
  38. package/dist/resources/extensions/gsd/worktree-manager.js +2 -3
  39. package/dist/resources/extensions/gsd/worktree-resolver.js +344 -0
  40. package/dist/resources/extensions/gsd/worktree.js +7 -44
  41. package/dist/tool-bootstrap.js +59 -11
  42. package/dist/worktree-cli.js +7 -7
  43. package/package.json +1 -1
  44. package/packages/pi-ai/dist/models.generated.d.ts +3630 -5483
  45. package/packages/pi-ai/dist/models.generated.d.ts.map +1 -1
  46. package/packages/pi-ai/dist/models.generated.js +735 -2588
  47. package/packages/pi-ai/dist/models.generated.js.map +1 -1
  48. package/packages/pi-ai/src/models.generated.ts +1039 -2892
  49. package/packages/pi-coding-agent/package.json +1 -1
  50. package/pkg/package.json +1 -1
  51. package/src/resources/extensions/gsd/auto/session.ts +47 -30
  52. package/src/resources/extensions/gsd/auto-dashboard.ts +28 -131
  53. package/src/resources/extensions/gsd/auto-direct-dispatch.ts +6 -1
  54. package/src/resources/extensions/gsd/auto-dispatch.ts +135 -91
  55. package/src/resources/extensions/gsd/auto-loop.ts +1665 -0
  56. package/src/resources/extensions/gsd/auto-observability.ts +4 -2
  57. package/src/resources/extensions/gsd/auto-post-unit.ts +85 -228
  58. package/src/resources/extensions/gsd/auto-prompts.ts +138 -109
  59. package/src/resources/extensions/gsd/auto-recovery.ts +124 -118
  60. package/src/resources/extensions/gsd/auto-start.ts +440 -354
  61. package/src/resources/extensions/gsd/auto-supervisor.ts +5 -12
  62. package/src/resources/extensions/gsd/auto-timeout-recovery.ts +8 -8
  63. package/src/resources/extensions/gsd/auto-timers.ts +3 -4
  64. package/src/resources/extensions/gsd/auto-verification.ts +76 -90
  65. package/src/resources/extensions/gsd/auto-worktree-sync.ts +204 -0
  66. package/src/resources/extensions/gsd/auto-worktree.ts +389 -141
  67. package/src/resources/extensions/gsd/auto.ts +515 -1199
  68. package/src/resources/extensions/gsd/captures.ts +10 -4
  69. package/src/resources/extensions/gsd/dispatch-guard.ts +13 -9
  70. package/src/resources/extensions/gsd/docs/preferences-reference.md +25 -18
  71. package/src/resources/extensions/gsd/doctor-checks.ts +3 -4
  72. package/src/resources/extensions/gsd/git-service.ts +8 -1
  73. package/src/resources/extensions/gsd/gitignore.ts +4 -2
  74. package/src/resources/extensions/gsd/gsd-db.ts +375 -180
  75. package/src/resources/extensions/gsd/index.ts +104 -263
  76. package/src/resources/extensions/gsd/post-unit-hooks.ts +13 -13
  77. package/src/resources/extensions/gsd/progress-score.ts +65 -200
  78. package/src/resources/extensions/gsd/quick.ts +121 -125
  79. package/src/resources/extensions/gsd/session-lock.ts +11 -0
  80. package/src/resources/extensions/gsd/templates/preferences.md +1 -0
  81. package/src/resources/extensions/gsd/tests/agent-end-retry.test.ts +32 -59
  82. package/src/resources/extensions/gsd/tests/all-milestones-complete-merge.test.ts +75 -27
  83. package/src/resources/extensions/gsd/tests/auto-budget-alerts.test.ts +1 -1
  84. package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +37 -0
  85. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +1458 -0
  86. package/src/resources/extensions/gsd/tests/auto-recovery.test.ts +8 -162
  87. package/src/resources/extensions/gsd/tests/auto-secrets-gate.test.ts +2 -108
  88. package/src/resources/extensions/gsd/tests/auto-session-encapsulation.test.ts +1 -3
  89. package/src/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts +0 -3
  90. package/src/resources/extensions/gsd/tests/auto-worktree.test.ts +58 -0
  91. package/src/resources/extensions/gsd/tests/dispatch-guard.test.ts +0 -55
  92. package/src/resources/extensions/gsd/tests/headless-query.test.ts +22 -0
  93. package/src/resources/extensions/gsd/tests/milestone-transition-worktree.test.ts +8 -11
  94. package/src/resources/extensions/gsd/tests/provider-errors.test.ts +4 -6
  95. package/src/resources/extensions/gsd/tests/run-uat.test.ts +3 -3
  96. package/src/resources/extensions/gsd/tests/session-lock-regression.test.ts +64 -0
  97. package/src/resources/extensions/gsd/tests/sidecar-queue.test.ts +181 -0
  98. package/src/resources/extensions/gsd/tests/stale-worktree-cwd.test.ts +0 -3
  99. package/src/resources/extensions/gsd/tests/token-profile.test.ts +6 -6
  100. package/src/resources/extensions/gsd/tests/triage-dispatch.test.ts +6 -6
  101. package/src/resources/extensions/gsd/tests/undo.test.ts +6 -0
  102. package/src/resources/extensions/gsd/tests/verification-evidence.test.ts +24 -26
  103. package/src/resources/extensions/gsd/tests/verification-gate.test.ts +7 -201
  104. package/src/resources/extensions/gsd/tests/worktree-db-integration.test.ts +205 -0
  105. package/src/resources/extensions/gsd/tests/worktree-db.test.ts +442 -0
  106. package/src/resources/extensions/gsd/tests/worktree-e2e.test.ts +0 -3
  107. package/src/resources/extensions/gsd/tests/worktree-resolver.test.ts +705 -0
  108. package/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts +57 -106
  109. package/src/resources/extensions/gsd/tests/worktree.test.ts +5 -1
  110. package/src/resources/extensions/gsd/tests/write-gate.test.ts +43 -132
  111. package/src/resources/extensions/gsd/types.ts +90 -81
  112. package/src/resources/extensions/gsd/undo.ts +42 -46
  113. package/src/resources/extensions/gsd/unit-runtime.ts +14 -18
  114. package/src/resources/extensions/gsd/verification-evidence.ts +1 -3
  115. package/src/resources/extensions/gsd/verification-gate.ts +6 -39
  116. package/src/resources/extensions/gsd/worktree-command.ts +36 -24
  117. package/src/resources/extensions/gsd/worktree-manager.ts +2 -3
  118. package/src/resources/extensions/gsd/worktree-resolver.ts +485 -0
  119. package/src/resources/extensions/gsd/worktree.ts +7 -44
  120. package/dist/resources/extensions/gsd/auto-constants.js +0 -5
  121. package/dist/resources/extensions/gsd/auto-idempotency.js +0 -106
  122. package/dist/resources/extensions/gsd/auto-stuck-detection.js +0 -165
  123. package/dist/resources/extensions/gsd/mechanical-completion.js +0 -351
  124. package/src/resources/extensions/gsd/auto-constants.ts +0 -6
  125. package/src/resources/extensions/gsd/auto-idempotency.ts +0 -151
  126. package/src/resources/extensions/gsd/auto-stuck-detection.ts +0 -221
  127. package/src/resources/extensions/gsd/mechanical-completion.ts +0 -430
  128. package/src/resources/extensions/gsd/tests/auto-dispatch-loop.test.ts +0 -691
  129. package/src/resources/extensions/gsd/tests/auto-reentrancy-guard.test.ts +0 -127
  130. package/src/resources/extensions/gsd/tests/auto-skip-loop.test.ts +0 -123
  131. package/src/resources/extensions/gsd/tests/dispatch-stall-guard.test.ts +0 -126
  132. package/src/resources/extensions/gsd/tests/loop-regression.test.ts +0 -874
  133. package/src/resources/extensions/gsd/tests/mechanical-completion.test.ts +0 -356
  134. package/src/resources/extensions/gsd/tests/progress-score.test.ts +0 -206
  135. package/src/resources/extensions/gsd/tests/session-lock.test.ts +0 -434
@@ -0,0 +1,1458 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { readFileSync } from "node:fs";
4
+ import { resolve } from "node:path";
5
+
6
+ import {
7
+ resolveAgentEnd,
8
+ runUnit,
9
+ autoLoop,
10
+ _resetPendingResolve,
11
+ _setActiveSession,
12
+ isSessionSwitchInFlight,
13
+ type UnitResult,
14
+ type AgentEndEvent,
15
+ type LoopDeps,
16
+ } from "../auto-loop.js";
17
+
18
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
19
+
20
+ function makeEvent(
21
+ messages: unknown[] = [{ role: "assistant" }],
22
+ ): AgentEndEvent {
23
+ return { messages };
24
+ }
25
+
26
+ /**
27
+ * Build a minimal mock AutoSession with controllable newSession behavior.
28
+ */
29
+ function makeMockSession(opts?: {
30
+ newSessionResult?: { cancelled: boolean };
31
+ newSessionThrows?: string;
32
+ newSessionDelayMs?: number;
33
+ onNewSessionStart?: (session: any) => void;
34
+ onNewSessionSettle?: (session: any) => void;
35
+ }) {
36
+ const session = {
37
+ active: true,
38
+ verbose: false,
39
+ sessionSwitchInFlight: false,
40
+ pendingResolve: null,
41
+ pendingAgentEndQueue: [],
42
+ cmdCtx: {
43
+ newSession: () => {
44
+ opts?.onNewSessionStart?.(session);
45
+ if (opts?.newSessionThrows) {
46
+ return Promise.reject(new Error(opts.newSessionThrows));
47
+ }
48
+ const result = opts?.newSessionResult ?? { cancelled: false };
49
+ const delay = opts?.newSessionDelayMs ?? 0;
50
+ if (delay > 0) {
51
+ return new Promise<{ cancelled: boolean }>((res) =>
52
+ setTimeout(() => {
53
+ opts?.onNewSessionSettle?.(session);
54
+ res(result);
55
+ }, delay),
56
+ );
57
+ }
58
+ opts?.onNewSessionSettle?.(session);
59
+ return Promise.resolve(result);
60
+ },
61
+ },
62
+ clearTimers: () => {},
63
+ } as any;
64
+ return session;
65
+ }
66
+
67
+ /**
68
+ * Build a minimal mock ExtensionContext.
69
+ */
70
+ function makeMockCtx() {
71
+ return {
72
+ ui: { notify: () => {} },
73
+ model: { id: "test-model" },
74
+ } as any;
75
+ }
76
+
77
+ /**
78
+ * Build a minimal mock ExtensionAPI that records sendMessage calls.
79
+ */
80
+ function makeMockPi() {
81
+ const calls: unknown[] = [];
82
+ return {
83
+ sendMessage: (...args: unknown[]) => {
84
+ calls.push(args);
85
+ },
86
+ calls,
87
+ } as any;
88
+ }
89
+
90
+ // ─── Tests ───────────────────────────────────────────────────────────────────
91
+
92
+ test("resolveAgentEnd resolves a pending runUnit promise", async () => {
93
+ _resetPendingResolve();
94
+
95
+ const ctx = makeMockCtx();
96
+ const pi = makeMockPi();
97
+ const s = makeMockSession();
98
+ _setActiveSession(s);
99
+ const event = makeEvent();
100
+
101
+ // Start runUnit — it will create the promise and send a message,
102
+ // then block awaiting agent_end
103
+ const resultPromise = runUnit(
104
+ ctx,
105
+ pi,
106
+ s,
107
+ "task",
108
+ "T01",
109
+ "do stuff",
110
+ undefined,
111
+ );
112
+
113
+ // Give the microtask queue a tick so runUnit reaches the await
114
+ await new Promise((r) => setTimeout(r, 10));
115
+
116
+ // Now resolve the agent_end
117
+ resolveAgentEnd(event);
118
+
119
+ const result = await resultPromise;
120
+ assert.equal(result.status, "completed");
121
+ assert.deepEqual(result.event, event);
122
+ });
123
+
124
+ test("resolveAgentEnd queues event when no promise is pending", () => {
125
+ _resetPendingResolve();
126
+ const s = makeMockSession();
127
+ _setActiveSession(s);
128
+
129
+ // Should not throw — queues the event for the next runUnit
130
+ assert.doesNotThrow(() => {
131
+ resolveAgentEnd(makeEvent());
132
+ });
133
+ assert.equal(s.pendingAgentEndQueue.length, 1, "event should be queued");
134
+ });
135
+
136
+ test("double resolveAgentEnd only resolves once (second is queued)", async () => {
137
+ _resetPendingResolve();
138
+
139
+ const ctx = makeMockCtx();
140
+ const pi = makeMockPi();
141
+ const s = makeMockSession();
142
+ _setActiveSession(s);
143
+ const event1 = makeEvent([{ id: 1 }]);
144
+ const event2 = makeEvent([{ id: 2 }]);
145
+
146
+ const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt", undefined);
147
+
148
+ await new Promise((r) => setTimeout(r, 10));
149
+
150
+ // First resolve — should work
151
+ resolveAgentEnd(event1);
152
+
153
+ // Second resolve — should be queued (no pending promise)
154
+ assert.doesNotThrow(() => {
155
+ resolveAgentEnd(event2);
156
+ });
157
+ assert.equal(
158
+ s.pendingAgentEndQueue.length,
159
+ 1,
160
+ "second event should be queued",
161
+ );
162
+
163
+ const result = await resultPromise;
164
+ assert.equal(result.status, "completed");
165
+ // Should have the first event, not the second
166
+ assert.deepEqual(result.event, event1);
167
+ });
168
+
169
+ test("runUnit returns cancelled when session creation fails", async () => {
170
+ _resetPendingResolve();
171
+
172
+ const ctx = makeMockCtx();
173
+ const pi = makeMockPi();
174
+ const s = makeMockSession({ newSessionThrows: "connection refused" });
175
+
176
+ const result = await runUnit(ctx, pi, s, "task", "T01", "prompt", undefined);
177
+
178
+ assert.equal(result.status, "cancelled");
179
+ assert.equal(result.event, undefined);
180
+ // sendMessage should NOT have been called
181
+ assert.equal(pi.calls.length, 0);
182
+ });
183
+
184
+ test("runUnit returns cancelled when session creation times out", async () => {
185
+ _resetPendingResolve();
186
+
187
+ const ctx = makeMockCtx();
188
+ const pi = makeMockPi();
189
+ // Session returns cancelled: true (simulates the timeout race outcome)
190
+ const s = makeMockSession({ newSessionResult: { cancelled: true } });
191
+
192
+ const result = await runUnit(ctx, pi, s, "task", "T01", "prompt", undefined);
193
+
194
+ assert.equal(result.status, "cancelled");
195
+ assert.equal(result.event, undefined);
196
+ assert.equal(pi.calls.length, 0);
197
+ });
198
+
199
+ test("runUnit returns cancelled when s.active is false before sendMessage", async () => {
200
+ _resetPendingResolve();
201
+
202
+ const ctx = makeMockCtx();
203
+ const pi = makeMockPi();
204
+ const s = makeMockSession();
205
+ s.active = false;
206
+
207
+ const result = await runUnit(ctx, pi, s, "task", "T01", "prompt", undefined);
208
+
209
+ assert.equal(result.status, "cancelled");
210
+ assert.equal(pi.calls.length, 0);
211
+ });
212
+
213
+ test("runUnit only arms pendingResolve after newSession completes", async () => {
214
+ _resetPendingResolve();
215
+
216
+ let sawSwitchFlag = false;
217
+ let sawPendingResolve: unknown = "unset";
218
+
219
+ const ctx = makeMockCtx();
220
+ const pi = makeMockPi();
221
+ const s = makeMockSession({
222
+ newSessionDelayMs: 20,
223
+ onNewSessionStart: (session) => {
224
+ sawSwitchFlag = session.sessionSwitchInFlight;
225
+ sawPendingResolve = session.pendingResolve;
226
+ },
227
+ });
228
+ _setActiveSession(s);
229
+
230
+ const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt", undefined);
231
+
232
+ await new Promise((r) => setTimeout(r, 30));
233
+
234
+ assert.equal(sawSwitchFlag, true, "session switch guard should be active during newSession");
235
+ assert.equal(sawPendingResolve, null, "pendingResolve should not be armed before newSession completes");
236
+ assert.equal(isSessionSwitchInFlight(), false, "session switch guard should clear after newSession settles");
237
+
238
+ resolveAgentEnd(makeEvent());
239
+
240
+ const result = await resultPromise;
241
+ assert.equal(result.status, "completed");
242
+ assert.equal(pi.calls.length, 1);
243
+ });
244
+
245
+ // ─── Structural assertions ───────────────────────────────────────────────────
246
+
247
+ test("auto-loop.ts exports autoLoop, runUnit, resolveAgentEnd", async () => {
248
+ const mod = await import("../auto-loop.js");
249
+ assert.equal(
250
+ typeof mod.autoLoop,
251
+ "function",
252
+ "autoLoop should be exported as a function",
253
+ );
254
+ assert.equal(
255
+ typeof mod.runUnit,
256
+ "function",
257
+ "runUnit should be exported as a function",
258
+ );
259
+ assert.equal(
260
+ typeof mod.resolveAgentEnd,
261
+ "function",
262
+ "resolveAgentEnd should be exported as a function",
263
+ );
264
+ });
265
+
266
+ test("auto-loop.ts contains a while keyword", () => {
267
+ const src = readFileSync(
268
+ resolve(import.meta.dirname, "..", "auto-loop.ts"),
269
+ "utf-8",
270
+ );
271
+ assert.ok(
272
+ src.includes("while"),
273
+ "auto-loop.ts should contain a while keyword (loop or placeholder)",
274
+ );
275
+ });
276
+
277
+ test("auto-loop.ts one-shot pattern: pendingResolve is nulled before calling resolver", () => {
278
+ const src = readFileSync(
279
+ resolve(import.meta.dirname, "..", "auto-loop.ts"),
280
+ "utf-8",
281
+ );
282
+ // The one-shot pattern requires: save ref, null the variable, then call
283
+ // Look for the pattern: s.pendingResolve = null appearing before r(
284
+ const resolveBlock = src.slice(
285
+ src.indexOf("export function resolveAgentEnd"),
286
+ src.indexOf("export function resolveAgentEnd") + 600,
287
+ );
288
+ const nullIdx = resolveBlock.indexOf("pendingResolve = null");
289
+ const callIdx = resolveBlock.indexOf("r({");
290
+ assert.ok(nullIdx > 0, "should null pendingResolve in resolveAgentEnd");
291
+ assert.ok(callIdx > 0, "should call resolver in resolveAgentEnd");
292
+ assert.ok(
293
+ nullIdx < callIdx,
294
+ "pendingResolve should be nulled before calling the resolver (one-shot)",
295
+ );
296
+ });
297
+
298
+ // ─── autoLoop tests (T02) ─────────────────────────────────────────────────
299
+
300
+ /**
301
+ * Build a mock LoopDeps that tracks call order and allows controlling
302
+ * behavior via overrides.
303
+ */
304
+ function makeMockDeps(
305
+ overrides?: Partial<LoopDeps>,
306
+ ): LoopDeps & { callLog: string[] } {
307
+ const callLog: string[] = [];
308
+
309
+ const baseDeps: LoopDeps = {
310
+ lockBase: () => "/tmp/test-lock",
311
+ buildSnapshotOpts: () => ({}),
312
+ stopAuto: async () => {
313
+ callLog.push("stopAuto");
314
+ },
315
+ pauseAuto: async () => {
316
+ callLog.push("pauseAuto");
317
+ },
318
+ clearUnitTimeout: () => {},
319
+ updateProgressWidget: () => {},
320
+ invalidateAllCaches: () => {
321
+ callLog.push("invalidateAllCaches");
322
+ },
323
+ deriveState: async () => {
324
+ callLog.push("deriveState");
325
+ return {
326
+ phase: "executing",
327
+ activeMilestone: {
328
+ id: "M001",
329
+ title: "Test Milestone",
330
+ status: "active",
331
+ },
332
+ activeSlice: { id: "S01", title: "Test Slice" },
333
+ activeTask: { id: "T01" },
334
+ registry: [{ id: "M001", status: "active" }],
335
+ blockers: [],
336
+ } as any;
337
+ },
338
+ loadEffectiveGSDPreferences: () => ({ preferences: {} }),
339
+ preDispatchHealthGate: async () => ({ proceed: true, fixesApplied: [] }),
340
+ syncProjectRootToWorktree: () => {},
341
+ checkResourcesStale: () => null,
342
+ validateSessionLock: () => true,
343
+ updateSessionLock: () => {
344
+ callLog.push("updateSessionLock");
345
+ },
346
+ handleLostSessionLock: () => {
347
+ callLog.push("handleLostSessionLock");
348
+ },
349
+ sendDesktopNotification: () => {},
350
+ setActiveMilestoneId: () => {},
351
+ pruneQueueOrder: () => {},
352
+ isInAutoWorktree: () => false,
353
+ shouldUseWorktreeIsolation: () => false,
354
+ mergeMilestoneToMain: () => ({ pushed: false }),
355
+ teardownAutoWorktree: () => {},
356
+ createAutoWorktree: () => "/tmp/wt",
357
+ captureIntegrationBranch: () => {},
358
+ getIsolationMode: () => "none",
359
+ getCurrentBranch: () => "main",
360
+ autoWorktreeBranch: () => "auto/M001",
361
+ resolveMilestoneFile: () => null,
362
+ reconcileMergeState: () => false,
363
+ getLedger: () => null,
364
+ getProjectTotals: () => ({ cost: 0 }),
365
+ formatCost: (c: number) => `$${c.toFixed(2)}`,
366
+ getBudgetAlertLevel: () => 0,
367
+ getNewBudgetAlertLevel: () => 0,
368
+ getBudgetEnforcementAction: () => "none",
369
+ getManifestStatus: async () => null,
370
+ collectSecretsFromManifest: async () => null,
371
+ resolveDispatch: async () => {
372
+ callLog.push("resolveDispatch");
373
+ return {
374
+ action: "dispatch" as const,
375
+ unitType: "execute-task",
376
+ unitId: "M001/S01/T01",
377
+ prompt: "do the thing",
378
+ };
379
+ },
380
+ runPreDispatchHooks: () => ({ firedHooks: [], action: "proceed" }),
381
+ getPriorSliceCompletionBlocker: () => null,
382
+ getMainBranch: () => "main",
383
+ collectObservabilityWarnings: async () => [],
384
+ buildObservabilityRepairBlock: () => null,
385
+ closeoutUnit: async () => {},
386
+ verifyExpectedArtifact: () => true,
387
+ clearUnitRuntimeRecord: () => {},
388
+ writeUnitRuntimeRecord: () => {},
389
+ recordOutcome: () => {},
390
+ writeLock: () => {},
391
+ captureAvailableSkills: () => {},
392
+ ensurePreconditions: () => {},
393
+ updateSliceProgressCache: () => {},
394
+ selectAndApplyModel: async () => ({ routing: null }),
395
+ startUnitSupervision: () => {},
396
+ getDeepDiagnostic: () => null,
397
+ isDbAvailable: () => false,
398
+ reorderForCaching: (p: string) => p,
399
+ existsSync: () => false,
400
+ readFileSync: () => "",
401
+ atomicWriteSync: () => {},
402
+ GitServiceImpl: class {} as any,
403
+ resolver: {
404
+ get workPath() {
405
+ return "/tmp/project";
406
+ },
407
+ get projectRoot() {
408
+ return "/tmp/project";
409
+ },
410
+ get lockPath() {
411
+ return "/tmp/project";
412
+ },
413
+ enterMilestone: () => {},
414
+ exitMilestone: () => {},
415
+ mergeAndExit: () => {},
416
+ mergeAndEnterNext: () => {},
417
+ } as any,
418
+ postUnitPreVerification: async () => {
419
+ callLog.push("postUnitPreVerification");
420
+ return "continue" as const;
421
+ },
422
+ runPostUnitVerification: async () => {
423
+ callLog.push("runPostUnitVerification");
424
+ return "continue" as const;
425
+ },
426
+ postUnitPostVerification: async () => {
427
+ callLog.push("postUnitPostVerification");
428
+ return "continue" as const;
429
+ },
430
+ getSessionFile: () => "/tmp/session.json",
431
+ };
432
+
433
+ const merged = { ...baseDeps, ...overrides, callLog };
434
+ return merged;
435
+ }
436
+
437
+ /**
438
+ * Build a mock session for autoLoop testing — needs more fields than the
439
+ * runUnit mock (dispatch counters, milestone state, etc.).
440
+ */
441
+ function makeLoopSession(overrides?: Partial<Record<string, unknown>>) {
442
+ return {
443
+ active: true,
444
+ verbose: false,
445
+ stepMode: false,
446
+ paused: false,
447
+ basePath: "/tmp/project",
448
+ originalBasePath: "",
449
+ currentMilestoneId: "M001",
450
+ currentUnit: null,
451
+ currentUnitRouting: null,
452
+ completedUnits: [],
453
+ resourceVersionOnStart: null,
454
+ lastPromptCharCount: undefined,
455
+ lastBaselineCharCount: undefined,
456
+ lastBudgetAlertLevel: 0,
457
+ pendingVerificationRetry: null,
458
+ pendingCrashRecovery: null,
459
+ pendingQuickTasks: [],
460
+ sidecarQueue: [],
461
+ autoModeStartModel: null,
462
+ pendingResolve: null,
463
+ pendingAgentEndQueue: [],
464
+ unitDispatchCount: new Map<string, number>(),
465
+ unitLifetimeDispatches: new Map<string, number>(),
466
+ unitRecoveryCount: new Map<string, number>(),
467
+ verificationRetryCount: new Map<string, number>(),
468
+ gitService: null,
469
+ autoStartTime: Date.now(),
470
+ cmdCtx: {
471
+ newSession: () => Promise.resolve({ cancelled: false }),
472
+ getContextUsage: () => ({ percent: 10, tokens: 1000, limit: 10000 }),
473
+ },
474
+ clearTimers: () => {},
475
+ ...overrides,
476
+ } as any;
477
+ }
478
+
479
+ test("autoLoop exits when s.active is set to false", async (t) => {
480
+ _resetPendingResolve();
481
+
482
+ const ctx = makeMockCtx();
483
+ ctx.ui.setStatus = () => {};
484
+ const pi = makeMockPi();
485
+ const s = makeLoopSession({ active: false });
486
+
487
+ const deps = makeMockDeps();
488
+ await autoLoop(ctx, pi, s, deps);
489
+
490
+ // Loop body should not have executed (deriveState never called)
491
+ assert.ok(
492
+ !deps.callLog.includes("deriveState"),
493
+ "loop should not have iterated",
494
+ );
495
+ });
496
+
497
+ test("autoLoop exits on terminal complete state", async (t) => {
498
+ _resetPendingResolve();
499
+
500
+ const ctx = makeMockCtx();
501
+ ctx.ui.setStatus = () => {};
502
+ const pi = makeMockPi();
503
+ const s = makeLoopSession();
504
+
505
+ const deps = makeMockDeps({
506
+ deriveState: async () => {
507
+ deps.callLog.push("deriveState");
508
+ return {
509
+ phase: "complete",
510
+ activeMilestone: { id: "M001", title: "Test", status: "complete" },
511
+ activeSlice: null,
512
+ activeTask: null,
513
+ registry: [{ id: "M001", status: "complete" }],
514
+ blockers: [],
515
+ } as any;
516
+ },
517
+ });
518
+
519
+ await autoLoop(ctx, pi, s, deps);
520
+
521
+ assert.ok(deps.callLog.includes("deriveState"), "should have derived state");
522
+ assert.ok(
523
+ deps.callLog.includes("stopAuto"),
524
+ "should have called stopAuto for complete state",
525
+ );
526
+ // Should NOT have dispatched a unit
527
+ assert.ok(
528
+ !deps.callLog.includes("resolveDispatch"),
529
+ "should not dispatch when complete",
530
+ );
531
+ });
532
+
533
+ test("autoLoop exits on terminal blocked state", async (t) => {
534
+ _resetPendingResolve();
535
+
536
+ const ctx = makeMockCtx();
537
+ ctx.ui.setStatus = () => {};
538
+ const pi = makeMockPi();
539
+ const s = makeLoopSession();
540
+
541
+ const deps = makeMockDeps({
542
+ deriveState: async () => {
543
+ deps.callLog.push("deriveState");
544
+ return {
545
+ phase: "blocked",
546
+ activeMilestone: { id: "M001", title: "Test", status: "active" },
547
+ activeSlice: null,
548
+ activeTask: null,
549
+ registry: [{ id: "M001", status: "active" }],
550
+ blockers: ["Missing API key"],
551
+ } as any;
552
+ },
553
+ });
554
+
555
+ await autoLoop(ctx, pi, s, deps);
556
+
557
+ assert.ok(deps.callLog.includes("deriveState"), "should have derived state");
558
+ assert.ok(
559
+ deps.callLog.includes("stopAuto"),
560
+ "should have called stopAuto for blocked state",
561
+ );
562
+ assert.ok(
563
+ !deps.callLog.includes("resolveDispatch"),
564
+ "should not dispatch when blocked",
565
+ );
566
+ });
567
+
568
+ test("autoLoop calls deriveState → resolveDispatch → runUnit in sequence", async (t) => {
569
+ _resetPendingResolve();
570
+
571
+ const ctx = makeMockCtx();
572
+ ctx.ui.setStatus = () => {};
573
+ ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
574
+ const pi = makeMockPi();
575
+
576
+ let loopCount = 0;
577
+ const s = makeLoopSession();
578
+
579
+ const deps = makeMockDeps({
580
+ deriveState: async () => {
581
+ deps.callLog.push("deriveState");
582
+ return {
583
+ phase: "executing",
584
+ activeMilestone: { id: "M001", title: "Test", status: "active" },
585
+ activeSlice: { id: "S01", title: "Slice 1" },
586
+ activeTask: { id: "T01" },
587
+ registry: [{ id: "M001", status: "active" }],
588
+ blockers: [],
589
+ } as any;
590
+ },
591
+ resolveDispatch: async () => {
592
+ deps.callLog.push("resolveDispatch");
593
+ return {
594
+ action: "dispatch" as const,
595
+ unitType: "execute-task",
596
+ unitId: "M001/S01/T01",
597
+ prompt: "do the thing",
598
+ };
599
+ },
600
+ postUnitPostVerification: async () => {
601
+ deps.callLog.push("postUnitPostVerification");
602
+ loopCount++;
603
+ // After first iteration, deactivate to exit the loop
604
+ if (loopCount >= 1) {
605
+ s.active = false;
606
+ }
607
+ return "continue" as const;
608
+ },
609
+ });
610
+
611
+ // Run autoLoop — it will call runUnit internally which creates a promise.
612
+ // We need to resolve the promise from outside via resolveAgentEnd.
613
+ const loopPromise = autoLoop(ctx, pi, s, deps);
614
+
615
+ // Give the loop time to reach runUnit's await
616
+ await new Promise((r) => setTimeout(r, 50));
617
+
618
+ // Resolve the first unit's agent_end
619
+ resolveAgentEnd(makeEvent());
620
+
621
+ await loopPromise;
622
+
623
+ // Verify the sequence: deriveState → resolveDispatch → then finalize callbacks
624
+ const deriveIdx = deps.callLog.indexOf("deriveState");
625
+ const dispatchIdx = deps.callLog.indexOf("resolveDispatch");
626
+ const preVerIdx = deps.callLog.indexOf("postUnitPreVerification");
627
+ const verIdx = deps.callLog.indexOf("runPostUnitVerification");
628
+ const postVerIdx = deps.callLog.indexOf("postUnitPostVerification");
629
+
630
+ assert.ok(deriveIdx >= 0, "deriveState should have been called");
631
+ assert.ok(
632
+ dispatchIdx > deriveIdx,
633
+ "resolveDispatch should come after deriveState",
634
+ );
635
+ assert.ok(
636
+ preVerIdx > dispatchIdx,
637
+ "postUnitPreVerification should come after resolveDispatch",
638
+ );
639
+ assert.ok(
640
+ verIdx > preVerIdx,
641
+ "runPostUnitVerification should come after pre-verification",
642
+ );
643
+ assert.ok(
644
+ postVerIdx > verIdx,
645
+ "postUnitPostVerification should come after verification",
646
+ );
647
+ });
648
+
649
+ test("autoLoop handles verification retry by continuing loop", async (t) => {
650
+ _resetPendingResolve();
651
+
652
+ const ctx = makeMockCtx();
653
+ ctx.ui.setStatus = () => {};
654
+ ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
655
+ const pi = makeMockPi();
656
+
657
+ let verifyCallCount = 0;
658
+ let deriveCallCount = 0;
659
+ const s = makeLoopSession();
660
+
661
+ const deps = makeMockDeps({
662
+ deriveState: async () => {
663
+ deriveCallCount++;
664
+ deps.callLog.push("deriveState");
665
+ return {
666
+ phase: "executing",
667
+ activeMilestone: { id: "M001", title: "Test", status: "active" },
668
+ activeSlice: { id: "S01", title: "Slice 1" },
669
+ activeTask: { id: "T01" },
670
+ registry: [{ id: "M001", status: "active" }],
671
+ blockers: [],
672
+ } as any;
673
+ },
674
+ runPostUnitVerification: async () => {
675
+ verifyCallCount++;
676
+ deps.callLog.push("runPostUnitVerification");
677
+ if (verifyCallCount === 1) {
678
+ // First call: simulate retry — set pendingVerificationRetry on session
679
+ s.pendingVerificationRetry = {
680
+ unitId: "M001/S01/T01",
681
+ failureContext: "test failed: expected X got Y",
682
+ attempt: 1,
683
+ };
684
+ return "retry" as const;
685
+ }
686
+ // Second call: pass
687
+ return "continue" as const;
688
+ },
689
+ postUnitPostVerification: async () => {
690
+ deps.callLog.push("postUnitPostVerification");
691
+ // After the retry cycle completes, deactivate
692
+ s.active = false;
693
+ return "continue" as const;
694
+ },
695
+ });
696
+
697
+ const loopPromise = autoLoop(ctx, pi, s, deps);
698
+
699
+ // First iteration: runUnit → verification returns "retry" → loop continues
700
+ await new Promise((r) => setTimeout(r, 50));
701
+ resolveAgentEnd(makeEvent()); // resolve first unit
702
+
703
+ // Second iteration: runUnit → verification returns "continue"
704
+ await new Promise((r) => setTimeout(r, 50));
705
+ resolveAgentEnd(makeEvent()); // resolve retry unit
706
+
707
+ await loopPromise;
708
+
709
+ // Verify deriveState was called twice (two iterations)
710
+ const deriveCount = deps.callLog.filter((c) => c === "deriveState").length;
711
+ assert.ok(
712
+ deriveCount >= 2,
713
+ `deriveState should be called at least 2 times (got ${deriveCount})`,
714
+ );
715
+
716
+ // Verify verification was called twice
717
+ assert.equal(
718
+ verifyCallCount,
719
+ 2,
720
+ "verification should have been called twice (once retry, once pass)",
721
+ );
722
+ });
723
+
724
+ test("autoLoop handles dispatch stop action", async (t) => {
725
+ _resetPendingResolve();
726
+
727
+ const ctx = makeMockCtx();
728
+ ctx.ui.setStatus = () => {};
729
+ const pi = makeMockPi();
730
+ const s = makeLoopSession();
731
+
732
+ const deps = makeMockDeps({
733
+ resolveDispatch: async () => {
734
+ deps.callLog.push("resolveDispatch");
735
+ return {
736
+ action: "stop" as const,
737
+ reason: "test-stop-reason",
738
+ level: "info" as const,
739
+ };
740
+ },
741
+ });
742
+
743
+ await autoLoop(ctx, pi, s, deps);
744
+
745
+ assert.ok(
746
+ deps.callLog.includes("resolveDispatch"),
747
+ "should have called resolveDispatch",
748
+ );
749
+ assert.ok(
750
+ deps.callLog.includes("stopAuto"),
751
+ "should have stopped on dispatch stop action",
752
+ );
753
+ });
754
+
755
+ test("autoLoop handles dispatch skip action by continuing", async (t) => {
756
+ _resetPendingResolve();
757
+
758
+ const ctx = makeMockCtx();
759
+ ctx.ui.setStatus = () => {};
760
+ const pi = makeMockPi();
761
+ const s = makeLoopSession();
762
+
763
+ let dispatchCallCount = 0;
764
+ const deps = makeMockDeps({
765
+ resolveDispatch: async () => {
766
+ dispatchCallCount++;
767
+ deps.callLog.push("resolveDispatch");
768
+ if (dispatchCallCount === 1) {
769
+ return { action: "skip" as const };
770
+ }
771
+ // Second time: stop to exit the loop
772
+ return {
773
+ action: "stop" as const,
774
+ reason: "done",
775
+ level: "info" as const,
776
+ };
777
+ },
778
+ });
779
+
780
+ await autoLoop(ctx, pi, s, deps);
781
+
782
+ // Should have called resolveDispatch twice (skip → re-derive → stop)
783
+ const dispatchCalls = deps.callLog.filter((c) => c === "resolveDispatch");
784
+ assert.equal(
785
+ dispatchCalls.length,
786
+ 2,
787
+ "resolveDispatch should be called twice (skip then stop)",
788
+ );
789
+ const deriveCalls = deps.callLog.filter((c) => c === "deriveState");
790
+ assert.ok(
791
+ deriveCalls.length >= 2,
792
+ "deriveState should be called at least twice (one per iteration)",
793
+ );
794
+ });
795
+
796
+ test("autoLoop drains sidecar queue after postUnitPostVerification enqueues items", async (t) => {
797
+ _resetPendingResolve();
798
+
799
+ const ctx = makeMockCtx();
800
+ ctx.ui.setStatus = () => {};
801
+ ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
802
+ const pi = makeMockPi();
803
+ const s = makeLoopSession();
804
+
805
+ let postVerCallCount = 0;
806
+ const deps = makeMockDeps({
807
+ postUnitPostVerification: async () => {
808
+ postVerCallCount++;
809
+ deps.callLog.push("postUnitPostVerification");
810
+ if (postVerCallCount === 1) {
811
+ // First call (main unit): enqueue a sidecar item
812
+ s.sidecarQueue.push({
813
+ kind: "hook" as const,
814
+ unitType: "hook/review",
815
+ unitId: "M001/S01/T01/review",
816
+ prompt: "review the code",
817
+ });
818
+ return "continue" as const;
819
+ }
820
+ // Second call (sidecar unit completed): done
821
+ s.active = false;
822
+ return "continue" as const;
823
+ },
824
+ });
825
+
826
+ const loopPromise = autoLoop(ctx, pi, s, deps);
827
+
828
+ // Wait for main unit's runUnit to be awaiting
829
+ await new Promise((r) => setTimeout(r, 50));
830
+ resolveAgentEnd(makeEvent()); // resolve main unit
831
+
832
+ // Wait for the sidecar unit's runUnit to be awaiting
833
+ await new Promise((r) => setTimeout(r, 50));
834
+ resolveAgentEnd(makeEvent()); // resolve sidecar unit
835
+
836
+ await loopPromise;
837
+
838
+ // postUnitPostVerification should have been called twice (main + sidecar)
839
+ assert.equal(
840
+ postVerCallCount,
841
+ 2,
842
+ "postUnitPostVerification should be called twice (main + sidecar)",
843
+ );
844
+ });
845
+
846
+ test("autoLoop exits when no active milestone found", async (t) => {
847
+ _resetPendingResolve();
848
+
849
+ const ctx = makeMockCtx();
850
+ ctx.ui.setStatus = () => {};
851
+ const pi = makeMockPi();
852
+ const s = makeLoopSession({ currentMilestoneId: null });
853
+
854
+ const deps = makeMockDeps({
855
+ deriveState: async () => {
856
+ deps.callLog.push("deriveState");
857
+ return {
858
+ phase: "executing",
859
+ activeMilestone: null,
860
+ activeSlice: null,
861
+ activeTask: null,
862
+ registry: [],
863
+ blockers: [],
864
+ } as any;
865
+ },
866
+ });
867
+
868
+ await autoLoop(ctx, pi, s, deps);
869
+
870
+ assert.ok(
871
+ deps.callLog.includes("stopAuto"),
872
+ "should stop when no milestone and all complete",
873
+ );
874
+ });
875
+
876
+ test("autoLoop exports LoopDeps type", async () => {
877
+ const src = readFileSync(
878
+ resolve(import.meta.dirname, "..", "auto-loop.ts"),
879
+ "utf-8",
880
+ );
881
+ assert.ok(
882
+ src.includes("export interface LoopDeps"),
883
+ "auto-loop.ts should export LoopDeps interface",
884
+ );
885
+ });
886
+
887
+ test("autoLoop signature accepts deps parameter", async () => {
888
+ const src = readFileSync(
889
+ resolve(import.meta.dirname, "..", "auto-loop.ts"),
890
+ "utf-8",
891
+ );
892
+ assert.ok(
893
+ src.includes("deps: LoopDeps"),
894
+ "autoLoop should accept a deps: LoopDeps parameter",
895
+ );
896
+ });
897
+
898
+ test("autoLoop contains while (s.active) loop", () => {
899
+ const src = readFileSync(
900
+ resolve(import.meta.dirname, "..", "auto-loop.ts"),
901
+ "utf-8",
902
+ );
903
+ assert.ok(
904
+ src.includes("while (s.active)"),
905
+ "autoLoop should contain a while (s.active) loop",
906
+ );
907
+ });
908
+
909
+ // ── T03: End-to-end wiring structural assertions ─────────────────────────────
910
+
911
+ test("auto-loop.ts exports autoLoop, runUnit, and resolveAgentEnd", () => {
912
+ const src = readFileSync(
913
+ resolve(import.meta.dirname, "..", "auto-loop.ts"),
914
+ "utf-8",
915
+ );
916
+ assert.ok(
917
+ src.includes("export async function autoLoop"),
918
+ "must export autoLoop",
919
+ );
920
+ assert.ok(
921
+ src.includes("export async function runUnit"),
922
+ "must export runUnit",
923
+ );
924
+ assert.ok(
925
+ src.includes("export function resolveAgentEnd"),
926
+ "must export resolveAgentEnd",
927
+ );
928
+ });
929
+
930
+ test("auto.ts startAuto calls autoLoop (not dispatchNextUnit as first dispatch)", () => {
931
+ const src = readFileSync(
932
+ resolve(import.meta.dirname, "..", "auto.ts"),
933
+ "utf-8",
934
+ );
935
+ // Find the startAuto function body
936
+ const fnIdx = src.indexOf("export async function startAuto");
937
+ assert.ok(fnIdx > -1, "startAuto must exist in auto.ts");
938
+ const fnEnd = src.indexOf("\n// ─── ", fnIdx + 100);
939
+ const fnBlock =
940
+ fnEnd > -1 ? src.slice(fnIdx, fnEnd) : src.slice(fnIdx, fnIdx + 5000);
941
+ assert.ok(
942
+ fnBlock.includes("autoLoop("),
943
+ "startAuto must call autoLoop() instead of dispatchNextUnit()",
944
+ );
945
+ });
946
+
947
+ test("index.ts agent_end handler calls resolveAgentEnd (not handleAgentEnd)", () => {
948
+ const src = readFileSync(
949
+ resolve(import.meta.dirname, "..", "index.ts"),
950
+ "utf-8",
951
+ );
952
+ // Find the agent_end handler success path
953
+ const handlerIdx = src.indexOf('pi.on("agent_end"');
954
+ assert.ok(handlerIdx > -1, "index.ts must have an agent_end handler");
955
+ const handlerBlock = src.slice(handlerIdx, handlerIdx + 10000);
956
+ assert.ok(
957
+ handlerBlock.includes("resolveAgentEnd(event)"),
958
+ "agent_end success path must call resolveAgentEnd(event) instead of handleAgentEnd(ctx, pi)",
959
+ );
960
+ assert.ok(
961
+ handlerBlock.includes("isSessionSwitchInFlight()"),
962
+ "agent_end handler must ignore session-switch agent_end events from cmdCtx.newSession()",
963
+ );
964
+ });
965
+
966
+ test("auto-verification.ts runPostUnitVerification does not take dispatchNextUnit callback", () => {
967
+ const src = readFileSync(
968
+ resolve(import.meta.dirname, "..", "auto-verification.ts"),
969
+ "utf-8",
970
+ );
971
+ const fnIdx = src.indexOf("export async function runPostUnitVerification");
972
+ assert.ok(fnIdx > -1, "runPostUnitVerification must exist");
973
+ const sigEnd = src.indexOf("): Promise<VerificationResult>", fnIdx);
974
+ const signature = src.slice(fnIdx, sigEnd);
975
+ assert.ok(
976
+ !signature.includes("dispatchNextUnit"),
977
+ "runPostUnitVerification must not take a dispatchNextUnit callback parameter",
978
+ );
979
+ assert.ok(
980
+ !signature.includes("startDispatchGapWatchdog"),
981
+ "runPostUnitVerification must not take a startDispatchGapWatchdog callback parameter",
982
+ );
983
+ });
984
+
985
+ test("auto-timeout-recovery.ts calls resolveAgentEnd instead of dispatchNextUnit", () => {
986
+ const src = readFileSync(
987
+ resolve(import.meta.dirname, "..", "auto-timeout-recovery.ts"),
988
+ "utf-8",
989
+ );
990
+ assert.ok(
991
+ !src.includes("await dispatchNextUnit"),
992
+ "auto-timeout-recovery.ts must not call dispatchNextUnit",
993
+ );
994
+ assert.ok(
995
+ src.includes("resolveAgentEnd("),
996
+ "auto-timeout-recovery.ts must call resolveAgentEnd to re-iterate the loop on timeout recovery",
997
+ );
998
+ });
999
+
1000
+ test("handleAgentEnd in auto.ts is a thin wrapper calling resolveAgentEnd", () => {
1001
+ const src = readFileSync(
1002
+ resolve(import.meta.dirname, "..", "auto.ts"),
1003
+ "utf-8",
1004
+ );
1005
+ const fnIdx = src.indexOf("export async function handleAgentEnd");
1006
+ assert.ok(fnIdx > -1, "handleAgentEnd must exist");
1007
+ const fnEnd = src.indexOf("\n// ─── ", fnIdx + 100);
1008
+ const fnBlock =
1009
+ fnEnd > -1 ? src.slice(fnIdx, fnEnd) : src.slice(fnIdx, fnIdx + 1000);
1010
+ assert.ok(
1011
+ fnBlock.includes("resolveAgentEnd("),
1012
+ "handleAgentEnd must call resolveAgentEnd",
1013
+ );
1014
+ // The function should be short — no reentrancy guard, no verification, no dispatch
1015
+ assert.ok(
1016
+ !fnBlock.includes("dispatchNextUnit"),
1017
+ "handleAgentEnd must not call dispatchNextUnit (it's now a thin wrapper)",
1018
+ );
1019
+ assert.ok(
1020
+ !fnBlock.includes("postUnitPreVerification") &&
1021
+ !fnBlock.includes("postUnitPostVerification"),
1022
+ "handleAgentEnd must not contain verification logic (moved to autoLoop)",
1023
+ );
1024
+ });
1025
+
1026
+ // ── Stuck counter tests ──────────────────────────────────────────────────────
1027
+
1028
+ test("stuck counter: stops when deriveState returns same unit 5 consecutive times", async () => {
1029
+ _resetPendingResolve();
1030
+
1031
+ const ctx = makeMockCtx();
1032
+ ctx.ui.setStatus = () => {};
1033
+ ctx.ui.notify = () => {};
1034
+ const pi = makeMockPi();
1035
+ const s = makeLoopSession();
1036
+
1037
+ let stopReason = "";
1038
+ const deps = makeMockDeps({
1039
+ deriveState: async () =>
1040
+ ({
1041
+ phase: "executing",
1042
+ activeMilestone: { id: "M001", title: "Test", status: "active" },
1043
+ activeSlice: { id: "S01", title: "Slice 1" },
1044
+ activeTask: { id: "T01" },
1045
+ registry: [{ id: "M001", status: "active" }],
1046
+ blockers: [],
1047
+ }) as any,
1048
+ resolveDispatch: async () => ({
1049
+ action: "dispatch" as const,
1050
+ unitType: "execute-task",
1051
+ unitId: "M001/S01/T01",
1052
+ prompt: "do the thing",
1053
+ }),
1054
+ stopAuto: async (_ctx?: any, _pi?: any, reason?: string) => {
1055
+ deps.callLog.push("stopAuto");
1056
+ stopReason = reason ?? "";
1057
+ s.active = false;
1058
+ },
1059
+ });
1060
+
1061
+ const loopPromise = autoLoop(ctx, pi, s, deps);
1062
+
1063
+ // The loop will dispatch the same unit each iteration. On iteration 1, sameUnitCount
1064
+ // starts at 0 and the unit key is set. On iterations 2-5, sameUnitCount increments.
1065
+ // At sameUnitCount=5 (iteration 6), stopAuto is called.
1066
+ // Each iteration requires resolving an agent_end event.
1067
+ // But the stuck counter fires BEFORE runUnit, so we only need to resolve 4 times
1068
+ // (iterations 1-4 each run a unit, iteration 5 increments to 5 and stops).
1069
+
1070
+ // Actually: iteration 1 sets lastDerivedUnit (sameUnitCount=0).
1071
+ // Iteration 2: derivedKey === lastDerivedUnit → sameUnitCount=1.
1072
+ // Iteration 3: sameUnitCount=2. Iteration 4: sameUnitCount=3.
1073
+ // Iteration 5: sameUnitCount=4. Iteration 6: sameUnitCount=5 → stop.
1074
+ // So we need to resolve 5 agent_end events (iterations 1-5 each run a unit).
1075
+
1076
+ for (let i = 0; i < 5; i++) {
1077
+ await new Promise((r) => setTimeout(r, 30));
1078
+ resolveAgentEnd(makeEvent());
1079
+ }
1080
+
1081
+ await loopPromise;
1082
+
1083
+ assert.ok(
1084
+ deps.callLog.includes("stopAuto"),
1085
+ "stopAuto should have been called",
1086
+ );
1087
+ assert.ok(
1088
+ stopReason.includes("Stuck"),
1089
+ `stop reason should mention 'Stuck', got: ${stopReason}`,
1090
+ );
1091
+ assert.ok(
1092
+ stopReason.includes("execute-task"),
1093
+ "stop reason should include unitType",
1094
+ );
1095
+ assert.ok(
1096
+ stopReason.includes("M001/S01/T01"),
1097
+ "stop reason should include unitId",
1098
+ );
1099
+ });
1100
+
1101
+ test("stuck counter: resets when deriveState returns a different unit", async () => {
1102
+ _resetPendingResolve();
1103
+
1104
+ const ctx = makeMockCtx();
1105
+ ctx.ui.setStatus = () => {};
1106
+ ctx.ui.notify = () => {};
1107
+ const pi = makeMockPi();
1108
+ const s = makeLoopSession();
1109
+
1110
+ let deriveCallCount = 0;
1111
+ let stopCalled = false;
1112
+
1113
+ const deps = makeMockDeps({
1114
+ deriveState: async () => {
1115
+ deriveCallCount++;
1116
+ deps.callLog.push("deriveState");
1117
+ return {
1118
+ phase: "executing",
1119
+ activeMilestone: { id: "M001", title: "Test", status: "active" },
1120
+ activeSlice: { id: "S01", title: "Slice 1" },
1121
+ activeTask: { id: deriveCallCount <= 3 ? "T01" : "T02" },
1122
+ registry: [{ id: "M001", status: "active" }],
1123
+ blockers: [],
1124
+ } as any;
1125
+ },
1126
+ resolveDispatch: async () => {
1127
+ deps.callLog.push("resolveDispatch");
1128
+ // Return dispatch matching the task from deriveState
1129
+ const taskId = deriveCallCount <= 3 ? "T01" : "T02";
1130
+ return {
1131
+ action: "dispatch" as const,
1132
+ unitType: "execute-task",
1133
+ unitId: `M001/S01/${taskId}`,
1134
+ prompt: "do the thing",
1135
+ };
1136
+ },
1137
+ stopAuto: async (_ctx?: any, _pi?: any, reason?: string) => {
1138
+ deps.callLog.push("stopAuto");
1139
+ stopCalled = true;
1140
+ s.active = false;
1141
+ },
1142
+ postUnitPostVerification: async () => {
1143
+ deps.callLog.push("postUnitPostVerification");
1144
+ // After 4th iteration (unit changed on iter 4), exit
1145
+ if (deriveCallCount >= 4) {
1146
+ s.active = false;
1147
+ }
1148
+ return "continue" as const;
1149
+ },
1150
+ });
1151
+
1152
+ const loopPromise = autoLoop(ctx, pi, s, deps);
1153
+
1154
+ // Resolve agent_end for iterations 1-4
1155
+ for (let i = 0; i < 4; i++) {
1156
+ await new Promise((r) => setTimeout(r, 30));
1157
+ resolveAgentEnd(makeEvent());
1158
+ }
1159
+
1160
+ await loopPromise;
1161
+
1162
+ // The counter should have reset when T02 was derived — no stuck stop
1163
+ assert.ok(
1164
+ !stopCalled,
1165
+ "stopAuto should NOT have been called — counter reset on unit change",
1166
+ );
1167
+ assert.ok(
1168
+ deriveCallCount >= 4,
1169
+ `deriveState should have been called at least 4 times (got ${deriveCallCount})`,
1170
+ );
1171
+ });
1172
+
1173
+ test("stuck counter: does not increment during verification retry", async () => {
1174
+ _resetPendingResolve();
1175
+
1176
+ const ctx = makeMockCtx();
1177
+ ctx.ui.setStatus = () => {};
1178
+ ctx.ui.notify = () => {};
1179
+ const pi = makeMockPi();
1180
+ const s = makeLoopSession();
1181
+
1182
+ let verifyCallCount = 0;
1183
+ let stopReason = "";
1184
+
1185
+ const deps = makeMockDeps({
1186
+ deriveState: async () =>
1187
+ ({
1188
+ phase: "executing",
1189
+ activeMilestone: { id: "M001", title: "Test", status: "active" },
1190
+ activeSlice: { id: "S01", title: "Slice 1" },
1191
+ activeTask: { id: "T01" },
1192
+ registry: [{ id: "M001", status: "active" }],
1193
+ blockers: [],
1194
+ }) as any,
1195
+ resolveDispatch: async () => ({
1196
+ action: "dispatch" as const,
1197
+ unitType: "execute-task",
1198
+ unitId: "M001/S01/T01",
1199
+ prompt: "do the thing",
1200
+ }),
1201
+ runPostUnitVerification: async () => {
1202
+ verifyCallCount++;
1203
+ deps.callLog.push("runPostUnitVerification");
1204
+ if (verifyCallCount <= 3) {
1205
+ // Set pendingVerificationRetry — should prevent stuck counter increment
1206
+ s.pendingVerificationRetry = {
1207
+ unitId: "M001/S01/T01",
1208
+ failureContext: "test failed",
1209
+ attempt: verifyCallCount,
1210
+ };
1211
+ return "retry" as const;
1212
+ }
1213
+ // After 3 retries, exit gracefully
1214
+ s.active = false;
1215
+ return "continue" as const;
1216
+ },
1217
+ stopAuto: async (_ctx?: any, _pi?: any, reason?: string) => {
1218
+ deps.callLog.push("stopAuto");
1219
+ stopReason = reason ?? "";
1220
+ s.active = false;
1221
+ },
1222
+ });
1223
+
1224
+ const loopPromise = autoLoop(ctx, pi, s, deps);
1225
+
1226
+ // Resolve agent_end for 4 iterations (1 initial + 3 retries)
1227
+ for (let i = 0; i < 4; i++) {
1228
+ await new Promise((r) => setTimeout(r, 30));
1229
+ resolveAgentEnd(makeEvent());
1230
+ }
1231
+
1232
+ await loopPromise;
1233
+
1234
+ // Even though same unit was derived 4 times, verification retries should
1235
+ // not count, so stuck counter should not have fired
1236
+ assert.ok(
1237
+ !stopReason.includes("Stuck"),
1238
+ `stuck counter should not fire during verification retries, got: ${stopReason}`,
1239
+ );
1240
+ assert.equal(
1241
+ verifyCallCount,
1242
+ 4,
1243
+ "verification should have been called 4 times (1 initial + 3 retries)",
1244
+ );
1245
+ });
1246
+
1247
+ test("stuck counter: logs debug output with stuck-detected phase", () => {
1248
+ // Structural test: verify the auto-loop.ts source contains both
1249
+ // stuck-detected and stuck-counter-reset debug log phases
1250
+ const src = readFileSync(
1251
+ resolve(import.meta.dirname, "..", "auto-loop.ts"),
1252
+ "utf-8",
1253
+ );
1254
+ assert.ok(
1255
+ src.includes('"stuck-detected"'),
1256
+ "auto-loop.ts must log phase: 'stuck-detected' when stuck counter fires",
1257
+ );
1258
+ assert.ok(
1259
+ src.includes('"stuck-counter-reset"'),
1260
+ "auto-loop.ts must log phase: 'stuck-counter-reset' when counter resets on new unit",
1261
+ );
1262
+ assert.ok(
1263
+ src.includes("sameUnitCount"),
1264
+ "auto-loop.ts must track sameUnitCount for stuck detection",
1265
+ );
1266
+ });
1267
+
1268
+ // ── Lifecycle test (S05/T02) ─────────────────────────────────────────────────
1269
+
1270
+ test("autoLoop lifecycle: advances through research → plan → execute → verify → complete across iterations", async () => {
1271
+ _resetPendingResolve();
1272
+
1273
+ const ctx = makeMockCtx();
1274
+ ctx.ui.setStatus = () => {};
1275
+ ctx.ui.notify = () => {};
1276
+ ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
1277
+ const pi = makeMockPi();
1278
+ const s = makeLoopSession();
1279
+
1280
+ let deriveCallCount = 0;
1281
+ let dispatchCallCount = 0;
1282
+ const dispatchedUnitTypes: string[] = [];
1283
+
1284
+ // Phase sequence: each deriveState call returns a different phase.
1285
+ // On the 6th call (start of iteration 6), we deactivate to exit.
1286
+ const phases = [
1287
+ // Call 1: researching → dispatches research-slice
1288
+ {
1289
+ phase: "researching",
1290
+ activeSlice: { id: "S01", title: "Research Slice" },
1291
+ activeTask: null,
1292
+ },
1293
+ // Call 2: planning → dispatches plan-slice
1294
+ {
1295
+ phase: "planning",
1296
+ activeSlice: { id: "S01", title: "Plan Slice" },
1297
+ activeTask: null,
1298
+ },
1299
+ // Call 3: executing → dispatches execute-task
1300
+ {
1301
+ phase: "executing",
1302
+ activeSlice: { id: "S01", title: "Execute Slice" },
1303
+ activeTask: { id: "T01" },
1304
+ },
1305
+ // Call 4: verifying → dispatches verify-slice
1306
+ {
1307
+ phase: "verifying",
1308
+ activeSlice: { id: "S01", title: "Verify Slice" },
1309
+ activeTask: null,
1310
+ },
1311
+ // Call 5: completing → dispatches complete-slice
1312
+ {
1313
+ phase: "completing",
1314
+ activeSlice: { id: "S01", title: "Complete Slice" },
1315
+ activeTask: null,
1316
+ },
1317
+ ];
1318
+
1319
+ const dispatches = [
1320
+ { unitType: "research-slice", unitId: "M001/S01", prompt: "research" },
1321
+ { unitType: "plan-slice", unitId: "M001/S01", prompt: "plan" },
1322
+ { unitType: "execute-task", unitId: "M001/S01/T01", prompt: "execute" },
1323
+ { unitType: "verify-slice", unitId: "M001/S01", prompt: "verify" },
1324
+ { unitType: "complete-slice", unitId: "M001/S01", prompt: "complete" },
1325
+ ];
1326
+
1327
+ const deps = makeMockDeps({
1328
+ deriveState: async () => {
1329
+ deriveCallCount++;
1330
+ deps.callLog.push("deriveState");
1331
+
1332
+ if (deriveCallCount > phases.length) {
1333
+ // 6th+ call: deactivate to exit the loop
1334
+ s.active = false;
1335
+ return {
1336
+ phase: "complete",
1337
+ activeMilestone: { id: "M001", title: "Test", status: "complete" },
1338
+ activeSlice: null,
1339
+ activeTask: null,
1340
+ registry: [{ id: "M001", status: "complete" }],
1341
+ blockers: [],
1342
+ } as any;
1343
+ }
1344
+
1345
+ const p = phases[deriveCallCount - 1];
1346
+ return {
1347
+ phase: p.phase,
1348
+ activeMilestone: { id: "M001", title: "Test", status: "active" },
1349
+ activeSlice: p.activeSlice,
1350
+ activeTask: p.activeTask,
1351
+ registry: [{ id: "M001", status: "active" }],
1352
+ blockers: [],
1353
+ } as any;
1354
+ },
1355
+ resolveDispatch: async () => {
1356
+ dispatchCallCount++;
1357
+ deps.callLog.push("resolveDispatch");
1358
+
1359
+ if (dispatchCallCount > dispatches.length) {
1360
+ // Safety: shouldn't reach here, but stop if it does
1361
+ return {
1362
+ action: "stop" as const,
1363
+ reason: "done",
1364
+ level: "info" as const,
1365
+ };
1366
+ }
1367
+
1368
+ const d = dispatches[dispatchCallCount - 1];
1369
+ dispatchedUnitTypes.push(d.unitType);
1370
+ return {
1371
+ action: "dispatch" as const,
1372
+ unitType: d.unitType,
1373
+ unitId: d.unitId,
1374
+ prompt: d.prompt,
1375
+ };
1376
+ },
1377
+ postUnitPostVerification: async () => {
1378
+ deps.callLog.push("postUnitPostVerification");
1379
+ return "continue" as const;
1380
+ },
1381
+ });
1382
+
1383
+ const loopPromise = autoLoop(ctx, pi, s, deps);
1384
+
1385
+ // Resolve each iteration's agent_end — 5 iterations, each dispatches a unit
1386
+ for (let i = 0; i < 5; i++) {
1387
+ await new Promise((r) => setTimeout(r, 30));
1388
+ resolveAgentEnd(makeEvent());
1389
+ }
1390
+
1391
+ await loopPromise;
1392
+
1393
+ // Assert deriveState was called at least 5 times (once per iteration)
1394
+ assert.ok(
1395
+ deriveCallCount >= 5,
1396
+ `deriveState should be called at least 5 times (got ${deriveCallCount})`,
1397
+ );
1398
+
1399
+ // Assert the dispatched unit types cover the full lifecycle sequence
1400
+ assert.ok(
1401
+ dispatchedUnitTypes.includes("research-slice"),
1402
+ `should have dispatched research-slice, got: ${dispatchedUnitTypes.join(", ")}`,
1403
+ );
1404
+ assert.ok(
1405
+ dispatchedUnitTypes.includes("plan-slice"),
1406
+ `should have dispatched plan-slice, got: ${dispatchedUnitTypes.join(", ")}`,
1407
+ );
1408
+ assert.ok(
1409
+ dispatchedUnitTypes.includes("execute-task"),
1410
+ `should have dispatched execute-task, got: ${dispatchedUnitTypes.join(", ")}`,
1411
+ );
1412
+ assert.ok(
1413
+ dispatchedUnitTypes.includes("verify-slice"),
1414
+ `should have dispatched verify-slice, got: ${dispatchedUnitTypes.join(", ")}`,
1415
+ );
1416
+ assert.ok(
1417
+ dispatchedUnitTypes.includes("complete-slice"),
1418
+ `should have dispatched complete-slice, got: ${dispatchedUnitTypes.join(", ")}`,
1419
+ );
1420
+
1421
+ // Assert call sequence: deriveState and resolveDispatch entries are interleaved
1422
+ const deriveEntries = deps.callLog.filter((c) => c === "deriveState");
1423
+ const dispatchEntries = deps.callLog.filter((c) => c === "resolveDispatch");
1424
+ assert.ok(
1425
+ deriveEntries.length >= 5,
1426
+ `callLog should have at least 5 deriveState entries (got ${deriveEntries.length})`,
1427
+ );
1428
+ assert.ok(
1429
+ dispatchEntries.length >= 5,
1430
+ `callLog should have at least 5 resolveDispatch entries (got ${dispatchEntries.length})`,
1431
+ );
1432
+
1433
+ // Verify interleaving: each resolveDispatch should follow a deriveState
1434
+ let dispatchSeen = 0;
1435
+ for (const entry of deps.callLog) {
1436
+ if (entry === "resolveDispatch") {
1437
+ dispatchSeen++;
1438
+ }
1439
+ if (entry === "deriveState" && dispatchSeen > 0) {
1440
+ // A deriveState after a resolveDispatch confirms the loop advanced
1441
+ break;
1442
+ }
1443
+ }
1444
+ assert.ok(dispatchSeen > 0, "resolveDispatch should appear in callLog");
1445
+
1446
+ // Assert the exact sequence of dispatched unit types
1447
+ assert.deepEqual(
1448
+ dispatchedUnitTypes,
1449
+ [
1450
+ "research-slice",
1451
+ "plan-slice",
1452
+ "execute-task",
1453
+ "verify-slice",
1454
+ "complete-slice",
1455
+ ],
1456
+ "dispatched unit types should follow the full lifecycle sequence",
1457
+ );
1458
+ });