gsd-pi 2.23.0 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/dist/cli.js +12 -3
  2. package/dist/headless.d.ts +4 -0
  3. package/dist/headless.js +118 -10
  4. package/dist/help-text.js +22 -7
  5. package/dist/resource-loader.js +64 -9
  6. package/dist/resources/extensions/gsd/auto-dispatch.ts +51 -2
  7. package/dist/resources/extensions/gsd/auto-prompts.ts +73 -0
  8. package/dist/resources/extensions/gsd/auto-recovery.ts +41 -2
  9. package/dist/resources/extensions/gsd/auto-worktree.ts +15 -3
  10. package/dist/resources/extensions/gsd/auto.ts +123 -41
  11. package/dist/resources/extensions/gsd/commands.ts +176 -10
  12. package/dist/resources/extensions/gsd/complexity.ts +1 -0
  13. package/dist/resources/extensions/gsd/dashboard-overlay.ts +38 -0
  14. package/dist/resources/extensions/gsd/doctor.ts +56 -11
  15. package/dist/resources/extensions/gsd/exit-command.ts +2 -2
  16. package/dist/resources/extensions/gsd/gitignore.ts +1 -0
  17. package/dist/resources/extensions/gsd/guided-flow.ts +75 -0
  18. package/dist/resources/extensions/gsd/index.ts +34 -1
  19. package/dist/resources/extensions/gsd/parallel-eligibility.ts +233 -0
  20. package/dist/resources/extensions/gsd/parallel-merge.ts +156 -0
  21. package/dist/resources/extensions/gsd/parallel-orchestrator.ts +496 -0
  22. package/dist/resources/extensions/gsd/preferences.ts +65 -1
  23. package/dist/resources/extensions/gsd/prompts/discuss-headless.md +86 -0
  24. package/dist/resources/extensions/gsd/prompts/research-slice.md +1 -1
  25. package/dist/resources/extensions/gsd/prompts/validate-milestone.md +40 -61
  26. package/dist/resources/extensions/gsd/provider-error-pause.ts +29 -2
  27. package/dist/resources/extensions/gsd/session-status-io.ts +197 -0
  28. package/dist/resources/extensions/gsd/state.ts +72 -30
  29. package/dist/resources/extensions/gsd/tests/agent-end-provider-error.test.ts +81 -0
  30. package/dist/resources/extensions/gsd/tests/auto-budget-alerts.test.ts +20 -3
  31. package/dist/resources/extensions/gsd/tests/auto-preflight.test.ts +1 -0
  32. package/dist/resources/extensions/gsd/tests/auto-recovery.test.ts +202 -2
  33. package/dist/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts +34 -0
  34. package/dist/resources/extensions/gsd/tests/complete-milestone.test.ts +8 -1
  35. package/dist/resources/extensions/gsd/tests/derive-state-db.test.ts +9 -15
  36. package/dist/resources/extensions/gsd/tests/derive-state-deps.test.ts +9 -0
  37. package/dist/resources/extensions/gsd/tests/derive-state-draft.test.ts +8 -0
  38. package/dist/resources/extensions/gsd/tests/derive-state.test.ts +14 -0
  39. package/dist/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts +8 -0
  40. package/dist/resources/extensions/gsd/tests/migrate-writer-integration.test.ts +5 -5
  41. package/dist/resources/extensions/gsd/tests/parallel-orchestration.test.ts +656 -0
  42. package/dist/resources/extensions/gsd/tests/parallel-workers-multi-milestone-e2e.test.ts +354 -0
  43. package/dist/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts +1 -0
  44. package/dist/resources/extensions/gsd/tests/validate-milestone.test.ts +316 -0
  45. package/dist/resources/extensions/gsd/tests/worker-registry.test.ts +148 -0
  46. package/dist/resources/extensions/gsd/types.ts +15 -1
  47. package/dist/resources/extensions/subagent/index.ts +5 -0
  48. package/dist/resources/extensions/subagent/worker-registry.ts +99 -0
  49. package/dist/update-check.d.ts +9 -0
  50. package/dist/update-check.js +97 -0
  51. package/package.json +6 -1
  52. package/packages/pi-ai/dist/providers/anthropic.d.ts.map +1 -1
  53. package/packages/pi-ai/dist/providers/anthropic.js +16 -7
  54. package/packages/pi-ai/dist/providers/anthropic.js.map +1 -1
  55. package/packages/pi-ai/dist/providers/azure-openai-responses.d.ts.map +1 -1
  56. package/packages/pi-ai/dist/providers/azure-openai-responses.js +12 -4
  57. package/packages/pi-ai/dist/providers/azure-openai-responses.js.map +1 -1
  58. package/packages/pi-ai/dist/providers/google-vertex.d.ts.map +1 -1
  59. package/packages/pi-ai/dist/providers/google-vertex.js +21 -9
  60. package/packages/pi-ai/dist/providers/google-vertex.js.map +1 -1
  61. package/packages/pi-ai/dist/providers/openai-completions.d.ts.map +1 -1
  62. package/packages/pi-ai/dist/providers/openai-completions.js +12 -4
  63. package/packages/pi-ai/dist/providers/openai-completions.js.map +1 -1
  64. package/packages/pi-ai/dist/providers/openai-responses.d.ts.map +1 -1
  65. package/packages/pi-ai/dist/providers/openai-responses.js +12 -4
  66. package/packages/pi-ai/dist/providers/openai-responses.js.map +1 -1
  67. package/packages/pi-ai/src/providers/anthropic.ts +21 -8
  68. package/packages/pi-ai/src/providers/azure-openai-responses.ts +16 -4
  69. package/packages/pi-ai/src/providers/google-vertex.ts +32 -17
  70. package/packages/pi-ai/src/providers/openai-completions.ts +16 -4
  71. package/packages/pi-ai/src/providers/openai-responses.ts +16 -4
  72. package/packages/pi-coding-agent/dist/core/agent-session.js +1 -1
  73. package/packages/pi-coding-agent/dist/core/agent-session.js.map +1 -1
  74. package/packages/pi-coding-agent/dist/core/settings-manager.js +1 -1
  75. package/packages/pi-coding-agent/dist/core/settings-manager.js.map +1 -1
  76. package/packages/pi-coding-agent/src/core/agent-session.ts +1 -1
  77. package/packages/pi-coding-agent/src/core/settings-manager.ts +2 -2
  78. package/scripts/postinstall.js +7 -109
  79. package/src/resources/extensions/gsd/auto-dispatch.ts +51 -2
  80. package/src/resources/extensions/gsd/auto-prompts.ts +73 -0
  81. package/src/resources/extensions/gsd/auto-recovery.ts +41 -2
  82. package/src/resources/extensions/gsd/auto-worktree.ts +15 -3
  83. package/src/resources/extensions/gsd/auto.ts +123 -41
  84. package/src/resources/extensions/gsd/commands.ts +176 -10
  85. package/src/resources/extensions/gsd/complexity.ts +1 -0
  86. package/src/resources/extensions/gsd/dashboard-overlay.ts +38 -0
  87. package/src/resources/extensions/gsd/doctor.ts +56 -11
  88. package/src/resources/extensions/gsd/exit-command.ts +2 -2
  89. package/src/resources/extensions/gsd/gitignore.ts +1 -0
  90. package/src/resources/extensions/gsd/guided-flow.ts +75 -0
  91. package/src/resources/extensions/gsd/index.ts +34 -1
  92. package/src/resources/extensions/gsd/parallel-eligibility.ts +233 -0
  93. package/src/resources/extensions/gsd/parallel-merge.ts +156 -0
  94. package/src/resources/extensions/gsd/parallel-orchestrator.ts +496 -0
  95. package/src/resources/extensions/gsd/preferences.ts +65 -1
  96. package/src/resources/extensions/gsd/prompts/discuss-headless.md +86 -0
  97. package/src/resources/extensions/gsd/prompts/research-slice.md +1 -1
  98. package/src/resources/extensions/gsd/prompts/validate-milestone.md +40 -61
  99. package/src/resources/extensions/gsd/provider-error-pause.ts +29 -2
  100. package/src/resources/extensions/gsd/session-status-io.ts +197 -0
  101. package/src/resources/extensions/gsd/state.ts +72 -30
  102. package/src/resources/extensions/gsd/tests/agent-end-provider-error.test.ts +81 -0
  103. package/src/resources/extensions/gsd/tests/auto-budget-alerts.test.ts +20 -3
  104. package/src/resources/extensions/gsd/tests/auto-preflight.test.ts +1 -0
  105. package/src/resources/extensions/gsd/tests/auto-recovery.test.ts +202 -2
  106. package/src/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts +34 -0
  107. package/src/resources/extensions/gsd/tests/complete-milestone.test.ts +8 -1
  108. package/src/resources/extensions/gsd/tests/derive-state-db.test.ts +9 -15
  109. package/src/resources/extensions/gsd/tests/derive-state-deps.test.ts +9 -0
  110. package/src/resources/extensions/gsd/tests/derive-state-draft.test.ts +8 -0
  111. package/src/resources/extensions/gsd/tests/derive-state.test.ts +14 -0
  112. package/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts +8 -0
  113. package/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts +5 -5
  114. package/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts +656 -0
  115. package/src/resources/extensions/gsd/tests/parallel-workers-multi-milestone-e2e.test.ts +354 -0
  116. package/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts +1 -0
  117. package/src/resources/extensions/gsd/tests/validate-milestone.test.ts +316 -0
  118. package/src/resources/extensions/gsd/tests/worker-registry.test.ts +148 -0
  119. package/src/resources/extensions/gsd/types.ts +15 -1
  120. package/src/resources/extensions/subagent/index.ts +5 -0
  121. package/src/resources/extensions/subagent/worker-registry.ts +99 -0
@@ -0,0 +1,354 @@
1
+ /**
2
+ * E2E test: Parallel workers across multiple milestones.
3
+ *
4
+ * Validates the full lifecycle of the worker registry + metrics + budget
5
+ * alerting across multiple milestone contexts. Uses real filesystem fixtures
6
+ * and the actual metrics/worker-registry modules (no mocking).
7
+ *
8
+ * Covers:
9
+ * - Worker registry tracking across parallel batches
10
+ * - Metrics ledger accumulation across milestones
11
+ * - Budget alert level transitions including the 80% threshold
12
+ * - Dashboard data aggregation with parallel worker context
13
+ * - Cost projection with budget ceiling awareness
14
+ */
15
+
16
+ import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs';
17
+ import { join } from 'node:path';
18
+ import { tmpdir } from 'node:os';
19
+
20
+ import { createTestContext } from './test-helpers.ts';
21
+ import {
22
+ registerWorker,
23
+ updateWorker,
24
+ getActiveWorkers,
25
+ getWorkerBatches,
26
+ hasActiveWorkers,
27
+ resetWorkerRegistry,
28
+ } from '../../subagent/worker-registry.ts';
29
+ import {
30
+ getBudgetAlertLevel,
31
+ getNewBudgetAlertLevel,
32
+ getBudgetEnforcementAction,
33
+ } from '../auto.ts';
34
+ import {
35
+ type UnitMetrics,
36
+ type MetricsLedger,
37
+ getProjectTotals,
38
+ aggregateByPhase,
39
+ aggregateBySlice,
40
+ formatCost,
41
+ formatCostProjection,
42
+ getAverageCostPerUnitType,
43
+ predictRemainingCost,
44
+ } from '../metrics.ts';
45
+
46
+ const { assertEq, assertTrue, assertMatch, report } = createTestContext();
47
+
48
+ // ─── Fixture helpers ──────────────────────────────────────────────────────────
49
+
50
+ function createFixtureBase(): string {
51
+ const base = mkdtempSync(join(tmpdir(), 'gsd-e2e-parallel-'));
52
+ mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
53
+ return base;
54
+ }
55
+
56
+ function writeMetricsLedger(base: string, ledger: MetricsLedger): void {
57
+ writeFileSync(join(base, '.gsd', 'metrics.json'), JSON.stringify(ledger, null, 2));
58
+ }
59
+
60
+ function readMetricsLedger(base: string): MetricsLedger {
61
+ return JSON.parse(readFileSync(join(base, '.gsd', 'metrics.json'), 'utf-8'));
62
+ }
63
+
64
+ function makeUnit(overrides: Partial<UnitMetrics> = {}): UnitMetrics {
65
+ return {
66
+ type: "execute-task",
67
+ id: "M001/S01/T01",
68
+ model: "claude-sonnet-4-20250514",
69
+ startedAt: Date.now() - 5000,
70
+ finishedAt: Date.now(),
71
+ tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 },
72
+ cost: 0.05,
73
+ toolCalls: 3,
74
+ assistantMessages: 2,
75
+ userMessages: 1,
76
+ ...overrides,
77
+ };
78
+ }
79
+
80
+ function cleanup(base: string): void {
81
+ rmSync(base, { recursive: true, force: true });
82
+ }
83
+
84
+ // ─── E2E: Parallel workers across M001 and M002 ──────────────────────────────
85
+
86
+ console.log("\n=== E2E: Parallel workers across milestones ===");
87
+
88
+ {
89
+ resetWorkerRegistry();
90
+ const base = createFixtureBase();
91
+
92
+ // Create milestone directories
93
+ mkdirSync(join(base, '.gsd', 'milestones', 'M001'), { recursive: true });
94
+ mkdirSync(join(base, '.gsd', 'milestones', 'M002'), { recursive: true });
95
+
96
+ // Simulate M001 parallel workers (batch 1)
97
+ const batch1Id = "batch-m001";
98
+ const w1 = registerWorker("scout", "Explore M001 codebase", 0, 3, batch1Id);
99
+ const w2 = registerWorker("researcher", "Research M001 APIs", 1, 3, batch1Id);
100
+ const w3 = registerWorker("worker", "Implement M001 feature", 2, 3, batch1Id);
101
+
102
+ assertEq(getActiveWorkers().length, 3, "M001: 3 parallel workers registered");
103
+ assertTrue(hasActiveWorkers(), "M001: has active workers");
104
+
105
+ const batches1 = getWorkerBatches();
106
+ assertEq(batches1.size, 1, "M001: single batch");
107
+ assertEq(batches1.get(batch1Id)!.length, 3, "M001: batch has 3 workers");
108
+
109
+ // Complete M001 workers
110
+ updateWorker(w1, "completed");
111
+ updateWorker(w2, "completed");
112
+ updateWorker(w3, "completed");
113
+ assertTrue(!hasActiveWorkers(), "M001: no active workers after completion");
114
+
115
+ // Simulate M002 parallel workers (batch 2) — overlapping with M001 cleanup
116
+ const batch2Id = "batch-m002";
117
+ const w4 = registerWorker("scout", "Explore M002 codebase", 0, 2, batch2Id);
118
+ const w5 = registerWorker("worker", "Implement M002 feature", 1, 2, batch2Id);
119
+
120
+ assertTrue(hasActiveWorkers(), "M002: has active workers");
121
+ const batches2 = getWorkerBatches();
122
+ // M001 workers may still be in cleanup window (5s timeout), M002 workers are active
123
+ assertTrue(batches2.has(batch2Id), "M002: batch exists");
124
+ assertEq(batches2.get(batch2Id)!.length, 2, "M002: batch has 2 workers");
125
+
126
+ // One worker fails in M002
127
+ updateWorker(w4, "completed");
128
+ updateWorker(w5, "failed");
129
+ assertTrue(!hasActiveWorkers(), "M002: no active workers after all finish");
130
+
131
+ // Verify worker statuses reflect correctly
132
+ const allWorkers = getActiveWorkers();
133
+ const m002Workers = allWorkers.filter(w => w.batchId === batch2Id);
134
+ if (m002Workers.length > 0) {
135
+ const failedWorker = m002Workers.find(w => w.status === "failed");
136
+ assertTrue(failedWorker !== undefined, "M002: failed worker tracked");
137
+ assertEq(failedWorker?.agent, "worker", "M002: failed worker is 'worker'");
138
+ }
139
+
140
+ cleanup(base);
141
+ }
142
+
143
+ // ─── E2E: Metrics accumulation across milestones ──────────────────────────────
144
+
145
+ console.log("\n=== E2E: Metrics across milestones ===");
146
+
147
+ {
148
+ const base = createFixtureBase();
149
+
150
+ // Build a ledger spanning two milestones
151
+ const ledger: MetricsLedger = {
152
+ version: 1,
153
+ projectStartedAt: Date.now() - 60000,
154
+ units: [
155
+ // M001 units
156
+ makeUnit({ type: "research-milestone", id: "M001", cost: 0.10 }),
157
+ makeUnit({ type: "plan-milestone", id: "M001", cost: 0.08 }),
158
+ makeUnit({ type: "plan-slice", id: "M001/S01", cost: 0.05 }),
159
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", cost: 0.12 }),
160
+ makeUnit({ type: "execute-task", id: "M001/S01/T02", cost: 0.15 }),
161
+ makeUnit({ type: "complete-slice", id: "M001/S01", cost: 0.03 }),
162
+ makeUnit({ type: "plan-slice", id: "M001/S02", cost: 0.06 }),
163
+ makeUnit({ type: "execute-task", id: "M001/S02/T01", cost: 0.20 }),
164
+ makeUnit({ type: "complete-slice", id: "M001/S02", cost: 0.04 }),
165
+ // M002 units
166
+ makeUnit({ type: "research-milestone", id: "M002", cost: 0.12 }),
167
+ makeUnit({ type: "plan-milestone", id: "M002", cost: 0.09 }),
168
+ makeUnit({ type: "plan-slice", id: "M002/S01", cost: 0.07 }),
169
+ makeUnit({ type: "execute-task", id: "M002/S01/T01", cost: 0.18 }),
170
+ ],
171
+ };
172
+
173
+ writeMetricsLedger(base, ledger);
174
+ const loaded = readMetricsLedger(base);
175
+
176
+ // Verify totals
177
+ const totals = getProjectTotals(loaded.units);
178
+ assertEq(totals.units, 13, "metrics: 13 total units across M001+M002");
179
+ const totalCost = loaded.units.reduce((sum, u) => sum + u.cost, 0);
180
+ assertTrue(Math.abs(totals.cost - totalCost) < 0.001, "metrics: total cost matches sum");
181
+
182
+ // Verify phase aggregation
183
+ const phases = aggregateByPhase(loaded.units);
184
+ const research = phases.find(p => p.phase === "research");
185
+ assertTrue(research !== undefined, "metrics: research phase exists");
186
+ assertEq(research!.units, 2, "metrics: 2 research units (M001 + M002)");
187
+
188
+ const execution = phases.find(p => p.phase === "execution");
189
+ assertTrue(execution !== undefined, "metrics: execution phase exists");
190
+ assertEq(execution!.units, 4, "metrics: 4 execution units across both milestones");
191
+
192
+ // Verify slice aggregation
193
+ const slices = aggregateBySlice(loaded.units);
194
+ assertTrue(slices.length >= 4, "metrics: at least 4 slice aggregates (M001/S01, M001/S02, M002/S01, milestone-level)");
195
+
196
+ const m001s01 = slices.find(s => s.sliceId === "M001/S01");
197
+ assertTrue(m001s01 !== undefined, "metrics: M001/S01 slice aggregate exists");
198
+ // M001/S01 has: plan-slice + T01 + T02 + complete-slice = 4 units
199
+ assertEq(m001s01!.units, 4, "metrics: M001/S01 has 4 units");
200
+
201
+ // Cost projection
202
+ const projLines = formatCostProjection(slices, 3, 2.0);
203
+ assertTrue(projLines.length >= 1, "metrics: cost projection generated");
204
+ assertMatch(projLines[0], /Projected remaining/, "metrics: projection line text");
205
+
206
+ cleanup(base);
207
+ }
208
+
209
+ // ─── E2E: Budget alert progression through all thresholds ─────────────────────
210
+
211
+ console.log("\n=== E2E: Budget alert progression 0→75→80→90→100 ===");
212
+
213
+ {
214
+ // Simulate spending progression against a $10 budget ceiling
215
+ const ceiling = 10.0;
216
+
217
+ // Start: 50% spent
218
+ let lastLevel = getBudgetAlertLevel(5.0 / ceiling);
219
+ assertEq(lastLevel, 0, "budget: 50% → level 0");
220
+ assertEq(getNewBudgetAlertLevel(0, 5.0 / ceiling), null, "budget: no alert at 50%");
221
+
222
+ // Spend to 75%
223
+ let newLevel = getNewBudgetAlertLevel(lastLevel, 7.5 / ceiling);
224
+ assertEq(newLevel, 75, "budget: alert fires at 75%");
225
+ lastLevel = newLevel!;
226
+
227
+ // Spend to 78% — no alert (between 75 and 80)
228
+ assertEq(getNewBudgetAlertLevel(lastLevel, 7.8 / ceiling), null, "budget: no alert at 78%");
229
+
230
+ // Spend to 80% — 80% approach alert
231
+ newLevel = getNewBudgetAlertLevel(lastLevel, 8.0 / ceiling);
232
+ assertEq(newLevel, 80, "budget: approach alert fires at 80%");
233
+ lastLevel = newLevel!;
234
+
235
+ // Spend to 85% — no alert (still at 80 level)
236
+ assertEq(getNewBudgetAlertLevel(lastLevel, 8.5 / ceiling), null, "budget: no alert at 85%");
237
+
238
+ // Spend to 90%
239
+ newLevel = getNewBudgetAlertLevel(lastLevel, 9.0 / ceiling);
240
+ assertEq(newLevel, 90, "budget: alert fires at 90%");
241
+ lastLevel = newLevel!;
242
+
243
+ // Spend to 100%
244
+ newLevel = getNewBudgetAlertLevel(lastLevel, 10.0 / ceiling);
245
+ assertEq(newLevel, 100, "budget: alert fires at 100%");
246
+ lastLevel = newLevel!;
247
+
248
+ // Over budget — no re-emission
249
+ assertEq(getNewBudgetAlertLevel(lastLevel, 12.0 / ceiling), null, "budget: no re-alert over 100%");
250
+
251
+ // Enforcement at 80% — still "none" (enforcement only at 100%)
252
+ assertEq(getBudgetEnforcementAction("pause", 0.80), "none", "budget: no enforcement at 80%");
253
+ assertEq(getBudgetEnforcementAction("halt", 0.80), "none", "budget: no enforcement at 80%");
254
+ assertEq(getBudgetEnforcementAction("warn", 0.80), "none", "budget: no enforcement at 80%");
255
+ }
256
+
257
+ // ─── E2E: Budget prediction with multi-milestone cost data ────────────────────
258
+
259
+ console.log("\n=== E2E: Budget prediction across milestones ===");
260
+
261
+ {
262
+ const units: UnitMetrics[] = [
263
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", cost: 0.10 }),
264
+ makeUnit({ type: "execute-task", id: "M001/S01/T02", cost: 0.15 }),
265
+ makeUnit({ type: "plan-slice", id: "M001/S01", cost: 0.05 }),
266
+ makeUnit({ type: "execute-task", id: "M002/S01/T01", cost: 0.20 }),
267
+ makeUnit({ type: "plan-slice", id: "M002/S01", cost: 0.08 }),
268
+ ];
269
+
270
+ const avgCosts = getAverageCostPerUnitType(units);
271
+ assertTrue(avgCosts.has("execute-task"), "prediction: has execute-task average");
272
+ assertTrue(avgCosts.has("plan-slice"), "prediction: has plan-slice average");
273
+
274
+ // Average execute-task cost: (0.10 + 0.15 + 0.20) / 3 = 0.15
275
+ const execAvg = avgCosts.get("execute-task")!;
276
+ assertTrue(Math.abs(execAvg - 0.15) < 0.001, `prediction: execute-task avg is $0.15 (got ${execAvg})`);
277
+
278
+ // Average plan-slice cost: (0.05 + 0.08) / 2 = 0.065
279
+ const planAvg = avgCosts.get("plan-slice")!;
280
+ assertTrue(Math.abs(planAvg - 0.065) < 0.001, `prediction: plan-slice avg is $0.065 (got ${planAvg})`);
281
+
282
+ // Predict remaining cost for 3 more execute-tasks and 1 plan-slice
283
+ const remaining = predictRemainingCost(avgCosts, [
284
+ "execute-task", "execute-task", "execute-task", "plan-slice",
285
+ ]);
286
+ // Expected: 3 * 0.15 + 1 * 0.065 = 0.515
287
+ assertTrue(Math.abs(remaining - 0.515) < 0.001, `prediction: remaining cost ~$0.515 (got ${remaining})`);
288
+ }
289
+
290
+ // ─── E2E: Parallel workers + budget alerts combined scenario ──────────────────
291
+
292
+ console.log("\n=== E2E: Combined parallel workers + budget monitoring ===");
293
+
294
+ {
295
+ resetWorkerRegistry();
296
+
297
+ // Simulate a scenario: 3 parallel workers running while budget is at 78%
298
+ const batchId = "batch-combined";
299
+ const w1 = registerWorker("scout", "Research APIs", 0, 3, batchId);
300
+ const w2 = registerWorker("worker", "Implement feature", 1, 3, batchId);
301
+ const w3 = registerWorker("worker", "Write tests", 2, 3, batchId);
302
+
303
+ // Budget is at 78% — no alert yet (between 75 and 80)
304
+ const ceiling = 10.0;
305
+ let lastLevel: ReturnType<typeof getBudgetAlertLevel> = 75; // already got 75% alert
306
+ assertEq(getNewBudgetAlertLevel(lastLevel, 7.8 / ceiling), null, "combined: no alert at 78% with workers running");
307
+ assertTrue(hasActiveWorkers(), "combined: workers running during budget check");
308
+
309
+ // First worker completes, cost rises to 80%
310
+ updateWorker(w1, "completed");
311
+ const level80 = getNewBudgetAlertLevel(lastLevel, 8.0 / ceiling);
312
+ assertEq(level80, 80, "combined: 80% approach alert fires after worker completes");
313
+ lastLevel = level80!;
314
+
315
+ // Second worker completes, cost rises to 88%
316
+ updateWorker(w2, "completed");
317
+ assertEq(getNewBudgetAlertLevel(lastLevel, 8.8 / ceiling), null, "combined: no alert at 88%");
318
+
319
+ // Third worker completes, cost reaches 90%
320
+ updateWorker(w3, "completed");
321
+ const level90 = getNewBudgetAlertLevel(lastLevel, 9.0 / ceiling);
322
+ assertEq(level90, 90, "combined: 90% alert fires after all workers complete");
323
+
324
+ assertTrue(!hasActiveWorkers(), "combined: no active workers at end");
325
+
326
+ resetWorkerRegistry();
327
+ }
328
+
329
+ // ─── E2E: formatCostProjection with budget ceiling warnings ───────────────────
330
+
331
+ console.log("\n=== E2E: Cost projection ceiling warnings ===");
332
+
333
+ {
334
+ const slices = [
335
+ { sliceId: "M001/S01", units: 4, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, cost: 3.0, duration: 10000 },
336
+ { sliceId: "M001/S02", units: 3, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, cost: 4.0, duration: 8000 },
337
+ { sliceId: "M002/S01", units: 3, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, cost: 5.0, duration: 12000 },
338
+ ];
339
+
340
+ // With ceiling NOT yet reached
341
+ const proj1 = formatCostProjection(slices, 2, 20.0);
342
+ assertTrue(proj1.length >= 1, "projection: has projection line");
343
+ assertMatch(proj1[0], /Projected remaining/, "projection: shows projection");
344
+ assertTrue(proj1.length === 1, "projection: no ceiling warning when under budget");
345
+
346
+ // With ceiling reached (spent 12.0 >= ceiling 10.0)
347
+ const proj2 = formatCostProjection(slices, 2, 10.0);
348
+ assertTrue(proj2.length >= 2, "projection: has ceiling warning when over budget");
349
+ assertMatch(proj2[1], /ceiling/, "projection: ceiling warning text");
350
+ }
351
+
352
+ // ─── Summary ──────────────────────────────────────────────────────────────────
353
+
354
+ report();
@@ -58,6 +58,7 @@ function writeCompleteMilestone(base: string, mid: string): void {
58
58
  - [x] **S01: Done** \`risk:low\` \`depends:[]\`
59
59
  > After this: Done.
60
60
  `);
61
+ writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`);
61
62
  writeFileSync(join(dir, `${mid}-SUMMARY.md`), `# ${mid} Summary\n\nComplete.`);
62
63
  }
63
64
 
@@ -0,0 +1,316 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { mkdirSync, writeFileSync, existsSync, rmSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ import { tmpdir } from "node:os";
6
+ import { randomUUID } from "node:crypto";
7
+
8
+ import { deriveState, isValidationTerminal } from "../state.ts";
9
+ import { resolveExpectedArtifactPath, verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts";
10
+ import { resolveDispatch, type DispatchContext } from "../auto-dispatch.ts";
11
+ import type { GSDState } from "../types.ts";
12
+ import { clearPathCache } from "../paths.ts";
13
+ import { clearParseCache } from "../files.ts";
14
+
15
+ // ─── Helpers ──────────────────────────────────────────────────────────────
16
+
17
+ function makeTmpBase(): string {
18
+ const base = join(tmpdir(), `gsd-val-test-${randomUUID()}`);
19
+ mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
20
+ return base;
21
+ }
22
+
23
+ function cleanup(base: string): void {
24
+ clearPathCache();
25
+ clearParseCache();
26
+ try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
27
+ }
28
+
29
+ function writeRoadmap(base: string, mid: string, content: string): void {
30
+ const dir = join(base, ".gsd", "milestones", mid);
31
+ mkdirSync(dir, { recursive: true });
32
+ writeFileSync(join(dir, `${mid}-ROADMAP.md`), content);
33
+ }
34
+
35
+ function writeMilestoneSummary(base: string, mid: string, content: string): void {
36
+ const dir = join(base, ".gsd", "milestones", mid);
37
+ mkdirSync(dir, { recursive: true });
38
+ writeFileSync(join(dir, `${mid}-SUMMARY.md`), content);
39
+ }
40
+
41
+ function writeValidation(base: string, mid: string, content: string): void {
42
+ const dir = join(base, ".gsd", "milestones", mid);
43
+ mkdirSync(dir, { recursive: true });
44
+ writeFileSync(join(dir, `${mid}-VALIDATION.md`), content);
45
+ }
46
+
47
+ function writeSlicePlan(base: string, mid: string, sid: string, content: string): void {
48
+ const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
49
+ mkdirSync(join(dir, "tasks"), { recursive: true });
50
+ writeFileSync(join(dir, `${sid}-PLAN.md`), content);
51
+ }
52
+
53
+ function writeSliceSummary(base: string, mid: string, sid: string, content: string): void {
54
+ const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
55
+ mkdirSync(dir, { recursive: true });
56
+ writeFileSync(join(dir, `${sid}-SUMMARY.md`), content);
57
+ }
58
+
59
+ const ALL_DONE_ROADMAP = `# M001: Test Milestone
60
+
61
+ ## Vision
62
+ Test
63
+
64
+ ## Success Criteria
65
+ - It works
66
+
67
+ ## Slices
68
+
69
+ - [x] **S01: First slice** \`risk:low\` \`depends:[]\`
70
+ > After this: it works
71
+
72
+ ## Boundary Map
73
+
74
+ | From | To | Produces | Consumes |
75
+ |------|-----|----------|----------|
76
+ | S01 | terminal | output | nothing |
77
+ `;
78
+
79
+ const CONTEXT_FILE = `---
80
+ id: M001
81
+ title: Test Milestone
82
+ ---
83
+
84
+ # Context
85
+ Test context.
86
+ `;
87
+
88
+ // ─── isValidationTerminal ─────────────────────────────────────────────────
89
+
90
+ test("isValidationTerminal returns true for verdict: pass", () => {
91
+ const content = "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation";
92
+ assert.equal(isValidationTerminal(content), true);
93
+ });
94
+
95
+ test("isValidationTerminal returns true for verdict: needs-attention", () => {
96
+ const content = "---\nverdict: needs-attention\nremediation_round: 0\n---\n\n# Validation";
97
+ assert.equal(isValidationTerminal(content), true);
98
+ });
99
+
100
+ test("isValidationTerminal returns false for verdict: needs-remediation", () => {
101
+ const content = "---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation";
102
+ assert.equal(isValidationTerminal(content), false);
103
+ });
104
+
105
+ test("isValidationTerminal returns false for missing frontmatter", () => {
106
+ const content = "# Validation\nNo frontmatter here.";
107
+ assert.equal(isValidationTerminal(content), false);
108
+ });
109
+
110
+ test("isValidationTerminal returns false for missing verdict field", () => {
111
+ const content = "---\nremediation_round: 0\n---\n\n# Validation";
112
+ assert.equal(isValidationTerminal(content), false);
113
+ });
114
+
115
+ // ─── deriveState: validating-milestone ────────────────────────────────────
116
+
117
+ test("deriveState returns validating-milestone when all slices done and no VALIDATION file", async () => {
118
+ const base = makeTmpBase();
119
+ try {
120
+ writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
121
+ // Write CONTEXT so milestone has a title
122
+ const dir = join(base, ".gsd", "milestones", "M001");
123
+ writeFileSync(join(dir, "M001-CONTEXT.md"), CONTEXT_FILE);
124
+
125
+ const state = await deriveState(base);
126
+ assert.equal(state.phase, "validating-milestone");
127
+ assert.equal(state.activeMilestone?.id, "M001");
128
+ assert.equal(state.activeSlice, null);
129
+ } finally {
130
+ cleanup(base);
131
+ }
132
+ });
133
+
134
+ test("deriveState returns completing-milestone when VALIDATION exists with terminal verdict", async () => {
135
+ const base = makeTmpBase();
136
+ try {
137
+ writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
138
+ writeValidation(base, "M001", "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nAll good.");
139
+
140
+ const state = await deriveState(base);
141
+ assert.equal(state.phase, "completing-milestone");
142
+ assert.equal(state.activeMilestone?.id, "M001");
143
+ } finally {
144
+ cleanup(base);
145
+ }
146
+ });
147
+
148
+ test("deriveState returns validating-milestone when VALIDATION exists with needs-remediation verdict", async () => {
149
+ const base = makeTmpBase();
150
+ try {
151
+ writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
152
+ writeValidation(base, "M001", "---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation\nNeeds fixes.");
153
+
154
+ const state = await deriveState(base);
155
+ assert.equal(state.phase, "validating-milestone");
156
+ assert.equal(state.activeMilestone?.id, "M001");
157
+ } finally {
158
+ cleanup(base);
159
+ }
160
+ });
161
+
162
+ test("deriveState returns complete when both VALIDATION and SUMMARY exist", async () => {
163
+ const base = makeTmpBase();
164
+ try {
165
+ writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
166
+ writeValidation(base, "M001", "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.");
167
+ writeMilestoneSummary(base, "M001", "# Summary\nDone.");
168
+
169
+ const state = await deriveState(base);
170
+ assert.equal(state.phase, "complete");
171
+ } finally {
172
+ cleanup(base);
173
+ }
174
+ });
175
+
176
+ // ─── Dispatch rule ────────────────────────────────────────────────────────
177
+
178
+ test("dispatch rule matches validating-milestone phase", async () => {
179
+ const state: GSDState = {
180
+ activeMilestone: { id: "M001", title: "Test" },
181
+ activeSlice: null,
182
+ activeTask: null,
183
+ phase: "validating-milestone",
184
+ recentDecisions: [],
185
+ blockers: [],
186
+ nextAction: "Validate milestone M001.",
187
+ registry: [{ id: "M001", title: "Test", status: "active" }],
188
+ progress: { milestones: { done: 0, total: 1 } },
189
+ };
190
+
191
+ const base = makeTmpBase();
192
+ try {
193
+ // Set up minimal milestone structure for the prompt builder
194
+ writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
195
+
196
+ const ctx: DispatchContext = {
197
+ basePath: base,
198
+ mid: "M001",
199
+ midTitle: "Test",
200
+ state,
201
+ prefs: undefined,
202
+ };
203
+ const result = await resolveDispatch(ctx);
204
+ assert.equal(result.action, "dispatch");
205
+ if (result.action === "dispatch") {
206
+ assert.equal(result.unitType, "validate-milestone");
207
+ assert.equal(result.unitId, "M001");
208
+ }
209
+ } finally {
210
+ cleanup(base);
211
+ }
212
+ });
213
+
214
+ test("dispatch rule skips when skip_milestone_validation preference is set", async () => {
215
+ const state: GSDState = {
216
+ activeMilestone: { id: "M001", title: "Test" },
217
+ activeSlice: null,
218
+ activeTask: null,
219
+ phase: "validating-milestone",
220
+ recentDecisions: [],
221
+ blockers: [],
222
+ nextAction: "Validate milestone M001.",
223
+ registry: [{ id: "M001", title: "Test", status: "active" }],
224
+ progress: { milestones: { done: 0, total: 1 } },
225
+ };
226
+
227
+ const base = makeTmpBase();
228
+ try {
229
+ writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
230
+
231
+ const ctx: DispatchContext = {
232
+ basePath: base,
233
+ mid: "M001",
234
+ midTitle: "Test",
235
+ state,
236
+ prefs: { phases: { skip_milestone_validation: true } },
237
+ };
238
+ const result = await resolveDispatch(ctx);
239
+ assert.equal(result.action, "skip");
240
+
241
+ // Verify the VALIDATION file was written
242
+ const validationPath = join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md");
243
+ assert.ok(existsSync(validationPath), "VALIDATION file should be written on skip");
244
+ } finally {
245
+ cleanup(base);
246
+ }
247
+ });
248
+
249
+ // ─── Artifact resolution & verification ───────────────────────────────────
250
+
251
+ test("resolveExpectedArtifactPath returns VALIDATION path for validate-milestone", () => {
252
+ const base = makeTmpBase();
253
+ try {
254
+ mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
255
+ const result = resolveExpectedArtifactPath("validate-milestone", "M001", base);
256
+ assert.ok(result);
257
+ assert.ok(result!.includes("VALIDATION"));
258
+ } finally {
259
+ cleanup(base);
260
+ }
261
+ });
262
+
263
+ test("verifyExpectedArtifact passes when VALIDATION.md exists", () => {
264
+ const base = makeTmpBase();
265
+ try {
266
+ writeValidation(base, "M001", "---\nverdict: pass\n---\n# Val");
267
+ clearPathCache();
268
+ clearParseCache();
269
+ const result = verifyExpectedArtifact("validate-milestone", "M001", base);
270
+ assert.equal(result, true);
271
+ } finally {
272
+ cleanup(base);
273
+ }
274
+ });
275
+
276
+ test("verifyExpectedArtifact fails when VALIDATION.md is missing", () => {
277
+ const base = makeTmpBase();
278
+ try {
279
+ mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
280
+ clearPathCache();
281
+ clearParseCache();
282
+ const result = verifyExpectedArtifact("validate-milestone", "M001", base);
283
+ assert.equal(result, false);
284
+ } finally {
285
+ cleanup(base);
286
+ }
287
+ });
288
+
289
+ // ─── diagnoseExpectedArtifact ─────────────────────────────────────────────
290
+
291
+ test("diagnoseExpectedArtifact returns validation path for validate-milestone", () => {
292
+ const base = makeTmpBase();
293
+ try {
294
+ const result = diagnoseExpectedArtifact("validate-milestone", "M001", base);
295
+ assert.ok(result);
296
+ assert.ok(result!.includes("VALIDATION"));
297
+ assert.ok(result!.includes("milestone validation report"));
298
+ } finally {
299
+ cleanup(base);
300
+ }
301
+ });
302
+
303
+ // ─── buildLoopRemediationSteps ────────────────────────────────────────────
304
+
305
+ test("buildLoopRemediationSteps returns steps for validate-milestone", () => {
306
+ const base = makeTmpBase();
307
+ try {
308
+ const result = buildLoopRemediationSteps("validate-milestone", "M001", base);
309
+ assert.ok(result);
310
+ assert.ok(result!.includes("VALIDATION"));
311
+ assert.ok(result!.includes("verdict: pass"));
312
+ assert.ok(result!.includes("gsd doctor"));
313
+ } finally {
314
+ cleanup(base);
315
+ }
316
+ });