taskplane 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +2 -20
  3. package/bin/taskplane.mjs +706 -0
  4. package/dashboard/public/app.js +900 -0
  5. package/dashboard/public/index.html +92 -0
  6. package/dashboard/public/style.css +924 -0
  7. package/dashboard/server.cjs +531 -0
  8. package/extensions/task-orchestrator.ts +28 -0
  9. package/extensions/task-runner.ts +1923 -0
  10. package/extensions/taskplane/abort.ts +466 -0
  11. package/extensions/taskplane/config.ts +102 -0
  12. package/extensions/taskplane/discovery.ts +988 -0
  13. package/extensions/taskplane/engine.ts +758 -0
  14. package/extensions/taskplane/execution.ts +1752 -0
  15. package/extensions/taskplane/extension.ts +577 -0
  16. package/extensions/taskplane/formatting.ts +718 -0
  17. package/extensions/taskplane/git.ts +38 -0
  18. package/extensions/taskplane/index.ts +22 -0
  19. package/extensions/taskplane/merge.ts +795 -0
  20. package/extensions/taskplane/messages.ts +134 -0
  21. package/extensions/taskplane/persistence.ts +1121 -0
  22. package/extensions/taskplane/resume.ts +1092 -0
  23. package/extensions/taskplane/sessions.ts +92 -0
  24. package/extensions/taskplane/types.ts +1514 -0
  25. package/extensions/taskplane/waves.ts +900 -0
  26. package/extensions/taskplane/worktree.ts +1624 -0
  27. package/package.json +48 -3
  28. package/skills/create-taskplane-task/SKILL.md +326 -0
  29. package/skills/create-taskplane-task/references/context-template.md +78 -0
  30. package/skills/create-taskplane-task/references/prompt-template.md +246 -0
  31. package/templates/agents/task-merger.md +256 -0
  32. package/templates/agents/task-reviewer.md +81 -0
  33. package/templates/agents/task-worker.md +140 -0
  34. package/templates/config/task-orchestrator.yaml +89 -0
  35. package/templates/config/task-runner.yaml +99 -0
  36. package/templates/tasks/CONTEXT.md +31 -0
  37. package/templates/tasks/EXAMPLE-001-hello-world/PROMPT.md +90 -0
  38. package/templates/tasks/EXAMPLE-001-hello-world/STATUS.md +73 -0
@@ -0,0 +1,1923 @@
1
+ /**
2
+ * Task Runner — Autonomous task execution with live dashboard
3
+ *
4
+ * Replaces the Ralph Wiggum bash loop with a Pi extension. Workers are
5
+ * fresh-context subprocesses; STATUS.md is persistent memory. Supports
6
+ * cross-model review (reviewer uses a different model than the worker).
7
+ *
8
+ * Commands:
9
+ * /task <path/to/PROMPT.md> — Start executing a task
10
+ * /task-status — Re-read and display STATUS.md progress
11
+ * /task-pause — Pause after current worker finishes
12
+ * /task-resume — Resume a paused task
13
+ *
14
+ * Configuration: .pi/task-runner.yaml (project-specific settings)
15
+ * Agents: .pi/agents/task-worker.md, .pi/agents/task-reviewer.md
16
+ *
17
+ * Usage: pi -e extensions/task-runner.ts
18
+ */
19
+
20
+ import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
21
+ import { DynamicBorder } from "@mariozechner/pi-coding-agent";
22
+ import { Container, Text, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui";
23
+ import { spawn, spawnSync } from "child_process";
24
+ import {
25
+ readFileSync, writeFileSync, appendFileSync, existsSync, mkdirSync, unlinkSync,
26
+ } from "fs";
27
+ import { tmpdir } from "os";
28
+ import { join, dirname, basename, resolve } from "path";
29
+ import { parse as yamlParse } from "yaml";
30
+
31
+
32
+ // ── Types ────────────────────────────────────────────────────────────
33
+
34
+ interface TaskConfig {
35
+ project: { name: string; description: string };
36
+ paths: { tasks: string; architecture?: string };
37
+ testing: { commands: Record<string, string> };
38
+ standards: { docs: string[]; rules: string[] };
39
+ standards_overrides: Record<string, { docs?: string[]; rules?: string[] }>;
40
+ task_areas: Record<string, { path: string; [key: string]: any }>;
41
+ worker: {
42
+ model: string;
43
+ tools: string;
44
+ thinking: string;
45
+ spawn_mode?: "subprocess" | "tmux";
46
+ };
47
+ reviewer: { model: string; tools: string; thinking: string };
48
+ context: {
49
+ worker_context_window: number;
50
+ warn_percent: number;
51
+ kill_percent: number;
52
+ max_worker_iterations: number;
53
+ max_review_cycles: number;
54
+ no_progress_limit: number;
55
+ max_worker_minutes?: number;
56
+ };
57
+ }
58
+
59
+ interface StepInfo {
60
+ number: number;
61
+ name: string;
62
+ status: "not-started" | "in-progress" | "complete";
63
+ checkboxes: { text: string; checked: boolean }[];
64
+ totalChecked: number;
65
+ totalItems: number;
66
+ }
67
+
68
+ interface ParsedTask {
69
+ taskId: string;
70
+ taskName: string;
71
+ reviewLevel: number;
72
+ size: string;
73
+ steps: StepInfo[];
74
+ contextDocs: string[];
75
+ taskFolder: string;
76
+ promptPath: string;
77
+ }
78
+
79
+ type TaskPhase = "idle" | "running" | "paused" | "complete" | "error";
80
+
81
+ interface TaskState {
82
+ phase: TaskPhase;
83
+ task: ParsedTask | null;
84
+ config: TaskConfig | null;
85
+ currentStep: number;
86
+ workerIteration: number;
87
+ workerStatus: "idle" | "running" | "done" | "error" | "killed";
88
+ workerElapsed: number;
89
+ workerContextPct: number;
90
+ workerLastTool: string;
91
+ workerToolCount: number;
92
+ workerInputTokens: number;
93
+ workerOutputTokens: number;
94
+ workerCacheReadTokens: number;
95
+ workerCacheWriteTokens: number;
96
+ workerCostUsd: number;
97
+ workerProc: any;
98
+ workerTimer: any;
99
+ reviewerStatus: "idle" | "running" | "done" | "error";
100
+ reviewerType: string;
101
+ reviewerElapsed: number;
102
+ reviewerLastTool: string;
103
+ reviewerProc: any;
104
+ reviewerTimer: any;
105
+ reviewCounter: number;
106
+ totalIterations: number;
107
+ stepStatuses: Map<number, StepInfo>;
108
+ }
109
+
110
+ function freshState(): TaskState {
111
+ return {
112
+ phase: "idle", task: null, config: null, currentStep: 0,
113
+ workerIteration: 0, workerStatus: "idle", workerElapsed: 0,
114
+ workerContextPct: 0, workerLastTool: "", workerToolCount: 0,
115
+ workerInputTokens: 0, workerOutputTokens: 0, workerCacheReadTokens: 0, workerCacheWriteTokens: 0, workerCostUsd: 0,
116
+ workerProc: null, workerTimer: null,
117
+ reviewerStatus: "idle", reviewerType: "", reviewerElapsed: 0,
118
+ reviewerLastTool: "", reviewerProc: null, reviewerTimer: null,
119
+ reviewCounter: 0, totalIterations: 0, stepStatuses: new Map(),
120
+ };
121
+ }
122
+
123
+ // ── Config ───────────────────────────────────────────────────────────
124
+
125
+ const DEFAULT_CONFIG: TaskConfig = {
126
+ project: { name: "Project", description: "" },
127
+ paths: { tasks: "docs/task-management" },
128
+ testing: { commands: {} },
129
+ standards: { docs: [], rules: [] },
130
+ standards_overrides: {},
131
+ task_areas: {},
132
+ worker: { model: "", tools: "read,write,edit,bash,grep,find,ls", thinking: "off" },
133
+ reviewer: { model: "openai/gpt-5.3-codex", tools: "read,bash,grep,find,ls", thinking: "off" },
134
+ context: {
135
+ worker_context_window: 200000, warn_percent: 70, kill_percent: 85,
136
+ max_worker_iterations: 20, max_review_cycles: 2, no_progress_limit: 3,
137
+ },
138
+ };
139
+
140
+ function loadConfig(cwd: string): TaskConfig {
141
+ const configPath = join(cwd, ".pi", "task-runner.yaml");
142
+ if (!existsSync(configPath)) return { ...DEFAULT_CONFIG };
143
+ try {
144
+ const raw = readFileSync(configPath, "utf-8");
145
+ const loaded = yamlParse(raw) as any;
146
+ // Parse standards_overrides: Record<areaName, { docs?, rules? }>
147
+ const rawOverrides = loaded?.standards_overrides || {};
148
+ const parsedOverrides: Record<string, { docs?: string[]; rules?: string[] }> = {};
149
+ for (const [key, val] of Object.entries(rawOverrides)) {
150
+ if (val && typeof val === "object") {
151
+ const v = val as any;
152
+ parsedOverrides[key] = {
153
+ docs: Array.isArray(v.docs) ? v.docs : undefined,
154
+ rules: Array.isArray(v.rules) ? v.rules : undefined,
155
+ };
156
+ }
157
+ }
158
+
159
+ // Parse task_areas minimally (we only need path for standards resolution)
160
+ const rawAreas = loaded?.task_areas || {};
161
+ const parsedAreas: Record<string, { path: string }> = {};
162
+ for (const [key, val] of Object.entries(rawAreas)) {
163
+ if (val && typeof val === "object" && (val as any).path) {
164
+ parsedAreas[key] = { path: (val as any).path };
165
+ }
166
+ }
167
+
168
+ return {
169
+ project: { ...DEFAULT_CONFIG.project, ...loaded?.project },
170
+ paths: { ...DEFAULT_CONFIG.paths, ...loaded?.paths },
171
+ testing: { commands: { ...DEFAULT_CONFIG.testing.commands, ...loaded?.testing?.commands } },
172
+ standards: {
173
+ docs: loaded?.standards?.docs || DEFAULT_CONFIG.standards.docs,
174
+ rules: loaded?.standards?.rules || DEFAULT_CONFIG.standards.rules,
175
+ },
176
+ standards_overrides: parsedOverrides,
177
+ task_areas: parsedAreas,
178
+ worker: { ...DEFAULT_CONFIG.worker, ...loaded?.worker },
179
+ reviewer: { ...DEFAULT_CONFIG.reviewer, ...loaded?.reviewer },
180
+ context: { ...DEFAULT_CONFIG.context, ...loaded?.context },
181
+ };
182
+ } catch {
183
+ return { ...DEFAULT_CONFIG };
184
+ }
185
+ }
186
+
187
+ // ── Spawn Mode Resolution ────────────────────────────────────────────
188
+
189
+ /**
190
+ * Determines whether workers/reviewers spawn as headless subprocesses
191
+ * (existing behavior) or as TMUX sessions (parallel orchestrator mode).
192
+ *
193
+ * Resolution order: env var → config → default "subprocess".
194
+ * The orchestrator sets TASK_RUNNER_SPAWN_MODE=tmux per-lane.
195
+ */
196
+ function getSpawnMode(config: TaskConfig): "subprocess" | "tmux" {
197
+ const envMode = process.env.TASK_RUNNER_SPAWN_MODE;
198
+ if (envMode === "tmux" || envMode === "subprocess") return envMode;
199
+ if (config.worker.spawn_mode === "tmux" || config.worker.spawn_mode === "subprocess") {
200
+ return config.worker.spawn_mode;
201
+ }
202
+ return "subprocess";
203
+ }
204
+
205
+ /**
206
+ * Returns the TMUX session name prefix for worker/reviewer sessions.
207
+ * The orchestrator sets TASK_RUNNER_TMUX_PREFIX per-lane (e.g., "orch-lane-1").
208
+ * Worker sessions become "{prefix}-worker", reviewer sessions "{prefix}-reviewer".
209
+ */
210
+ function getTmuxPrefix(): string {
211
+ return process.env.TASK_RUNNER_TMUX_PREFIX || "task";
212
+ }
213
+
214
+ /**
215
+ * Detects whether this task runner is executing inside the parallel orchestrator.
216
+ *
217
+ * Requires BOTH signals to prevent false positives:
218
+ * 1. TASK_RUNNER_SPAWN_MODE === "tmux" — confirms tmux-based spawning
219
+ * 2. TASK_RUNNER_TMUX_PREFIX starts with "orch-" — confirms orchestrator origin
220
+ *
221
+ * When true, certain worker behaviors are suppressed — most notably, workers
222
+ * must NOT archive task folders because the orchestrator polls for .DONE files
223
+ * at the original path.
224
+ */
225
+ function isOrchestratedMode(): boolean {
226
+ // Orchestrated when the prefix is set by the orchestrator (orch-lane-N pattern).
227
+ // Spawn mode can be "tmux" or "subprocess" — both are valid orchestrated modes.
228
+ return !!process.env.TASK_RUNNER_TMUX_PREFIX?.startsWith("orch-");
229
+ }
230
+
231
+ /**
232
+ * Returns the wall-clock timeout for TMUX worker sessions in minutes.
233
+ * Used instead of context-% based kill (no JSON stream in TMUX mode).
234
+ *
235
+ * Resolution order: env var → config → default 30 minutes.
236
+ * Reviewers do NOT use this timeout — they run to session completion.
237
+ */
238
+ function getMaxWorkerMinutes(config: TaskConfig): number {
239
+ const envVal = process.env.TASK_RUNNER_MAX_WORKER_MINUTES;
240
+ if (envVal) {
241
+ const parsed = parseInt(envVal, 10);
242
+ if (!isNaN(parsed) && parsed > 0) return parsed;
243
+ }
244
+ const configVal = config.context.max_worker_minutes;
245
+ if (typeof configVal === "number" && configVal > 0) return configVal;
246
+ return 30;
247
+ }
248
+
249
+ // ── Orchestrator Sidecar Files ────────────────────────────────────────
250
+
251
+ /**
252
+ * Returns the .pi directory path for sidecar files (lane state, conversation logs).
253
+ * In orchestrated mode, the orchestrator passes ORCH_SIDECAR_DIR pointing to the
254
+ * MAIN repo's .pi/ directory (not the worktree's).
255
+ */
256
+ function getSidecarDir(): string {
257
+ // Orchestrator provides the main repo .pi path
258
+ const orchDir = process.env.ORCH_SIDECAR_DIR;
259
+ if (orchDir) {
260
+ if (!existsSync(orchDir)) mkdirSync(orchDir, { recursive: true });
261
+ return orchDir;
262
+ }
263
+ // Fallback: walk up from cwd
264
+ let dir = process.cwd();
265
+ for (let i = 0; i < 10; i++) {
266
+ const piDir = join(dir, ".pi");
267
+ if (existsSync(piDir)) return piDir;
268
+ const parent = dirname(dir);
269
+ if (parent === dir) break;
270
+ dir = parent;
271
+ }
272
+ const piDir = join(process.cwd(), ".pi");
273
+ if (!existsSync(piDir)) mkdirSync(piDir, { recursive: true });
274
+ return piDir;
275
+ }
276
+
277
+ /**
278
+ * Write lane state sidecar JSON for the web dashboard.
279
+ * Written every second when in orchestrated mode.
280
+ */
281
+ function writeLaneState(state: TaskState): void {
282
+ if (!isOrchestratedMode()) return;
283
+ const prefix = getTmuxPrefix(); // e.g., "orch-lane-1"
284
+ const filePath = join(getSidecarDir(), `lane-state-${prefix}.json`);
285
+ try {
286
+ const data = {
287
+ prefix,
288
+ taskId: state.task?.taskId || null,
289
+ phase: state.phase,
290
+ currentStep: state.currentStep,
291
+ totalIterations: state.totalIterations,
292
+ workerIteration: state.workerIteration,
293
+ workerStatus: state.workerStatus,
294
+ workerElapsed: state.workerElapsed,
295
+ workerContextPct: state.workerContextPct,
296
+ workerLastTool: state.workerLastTool,
297
+ workerToolCount: state.workerToolCount,
298
+ workerInputTokens: state.workerInputTokens,
299
+ workerOutputTokens: state.workerOutputTokens,
300
+ workerCacheReadTokens: state.workerCacheReadTokens,
301
+ workerCacheWriteTokens: state.workerCacheWriteTokens,
302
+ workerCostUsd: state.workerCostUsd,
303
+ reviewerStatus: state.reviewerStatus || "idle",
304
+ timestamp: Date.now(),
305
+ };
306
+ writeFileSync(filePath, JSON.stringify(data) + "\n");
307
+ } catch {
308
+ // Best effort — don't crash the runner
309
+ }
310
+ }
311
+
312
+ /**
313
+ * Append a JSON event to the conversation JSONL log file.
314
+ * Used in orchestrated mode to capture the full worker conversation for the web dashboard.
315
+ */
316
+ function appendConversationEvent(prefix: string, event: Record<string, unknown>): void {
317
+ const filePath = join(getSidecarDir(), `worker-conversation-${prefix}.jsonl`);
318
+ try {
319
+ appendFileSync(filePath, JSON.stringify(event) + "\n");
320
+ } catch {
321
+ // Best effort
322
+ }
323
+ }
324
+
325
+ /**
326
+ * Clear the conversation log at the start of a new worker iteration.
327
+ */
328
+ function clearConversationLog(prefix: string): void {
329
+ const filePath = join(getSidecarDir(), `worker-conversation-${prefix}.jsonl`);
330
+ try {
331
+ writeFileSync(filePath, "");
332
+ } catch {
333
+ // Best effort
334
+ }
335
+ }
336
+
337
+ // ── Agent Loader ─────────────────────────────────────────────────────
338
+
339
+ function loadAgentDef(cwd: string, name: string): { systemPrompt: string; tools: string; model: string } | null {
340
+ const paths = [join(cwd, ".pi", "agents", `${name}.md`), join(cwd, "agents", `${name}.md`)];
341
+ for (const p of paths) {
342
+ if (!existsSync(p)) continue;
343
+ const raw = readFileSync(p, "utf-8").replace(/\r\n/g, "\n");
344
+ const match = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
345
+ if (!match) continue;
346
+ const fm: Record<string, string> = {};
347
+ for (const line of match[1].split("\n")) {
348
+ const idx = line.indexOf(":");
349
+ if (idx > 0) fm[line.slice(0, idx).trim()] = line.slice(idx + 1).trim();
350
+ }
351
+ return { systemPrompt: match[2].trim(), tools: fm.tools || "read,grep,find,ls", model: fm.model || "" };
352
+ }
353
+ return null;
354
+ }
355
+
356
+ // ── PROMPT.md Parser ─────────────────────────────────────────────────
357
+
358
+ function parsePromptMd(content: string, promptPath: string): ParsedTask {
359
+ const text = content.replace(/\r\n/g, "\n");
360
+ const taskFolder = dirname(resolve(promptPath));
361
+
362
+ // Task ID and name
363
+ let taskId = "", taskName = "";
364
+ const titleMatch = text.match(/^#\s+(?:Task:\s*)?(\S+-\d+)\s*[-–:]\s*(.+)/m);
365
+ if (titleMatch) { taskId = titleMatch[1]; taskName = titleMatch[2].trim(); }
366
+ else { taskId = basename(taskFolder); taskName = taskId; }
367
+
368
+ // Review level
369
+ let reviewLevel = 0;
370
+ const rlMatch = text.match(/##\s+Review Level[:\s]*(\d)/);
371
+ if (rlMatch) reviewLevel = parseInt(rlMatch[1]);
372
+
373
+ // Size
374
+ let size = "M";
375
+ const sizeMatch = text.match(/\*\*Size:\*\*\s*(\w+)/);
376
+ if (sizeMatch) size = sizeMatch[1];
377
+
378
+ // Steps
379
+ const steps: StepInfo[] = [];
380
+ const stepRegex = /###\s+Step\s+(\d+):\s*(.+)/g;
381
+ const positions: { number: number; name: string; start: number }[] = [];
382
+ let m;
383
+ while ((m = stepRegex.exec(text)) !== null) {
384
+ positions.push({ number: parseInt(m[1]), name: m[2].trim(), start: m.index });
385
+ }
386
+ for (let i = 0; i < positions.length; i++) {
387
+ const section = text.slice(positions[i].start, i + 1 < positions.length ? positions[i + 1].start : text.length);
388
+ const checkboxes: { text: string; checked: boolean }[] = [];
389
+ const cbRegex = /^\s*-\s*\[([ xX])\]\s*(.*)/gm;
390
+ let cb;
391
+ while ((cb = cbRegex.exec(section)) !== null) {
392
+ checkboxes.push({ text: cb[2].trim(), checked: cb[1].toLowerCase() === "x" });
393
+ }
394
+ steps.push({
395
+ number: positions[i].number, name: positions[i].name,
396
+ status: "not-started", checkboxes,
397
+ totalChecked: checkboxes.filter(c => c.checked).length,
398
+ totalItems: checkboxes.length,
399
+ });
400
+ }
401
+
402
+ // Context docs
403
+ const contextDocs: string[] = [];
404
+ const ctxMatch = text.match(/##\s+Context to Read First\s*\n+([\s\S]*?)(?=\n##\s|$)/);
405
+ if (ctxMatch) {
406
+ const pathRegex = /`([^\s`]+\.(?:md|yaml|json|go|ts|js))`/g;
407
+ let pm;
408
+ while ((pm = pathRegex.exec(ctxMatch[1])) !== null) contextDocs.push(pm[1]);
409
+ }
410
+
411
+ return { taskId, taskName, reviewLevel, size, steps, contextDocs, taskFolder, promptPath };
412
+ }
413
+
414
+ // ── STATUS.md Parser ─────────────────────────────────────────────────
415
+
416
+ function parseStatusMd(content: string): { steps: StepInfo[]; reviewCounter: number; iteration: number } {
417
+ const text = content.replace(/\r\n/g, "\n");
418
+ const steps: StepInfo[] = [];
419
+ let currentStep: StepInfo | null = null;
420
+ let reviewCounter = 0, iteration = 0;
421
+
422
+ for (const line of text.split("\n")) {
423
+ const rcMatch = line.match(/\*\*Review Counter:\*\*\s*(\d+)/);
424
+ if (rcMatch) reviewCounter = parseInt(rcMatch[1]);
425
+ const itMatch = line.match(/\*\*Iteration:\*\*\s*(\d+)/);
426
+ if (itMatch) iteration = parseInt(itMatch[1]);
427
+
428
+ const stepMatch = line.match(/^###\s+Step\s+(\d+):\s*(.+)/);
429
+ if (stepMatch) {
430
+ if (currentStep) {
431
+ currentStep.totalChecked = currentStep.checkboxes.filter(c => c.checked).length;
432
+ currentStep.totalItems = currentStep.checkboxes.length;
433
+ steps.push(currentStep);
434
+ }
435
+ currentStep = { number: parseInt(stepMatch[1]), name: stepMatch[2].trim(), status: "not-started", checkboxes: [], totalChecked: 0, totalItems: 0 };
436
+ continue;
437
+ }
438
+ if (currentStep) {
439
+ const ss = line.match(/\*\*Status:\*\*\s*(.*)/);
440
+ if (ss) {
441
+ const s = ss[1];
442
+ if (s.includes("✅") || s.toLowerCase().includes("complete")) currentStep.status = "complete";
443
+ else if (s.includes("🟨") || s.toLowerCase().includes("progress")) currentStep.status = "in-progress";
444
+ }
445
+ const cb = line.match(/^\s*-\s*\[([ xX])\]\s*(.*)/);
446
+ if (cb) currentStep.checkboxes.push({ text: cb[2].trim(), checked: cb[1].toLowerCase() === "x" });
447
+ }
448
+ }
449
+ if (currentStep) {
450
+ currentStep.totalChecked = currentStep.checkboxes.filter(c => c.checked).length;
451
+ currentStep.totalItems = currentStep.checkboxes.length;
452
+ steps.push(currentStep);
453
+ }
454
+ return { steps, reviewCounter, iteration };
455
+ }
456
+
457
+ // ── STATUS.md Generator ──────────────────────────────────────────────
458
+
459
+ function generateStatusMd(task: ParsedTask): string {
460
+ const now = new Date().toISOString().slice(0, 10);
461
+ const lines: string[] = [
462
+ `# ${task.taskId}: ${task.taskName} — Status`, "",
463
+ `**Current Step:** Not Started`,
464
+ `**Status:** 🔵 Ready for Execution`,
465
+ `**Last Updated:** ${now}`,
466
+ `**Review Level:** ${task.reviewLevel}`,
467
+ `**Review Counter:** 0`,
468
+ `**Iteration:** 0`,
469
+ `**Size:** ${task.size}`, "", "---", "",
470
+ ];
471
+ for (const step of task.steps) {
472
+ lines.push(`### Step ${step.number}: ${step.name}`, `**Status:** ⬜ Not Started`, "");
473
+ for (const cb of step.checkboxes) lines.push(`- [ ] ${cb.text}`);
474
+ lines.push("", "---", "");
475
+ }
476
+ lines.push(
477
+ "## Reviews", "", "| # | Type | Step | Verdict | File |", "|---|------|------|---------|------|", "", "---", "",
478
+ "## Discoveries", "", "| Discovery | Disposition | Location |", "|-----------|-------------|----------|", "", "---", "",
479
+ "## Execution Log", "", "| Timestamp | Action | Outcome |", "|-----------|--------|---------|",
480
+ `| ${now} | Task staged | STATUS.md auto-generated by task-runner |`, "", "---", "",
481
+ "## Blockers", "", "*None*", "", "---", "", "## Notes", "", "*Reserved for execution notes*",
482
+ );
483
+ return lines.join("\n");
484
+ }
485
+
486
+ // ── STATUS.md Updaters ───────────────────────────────────────────────
487
+
488
+ function updateStatusField(statusPath: string, field: string, value: string): void {
489
+ let content = readFileSync(statusPath, "utf-8").replace(/\r\n/g, "\n");
490
+ const pattern = new RegExp(`(\\*\\*${field.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}:\\*\\*\\s*)(.+)`);
491
+ if (pattern.test(content)) {
492
+ content = content.replace(pattern, `$1${value}`);
493
+ } else {
494
+ // Append after last ** field
495
+ content = content.replace(/(\*\*[^*]+:\*\*\s*.+\n)/, `$1**${field}:** ${value}\n`);
496
+ }
497
+ writeFileSync(statusPath, content);
498
+ }
499
+
500
+ function updateStepStatus(statusPath: string, stepNum: number, status: "not-started" | "in-progress" | "complete"): void {
501
+ let content = readFileSync(statusPath, "utf-8").replace(/\r\n/g, "\n");
502
+ const emoji = status === "complete" ? "✅ Complete" : status === "in-progress" ? "🟨 In Progress" : "⬜ Not Started";
503
+ const lines = content.split("\n");
504
+ let inTarget = false;
505
+ for (let i = 0; i < lines.length; i++) {
506
+ const sm = lines[i].match(/^###\s+Step\s+(\d+):/);
507
+ if (sm) inTarget = parseInt(sm[1]) === stepNum;
508
+ if (inTarget && lines[i].match(/^\*\*Status:\*\*/)) {
509
+ lines[i] = `**Status:** ${emoji}`;
510
+ break;
511
+ }
512
+ }
513
+ writeFileSync(statusPath, lines.join("\n"));
514
+ }
515
+
516
+ function appendTableRow(statusPath: string, sectionName: string, row: string): void {
517
+ let content = readFileSync(statusPath, "utf-8").replace(/\r\n/g, "\n");
518
+ const lines = content.split("\n");
519
+ let insertIdx = -1, inSection = false, lastTableRow = -1;
520
+ for (let i = 0; i < lines.length; i++) {
521
+ if (lines[i].match(new RegExp(`^##\\s+${sectionName}`))) {
522
+ inSection = true;
523
+ continue;
524
+ }
525
+ if (inSection) {
526
+ // End of section — hit another ## heading or ---
527
+ if (lines[i].match(/^##\s/) || lines[i].trim() === "---") {
528
+ insertIdx = lastTableRow >= 0 ? lastTableRow + 1 : i;
529
+ break;
530
+ }
531
+ // Track last table data row (skip header separator |---|)
532
+ if (lines[i].startsWith("|") && !lines[i].match(/^\|[\s-|]+\|$/)) {
533
+ lastTableRow = i;
534
+ }
535
+ }
536
+ }
537
+ if (insertIdx === -1) {
538
+ insertIdx = lastTableRow >= 0 ? lastTableRow + 1 : lines.length;
539
+ }
540
+ lines.splice(insertIdx, 0, row);
541
+ writeFileSync(statusPath, lines.join("\n"));
542
+ }
543
+
544
+ function logExecution(statusPath: string, action: string, outcome: string): void {
545
+ const ts = new Date().toISOString().slice(0, 16).replace("T", " ");
546
+ appendTableRow(statusPath, "Execution Log", `| ${ts} | ${action} | ${outcome} |`);
547
+ }
548
+
549
+ function logReview(statusPath: string, num: string, type: string, stepNum: number, verdict: string, file: string): void {
550
+ appendTableRow(statusPath, "Reviews", `| ${num} | ${type} | Step ${stepNum} | ${verdict} | ${file} |`);
551
+ }
552
+
553
+ // ── Project Context Builder ──────────────────────────────────────────
554
+
555
+ function buildProjectContext(config: TaskConfig, taskFolder: string): string {
556
+ const resolved = resolveStandards(config, taskFolder);
557
+ const lines: string[] = [`## Project: ${config.project.name}`];
558
+ if (config.project.description) lines.push(config.project.description);
559
+ lines.push("");
560
+ if (resolved.rules.length > 0) {
561
+ lines.push("## Code Standards");
562
+ for (const r of resolved.rules) lines.push(`- ${r}`);
563
+ lines.push("");
564
+ }
565
+ if (resolved.docs.length > 0) {
566
+ lines.push("## Reference Documentation");
567
+ for (const d of resolved.docs) lines.push(`- ${d}`);
568
+ lines.push("");
569
+ }
570
+ if (Object.keys(config.testing.commands).length > 0) {
571
+ lines.push("## Testing Commands");
572
+ for (const [name, cmd] of Object.entries(config.testing.commands)) lines.push(`- **${name}:** \`${cmd}\``);
573
+ lines.push("");
574
+ }
575
+ lines.push(`## Task Folder\n${taskFolder}`);
576
+ return lines.join("\n");
577
+ }
578
+
579
+ // ── Git Helpers ──────────────────────────────────────────────────────
580
+
581
+ /**
582
+ * Returns the current HEAD commit SHA (short form).
583
+ * Used to capture baseline before a step starts so code reviews
584
+ * can diff against the correct range instead of just uncommitted changes.
585
+ */
586
+ function getHeadCommitSha(): string {
587
+ try {
588
+ const result = spawnSync("git", ["rev-parse", "--short", "HEAD"], {
589
+ encoding: "utf-8",
590
+ timeout: 5000,
591
+ });
592
+ return result.status === 0 ? (result.stdout || "").trim() : "";
593
+ } catch {
594
+ return "";
595
+ }
596
+ }
597
+
598
+ // ── Standards Resolution ─────────────────────────────────────────────
599
+
600
+ /**
601
+ * Resolve which standards apply to a task based on its area.
602
+ *
603
+ * Matches the task's folder path against `task_areas` paths to find the
604
+ * area name, then checks `standards_overrides` for area-specific standards.
605
+ * Falls back to global `standards` if no override exists.
606
+ *
607
+ * This allows TypeScript extension tasks (e.g., task-system area) to use
608
+ * different review standards than Go backend service tasks.
609
+ */
610
+ function resolveStandards(config: TaskConfig, taskFolder: string): { docs: string[]; rules: string[] } {
611
+ const normalizedFolder = taskFolder.replace(/\\/g, "/");
612
+
613
+ // Find which area this task belongs to
614
+ for (const [areaName, areaCfg] of Object.entries(config.task_areas)) {
615
+ const areaPath = areaCfg.path.replace(/\\/g, "/");
616
+ if (normalizedFolder.includes(areaPath)) {
617
+ const override = config.standards_overrides[areaName];
618
+ if (override) {
619
+ return {
620
+ docs: override.docs ?? config.standards.docs,
621
+ rules: override.rules ?? config.standards.rules,
622
+ };
623
+ }
624
+ break; // Area found but no override — use global
625
+ }
626
+ }
627
+
628
+ return { docs: config.standards.docs, rules: config.standards.rules };
629
+ }
630
+
631
+ // ── Review Request Generator ─────────────────────────────────────────
632
+
633
+ function generateReviewRequest(
634
+ type: "plan" | "code", stepNum: number, stepName: string,
635
+ task: ParsedTask, config: TaskConfig, outputPath: string,
636
+ stepBaselineCommit?: string,
637
+ ): string {
638
+ const resolved = resolveStandards(config, task.taskFolder);
639
+ const standardsDocs = resolved.docs.map(d => ` - ${d}`).join("\n");
640
+ const standardsRules = resolved.rules.map(r => `- ${r}`).join("\n");
641
+
642
+ if (type === "plan") {
643
+ return [
644
+ `# Review Request: Plan Review`, "",
645
+ `You are reviewing an implementation plan for a ${config.project.name} task.`,
646
+ `You have full tool access — use \`read\` to examine files and \`bash\` to run commands.`, "",
647
+ `## Task Context`, "",
648
+ `- **Task PROMPT:** ${task.promptPath}`,
649
+ `- **Task STATUS:** ${join(task.taskFolder, "STATUS.md")}`,
650
+ `- **Step being planned:** Step ${stepNum}: ${stepName}`, "",
651
+ `## Instructions`, "",
652
+ `1. Read the PROMPT.md for full requirements`,
653
+ `2. Read STATUS.md for progress so far`,
654
+ `3. Check relevant source files for existing patterns:`,
655
+ standardsDocs, "",
656
+ `## Project Standards`, "", standardsRules, "",
657
+ `## Output`, "",
658
+ `Write your review to: \`${outputPath}\``,
659
+ ].join("\n");
660
+ } else {
661
+ // For code reviews, provide the baseline commit so the reviewer can
662
+ // diff the full step's changes — not just uncommitted changes.
663
+ // Workers commit via checkpoints, so `git diff` alone sees nothing.
664
+ const diffCmd = stepBaselineCommit
665
+ ? `git diff ${stepBaselineCommit}..HEAD --name-only`
666
+ : `git diff --name-only`;
667
+ const diffFullCmd = stepBaselineCommit
668
+ ? `git diff ${stepBaselineCommit}..HEAD`
669
+ : `git diff`;
670
+
671
+ return [
672
+ `# Review Request: Code Review`, "",
673
+ `You are reviewing code changes for a ${config.project.name} task.`,
674
+ `You have full tool access — use \`read\` to examine files and \`bash\` to run commands.`, "",
675
+ `## Task Context`, "",
676
+ `- **Task PROMPT:** ${task.promptPath}`,
677
+ `- **Task STATUS:** ${join(task.taskFolder, "STATUS.md")}`,
678
+ `- **Step reviewed:** Step ${stepNum}: ${stepName}`,
679
+ ...(stepBaselineCommit ? [`- **Step baseline commit:** ${stepBaselineCommit}`] : []),
680
+ "",
681
+ `## Instructions`, "",
682
+ `1. Run \`${diffCmd}\` to see files changed in this step`,
683
+ ` Then \`${diffFullCmd}\` for the full diff`,
684
+ ` **Important:** The worker commits code via checkpoints, so plain \`git diff\` may show nothing.`,
685
+ ` Always use the baseline commit range above to see all step changes.`,
686
+ `2. Read changed files in full for context`,
687
+ `3. Check neighboring files for pattern consistency`,
688
+ `4. Check standards:`,
689
+ standardsDocs, "",
690
+ `## Project Standards`, "", standardsRules, "",
691
+ `## Output`, "",
692
+ `Write your review to: \`${outputPath}\``,
693
+ ].join("\n");
694
+ }
695
+ }
696
+
697
+ function extractVerdict(reviewContent: string): string {
698
+ const match = reviewContent.match(/###?\s*Verdict[:\s]*(APPROVE|REVISE|RETHINK)/i);
699
+ return match ? match[1].toUpperCase() : "UNKNOWN";
700
+ }
701
+
702
+ // ── Subagent Spawner ─────────────────────────────────────────────────
703
+
704
+ function spawnAgent(opts: {
705
+ model: string; tools: string; thinking: string;
706
+ systemPrompt: string; prompt: string;
707
+ contextWindow?: number; warnPct?: number; killPct?: number;
708
+ wrapUpFile?: string;
709
+ onToolCall?: (toolName: string, args: any) => void;
710
+ onContextPct?: (pct: number) => void;
711
+ onTokenUpdate?: (tokens: { input: number; output: number; cacheRead: number; cacheWrite: number; cost: number }) => void;
712
+ onJsonEvent?: (event: Record<string, unknown>) => void;
713
+ }): { promise: Promise<{ output: string; exitCode: number; elapsed: number; killed: boolean }>; kill: () => void } {
714
+ let killFn: () => void = () => {};
715
+
716
+ const promise = new Promise<{ output: string; exitCode: number; elapsed: number; killed: boolean }>((resolve) => {
717
+ // Write system prompt and user prompt to temp files to avoid
718
+ // shell escaping issues (backticks, quotes, etc. in markdown)
719
+ const id = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
720
+ const sysTmpFile = join(tmpdir(), `pi-task-sys-${id}.txt`);
721
+ const promptTmpFile = join(tmpdir(), `pi-task-prompt-${id}.txt`);
722
+ writeFileSync(sysTmpFile, opts.systemPrompt);
723
+ writeFileSync(promptTmpFile, opts.prompt);
724
+
725
+ const args = [
726
+ "-p", "--mode", "json",
727
+ "--no-session", "--no-extensions", "--no-skills",
728
+ "--model", opts.model,
729
+ "--tools", opts.tools,
730
+ "--thinking", opts.thinking,
731
+ "--append-system-prompt", sysTmpFile,
732
+ `@${promptTmpFile}`,
733
+ ];
734
+
735
+ const proc = spawn("pi", args, {
736
+ stdio: ["ignore", "pipe", "pipe"],
737
+ env: { ...process.env },
738
+ shell: true,
739
+ });
740
+
741
+ // Clean up temp files after process finishes
742
+ const cleanupTmp = () => {
743
+ setTimeout(() => {
744
+ try { unlinkSync(sysTmpFile); } catch {}
745
+ try { unlinkSync(promptTmpFile); } catch {}
746
+ }, 1000);
747
+ };
748
+
749
+ let killed = false;
750
+ const startTime = Date.now();
751
+ const textChunks: string[] = [];
752
+ let buffer = "";
753
+
754
+ killFn = () => { killed = true; proc.kill("SIGTERM"); };
755
+
756
+ proc.stdout!.setEncoding("utf-8");
757
+ proc.stdout!.on("data", (chunk: string) => {
758
+ buffer += chunk;
759
+ const lines = buffer.split("\n");
760
+ buffer = lines.pop() || "";
761
+ for (const line of lines) {
762
+ if (!line.trim()) continue;
763
+ try {
764
+ const event = JSON.parse(line);
765
+ // Tee all events to JSONL log if callback provided
766
+ opts.onJsonEvent?.(event);
767
+ if (event.type === "message_update") {
768
+ const delta = event.assistantMessageEvent;
769
+ if (delta?.type === "text_delta" && delta.delta) {
770
+ textChunks.push(delta.delta);
771
+ }
772
+ } else if (event.type === "tool_execution_start") {
773
+ opts.onToolCall?.(event.toolName, event.args);
774
+ } else if (event.type === "message_end") {
775
+ const usage = event.message?.usage;
776
+ if (usage) {
777
+ // Report per-turn token counts to caller (caller accumulates).
778
+ // Anthropic `input` = uncached new tokens only; `cacheRead`
779
+ // holds bulk of input. `cost.total` = exact dollar cost for turn.
780
+ opts.onTokenUpdate?.({
781
+ input: (usage as any).input || 0,
782
+ output: (usage as any).output || 0,
783
+ cacheRead: (usage as any).cacheRead || 0,
784
+ cacheWrite: (usage as any).cacheWrite || 0,
785
+ cost: (usage as any).cost?.total || 0,
786
+ });
787
+ if (opts.contextWindow) {
788
+ // Use totalTokens (cumulative) — works across providers.
789
+ // Anthropic reports small `input` per-turn but growing `totalTokens`.
790
+ // OpenAI reports growing `input` but also growing `totalTokens`.
791
+ const tokens = (usage as any).totalTokens || ((usage as any).input + (usage as any).output) || 0;
792
+ if (tokens > 0) {
793
+ const pct = (tokens / opts.contextWindow) * 100;
794
+ opts.onContextPct?.(pct);
795
+ if (opts.warnPct && pct >= opts.warnPct && opts.wrapUpFile && !existsSync(opts.wrapUpFile)) {
796
+ writeFileSync(opts.wrapUpFile, `Wrap up at ${new Date().toISOString()}`);
797
+ }
798
+ if (opts.killPct && pct >= opts.killPct && !killed) {
799
+ killed = true;
800
+ proc.kill("SIGTERM");
801
+ }
802
+ }
803
+ }
804
+ }
805
+ }
806
+ } catch {}
807
+ }
808
+ });
809
+
810
+ proc.stderr?.setEncoding("utf-8");
811
+ proc.stderr?.on("data", () => {});
812
+
813
+ proc.on("close", (code) => {
814
+ cleanupTmp();
815
+ if (buffer.trim()) {
816
+ try {
817
+ const event = JSON.parse(buffer);
818
+ if (event.type === "message_update") {
819
+ const delta = event.assistantMessageEvent;
820
+ if (delta?.type === "text_delta") textChunks.push(delta.delta || "");
821
+ }
822
+ } catch {}
823
+ }
824
+ resolve({ output: textChunks.join(""), exitCode: code ?? 1, elapsed: Date.now() - startTime, killed });
825
+ });
826
+
827
+ proc.on("error", (err) => {
828
+ cleanupTmp();
829
+ resolve({ output: `Error: ${err.message}`, exitCode: 1, elapsed: Date.now() - startTime, killed: false });
830
+ });
831
+ });
832
+
833
+ return { promise, kill: () => killFn() };
834
+ }
835
+
836
+ // ── TMUX Agent Spawner ───────────────────────────────────────────────
837
+
838
+ /**
839
+ * Spawns a Pi agent in a named TMUX session instead of a headless subprocess.
840
+ * Returns the same interface shape as `spawnAgent()` for drop-in compatibility.
841
+ *
842
+ * Differences from subprocess mode:
843
+ * - No JSON event stream → no onToolCall/onContextPct callbacks
844
+ * - No captured output → output is always ""
845
+ * - Completion detected via `tmux has-session` polling (2s interval)
846
+ * - Kill via `tmux kill-session`
847
+ * - User can `tmux attach -t {sessionName}` for full visibility
848
+ *
849
+ * Temp files are cleaned up on all exit paths:
850
+ * - Normal completion (session ends, polling detects it)
851
+ * - Kill (explicit kill-session call)
852
+ * - TMUX not installed (throws with actionable message)
853
+ * - Session creation failure (throws after cleanup)
854
+ *
855
+ * Parity with spawnAgent():
856
+ * - Return shape: identical — { promise, kill }
857
+ * - Promise result: identical fields — { output, exitCode, elapsed, killed }
858
+ * - Kill semantics: sets killed=true, terminates session, cleans temp files
859
+ * - Elapsed calc: Date.now() - startTime (same pattern)
860
+ * - Cleanup: synchronous on all paths (more deterministic than spawnAgent's 1s setTimeout)
861
+ * - output: always "" (no JSON stream in TMUX mode)
862
+ * - exitCode: 0 on normal completion, 1 on poll error (TMUX doesn't forward exit codes)
863
+ *
864
+ * @param opts.sessionName — TMUX session name (e.g., "orch-lane-1-worker")
865
+ * @param opts.cwd — Working directory for the TMUX session
866
+ * @param opts.systemPrompt — System prompt content (written to temp file)
867
+ * @param opts.prompt — User prompt content (written to temp file)
868
+ * @param opts.model — Model identifier (e.g., "anthropic/claude-sonnet-4-20250514")
869
+ * @param opts.tools — Comma-separated tool list
870
+ * @param opts.thinking — Thinking mode ("off", "on", etc.)
871
+ */
872
+ function spawnAgentTmux(opts: {
873
+ sessionName: string;
874
+ cwd: string;
875
+ systemPrompt: string;
876
+ prompt: string;
877
+ model: string;
878
+ tools: string;
879
+ thinking: string;
880
+ }): { promise: Promise<{ output: string; exitCode: number; elapsed: number; killed: boolean }>; kill: () => void } {
881
+
882
+ // ── Preflight: verify tmux is available ──────────────────────────
883
+ const tmuxCheck = spawnSync("tmux", ["-V"], { shell: true });
884
+ if (tmuxCheck.status !== 0 && tmuxCheck.status !== null) {
885
+ throw new Error(
886
+ "tmux is not installed or not in PATH. " +
887
+ "Install tmux to use TMUX spawn mode, or set TASK_RUNNER_SPAWN_MODE=subprocess. " +
888
+ `(tmux -V exited with code ${tmuxCheck.status})`
889
+ );
890
+ }
891
+
892
+ // ── Write prompts to temp files ─────────────────────────────────
893
+ // Same pattern as spawnAgent() — avoids shell escaping issues with
894
+ // backticks, quotes, and special characters in markdown content.
895
+ const id = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
896
+ const sysTmpFile = join(tmpdir(), `pi-task-sys-${id}.txt`);
897
+ const promptTmpFile = join(tmpdir(), `pi-task-prompt-${id}.txt`);
898
+ writeFileSync(sysTmpFile, opts.systemPrompt);
899
+ writeFileSync(promptTmpFile, opts.prompt);
900
+
901
+ const cleanupTmp = () => {
902
+ try { unlinkSync(sysTmpFile); } catch {}
903
+ try { unlinkSync(promptTmpFile); } catch {}
904
+ };
905
+
906
+ // ── Build Pi command ─────────────────────────────────────────────
907
+ // Use an array of arguments and quote each one individually to handle
908
+ // paths with spaces (Windows paths, temp dir, etc.). The command is
909
+ // passed as a single string to tmux new-session, so we shell-quote it.
910
+ const quoteArg = (s: string): string => {
911
+ // If the arg contains spaces, quotes, or shell metacharacters, wrap in single quotes.
912
+ // Inside single quotes, escape existing single quotes as '\'' (end quote, escaped quote, restart quote).
913
+ if (/[\s"'`$\\!&|;()<>{}#*?~]/.test(s)) {
914
+ return `'${s.replace(/'/g, "'\\''")}'`;
915
+ }
916
+ return s;
917
+ };
918
+
919
+ const piArgs = [
920
+ "pi",
921
+ "-p", // Non-interactive: process prompt and exit (without this, pi waits for more input)
922
+ "--no-session", "--no-extensions", "--no-skills",
923
+ "--model", quoteArg(opts.model),
924
+ "--tools", quoteArg(opts.tools),
925
+ "--thinking", quoteArg(opts.thinking),
926
+ "--append-system-prompt", quoteArg(sysTmpFile),
927
+ `@${quoteArg(promptTmpFile)}`,
928
+ ];
929
+ const piCommand = piArgs.join(" ");
930
+
931
+ // ── Handle stale session ─────────────────────────────────────────
932
+ // Session names are fixed per role (e.g., "orch-lane-1-worker").
933
+ // If a stale session from a previous iteration exists, kill it first.
934
+ const staleCheck = spawnSync("tmux", ["has-session", "-t", opts.sessionName]);
935
+ if (staleCheck.status === 0) {
936
+ console.error(`[task-runner] tmux: killing stale session '${opts.sessionName}'`);
937
+ spawnSync("tmux", ["kill-session", "-t", opts.sessionName]);
938
+ }
939
+
940
+ // ── Create TMUX session ─────────────────────────────────────────
941
+ // Use `cd <path> && TERM=xterm-256color <cmd>` wrapper instead of tmux `-c`
942
+ // because `-c` with Windows paths silently fails in MSYS2/Git Bash tmux.
943
+ // Pi's ink/react TUI hangs with TERM=tmux-256color (tmux default), so we
944
+ // force xterm-256color.
945
+ const tmuxCwd = opts.cwd.replace(/^([A-Za-z]):\\/, (_, d: string) => `/${d.toLowerCase()}/`).replace(/\\/g, "/");
946
+ const wrappedCommand = `cd ${quoteArg(tmuxCwd)} && TERM=xterm-256color ${piCommand}`;
947
+ const createResult = spawnSync("tmux", [
948
+ "new-session", "-d",
949
+ "-s", opts.sessionName,
950
+ wrappedCommand,
951
+ ]);
952
+
953
+ if (createResult.status !== 0) {
954
+ cleanupTmp();
955
+ const stderr = createResult.stderr?.toString().trim() || "unknown error";
956
+ console.error(`[task-runner] tmux: session '${opts.sessionName}' creation failed: ${stderr}`);
957
+ throw new Error(
958
+ `Failed to create TMUX session '${opts.sessionName}': ${stderr}. ` +
959
+ `Verify tmux is running and the session name is valid.`
960
+ );
961
+ }
962
+
963
+ console.error(`[task-runner] tmux: session '${opts.sessionName}' created (cwd: ${opts.cwd})`);
964
+
965
+
966
+ // ── Poll until session ends ─────────────────────────────────────
967
+ let killed = false;
968
+ const startTime = Date.now();
969
+
970
+ const promise = (async (): Promise<{ output: string; exitCode: number; elapsed: number; killed: boolean }> => {
971
+ try {
972
+ while (true) {
973
+ await new Promise(r => setTimeout(r, 2000));
974
+ const result = spawnSync("tmux", ["has-session", "-t", opts.sessionName]);
975
+ if (result.status !== 0) {
976
+ // Session no longer exists — Pi exited, TMUX closed
977
+ break;
978
+ }
979
+ }
980
+ } catch (pollErr: any) {
981
+ // Polling failure — clean up and report
982
+ console.error(`[task-runner] tmux: polling error for '${opts.sessionName}': ${pollErr?.message || pollErr}`);
983
+ cleanupTmp();
984
+ console.error(`[task-runner] tmux: cleanup done for '${opts.sessionName}' (poll-fail)`);
985
+ return {
986
+ output: `Polling error: ${pollErr?.message || pollErr}`,
987
+ exitCode: 1,
988
+ elapsed: Date.now() - startTime,
989
+ killed: false,
990
+ };
991
+ }
992
+
993
+ // Normal completion — clean up temp files
994
+ const elapsed = Date.now() - startTime;
995
+ console.error(`[task-runner] tmux: session '${opts.sessionName}' ended after ${Math.round(elapsed / 1000)}s${killed ? " (killed)" : ""}`);
996
+ cleanupTmp();
997
+ console.error(`[task-runner] tmux: cleanup done for '${opts.sessionName}'`);
998
+ return {
999
+ output: "", // No captured output in TMUX mode
1000
+ exitCode: 0, // TMUX session exit is best-effort success
1001
+ elapsed,
1002
+ killed,
1003
+ };
1004
+ })();
1005
+
1006
+ // ── Kill function ───────────────────────────────────────────────
1007
+ const kill = () => {
1008
+ killed = true;
1009
+ console.error(`[task-runner] tmux: killing session '${opts.sessionName}'`);
1010
+ const killResult = spawnSync("tmux", ["kill-session", "-t", opts.sessionName]);
1011
+ if (killResult.status !== 0) {
1012
+ // Session may have already exited — not an error
1013
+ console.error(`[task-runner] tmux: session '${opts.sessionName}' already exited (kill was no-op)`);
1014
+ }
1015
+ cleanupTmp();
1016
+ console.error(`[task-runner] tmux: cleanup done for '${opts.sessionName}' (killed)`);
1017
+ };
1018
+
1019
+ return { promise, kill };
1020
+ }
1021
+
1022
+ // ── Display Helpers ──────────────────────────────────────────────────
1023
+
1024
+ function displayName(name: string): string {
1025
+ return name.split("-").map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
1026
+ }
1027
+
1028
+ // ── Extension ────────────────────────────────────────────────────────
1029
+
1030
+ export default function (pi: ExtensionAPI) {
1031
+ let state = freshState();
1032
+ let widgetCtx: ExtensionContext | undefined;
1033
+
1034
+ // ── Widget Rendering ─────────────────────────────────────────────
1035
+
1036
+ function renderStepCard(step: StepInfo, colWidth: number, theme: any): string[] {
1037
+ const w = colWidth - 2;
1038
+ const trunc = (s: string, max: number) => s.length > max ? s.slice(0, max - 3) + "..." : s;
1039
+
1040
+ const isRunning = state.currentStep === step.number && state.phase === "running";
1041
+ const statusColor = step.status === "complete" ? "success"
1042
+ : step.status === "in-progress" ? "accent" : "dim";
1043
+ const statusIcon = step.status === "complete" ? "✓"
1044
+ : step.status === "in-progress" ? "●" : "○";
1045
+
1046
+ const nameStr = theme.fg("accent", theme.bold(trunc(`Step ${step.number}`, w)));
1047
+ const nameVis = Math.min(`Step ${step.number}`.length, w);
1048
+
1049
+ const statusStr = `${statusIcon} ${trunc(step.name, w - 4)}`;
1050
+ const statusLine = theme.fg(statusColor, statusStr);
1051
+ const statusVis = Math.min(statusStr.length, w);
1052
+
1053
+ const progress = `${step.totalChecked}/${step.totalItems} ✓`;
1054
+ const progressLine = theme.fg(step.totalChecked === step.totalItems && step.totalItems > 0 ? "success" : "muted", progress);
1055
+ const progressVis = progress.length;
1056
+
1057
+ let extraStr = "";
1058
+ let extraVis = 0;
1059
+ if (isRunning && state.workerStatus === "running") {
1060
+ extraStr = theme.fg("accent", `iter ${state.workerIteration}`) + theme.fg("dim", ` ctx:${Math.round(state.workerContextPct)}%`);
1061
+ extraVis = `iter ${state.workerIteration} ctx:${Math.round(state.workerContextPct)}%`.length;
1062
+ } else if (isRunning && state.reviewerStatus === "running") {
1063
+ extraStr = theme.fg("warning", `reviewing...`);
1064
+ extraVis = "reviewing...".length;
1065
+ }
1066
+
1067
+ const top = "┌" + "─".repeat(w) + "┐";
1068
+ const bot = "└" + "─".repeat(w) + "┘";
1069
+ const border = (content: string, vis: number) =>
1070
+ theme.fg("dim", "│") + content + " ".repeat(Math.max(0, w - vis)) + theme.fg("dim", "│");
1071
+
1072
+ return [
1073
+ theme.fg("dim", top),
1074
+ border(" " + nameStr, 1 + nameVis),
1075
+ border(" " + statusLine, 1 + statusVis),
1076
+ border(" " + progressLine, 1 + progressVis),
1077
+ border(extraStr ? " " + extraStr : "", extraVis ? 1 + extraVis : 0),
1078
+ theme.fg("dim", bot),
1079
+ ];
1080
+ }
1081
+
1082
+ function updateWidgets() {
1083
+ // Write sidecar state for web dashboard (orchestrated mode)
1084
+ writeLaneState(state);
1085
+
1086
+ if (!widgetCtx) return;
1087
+ const ctx = widgetCtx;
1088
+
1089
+ // Refresh step statuses from STATUS.md if task is active
1090
+ if (state.task) {
1091
+ const statusPath = join(state.task.taskFolder, "STATUS.md");
1092
+ if (existsSync(statusPath)) {
1093
+ try {
1094
+ const parsed = parseStatusMd(readFileSync(statusPath, "utf-8"));
1095
+ for (const s of parsed.steps) state.stepStatuses.set(s.number, s);
1096
+ } catch {}
1097
+ }
1098
+ }
1099
+
1100
+ ctx.ui.setWidget("task-runner", (_tui: any, theme: any) => {
1101
+ return {
1102
+ render(width: number): string[] {
1103
+ if (!state.task) {
1104
+ return ["", theme.fg("dim", " No task loaded. Use /task <path/to/PROMPT.md> to start.")];
1105
+ }
1106
+
1107
+ const task = state.task;
1108
+ const lines: string[] = [""];
1109
+
1110
+ // Header
1111
+ const phaseIcon = state.phase === "running" ? "●"
1112
+ : state.phase === "paused" ? "⏸"
1113
+ : state.phase === "complete" ? "✓"
1114
+ : state.phase === "error" ? "✗" : "○";
1115
+ const phaseColor = state.phase === "running" ? "accent"
1116
+ : state.phase === "complete" ? "success"
1117
+ : state.phase === "error" ? "error" : "dim";
1118
+
1119
+ const header =
1120
+ theme.fg(phaseColor, ` ${phaseIcon} `) +
1121
+ theme.fg("accent", theme.bold(task.taskId)) +
1122
+ theme.fg("dim", ": ") +
1123
+ theme.fg("muted", task.taskName) +
1124
+ theme.fg("dim", " ") +
1125
+ theme.fg("warning", `L${task.reviewLevel}`) +
1126
+ theme.fg("dim", " · ") +
1127
+ theme.fg("muted", task.size) +
1128
+ theme.fg("dim", " · ") +
1129
+ theme.fg("success", `iter ${state.totalIterations}`);
1130
+ lines.push(truncateToWidth(header, width));
1131
+
1132
+ // Progress bar
1133
+ const allSteps = task.steps.map(s => state.stepStatuses.get(s.number) || s);
1134
+ const totalCb = allSteps.reduce((a, s) => a + s.totalItems, 0);
1135
+ const doneCb = allSteps.reduce((a, s) => a + s.totalChecked, 0);
1136
+ const pct = totalCb > 0 ? Math.round((doneCb / totalCb) * 100) : 0;
1137
+ const barWidth = Math.min(30, width - 20);
1138
+ const filled = Math.round((pct / 100) * barWidth);
1139
+ const progressBar =
1140
+ theme.fg("dim", " ") +
1141
+ theme.fg("warning", "[") +
1142
+ theme.fg("success", "█".repeat(filled)) +
1143
+ theme.fg("dim", "░".repeat(barWidth - filled)) +
1144
+ theme.fg("warning", "]") +
1145
+ theme.fg("dim", " ") +
1146
+ theme.fg("accent", `${doneCb}/${totalCb}`) +
1147
+ theme.fg("dim", ` (${pct}%)`);
1148
+ lines.push(truncateToWidth(progressBar, width));
1149
+ lines.push("");
1150
+
1151
+ // Step cards — fit as many as the terminal allows, wrap to rows
1152
+ const steps = allSteps;
1153
+ const arrowWidth = 3;
1154
+ // Calculate how many cards fit in one row
1155
+ const minCardWidth = 16;
1156
+ const maxCols = Math.max(1, Math.floor((width + arrowWidth) / (minCardWidth + arrowWidth)));
1157
+ const cols = Math.min(steps.length, maxCols);
1158
+ const colWidth = Math.max(minCardWidth, Math.floor((width - arrowWidth * (cols - 1)) / cols));
1159
+
1160
+ // Render in rows of `cols` cards
1161
+ for (let rowStart = 0; rowStart < steps.length; rowStart += cols) {
1162
+ const rowSteps = steps.slice(rowStart, rowStart + cols);
1163
+ const cards = rowSteps.map(s => renderStepCard(s, colWidth, theme));
1164
+
1165
+ if (cards.length > 0) {
1166
+ const cardHeight = cards[0].length;
1167
+ const arrowRow = 2;
1168
+ for (let line = 0; line < cardHeight; line++) {
1169
+ let row = cards[0][line];
1170
+ for (let c = 1; c < cards.length; c++) {
1171
+ row += line === arrowRow ? theme.fg("dim", " → ") : " ";
1172
+ row += cards[c][line];
1173
+ }
1174
+ lines.push(truncateToWidth(row, width));
1175
+ }
1176
+ }
1177
+ }
1178
+
1179
+ // Worker status line
1180
+ if (state.workerStatus === "running") {
1181
+ lines.push("");
1182
+ lines.push(truncateToWidth(
1183
+ theme.fg("accent", " ● Worker: ") +
1184
+ theme.fg("dim", `${Math.round(state.workerElapsed / 1000)}s · `) +
1185
+ theme.fg("dim", `🔧${state.workerToolCount}`) +
1186
+ (state.workerLastTool
1187
+ ? theme.fg("dim", " · ") + theme.fg("muted", state.workerLastTool)
1188
+ : ""),
1189
+ width,
1190
+ ));
1191
+ } else if (state.reviewerStatus === "running") {
1192
+ lines.push("");
1193
+ lines.push(truncateToWidth(
1194
+ theme.fg("warning", " ◉ Reviewer: ") +
1195
+ theme.fg("dim", `${state.reviewerType} · ${Math.round(state.reviewerElapsed / 1000)}s`) +
1196
+ (state.reviewerLastTool
1197
+ ? theme.fg("dim", " · ") + theme.fg("muted", state.reviewerLastTool)
1198
+ : ""),
1199
+ width,
1200
+ ));
1201
+ }
1202
+
1203
+ return lines;
1204
+ },
1205
+ invalidate() {},
1206
+ };
1207
+ });
1208
+ }
1209
+
1210
+ // ── Execution Engine ─────────────────────────────────────────────
1211
+
1212
+ async function executeTask(ctx: ExtensionContext): Promise<void> {
1213
+ if (!state.task || !state.config) return;
1214
+
1215
+ const task = state.task;
1216
+ const config = state.config;
1217
+ const statusPath = join(task.taskFolder, "STATUS.md");
1218
+
1219
+ updateStatusField(statusPath, "Status", "🟡 In Progress");
1220
+ updateStatusField(statusPath, "Last Updated", new Date().toISOString().slice(0, 10));
1221
+ logExecution(statusPath, "Task started", "Extension-driven execution");
1222
+
1223
+ // Find first incomplete step
1224
+ const status = parseStatusMd(readFileSync(statusPath, "utf-8"));
1225
+ let startStep = 0;
1226
+ for (const s of status.steps) {
1227
+ if (s.status === "complete") startStep = s.number + 1;
1228
+ else break;
1229
+ }
1230
+
1231
+ for (let i = 0; i < task.steps.length; i++) {
1232
+ const step = task.steps[i];
1233
+ if (step.number < startStep) continue;
1234
+ if (state.phase === "paused") {
1235
+ logExecution(statusPath, "Paused", `User paused at Step ${step.number}`);
1236
+ ctx.ui.notify(`Task paused at Step ${step.number}`, "info");
1237
+ return;
1238
+ }
1239
+
1240
+ state.currentStep = step.number;
1241
+ updateWidgets();
1242
+
1243
+ await executeStep(step, ctx);
1244
+
1245
+ if (state.phase === "error" || state.phase === "paused") return;
1246
+ }
1247
+
1248
+ // All done
1249
+ const donePath = join(task.taskFolder, ".DONE");
1250
+ writeFileSync(donePath, `Completed: ${new Date().toISOString()}\nTask: ${task.taskId}\n`);
1251
+ updateStatusField(statusPath, "Status", "✅ Complete");
1252
+ logExecution(statusPath, "Task complete", ".DONE created");
1253
+
1254
+ // Auto-archive: move task folder to tasks/archive/.
1255
+ // In orchestrated runs, do NOT archive here — the orchestrator polls
1256
+ // .DONE at the original path and handles post-merge archival itself.
1257
+ if (!isOrchestratedMode()) {
1258
+ const tasksDir = dirname(task.taskFolder);
1259
+ const archiveDir = join(tasksDir, "archive");
1260
+ const archiveDest = join(archiveDir, basename(task.taskFolder));
1261
+ try {
1262
+ if (!existsSync(archiveDir)) mkdirSync(archiveDir, { recursive: true });
1263
+ const { renameSync } = require("fs");
1264
+ renameSync(task.taskFolder, archiveDest);
1265
+ logExecution(join(archiveDest, "STATUS.md"), "Archived", `Moved to ${archiveDest}`);
1266
+ ctx.ui.notify(`📦 Archived to ${archiveDest}`, "info");
1267
+ } catch (err: any) {
1268
+ ctx.ui.notify(`Archive failed (move manually): ${err?.message}`, "warning");
1269
+ }
1270
+ } else {
1271
+ ctx.ui.notify("ℹ️ Orchestrated run: skipping auto-archive (orchestrator handles archival)", "info");
1272
+ }
1273
+
1274
+ state.phase = "complete";
1275
+ updateWidgets();
1276
+ ctx.ui.notify(`✅ Task ${task.taskId} complete!`, "success");
1277
+ }
1278
+
1279
+ async function executeStep(step: StepInfo, ctx: ExtensionContext): Promise<void> {
1280
+ if (!state.task || !state.config) return;
1281
+
1282
+ const task = state.task;
1283
+ const config = state.config;
1284
+ const statusPath = join(task.taskFolder, "STATUS.md");
1285
+
1286
+ // Capture git HEAD before the step starts so code reviewers can
1287
+ // diff the full step's changes (workers commit via checkpoints).
1288
+ const stepBaselineCommit = getHeadCommitSha();
1289
+
1290
+ updateStepStatus(statusPath, step.number, "in-progress");
1291
+ updateStatusField(statusPath, "Current Step", `Step ${step.number}: ${step.name}`);
1292
+ logExecution(statusPath, `Step ${step.number} started`, step.name);
1293
+ updateWidgets();
1294
+
1295
+ // Plan review (level ≥ 1)
1296
+ if (task.reviewLevel >= 1) {
1297
+ const verdict = await doReview("plan", step, ctx, stepBaselineCommit);
1298
+ if (verdict === "RETHINK") {
1299
+ ctx.ui.notify(`Reviewer: RETHINK on Step ${step.number} plan. Proceeding with caution.`, "warning");
1300
+ }
1301
+ }
1302
+
1303
+ // Worker loop
1304
+ let noProgressCount = 0;
1305
+ for (let iter = 0; iter < config.context.max_worker_iterations; iter++) {
1306
+ if (state.phase === "paused") return;
1307
+
1308
+ // Re-read STATUS.md
1309
+ const currentStatus = parseStatusMd(readFileSync(statusPath, "utf-8"));
1310
+ const stepStatus = currentStatus.steps.find(s => s.number === step.number);
1311
+ if (stepStatus?.status === "complete" || (stepStatus && stepStatus.totalChecked === stepStatus.totalItems && stepStatus.totalItems > 0)) {
1312
+ updateStepStatus(statusPath, step.number, "complete");
1313
+ break;
1314
+ }
1315
+
1316
+ const prevChecked = stepStatus?.totalChecked || 0;
1317
+ state.workerIteration = iter + 1;
1318
+ state.totalIterations++;
1319
+ updateStatusField(statusPath, "Iteration", `${state.totalIterations}`);
1320
+ updateWidgets();
1321
+
1322
+ await runWorker(step, ctx);
1323
+
1324
+ // Check progress
1325
+ const afterStatus = parseStatusMd(readFileSync(statusPath, "utf-8"));
1326
+ const afterStep = afterStatus.steps.find(s => s.number === step.number);
1327
+ const afterChecked = afterStep?.totalChecked || 0;
1328
+
1329
+ if (afterChecked <= prevChecked) {
1330
+ noProgressCount++;
1331
+ if (noProgressCount >= config.context.no_progress_limit) {
1332
+ logExecution(statusPath, `Step ${step.number} blocked`, `No progress after ${noProgressCount} iterations`);
1333
+ ctx.ui.notify(`⚠️ Step ${step.number} blocked — no progress after ${noProgressCount} iterations`, "error");
1334
+ state.phase = "error";
1335
+ return;
1336
+ }
1337
+ } else {
1338
+ noProgressCount = 0;
1339
+ }
1340
+
1341
+ if (afterStep?.status === "complete" || (afterStep && afterStep.totalChecked === afterStep.totalItems && afterStep.totalItems > 0)) {
1342
+ updateStepStatus(statusPath, step.number, "complete");
1343
+ break;
1344
+ }
1345
+ }
1346
+
1347
+ // Code review (level ≥ 2)
1348
+ if (task.reviewLevel >= 2 && state.phase === "running") {
1349
+ const verdict = await doReview("code", step, ctx, stepBaselineCommit);
1350
+ if (verdict === "REVISE") {
1351
+ ctx.ui.notify(`Reviewer: REVISE on Step ${step.number}. Running worker to fix...`, "warning");
1352
+ await runWorker(step, ctx); // One more pass to address issues
1353
+ }
1354
+ }
1355
+
1356
+ if (state.phase === "running") {
1357
+ updateStepStatus(statusPath, step.number, "complete");
1358
+ logExecution(statusPath, `Step ${step.number} complete`, step.name);
1359
+ // Update local cache
1360
+ const refreshed = parseStatusMd(readFileSync(statusPath, "utf-8"));
1361
+ for (const s of refreshed.steps) state.stepStatuses.set(s.number, s);
1362
+ updateWidgets();
1363
+ }
1364
+ }
1365
+
1366
+ // ── Worker ───────────────────────────────────────────────────────
1367
+
1368
+ async function runWorker(step: StepInfo, ctx: ExtensionContext): Promise<void> {
1369
+ if (!state.task || !state.config) return;
1370
+
1371
+ const task = state.task;
1372
+ const config = state.config;
1373
+ const statusPath = join(task.taskFolder, "STATUS.md");
1374
+ const wrapUpFile = join(task.taskFolder, ".task-wrap-up");
1375
+ const legacyWrapUpFile = join(task.taskFolder, ".wiggum-wrap-up");
1376
+
1377
+ const clearWrapUpSignals = () => {
1378
+ if (existsSync(wrapUpFile)) try { unlinkSync(wrapUpFile); } catch {}
1379
+ if (existsSync(legacyWrapUpFile)) try { unlinkSync(legacyWrapUpFile); } catch {}
1380
+ };
1381
+
1382
+ const writeWrapUpSignal = (reason: string) => {
1383
+ const msg = `${reason} at ${new Date().toISOString()}`;
1384
+ if (!existsSync(wrapUpFile)) writeFileSync(wrapUpFile, msg);
1385
+ // Backward compatibility: write legacy signal too until all workers migrate.
1386
+ if (!existsSync(legacyWrapUpFile)) writeFileSync(legacyWrapUpFile, msg);
1387
+ };
1388
+
1389
+ clearWrapUpSignals();
1390
+
1391
+ const workerDef = loadAgentDef(ctx.cwd, "task-worker");
1392
+ const basePrompt = workerDef?.systemPrompt || "You are a task execution agent. Read STATUS.md first, find unchecked items, work on them, checkpoint after each.";
1393
+ const systemPrompt = basePrompt + "\n\n" + buildProjectContext(config, task.taskFolder);
1394
+
1395
+ const model = config.worker.model
1396
+ || workerDef?.model
1397
+ || (ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "anthropic/claude-sonnet-4-20250514");
1398
+
1399
+ const contextDocsList = task.contextDocs.length > 0
1400
+ ? "\n\nContext docs to read if needed:\n" + task.contextDocs.map(d => `- ${d}`).join("\n")
1401
+ : "";
1402
+
1403
+ // When running under the parallel orchestrator, workers must NOT
1404
+ // archive or move the task folder — the orchestrator polls for .DONE
1405
+ // at the original path and handles post-merge archival itself.
1406
+ const archiveSuppression = isOrchestratedMode()
1407
+ ? "\n\n⚠️ ORCHESTRATED RUN: Do NOT archive or move the task folder. " +
1408
+ "Do NOT rename, relocate, or reorganize the task folder path. " +
1409
+ "The orchestrator handles post-merge archival. " +
1410
+ "Just create the .DONE file in the task folder when complete."
1411
+ : "";
1412
+
1413
+ const prompt = [
1414
+ `Execute Step ${step.number}: ${step.name}`,
1415
+ ``,
1416
+ `Task: ${task.taskId} — ${task.taskName}`,
1417
+ `Task folder: ${task.taskFolder}/`,
1418
+ `PROMPT: ${task.promptPath}`,
1419
+ `STATUS: ${statusPath}`,
1420
+ ``,
1421
+ `This is iteration ${state.totalIterations}.`,
1422
+ `Read STATUS.md FIRST to find where you left off.`,
1423
+ `Work ONLY on Step ${step.number}. Do not proceed to other steps.`,
1424
+ ``,
1425
+ `Wrap-up signal files: ${wrapUpFile} (primary), ${legacyWrapUpFile} (legacy)`,
1426
+ `Check for either file after each checkpoint. If one exists, stop.`,
1427
+ archiveSuppression,
1428
+ contextDocsList,
1429
+ ].join("\n");
1430
+
1431
+ state.workerStatus = "running";
1432
+ state.workerElapsed = 0;
1433
+ state.workerContextPct = 0;
1434
+ state.workerLastTool = "";
1435
+ state.workerToolCount = 0;
1436
+ updateWidgets();
1437
+
1438
+ const startTime = Date.now();
1439
+ state.workerTimer = setInterval(() => {
1440
+ state.workerElapsed = Date.now() - startTime;
1441
+ updateWidgets();
1442
+ }, 1000);
1443
+
1444
+ const spawnMode = getSpawnMode(config);
1445
+ let promise: Promise<{ output: string; exitCode: number; elapsed: number; killed: boolean }>;
1446
+ let kill: () => void;
1447
+ let wallClockWarnTimer: ReturnType<typeof setTimeout> | null = null;
1448
+ let wallClockKillTimer: ReturnType<typeof setTimeout> | null = null;
1449
+
1450
+ if (spawnMode === "tmux") {
1451
+ // ── TMUX mode ────────────────────────────────────────
1452
+ // No JSON stream → no onToolCall/onContextPct callbacks.
1453
+ // Kill via wall-clock timeout instead of context-%.
1454
+ const sessionName = `${getTmuxPrefix()}-worker`;
1455
+ const spawned = spawnAgentTmux({
1456
+ sessionName,
1457
+ cwd: ctx.cwd,
1458
+ systemPrompt,
1459
+ prompt,
1460
+ model,
1461
+ tools: config.worker.tools || workerDef?.tools || "read,write,edit,bash,grep,find,ls",
1462
+ thinking: config.worker.thinking || "off",
1463
+ });
1464
+ promise = spawned.promise;
1465
+ kill = spawned.kill;
1466
+
1467
+ // Wall-clock timeout: write wrap-up file at 80% of limit,
1468
+ // hard kill at 100%. No context telemetry in TMUX mode.
1469
+ const maxMinutes = getMaxWorkerMinutes(config);
1470
+ const warnMs = Math.round(maxMinutes * 0.8 * 60_000);
1471
+ const killMs = maxMinutes * 60_000;
1472
+ const iterationMarker = state.totalIterations;
1473
+
1474
+ // Wrap-up warning at 80% of wall-clock limit
1475
+ wallClockWarnTimer = setTimeout(() => {
1476
+ if (
1477
+ state.workerStatus === "running" &&
1478
+ state.totalIterations === iterationMarker
1479
+ ) {
1480
+ writeWrapUpSignal(`Wrap up (wall-clock ${maxMinutes}min limit)`);
1481
+ }
1482
+ }, warnMs);
1483
+
1484
+ // Hard kill at 100% of wall-clock limit
1485
+ wallClockKillTimer = setTimeout(() => {
1486
+ if (state.workerStatus === "running" && state.totalIterations === iterationMarker) {
1487
+ console.error(`[task-runner] tmux worker: wall-clock timeout (${maxMinutes}min) — killing session '${sessionName}'`);
1488
+ kill();
1489
+ }
1490
+ }, killMs);
1491
+ } else {
1492
+ // ── Subprocess mode (default, unchanged) ─────────────
1493
+ // In orchestrated mode, tee conversation events to JSONL for web dashboard
1494
+ const conversationPrefix = isOrchestratedMode() ? getTmuxPrefix() : null;
1495
+ if (conversationPrefix) clearConversationLog(conversationPrefix);
1496
+
1497
+ const spawned = spawnAgent({
1498
+ model,
1499
+ tools: config.worker.tools || workerDef?.tools || "read,write,edit,bash,grep,find,ls",
1500
+ thinking: config.worker.thinking || "off",
1501
+ systemPrompt,
1502
+ prompt,
1503
+ contextWindow: config.context.worker_context_window,
1504
+ warnPct: config.context.warn_percent,
1505
+ killPct: config.context.kill_percent,
1506
+ wrapUpFile,
1507
+ onToolCall: (toolName, args) => {
1508
+ state.workerToolCount++;
1509
+ // Build a short summary of what the tool is doing
1510
+ const path = args?.path || args?.command || "";
1511
+ const shortPath = typeof path === "string" && path.length > 80
1512
+ ? "..." + path.slice(-77) : path;
1513
+ state.workerLastTool = `${toolName} ${shortPath}`.trim();
1514
+ if (conversationPrefix) {
1515
+ appendConversationEvent(conversationPrefix, {
1516
+ type: "tool_call", toolName, args, timestamp: Date.now(),
1517
+ });
1518
+ }
1519
+ updateWidgets();
1520
+ },
1521
+ onTokenUpdate: (tokens) => {
1522
+ // Accumulate across turns — each message_end reports per-turn values.
1523
+ // Anthropic's `input` is only uncached new tokens; cacheRead holds
1524
+ // the bulk of input processing. We sum all four independently so the
1525
+ // dashboard can show the full picture.
1526
+ state.workerInputTokens += tokens.input;
1527
+ state.workerOutputTokens += tokens.output;
1528
+ state.workerCacheReadTokens += tokens.cacheRead;
1529
+ state.workerCacheWriteTokens += tokens.cacheWrite;
1530
+ state.workerCostUsd += tokens.cost;
1531
+ updateWidgets();
1532
+ },
1533
+ onContextPct: (pct) => {
1534
+ state.workerContextPct = pct;
1535
+ if (pct >= config.context.warn_percent) {
1536
+ writeWrapUpSignal(`Wrap up (context ${Math.round(pct)}%)`);
1537
+ }
1538
+ updateWidgets();
1539
+ },
1540
+ onJsonEvent: conversationPrefix
1541
+ ? (event: Record<string, unknown>) => appendConversationEvent(conversationPrefix, event)
1542
+ : undefined,
1543
+ });
1544
+ promise = spawned.promise;
1545
+ kill = spawned.kill;
1546
+ }
1547
+
1548
+ state.workerProc = { kill };
1549
+
1550
+ const result = await promise;
1551
+
1552
+ // Clean up wall-clock timers if they haven't fired yet
1553
+ if (wallClockWarnTimer) clearTimeout(wallClockWarnTimer);
1554
+ if (wallClockKillTimer) clearTimeout(wallClockKillTimer);
1555
+
1556
+ clearInterval(state.workerTimer);
1557
+ state.workerElapsed = Date.now() - startTime;
1558
+ state.workerStatus = result.killed ? "killed" : (result.exitCode === 0 ? "done" : "error");
1559
+ state.workerProc = null;
1560
+
1561
+ clearWrapUpSignals();
1562
+
1563
+ // Log with mode-appropriate detail: subprocess has context%, TMUX does not
1564
+ const killedMsg = spawnMode === "tmux" ? "killed (wall-clock timeout)" : "killed (context limit)";
1565
+ const statusMsg = result.killed ? killedMsg : (result.exitCode === 0 ? "done" : `error (code ${result.exitCode})`);
1566
+ const ctxDetail = spawnMode === "tmux" ? "" : `, ctx: ${Math.round(state.workerContextPct)}%`;
1567
+ logExecution(statusPath, `Worker iter ${state.totalIterations}`,
1568
+ `${statusMsg} in ${Math.round(state.workerElapsed / 1000)}s${ctxDetail}, tools: ${state.workerToolCount}`);
1569
+
1570
+ updateWidgets();
1571
+ }
1572
+
1573
+ // ── Reviewer ─────────────────────────────────────────────────────
1574
+
1575
+ async function doReview(type: "plan" | "code", step: StepInfo, ctx: ExtensionContext, stepBaselineCommit?: string): Promise<string> {
1576
+ if (!state.task || !state.config) return "UNKNOWN";
1577
+
1578
+ const task = state.task;
1579
+ const config = state.config;
1580
+ const statusPath = join(task.taskFolder, "STATUS.md");
1581
+ const reviewsDir = join(task.taskFolder, ".reviews");
1582
+ if (!existsSync(reviewsDir)) mkdirSync(reviewsDir, { recursive: true });
1583
+
1584
+ state.reviewCounter++;
1585
+ const num = String(state.reviewCounter).padStart(3, "0");
1586
+ const requestPath = join(reviewsDir, `request-R${num}.md`);
1587
+ const outputPath = join(reviewsDir, `R${num}-${type}-step${step.number}.md`);
1588
+
1589
+ const request = generateReviewRequest(type, step.number, step.name, task, config, outputPath, stepBaselineCommit);
1590
+ writeFileSync(requestPath, request);
1591
+
1592
+ const reviewerDef = loadAgentDef(ctx.cwd, "task-reviewer");
1593
+ const reviewerModel = config.reviewer.model || reviewerDef?.model || "openai/gpt-5.3-codex";
1594
+ const reviewerPrompt = reviewerDef?.systemPrompt || "You are a code reviewer. Read the request and write your review to the specified output file.";
1595
+ const systemPrompt = reviewerPrompt + "\n\n" + buildProjectContext(config, task.taskFolder);
1596
+
1597
+ state.reviewerStatus = "running";
1598
+ state.reviewerType = `${type} review`;
1599
+ state.reviewerElapsed = 0;
1600
+ state.reviewerLastTool = "";
1601
+ updateWidgets();
1602
+
1603
+ const startTime = Date.now();
1604
+ state.reviewerTimer = setInterval(() => {
1605
+ state.reviewerElapsed = Date.now() - startTime;
1606
+ updateWidgets();
1607
+ }, 1000);
1608
+
1609
+ // Read the request file content as the prompt
1610
+ const promptContent = readFileSync(requestPath, "utf-8");
1611
+
1612
+ const spawnMode = getSpawnMode(config);
1613
+ let reviewPromise: Promise<{ output: string; exitCode: number; elapsed: number; killed: boolean }>;
1614
+
1615
+ if (spawnMode === "tmux") {
1616
+ // ── TMUX mode ────────────────────────────────────────
1617
+ // No JSON stream → no onToolCall callback.
1618
+ // No timeout — reviewer runs to session completion.
1619
+ const sessionName = `${getTmuxPrefix()}-reviewer`;
1620
+ const spawned = spawnAgentTmux({
1621
+ sessionName,
1622
+ cwd: ctx.cwd,
1623
+ systemPrompt,
1624
+ prompt: promptContent,
1625
+ model: reviewerModel,
1626
+ tools: config.reviewer.tools || reviewerDef?.tools || "read,write,bash,grep,find,ls",
1627
+ thinking: config.reviewer.thinking || "off",
1628
+ });
1629
+ reviewPromise = spawned.promise;
1630
+ state.reviewerProc = { kill: spawned.kill };
1631
+ } else {
1632
+ // ── Subprocess mode (default, unchanged) ─────────────
1633
+ const spawned = spawnAgent({
1634
+ model: reviewerModel,
1635
+ tools: config.reviewer.tools || reviewerDef?.tools || "read,write,bash,grep,find,ls",
1636
+ thinking: config.reviewer.thinking || "off",
1637
+ systemPrompt,
1638
+ prompt: promptContent,
1639
+ onToolCall: (toolName, args) => {
1640
+ const path = args?.path || args?.command || "";
1641
+ const shortPath = typeof path === "string" && path.length > 40
1642
+ ? "..." + path.slice(-37) : path;
1643
+ state.reviewerLastTool = `${toolName} ${shortPath}`.trim();
1644
+ updateWidgets();
1645
+ },
1646
+ });
1647
+ reviewPromise = spawned.promise;
1648
+ state.reviewerProc = { kill: spawned.kill };
1649
+ }
1650
+
1651
+ const result = await reviewPromise;
1652
+
1653
+ clearInterval(state.reviewerTimer);
1654
+ state.reviewerElapsed = Date.now() - startTime;
1655
+ state.reviewerStatus = result.exitCode === 0 ? "done" : "error";
1656
+ state.reviewerProc = null;
1657
+ updateWidgets();
1658
+
1659
+ // Read verdict
1660
+ let verdict = "UNKNOWN";
1661
+ if (existsSync(outputPath)) {
1662
+ const review = readFileSync(outputPath, "utf-8");
1663
+ verdict = extractVerdict(review);
1664
+ } else {
1665
+ verdict = "UNAVAILABLE";
1666
+ logExecution(statusPath, `Reviewer R${num}`, `${type} review — reviewer did not produce output`);
1667
+ }
1668
+
1669
+ logReview(statusPath, `R${num}`, type, step.number, verdict, `.reviews/R${num}-${type}-step${step.number}.md`);
1670
+ logExecution(statusPath, `Review R${num}`, `${type} Step ${step.number}: ${verdict}`);
1671
+ updateStatusField(statusPath, "Review Counter", `${state.reviewCounter}`);
1672
+
1673
+ ctx.ui.notify(`Review R${num} (${type} Step ${step.number}): ${verdict}`, verdict === "APPROVE" ? "success" : "warning");
1674
+
1675
+ return verdict;
1676
+ }
1677
+
1678
+ // ── Commands ─────────────────────────────────────────────────────
1679
+
1680
+ // ── Shared Task Initialization ───────────────────────────────────
1681
+ //
1682
+ // Extracts the core init logic used by both the `/task` command and
1683
+ // TASK_AUTOSTART so that they share a single code path. Returns true
1684
+ // if the task was started successfully.
1685
+
1686
+ function startTaskFromPath(ctx: ExtensionContext, fullPath: string): boolean {
1687
+ if (state.phase === "running") {
1688
+ ctx.ui.notify("A task is already running. Use /task-pause first.", "warning");
1689
+ return false;
1690
+ }
1691
+
1692
+ // Parse PROMPT.md
1693
+ let parsed: ParsedTask;
1694
+ try {
1695
+ const content = readFileSync(fullPath, "utf-8");
1696
+ parsed = parsePromptMd(content, fullPath);
1697
+ } catch (err: any) {
1698
+ ctx.ui.notify(`Failed to parse PROMPT.md: ${err?.message || err}`, "error");
1699
+ return false;
1700
+ }
1701
+
1702
+ state = freshState();
1703
+ state.task = parsed;
1704
+ state.config = loadConfig(ctx.cwd);
1705
+ state.phase = "running";
1706
+ widgetCtx = ctx;
1707
+
1708
+ // Generate STATUS.md if missing
1709
+ const statusPath = join(state.task.taskFolder, "STATUS.md");
1710
+ if (!existsSync(statusPath)) {
1711
+ writeFileSync(statusPath, generateStatusMd(state.task));
1712
+ ctx.ui.notify("Generated STATUS.md from PROMPT.md", "info");
1713
+ } else {
1714
+ // Sync review counter and iteration from existing STATUS
1715
+ const existing = parseStatusMd(readFileSync(statusPath, "utf-8"));
1716
+ state.reviewCounter = existing.reviewCounter;
1717
+ state.totalIterations = existing.iteration;
1718
+ for (const s of existing.steps) state.stepStatuses.set(s.number, s);
1719
+ }
1720
+
1721
+ // Create .reviews/ if missing
1722
+ const reviewsDir = join(state.task.taskFolder, ".reviews");
1723
+ if (!existsSync(reviewsDir)) mkdirSync(reviewsDir, { recursive: true });
1724
+
1725
+ updateWidgets();
1726
+ ctx.ui.notify(
1727
+ `Starting: ${state.task.taskId} — ${state.task.taskName}\n` +
1728
+ `Review Level: ${state.task.reviewLevel} · Size: ${state.task.size} · Steps: ${state.task.steps.length}\n` +
1729
+ `Worker model: ${state.config.worker.model || "inherit"} · Reviewer: ${state.config.reviewer.model}`,
1730
+ "info",
1731
+ );
1732
+
1733
+ // Fire-and-forget
1734
+ executeTask(ctx).catch(err => {
1735
+ state.phase = "error";
1736
+ ctx.ui.notify(`Task error: ${err?.message || err}`, "error");
1737
+ updateWidgets();
1738
+ });
1739
+
1740
+ return true;
1741
+ }
1742
+
1743
+ pi.registerCommand("task", {
1744
+ description: "Start executing a task: /task <path/to/PROMPT.md>",
1745
+ handler: async (args, ctx) => {
1746
+ widgetCtx = ctx;
1747
+ const promptPath = args?.trim();
1748
+ if (!promptPath) {
1749
+ ctx.ui.notify("Usage: /task <path/to/PROMPT.md>", "error");
1750
+ return;
1751
+ }
1752
+
1753
+ const fullPath = resolve(ctx.cwd, promptPath);
1754
+ if (!existsSync(fullPath)) {
1755
+ ctx.ui.notify(`File not found: ${promptPath}`, "error");
1756
+ return;
1757
+ }
1758
+
1759
+ startTaskFromPath(ctx, fullPath);
1760
+ },
1761
+ });
1762
+
1763
+ pi.registerCommand("task-status", {
1764
+ description: "Show current task progress",
1765
+ handler: async (_args, ctx) => {
1766
+ widgetCtx = ctx;
1767
+ if (!state.task) {
1768
+ ctx.ui.notify("No task loaded. Use /task <path/to/PROMPT.md>", "info");
1769
+ return;
1770
+ }
1771
+
1772
+ const statusPath = join(state.task.taskFolder, "STATUS.md");
1773
+ if (!existsSync(statusPath)) {
1774
+ ctx.ui.notify("STATUS.md not found", "error");
1775
+ return;
1776
+ }
1777
+
1778
+ const parsed = parseStatusMd(readFileSync(statusPath, "utf-8"));
1779
+ const lines = parsed.steps.map(s => {
1780
+ const icon = s.status === "complete" ? "✅" : s.status === "in-progress" ? "🟨" : "⬜";
1781
+ return `${icon} Step ${s.number}: ${s.name} (${s.totalChecked}/${s.totalItems})`;
1782
+ });
1783
+
1784
+ ctx.ui.notify(
1785
+ `${state.task.taskId}: ${state.task.taskName}\n` +
1786
+ `Phase: ${state.phase} · Iteration: ${state.totalIterations} · Reviews: ${state.reviewCounter}\n\n` +
1787
+ lines.join("\n"),
1788
+ "info",
1789
+ );
1790
+
1791
+ // Refresh widget
1792
+ for (const s of parsed.steps) state.stepStatuses.set(s.number, s);
1793
+ updateWidgets();
1794
+ },
1795
+ });
1796
+
1797
+ pi.registerCommand("task-pause", {
1798
+ description: "Pause task after current worker finishes",
1799
+ handler: async (_args, ctx) => {
1800
+ widgetCtx = ctx;
1801
+ if (state.phase !== "running") {
1802
+ ctx.ui.notify("No task is running", "warning");
1803
+ return;
1804
+ }
1805
+ state.phase = "paused";
1806
+ ctx.ui.notify("Task will pause after current worker finishes", "info");
1807
+ updateWidgets();
1808
+ },
1809
+ });
1810
+
1811
+ pi.registerCommand("task-resume", {
1812
+ description: "Resume a paused task",
1813
+ handler: async (_args, ctx) => {
1814
+ widgetCtx = ctx;
1815
+ if (state.phase !== "paused") {
1816
+ ctx.ui.notify("Task is not paused", "warning");
1817
+ return;
1818
+ }
1819
+ if (!state.task) {
1820
+ ctx.ui.notify("No task loaded", "error");
1821
+ return;
1822
+ }
1823
+
1824
+ state.phase = "running";
1825
+ ctx.ui.notify(`Resuming ${state.task.taskId}...`, "info");
1826
+ updateWidgets();
1827
+
1828
+ executeTask(ctx).catch(err => {
1829
+ state.phase = "error";
1830
+ ctx.ui.notify(`Task error: ${err?.message || err}`, "error");
1831
+ updateWidgets();
1832
+ });
1833
+ },
1834
+ });
1835
+
1836
+ // ── Session Lifecycle ────────────────────────────────────────────
1837
+
1838
+ pi.on("session_start", async (_event, ctx) => {
1839
+ widgetCtx = ctx;
1840
+
1841
+ // Kill any running subprocesses
1842
+ if (state.workerProc) try { state.workerProc.kill(); } catch {}
1843
+ if (state.reviewerProc) try { state.reviewerProc.kill(); } catch {}
1844
+ if (state.workerTimer) clearInterval(state.workerTimer);
1845
+ if (state.reviewerTimer) clearInterval(state.reviewerTimer);
1846
+
1847
+ // Keep task state if resuming, but reset runtime state
1848
+ const hadTask = state.task;
1849
+ if (hadTask) {
1850
+ state.phase = "paused";
1851
+ state.workerStatus = "idle";
1852
+ state.reviewerStatus = "idle";
1853
+ state.workerProc = null;
1854
+ state.reviewerProc = null;
1855
+ // Refresh from STATUS.md
1856
+ const statusPath = join(hadTask.taskFolder, "STATUS.md");
1857
+ if (existsSync(statusPath)) {
1858
+ const parsed = parseStatusMd(readFileSync(statusPath, "utf-8"));
1859
+ state.reviewCounter = parsed.reviewCounter;
1860
+ state.totalIterations = parsed.iteration;
1861
+ for (const s of parsed.steps) state.stepStatuses.set(s.number, s);
1862
+ }
1863
+ }
1864
+
1865
+ updateWidgets();
1866
+
1867
+ // Footer
1868
+ ctx.ui.setFooter((_tui, theme, _footerData) => ({
1869
+ dispose: () => {},
1870
+ invalidate() {},
1871
+ render(width: number): string[] {
1872
+ const model = ctx.model?.id || "no-model";
1873
+ const usage = ctx.getContextUsage();
1874
+ const pct = usage ? usage.percent : 0;
1875
+ const filled = Math.round(pct / 10);
1876
+ const bar = "#".repeat(filled) + "-".repeat(10 - filled);
1877
+
1878
+ const taskLabel = state.task
1879
+ ? theme.fg("accent", state.task.taskId) +
1880
+ theme.fg("dim", ` ${state.phase}`) +
1881
+ (state.phase === "running" ? theme.fg("accent", ` Step ${state.currentStep}`) : "")
1882
+ : theme.fg("dim", "no task");
1883
+
1884
+ const left = theme.fg("dim", ` ${model}`) +
1885
+ theme.fg("muted", " · ") +
1886
+ taskLabel;
1887
+ const right = theme.fg("dim", `[${bar}] ${Math.round(pct)}% `);
1888
+ const pad = " ".repeat(Math.max(1, width - visibleWidth(left) - visibleWidth(right)));
1889
+
1890
+ return [truncateToWidth(left + pad + right, width)];
1891
+ },
1892
+ }));
1893
+
1894
+ const config = loadConfig(ctx.cwd);
1895
+ ctx.ui.setStatus("task-runner", `📋 ${config.project.name}`);
1896
+
1897
+ if (hadTask) {
1898
+ ctx.ui.notify(`Task ${hadTask.taskId} loaded (paused). Use /task-resume to continue.`, "info");
1899
+ } else if (process.env.TASK_AUTOSTART) {
1900
+ // ── TASK_AUTOSTART ────────────────────────────────────────
1901
+ // When set, automatically start a task as if the user typed
1902
+ // `/task <path>`. Used by the parallel orchestrator to launch
1903
+ // workers in TMUX sessions without send-keys timing issues.
1904
+ const autoPath = process.env.TASK_AUTOSTART;
1905
+ const fullPath = resolve(ctx.cwd, autoPath);
1906
+ if (!existsSync(fullPath)) {
1907
+ ctx.ui.notify(`TASK_AUTOSTART: file not found — ${fullPath}`, "error");
1908
+ } else {
1909
+ ctx.ui.notify(`TASK_AUTOSTART: ${fullPath}`, "info");
1910
+ startTaskFromPath(ctx, fullPath);
1911
+ }
1912
+ } else {
1913
+ ctx.ui.notify(
1914
+ `Task Runner ready — ${config.project.name}\n\n` +
1915
+ `/task <path/to/PROMPT.md> Start a task\n` +
1916
+ `/task-status Show progress\n` +
1917
+ `/task-pause Pause execution\n` +
1918
+ `/task-resume Resume execution`,
1919
+ "info",
1920
+ );
1921
+ }
1922
+ });
1923
+ }