@rohaquinlop/pi-subagents 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -14,8 +14,11 @@ import { Container, Markdown, Spacer, Text, visibleWidth } from "@earendil-works
14
14
  import { Type } from "@sinclair/typebox";
15
15
  import "./tools/safe-bash";
16
16
 
17
- import type { AgentConfig } from "./lib/types";
18
- import { discoverAgents, mergeAgents } from "./lib/helpers";
17
+ import type { AgentConfig, AgentUsage, PipelineStepResult, PipelineResult, LoopIterationResult, LoopResult } from "./lib/types";
18
+ import { discoverAgents, mergeAgents, substitutePlaceholders, formatConnectorContext } from "./lib/helpers";
19
+ import { zeroUsage, accumulateUsage, validateAgents, MAX_LOOP_CONTEXT, parseJudgeVerdict } from "./lib/pipeline-helpers";
20
+ import { buildSubagentErrorContent, buildPipelineErrorContent, buildLoopErrorContent } from "./lib/error-helpers";
21
+ import { detectCycle } from "./lib/loop-detector";
19
22
 
20
23
  interface ToolEvent {
21
24
  tool: string;
@@ -54,6 +57,7 @@ interface AgentProgress {
54
57
  durationMs: number;
55
58
  lastMessage: string;
56
59
  error?: string;
60
+ warning?: string;
57
61
  }
58
62
 
59
63
  interface AgentResult {
@@ -64,17 +68,21 @@ interface AgentResult {
64
68
  progress: AgentProgress;
65
69
  model?: string;
66
70
  contextWindow?: number;
67
- usage: { input: number; output: number; cacheRead: number; cacheWrite: number; cost: number; turns: number };
71
+ usage: AgentUsage;
68
72
  }
69
73
 
70
74
  interface Details {
71
- results: AgentResult[];
75
+ results?: AgentResult[];
76
+ pipelineResult?: PipelineResult & { currentStep?: number };
77
+ loopResult?: LoopResult & { currentIteration?: number };
72
78
  }
73
79
 
74
80
  // ── Config ─────────────────────────────────────────────────────────────
75
81
 
76
82
  interface ExtensionConfig {
77
83
  maxConcurrency?: number;
84
+ subagentTimeoutMs?: number; // wall-clock, default 600000 (10 min). 0 = disabled.
85
+ subagentIdleTimeoutMs?: number; // no-stdout watchdog, default 300000 (5 min). 0 = disabled.
78
86
  }
79
87
 
80
88
  const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
@@ -82,6 +90,10 @@ const AGENTS_DIR = path.join(EXT_DIR, "agents");
82
90
  const TOOLS_DIR = path.join(EXT_DIR, "tools");
83
91
  const CONFIG_PATH = path.join(EXT_DIR, "config.json");
84
92
  const DEFAULT_MAX_CONCURRENCY = 4;
93
+ const DEFAULT_SUBAGENT_TIMEOUT_MS = 600_000; // 10 minutes
94
+ const DEFAULT_SUBAGENT_IDLE_TIMEOUT_MS = 300_000; // 5 minutes
95
+
96
+ let extensionConfig: ExtensionConfig = {};
85
97
 
86
98
  function loadConfig(): ExtensionConfig {
87
99
  try {
@@ -196,6 +208,7 @@ const MODEL_EXTENSIONS: ModelExtension[] = buildModelExtensions();
196
208
  // ── Agent Discovery & Registration ────────────────────────────────────
197
209
 
198
210
  let agents: AgentConfig[] = [];
211
+ let semaphore: Semaphore;
199
212
 
200
213
  // Read once at module load. If we're a child subagent process whose parent
201
214
  // pinned an allowlist, we silently ignore any agent (built-in OR registered
@@ -475,6 +488,11 @@ function flatten(s: string): string {
475
488
  // doesn't need to read inline anyway.
476
489
  const MAX_ARG_PREVIEW = 4000;
477
490
 
491
+ // Hard cap on recentTools entries to prevent unbounded memory growth in
492
+ // long-running subagents. Generous for expanded-view history; matches the
493
+ // callHistory trim pattern.
494
+ const MAX_RECENT_TOOLS = 50;
495
+
478
496
  function extractToolArgsPreview(args: Record<string, unknown>): string {
479
497
  const cap = (s: string) => (s.length > MAX_ARG_PREVIEW ? s.slice(0, MAX_ARG_PREVIEW) + "…" : s);
480
498
  if (args.command) return cap(flatten(String(args.command)));
@@ -498,7 +516,7 @@ async function runSubagent(
498
516
  task: string,
499
517
  cwd: string,
500
518
  signal: AbortSignal | undefined,
501
- onUpdate?: (progress: AgentProgress, usage: AgentResult["usage"]) => void,
519
+ onUpdate?: (progress: AgentProgress, usage: AgentResult["usage"], finalExitCode?: number) => void,
502
520
  ): Promise<AgentResult> {
503
521
  const { args, tempDir, childEnv } = await buildPiArgs(agent, task, cwd);
504
522
  const command = args[0];
@@ -540,14 +558,73 @@ async function runSubagent(
540
558
 
541
559
  let buf = "";
542
560
  let stderrBuf = "";
561
+ let resolved = false;
562
+ let wallTimer: ReturnType<typeof setTimeout> | undefined;
563
+ let idleTimer: ReturnType<typeof setTimeout> | undefined;
564
+ let inFlightToolCount = 0;
565
+ let childClosed = false;
566
+ let sigkillTimer: ReturnType<typeof setTimeout> | undefined;
567
+ let callHistory: string[] = []; // sliding window of tool-call signatures for cycle detection
568
+
569
+ const safeResolve = (code: number) => {
570
+ if (resolved) return;
571
+ resolved = true;
572
+ clearTimeout(wallTimer);
573
+ clearTimeout(idleTimer);
574
+ clearTimeout(sigkillTimer);
575
+ if (signal) signal.removeEventListener("abort", abortKill);
576
+ resolve(code);
577
+ };
578
+
579
+ // Wall-clock timeout
580
+ const timeoutMs = extensionConfig.subagentTimeoutMs ?? DEFAULT_SUBAGENT_TIMEOUT_MS;
581
+ if (timeoutMs > 0) {
582
+ wallTimer = setTimeout(() => {
583
+ if (!progress.error) progress.error = `Subagent timed out after ${Math.round(timeoutMs / 1000)}s`;
584
+ proc.kill("SIGTERM");
585
+ clearTimeout(sigkillTimer);
586
+ sigkillTimer = setTimeout(() => {
587
+ if (!childClosed) proc.kill("SIGKILL");
588
+ }, 5000);
589
+ }, timeoutMs);
590
+ }
591
+
592
+ // Idle (no-stdout) watchdog
593
+ const idleMs = extensionConfig.subagentIdleTimeoutMs ?? DEFAULT_SUBAGENT_IDLE_TIMEOUT_MS;
594
+ const resetIdle = () => {
595
+ if (idleMs <= 0) return;
596
+ clearTimeout(idleTimer);
597
+ idleTimer = setTimeout(() => {
598
+ if (!progress.error) progress.error = `Subagent idle for ${Math.round(idleMs / 1000)}s — likely stuck`;
599
+ proc.kill("SIGTERM");
600
+ clearTimeout(sigkillTimer);
601
+ sigkillTimer = setTimeout(() => {
602
+ if (!childClosed) proc.kill("SIGKILL");
603
+ }, 5000);
604
+ }, idleMs);
605
+ };
606
+ resetIdle();
607
+
608
+ const pauseIdle = () => {
609
+ clearTimeout(idleTimer);
610
+ idleTimer = undefined;
611
+ };
612
+ const resumeIdle = () => {
613
+ if (inFlightToolCount === 0) resetIdle();
614
+ };
615
+
616
+ const MAX_STDERR_BYTES = 100_000;
543
617
 
544
618
  const processLine = (line: string) => {
619
+ if (inFlightToolCount === 0) resetIdle();
545
620
  if (!line.trim()) return;
546
621
  try {
547
622
  const evt = JSON.parse(line) as any;
548
623
  progress.durationMs = Date.now() - startTime;
549
624
 
550
625
  if (evt.type === "tool_execution_start") {
626
+ inFlightToolCount++;
627
+ pauseIdle();
551
628
  progress.toolCount++;
552
629
  progress.recentTools.push({
553
630
  tool: evt.toolName,
@@ -555,6 +632,37 @@ async function runSubagent(
555
632
  toolCallId: evt.toolCallId,
556
633
  status: "running",
557
634
  });
635
+ // Trim oldest completed entries, but never evict an in-flight tool —
636
+ // otherwise tool_execution_end's .find(toolCallId) would no-op and leave a
637
+ // permanently-"running" ghost.
638
+ while (progress.recentTools.length > MAX_RECENT_TOOLS) {
639
+ const idx = progress.recentTools.findIndex((t) => t.status !== "running");
640
+ if (idx === -1) break; // only running entries left — don't evict in-flight
641
+ progress.recentTools.splice(idx, 1);
642
+ }
643
+ // ── Cycle detection (parent-side, context-free) ──
644
+ // Signature = toolName + args preview. Two calls with different args
645
+ // (different file, or same file different content) → different sig.
646
+ const sig = `${evt.toolName}:${extractToolArgsPreview((evt.args || {}) as Record<string, unknown>)}`;
647
+ const cycleResult = detectCycle(callHistory, sig);
648
+ callHistory.push(sig);
649
+ if (callHistory.length > 24) callHistory = callHistory.slice(-24);
650
+
651
+ if (cycleResult.cycle) {
652
+ const toolNames = (cycleResult.pattern || []).map((s) => {
653
+ const colonIdx = s.indexOf(":");
654
+ return colonIdx >= 0 ? s.slice(0, colonIdx) : s;
655
+ });
656
+ const patternStr = toolNames.join("→");
657
+ if (!progress.error) {
658
+ progress.error = `Subagent stuck in a tool-call loop: repeating ${patternStr}`;
659
+ }
660
+ proc.kill("SIGTERM");
661
+ clearTimeout(sigkillTimer);
662
+ sigkillTimer = setTimeout(() => {
663
+ if (!childClosed) proc.kill("SIGKILL");
664
+ }, 5000);
665
+ }
558
666
  fireUpdate();
559
667
  }
560
668
 
@@ -589,6 +697,8 @@ async function runSubagent(
589
697
  hit.children = finalChildren as AgentResult[];
590
698
  }
591
699
  }
700
+ inFlightToolCount = Math.max(0, inFlightToolCount - 1);
701
+ resumeIdle();
592
702
  fireUpdate();
593
703
  }
594
704
 
@@ -655,26 +765,37 @@ async function runSubagent(
655
765
  });
656
766
 
657
767
  proc.stderr.on("data", (d: Buffer) => {
768
+ if (stderrBuf.length >= MAX_STDERR_BYTES) return;
658
769
  stderrBuf += d.toString();
770
+ if (stderrBuf.length >= MAX_STDERR_BYTES) {
771
+ stderrBuf = stderrBuf.slice(0, MAX_STDERR_BYTES) + "\n[stderr truncated]";
772
+ }
659
773
  });
660
774
 
661
775
  proc.on("close", (code) => {
776
+ childClosed = true;
662
777
  if (buf.trim()) processLine(buf);
663
778
  if (code !== 0 && stderrBuf.trim() && !progress.error) {
664
779
  progress.error = stderrBuf.trim();
780
+ } else if (code === 0 && stderrBuf.trim()) {
781
+ // Non-fatal: surface stderr (e.g. deprecation warnings) on a successful exit.
782
+ progress.warning = stderrBuf.trim().slice(0, 2000);
665
783
  }
666
- resolve(code ?? 1);
784
+ safeResolve(code ?? 1);
667
785
  });
668
786
 
669
- proc.on("error", () => resolve(1));
787
+ proc.on("error", () => safeResolve(1));
670
788
 
789
+ const abortKill = () => {
790
+ proc.kill("SIGTERM");
791
+ clearTimeout(sigkillTimer);
792
+ sigkillTimer = setTimeout(() => {
793
+ if (!childClosed) proc.kill("SIGKILL");
794
+ }, 3000);
795
+ };
671
796
  if (signal) {
672
- const kill = () => {
673
- proc.kill("SIGTERM");
674
- setTimeout(() => !proc.killed && proc.kill("SIGKILL"), 3000);
675
- };
676
- if (signal.aborted) kill();
677
- else signal.addEventListener("abort", kill, { once: true });
797
+ if (signal.aborted) abortKill();
798
+ else signal.addEventListener("abort", abortKill, { once: true });
678
799
  }
679
800
  });
680
801
 
@@ -686,6 +807,13 @@ async function runSubagent(
686
807
  result.exitCode = exitCode;
687
808
  progress.status = exitCode === 0 && !progress.error ? "completed" : "failed";
688
809
  progress.durationMs = Date.now() - startTime;
810
+
811
+ // Push the terminal status to the live renderer so the TUI doesn't keep
812
+ // showing "running" after the child has exited. Pass exitCode so callers
813
+ // that hold a live result object (the subagent tool) can sync its exitCode
814
+ // and render the correct ✓/✗ icon instead of the -1 placeholder.
815
+ onUpdate?.(progress, result.usage, exitCode);
816
+
689
817
  if (progress.error) result.output = result.output || `Error: ${progress.error}`;
690
818
 
691
819
  // Truncate output if very large
@@ -896,14 +1024,355 @@ function renderAgentProgress(
896
1024
  addLine(theme.fg("error", `Error: ${prog.error}`));
897
1025
  }
898
1026
 
1027
+ if (prog.warning) {
1028
+ addLine(theme.fg("warning", `Warning: ${prog.warning}`));
1029
+ }
1030
+
1031
+ return c;
1032
+ }
1033
+
1034
+ // ── Pipeline Execution ────────────────────────────────────────────────
1035
+
1036
+ async function runPipeline(
1037
+ steps: Array<{ agent: string; task: string; connector?: string }>,
1038
+ cwd: string,
1039
+ signal: AbortSignal | undefined,
1040
+ onUpdate?: (stepIndex: number, progress: AgentProgress, usage: AgentUsage) => void,
1041
+ ): Promise<PipelineResult> {
1042
+ const results: PipelineStepResult[] = [];
1043
+ let previousOutput = "";
1044
+ let totalUsage = zeroUsage();
1045
+ const startTime = Date.now();
1046
+
1047
+ for (let i = 0; i < steps.length; i++) {
1048
+ if (signal?.aborted) break;
1049
+
1050
+ const step = steps[i];
1051
+ const agent = agents.find((a) => a.name === step.agent);
1052
+ if (!agent) {
1053
+ const errMsg = `Unknown agent: ${step.agent}`;
1054
+ results.push({
1055
+ agent: step.agent, task: step.task, output: `Error: ${errMsg}`,
1056
+ exitCode: 1, usage: zeroUsage(), durationMs: 0,
1057
+ });
1058
+ return {
1059
+ steps: results, finalOutput: previousOutput || "(no output)",
1060
+ stoppedAt: i, error: errMsg,
1061
+ totalUsage, totalDurationMs: Date.now() - startTime,
1062
+ };
1063
+ }
1064
+
1065
+ // Build task with {previous} substitution
1066
+ let taskWithContext = step.task;
1067
+ if (previousOutput && taskWithContext.includes("{previous}")) {
1068
+ // Apply connector formatting if available (step-level overrides agent-level)
1069
+ const connector = step.connector ?? agent.connector;
1070
+ const formattedOutput = formatConnectorContext(previousOutput, connector);
1071
+ taskWithContext = substitutePlaceholders(step.task, formattedOutput);
1072
+ }
1073
+
1074
+ const stepStart = Date.now();
1075
+ const result = await semaphore.run(() =>
1076
+ runSubagent(agent, taskWithContext, cwd, signal, (progress, usage) => {
1077
+ onUpdate?.(i, progress, usage);
1078
+ }),
1079
+ );
1080
+
1081
+ const stepResult: PipelineStepResult = {
1082
+ agent: step.agent, task: step.task, output: result.output,
1083
+ exitCode: result.exitCode, usage: result.usage,
1084
+ durationMs: Date.now() - stepStart,
1085
+ };
1086
+ results.push(stepResult);
1087
+ totalUsage = accumulateUsage(totalUsage, result.usage);
1088
+ previousOutput = result.output;
1089
+
1090
+ // Stop on error — surface the failing step's error as finalOutput (the pipeline
1091
+ // tool returns finalOutput as content, so the main LLM sees the actual failure,
1092
+ // not the previous step's success text).
1093
+ if (result.exitCode !== 0 || result.progress.error) {
1094
+ const errorDetail = buildPipelineErrorContent(i, step.agent, result);
1095
+ return {
1096
+ steps: results, finalOutput: errorDetail,
1097
+ stoppedAt: i, error: result.progress.error || `Agent ${step.agent} exited with code ${result.exitCode}`,
1098
+ totalUsage, totalDurationMs: Date.now() - startTime,
1099
+ };
1100
+ }
1101
+ }
1102
+
1103
+ return {
1104
+ steps: results, finalOutput: previousOutput || "(no output)",
1105
+ totalUsage, totalDurationMs: Date.now() - startTime,
1106
+ };
1107
+ }
1108
+
1109
+ // ── Loop Execution ─────────────────────────────────────────────────────
1110
+
1111
+ async function runLoop(
1112
+ agentName: string,
1113
+ task: string,
1114
+ maxIterations: number,
1115
+ judge: { agent: string; criteria: string } | undefined,
1116
+ cwd: string,
1117
+ signal: AbortSignal | undefined,
1118
+ onUpdate?: (iteration: number, progress: AgentProgress, usage: AgentUsage) => void,
1119
+ ): Promise<LoopResult> {
1120
+ const agent = agents.find((a) => a.name === agentName);
1121
+ if (!agent) throw new Error(`Unknown agent: ${agentName}`);
1122
+
1123
+ const iterations: LoopIterationResult[] = [];
1124
+ let priorOutputs: string[] = [];
1125
+ let stoppedBecause: LoopResult["stoppedBecause"] = "max_iterations";
1126
+ let totalUsage = zeroUsage();
1127
+ const startTime = Date.now();
1128
+
1129
+ for (let i = 0; i < maxIterations; i++) {
1130
+ if (signal?.aborted) break;
1131
+
1132
+ // Build task with accumulated context
1133
+ let fullTask = task;
1134
+ if (priorOutputs.length > 0) {
1135
+ // Enforce MAX_LOOP_CONTEXT budget: drop oldest iterations first
1136
+ let totalContext = 0;
1137
+ let keptOutputs: string[] = [];
1138
+ for (let j = priorOutputs.length - 1; j >= 0; j--) {
1139
+ const block = `--- Iteration ${j + 1} output ---\n${priorOutputs[j]}`;
1140
+ if (totalContext + block.length <= MAX_LOOP_CONTEXT) {
1141
+ keptOutputs.unshift(block);
1142
+ totalContext += block.length;
1143
+ } else {
1144
+ break;
1145
+ }
1146
+ }
1147
+ const contextBlock = keptOutputs.join("\n\n");
1148
+ fullTask = `${task}\n\n## Prior iterations:\n${contextBlock}`;
1149
+ }
1150
+
1151
+ const iterStart = Date.now();
1152
+ const result = await semaphore.run(() =>
1153
+ runSubagent(agent, fullTask, cwd, signal, (progress, usage) => {
1154
+ onUpdate?.(i, progress, usage);
1155
+ }),
1156
+ );
1157
+
1158
+ const iterResult: LoopIterationResult = {
1159
+ iteration: i + 1, output: result.output,
1160
+ exitCode: result.exitCode, usage: result.usage,
1161
+ durationMs: Date.now() - iterStart,
1162
+ };
1163
+ totalUsage = accumulateUsage(totalUsage, result.usage);
1164
+
1165
+ // Judge evaluation (if configured)
1166
+ if (judge && result.exitCode === 0 && !result.progress.error) {
1167
+ const judgeAgent = agents.find((a) => a.name === judge.agent);
1168
+ if (judgeAgent) {
1169
+ const judgePrompt = `Evaluate this output against the criteria below. Respond with YES if satisfied, or NO with specific feedback.\n\nCriteria: ${judge.criteria}\n\nOutput to evaluate:\n${result.output}`;
1170
+ const judgeResult = await semaphore.run(() =>
1171
+ runSubagent(judgeAgent, judgePrompt, cwd, signal),
1172
+ );
1173
+ totalUsage = accumulateUsage(totalUsage, judgeResult.usage);
1174
+
1175
+ // Parse judge verdict
1176
+ const satisfied = parseJudgeVerdict(judgeResult.output);
1177
+
1178
+ iterResult.judgeVerdict = { satisfied, response: judgeResult.output };
1179
+
1180
+ if (satisfied) {
1181
+ iterations.push(iterResult);
1182
+ stoppedBecause = "judge_satisfied";
1183
+ return {
1184
+ iterations, finalOutput: result.output,
1185
+ stoppedBecause, totalUsage, totalDurationMs: Date.now() - startTime,
1186
+ };
1187
+ }
1188
+ }
1189
+ }
1190
+
1191
+ iterations.push(iterResult);
1192
+ priorOutputs.push(result.output);
1193
+
1194
+ if (result.exitCode !== 0 || result.progress.error) {
1195
+ stoppedBecause = "error";
1196
+ const errorDetail = buildLoopErrorContent(i, agentName, result);
1197
+ return {
1198
+ iterations, finalOutput: errorDetail,
1199
+ stoppedBecause, totalUsage, totalDurationMs: Date.now() - startTime,
1200
+ };
1201
+ }
1202
+ }
1203
+
1204
+ return {
1205
+ iterations, finalOutput: priorOutputs[priorOutputs.length - 1] || "(no output)",
1206
+ stoppedBecause: "max_iterations",
1207
+ totalUsage, totalDurationMs: Date.now() - startTime,
1208
+ };
1209
+ }
1210
+
1211
+ // ── Pipeline / Loop Rendering ─────────────────────────────────────────
1212
+
1213
+ function renderPipelineResult(
1214
+ result: PipelineResult,
1215
+ theme: Theme,
1216
+ expanded: boolean,
1217
+ w: number,
1218
+ ): Container {
1219
+ const c = new Container();
1220
+
1221
+ // Header
1222
+ c.addChild(new Text(
1223
+ `${theme.fg("toolTitle", theme.bold("pipeline"))} — ${result.steps.length} steps · ${formatDuration(result.totalDurationMs)}`,
1224
+ 0, 0,
1225
+ ));
1226
+ c.addChild(new Spacer(1));
1227
+
1228
+ // Steps
1229
+ for (let i = 0; i < result.steps.length; i++) {
1230
+ const step = result.steps[i];
1231
+ const icon = step.exitCode === 0
1232
+ ? theme.fg("success", "✓")
1233
+ : theme.fg("error", "✗");
1234
+
1235
+ if (!expanded) {
1236
+ const arrow = i < result.steps.length - 1 && result.steps[i].exitCode === 0 && result.stoppedAt === undefined
1237
+ ? theme.fg("dim", " → ")
1238
+ : "";
1239
+ c.addChild(new Text(
1240
+ ` ${icon} ${theme.fg("accent", step.agent)}${arrow}`,
1241
+ 0, 0,
1242
+ ));
1243
+ } else {
1244
+ c.addChild(new Text(
1245
+ ` ${icon} ${theme.fg("accent", step.agent)} — ${formatDuration(step.durationMs)}`,
1246
+ 0, 0,
1247
+ ));
1248
+ c.addChild(new Text(
1249
+ ` ${theme.fg("dim", "Task:")} ${truncLine(step.task, w - 20)}`,
1250
+ 0, 0,
1251
+ ));
1252
+ if (step.output) {
1253
+ c.addChild(new Spacer(1));
1254
+ const mdTheme = getMarkdownTheme();
1255
+ c.addChild(new Markdown(step.output, 2, 0, mdTheme));
1256
+ }
1257
+ if (i < result.steps.length - 1 && result.stoppedAt === undefined) {
1258
+ c.addChild(new Text(theme.fg("dim", " ↓"), 0, 0));
1259
+ }
1260
+ }
1261
+ }
1262
+
1263
+ // Show running indicator if pipeline is still executing
1264
+ if (result.currentStep !== undefined && result.currentStep >= result.steps.length) {
1265
+ if (!expanded) {
1266
+ const hasCompletedSteps = result.steps.length > 0;
1267
+ const lastCompletedOk = hasCompletedSteps && result.steps[result.steps.length - 1].exitCode === 0;
1268
+ const arrow = hasCompletedSteps && lastCompletedOk ? theme.fg("dim", " → ") : "";
1269
+ c.addChild(new Text(
1270
+ ` ${arrow}${theme.fg("warning", "⟳")} ${theme.fg("dim", "running...")}`,
1271
+ 0, 0,
1272
+ ));
1273
+ }
1274
+ }
1275
+
1276
+ // Error message if pipeline failed
1277
+ if (result.error) {
1278
+ c.addChild(new Spacer(1));
1279
+ c.addChild(new Text(theme.fg("error", `Stopped at step ${(result.stoppedAt ?? 0) + 1}: ${result.error}`), 0, 0));
1280
+ }
1281
+
1282
+ // Usage summary
1283
+ c.addChild(new Spacer(1));
1284
+ const usageParts: string[] = [];
1285
+ if (result.totalUsage.input) usageParts.push(theme.fg("dim", `↑${formatTokens(result.totalUsage.input)}`));
1286
+ if (result.totalUsage.output) usageParts.push(theme.fg("dim", `↓${formatTokens(result.totalUsage.output)}`));
1287
+ if (result.totalUsage.cost) usageParts.push(theme.fg("dim", `$${result.totalUsage.cost.toFixed(3)}`));
1288
+ if (usageParts.length) c.addChild(new Text(usageParts.join(" "), 0, 0));
1289
+
1290
+ return c;
1291
+ }
1292
+
1293
+ function renderLoopResult(
1294
+ result: LoopResult,
1295
+ theme: Theme,
1296
+ expanded: boolean,
1297
+ w: number,
1298
+ ): Container {
1299
+ const c = new Container();
1300
+
1301
+ const stoppedLabel = result.stoppedBecause === "judge_satisfied"
1302
+ ? theme.fg("success", "judge satisfied")
1303
+ : result.stoppedBecause === "error"
1304
+ ? theme.fg("error", "stopped (error)")
1305
+ : theme.fg("dim", `max ${result.iterations.length} iterations`);
1306
+
1307
+ // Header
1308
+ c.addChild(new Text(
1309
+ `${theme.fg("toolTitle", theme.bold("loop"))} — ${result.iterations.length} iterations · ${stoppedLabel} · ${formatDuration(result.totalDurationMs)}`,
1310
+ 0, 0,
1311
+ ));
1312
+ c.addChild(new Spacer(1));
1313
+
1314
+ // Iterations
1315
+ result.iterations.forEach((iter, idx) => {
1316
+ const icon = iter.exitCode === 0
1317
+ ? theme.fg("success", "✓")
1318
+ : theme.fg("error", "✗");
1319
+
1320
+ const verdictStr = iter.judgeVerdict
1321
+ ? (iter.judgeVerdict.satisfied
1322
+ ? theme.fg("success", " (YES)")
1323
+ : theme.fg("warning", " (NO)"))
1324
+ : "";
1325
+
1326
+ if (!expanded) {
1327
+ const isLast = idx === result.iterations.length - 1;
1328
+ const arrow = isLast ? "" : theme.fg("dim", " → ");
1329
+ c.addChild(new Text(
1330
+ ` ${icon} ${theme.fg("accent", `Iteration ${iter.iteration}`)}${verdictStr}${arrow}`,
1331
+ 0, 0,
1332
+ ));
1333
+ } else {
1334
+ c.addChild(new Text(
1335
+ ` ${icon} ${theme.fg("accent", `Iteration ${iter.iteration}`)}${verdictStr} — ${formatDuration(iter.durationMs)}`,
1336
+ 0, 0,
1337
+ ));
1338
+ if (iter.output) {
1339
+ const mdTheme = getMarkdownTheme();
1340
+ c.addChild(new Markdown(iter.output, 2, 0, mdTheme));
1341
+ }
1342
+ if (iter.judgeVerdict && !iter.judgeVerdict.satisfied) {
1343
+ c.addChild(new Text(theme.fg("dim", " ↓ refine"), 0, 0));
1344
+ }
1345
+ }
1346
+ });
1347
+
1348
+ // Show running indicator if loop is still executing
1349
+ if (result.currentIteration !== undefined && result.currentIteration >= result.iterations.length) {
1350
+ if (!expanded) {
1351
+ const hasCompleted = result.iterations.length > 0;
1352
+ const arrow = hasCompleted ? theme.fg("dim", " → ") : "";
1353
+ c.addChild(new Text(
1354
+ ` ${arrow}${theme.fg("warning", "⟳")} ${theme.fg("dim", "refining...")}`,
1355
+ 0, 0,
1356
+ ));
1357
+ }
1358
+ }
1359
+
1360
+ // Usage summary
1361
+ c.addChild(new Spacer(1));
1362
+ const usageParts: string[] = [];
1363
+ if (result.totalUsage.input) usageParts.push(theme.fg("dim", `↑${formatTokens(result.totalUsage.input)}`));
1364
+ if (result.totalUsage.output) usageParts.push(theme.fg("dim", `↓${formatTokens(result.totalUsage.output)}`));
1365
+ if (result.totalUsage.cost) usageParts.push(theme.fg("dim", `$${result.totalUsage.cost.toFixed(3)}`));
1366
+ if (usageParts.length) c.addChild(new Text(usageParts.join(" "), 0, 0));
1367
+
899
1368
  return c;
900
1369
  }
901
1370
 
902
1371
  // ── Extension ─────────────────────────────────────────────────────────
903
1372
 
904
1373
  export default function (pi: ExtensionAPI) {
905
- const config = loadConfig();
906
- const semaphore = new Semaphore(config.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY);
1374
+ extensionConfig = loadConfig();
1375
+ semaphore = new Semaphore(extensionConfig.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY);
907
1376
  agents = loadAgents();
908
1377
 
909
1378
  // If spawned as a child by a parent subagent process, PI_SUBAGENT_ALLOWED
@@ -925,6 +1394,7 @@ export default function (pi: ExtensionAPI) {
925
1394
  "Use subagent to delegate *reasoning and decisions*: codebase exploration (scout), web research (researcher), or isolated code changes (worker)",
926
1395
  "For multiple independent subagent tasks, emit multiple `subagent` tool calls in the same turn — they run in parallel automatically.",
927
1396
  "Subagents have NO context from the current conversation — include ALL necessary context in the task description",
1397
+ "When a subagent returns an error, read it carefully. For transient failures (timeout, API/network), retry once with the same task plus 'Previous attempt failed with: {error}'. For structural failures (wrong approach, missing context), simplify the task or switch agents. If it persists after retry, report to the user with the specific error.",
928
1398
  ],
929
1399
  parameters: Type.Object({
930
1400
  agent: Type.String({ description: "Name of the agent to invoke" }),
@@ -959,9 +1429,10 @@ export default function (pi: ExtensionAPI) {
959
1429
  };
960
1430
 
961
1431
  const result = await semaphore.run(() =>
962
- runSubagent(agent, params.task!, params.cwd ?? cwd, signal, (progress, usage) => {
1432
+ runSubagent(agent, params.task!, params.cwd ?? cwd, signal, (progress, usage, finalExitCode) => {
963
1433
  liveResult.progress = progress;
964
1434
  liveResult.usage = { ...usage };
1435
+ if (finalExitCode !== undefined) liveResult.exitCode = finalExitCode;
965
1436
  onUpdate?.({
966
1437
  content: [{ type: "text", text: "(running...)" }],
967
1438
  details: { results: [liveResult] },
@@ -971,8 +1442,11 @@ export default function (pi: ExtensionAPI) {
971
1442
 
972
1443
  result.contextWindow = contextWindow;
973
1444
  const isError = result.exitCode !== 0 || !!result.progress.error;
1445
+ const contentText = isError
1446
+ ? buildSubagentErrorContent(result)
1447
+ : (result.output || "(no output)");
974
1448
  return {
975
- content: [{ type: "text", text: result.output || "(no output)" }],
1449
+ content: [{ type: "text", text: contentText }],
976
1450
  details: { results: [result] },
977
1451
  ...(isError ? { isError: true } : {}),
978
1452
  };
@@ -1022,7 +1496,7 @@ export default function (pi: ExtensionAPI) {
1022
1496
  // ── Render: result ──
1023
1497
  renderResult(result, options, theme, context) {
1024
1498
  const details = result.details as Details | undefined;
1025
- if (!details?.results?.length) {
1499
+ if (!details) {
1026
1500
  const t = result.content[0];
1027
1501
  const text = t?.type === "text" ? t.text : "(no output)";
1028
1502
  return new Text(text.slice(0, 200), 0, 0);
@@ -1030,8 +1504,264 @@ export default function (pi: ExtensionAPI) {
1030
1504
 
1031
1505
  const w = getTermWidth() - 4;
1032
1506
  const expanded = options.expanded;
1033
- const c = new Container();
1034
- c.addChild(renderAgentProgress(details.results[0], theme, expanded, w));
1507
+
1508
+ // Pipeline result
1509
+ if (details.pipelineResult) {
1510
+ return renderPipelineResult(details.pipelineResult, theme, expanded, w);
1511
+ }
1512
+
1513
+ // Loop result
1514
+ if (details.loopResult) {
1515
+ return renderLoopResult(details.loopResult, theme, expanded, w);
1516
+ }
1517
+
1518
+ // Single agent result (existing behavior)
1519
+ if (details.results?.length) {
1520
+ const c = new Container();
1521
+ c.addChild(renderAgentProgress(details.results[0], theme, expanded, w));
1522
+ return c;
1523
+ }
1524
+
1525
+ // Fallback
1526
+ const t = result.content[0];
1527
+ const text = t?.type === "text" ? t.text : "(no output)";
1528
+ return new Text(text.slice(0, 200), 0, 0);
1529
+ },
1530
+ });
1531
+
1532
+ // ── Pipeline Tool ────────────────────────────────────────────────────
1533
+
1534
+ pi.registerTool({
1535
+ name: "pipeline",
1536
+ label: "Pipeline",
1537
+ description:
1538
+ "Run 2–5 agents in sequence. Each agent's output feeds as {previous} context into the next agent's task. Use for multi-stage workflows like scout → planner → worker.",
1539
+ promptSnippet: "Run sequential multi-agent pipelines",
1540
+ promptGuidelines: [
1541
+ "Use pipeline when a task naturally decomposes into sequential agent roles (e.g. explore → plan → implement → review).",
1542
+ "Each step receives the previous step's output automatically via {previous} placeholder substitution.",
1543
+ "Pipelines stop on first error. The finalOutput is the failing step's error detail.",
1544
+ "When a pipeline fails at a step, the error identifies which step and why. Retry the failing step with a simpler task, or re-scope the pipeline. Early-step (exploration) failures → retry the whole pipeline with a more focused scope.",
1545
+ ],
1546
+ parameters: Type.Object({
1547
+ steps: Type.Array(
1548
+ Type.Object({
1549
+ agent: Type.String({ description: "Agent name for this step" }),
1550
+ task: Type.String({ description: "Task description. Use {previous} to reference the prior step's output." }),
1551
+ connector: Type.Optional(Type.String({ description: "Override agent's default connector template for this step. Format: \"## Header\\n\\n{output}\"" })),
1552
+ }),
1553
+ { minItems: 2, maxItems: 5, description: "Sequential steps (2–5). Each step's agent output feeds into the next step's task via {previous}." },
1554
+ ),
1555
+ cwd: Type.Optional(Type.String({ description: "Working directory for all agent processes" })),
1556
+ }),
1557
+
1558
+ async execute(toolCallId, params, signal, onUpdate, ctx) {
1559
+ const cwd = params.cwd ?? ctx.cwd;
1560
+
1561
+ if (!params.steps || params.steps.length < 2) {
1562
+ throw new Error("pipeline requires at least 2 steps");
1563
+ }
1564
+
1565
+ // Validate all agents exist
1566
+ const agentNames = params.steps.map((s: { agent: string }) => s.agent);
1567
+ const missing = validateAgents(agentNames, agents);
1568
+ if (missing) {
1569
+ const available = agents.map((a) => a.name).join(", ") || "none";
1570
+ throw new Error(`Unknown agent in pipeline: ${missing}. Available agents: ${available}`);
1571
+ }
1572
+
1573
+ const liveResult: Details = {
1574
+ pipelineResult: {
1575
+ steps: [],
1576
+ currentStep: 0,
1577
+ finalOutput: "",
1578
+ totalUsage: zeroUsage(),
1579
+ totalDurationMs: 0,
1580
+ },
1581
+ };
1582
+
1583
+ const result = await runPipeline(
1584
+ params.steps,
1585
+ cwd,
1586
+ signal,
1587
+ (stepIndex, progress, usage) => {
1588
+ const pResult = liveResult.pipelineResult!;
1589
+ pResult.currentStep = stepIndex;
1590
+ // Update live result with latest step progress
1591
+ if (progress.status === "running") {
1592
+ // Ensure step slot exists for live rendering
1593
+ if (stepIndex === pResult.steps.length) {
1594
+ pResult.steps.push({
1595
+ agent: params.steps[stepIndex].agent,
1596
+ task: params.steps[stepIndex].task,
1597
+ output: "",
1598
+ exitCode: -1, // sentinel: not yet done
1599
+ usage,
1600
+ durationMs: progress.durationMs,
1601
+ });
1602
+ }
1603
+ }
1604
+ if (progress.status === "completed" || progress.status === "failed") {
1605
+ const stepResult: PipelineStepResult = {
1606
+ agent: params.steps[stepIndex].agent,
1607
+ task: params.steps[stepIndex].task,
1608
+ output: progress.lastMessage || "",
1609
+ exitCode: progress.status === "failed" ? 1 : 0,
1610
+ usage,
1611
+ durationMs: progress.durationMs,
1612
+ };
1613
+ // Replace placeholder or push
1614
+ while (pResult.steps.length <= stepIndex) {
1615
+ pResult.steps.push({...stepResult, output: "", exitCode: -1, usage: zeroUsage()});
1616
+ }
1617
+ pResult.steps[stepIndex] = stepResult;
1618
+ }
1619
+ onUpdate?.({
1620
+ content: [{ type: "text", text: `Pipeline: step ${stepIndex + 1}/${params.steps.length}` }],
1621
+ details: liveResult,
1622
+ });
1623
+ },
1624
+ );
1625
+
1626
+ const isError = result.stoppedAt !== undefined;
1627
+ return {
1628
+ content: [{ type: "text", text: result.finalOutput || "(no output)" }],
1629
+ details: { pipelineResult: result },
1630
+ ...(isError ? { isError: true } : {}),
1631
+ };
1632
+ },
1633
+
1634
+ renderCall(args, theme, context) {
1635
+ if (!context.expanded) {
1636
+ if (!args.steps) {
1637
+ return new Text(theme.fg("toolTitle", theme.bold("pipeline")), 0, 0);
1638
+ }
1639
+ const stepNames = args.steps.map((s: { agent?: string }) => s?.agent || "?").join(" → ");
1640
+ return new Text(
1641
+ `${theme.fg("toolTitle", theme.bold("pipeline"))} ${theme.fg("accent", stepNames)}`,
1642
+ 0, 0,
1643
+ );
1644
+ }
1645
+
1646
+ const c = context.lastComponent instanceof Container
1647
+ ? (context.lastComponent.clear(), context.lastComponent)
1648
+ : new Container();
1649
+ const stepCount = args.steps?.length || 0;
1650
+ c.addChild(new Text(`${theme.fg("toolTitle", theme.bold("pipeline"))} — ${stepCount} steps`, 0, 0));
1651
+ if (args.steps) {
1652
+ c.addChild(new Spacer(1));
1653
+ for (let i = 0; i < args.steps.length; i++) {
1654
+ const step = args.steps[i];
1655
+ const agentLabel = step.agent ? theme.fg("accent", step.agent) : "?";
1656
+ const taskPreview = step.task ? truncLine(step.task, 60) : "";
1657
+ c.addChild(new Text(` ${theme.fg("dim", `${i + 1}.`)} ${agentLabel} ${theme.fg("dim", taskPreview)}`, 0, 0));
1658
+ }
1659
+ }
1660
+ return c;
1661
+ },
1662
+ });
1663
+
1664
+ // ── Loop Tool ─────────────────────────────────────────────────────────
1665
+
1666
+ pi.registerTool({
1667
+ name: "loop",
1668
+ label: "Loop",
1669
+ description:
1670
+ "Run the same agent 2–5 times, passing prior iteration outputs as context. Optionally use a judge agent to evaluate quality and stop early.",
1671
+ promptSnippet: "Run iterative refinement loops with optional judge",
1672
+ promptGuidelines: [
1673
+ "Use loop for tasks that benefit from iterative refinement (e.g. drafting → reviewing → polishing).",
1674
+ "Configure a judge agent to stop early when quality is sufficient, avoiding wasted iterations.",
1675
+ "Each iteration receives all prior outputs as context, enabling progressive improvement.",
1676
+ "When a loop iteration fails, the error shows which iteration. Reduce max_iterations or simplify the task; if the judge consistently rejects, refine the criteria or switch judge agent.",
1677
+ ],
1678
+ parameters: Type.Object({
1679
+ agent: Type.String({ description: "Agent name to run in the loop" }),
1680
+ task: Type.String({ description: "Task description for each iteration" }),
1681
+ max_iterations: Type.Optional(Type.Number({ minimum: 2, maximum: 5, default: 3, description: "Maximum number of iterations (2–5, default 3)" })),
1682
+ judge: Type.Optional(Type.Object({
1683
+ agent: Type.String({ description: "Judge agent name" }),
1684
+ criteria: Type.String({ description: "Quality criteria. Judge responds YES if satisfied, NO otherwise." }),
1685
+ }, { description: "Optional judge agent to evaluate each iteration and stop early when quality is sufficient" })),
1686
+ cwd: Type.Optional(Type.String({ description: "Working directory for agent processes" })),
1687
+ }),
1688
+
1689
+ async execute(toolCallId, params, signal, onUpdate, ctx) {
1690
+ const cwd = params.cwd ?? ctx.cwd;
1691
+ const maxIterations = params.max_iterations ?? 3;
1692
+
1693
+ // Validate agent exists
1694
+ const agentNames = [params.agent];
1695
+ if (params.judge) agentNames.push(params.judge.agent);
1696
+ const missing = validateAgents(agentNames, agents);
1697
+ if (missing) {
1698
+ const available = agents.map((a) => a.name).join(", ") || "none";
1699
+ throw new Error(`Unknown agent in loop: ${missing}. Available agents: ${available}`);
1700
+ }
1701
+
1702
+ const liveResult: Details = {
1703
+ loopResult: {
1704
+ iterations: [],
1705
+ currentIteration: 0,
1706
+ finalOutput: "",
1707
+ stoppedBecause: "max_iterations",
1708
+ totalUsage: zeroUsage(),
1709
+ totalDurationMs: 0,
1710
+ },
1711
+ };
1712
+
1713
+ const result = await runLoop(
1714
+ params.agent,
1715
+ params.task,
1716
+ maxIterations,
1717
+ params.judge,
1718
+ cwd,
1719
+ signal,
1720
+ (iteration, progress, usage) => {
1721
+ const lResult = liveResult.loopResult!;
1722
+ lResult.currentIteration = iteration;
1723
+ onUpdate?.({
1724
+ content: [{ type: "text", text: `Loop: iteration ${iteration + 1}/${maxIterations}` }],
1725
+ details: liveResult,
1726
+ });
1727
+ },
1728
+ );
1729
+
1730
+ const isError = result.stoppedBecause === "error";
1731
+ return {
1732
+ content: [{ type: "text", text: result.finalOutput || "(no output)" }],
1733
+ details: { loopResult: result },
1734
+ ...(isError ? { isError: true } : {}),
1735
+ };
1736
+ },
1737
+
1738
+ renderCall(args, theme, context) {
1739
+ if (!context.expanded) {
1740
+ if (!args.agent) {
1741
+ return new Text(theme.fg("toolTitle", theme.bold("loop")), 0, 0);
1742
+ }
1743
+ const maxIter = args.max_iterations || 3;
1744
+ const judgeStr = args.judge ? ` (judge: ${theme.fg("accent", (args.judge as { agent?: string }).agent || "?")})` : "";
1745
+ return new Text(
1746
+ `${theme.fg("toolTitle", theme.bold("loop"))} ${theme.fg("accent", args.agent)} × ${maxIter}${judgeStr}`,
1747
+ 0, 0,
1748
+ );
1749
+ }
1750
+
1751
+ const c = context.lastComponent instanceof Container
1752
+ ? (context.lastComponent.clear(), context.lastComponent)
1753
+ : new Container();
1754
+ const maxIter = args.max_iterations || 3;
1755
+ c.addChild(new Text(`${theme.fg("toolTitle", theme.bold("loop"))} ${theme.fg("accent", args.agent || "?")} × ${maxIter}`, 0, 0));
1756
+ if (args.task) {
1757
+ c.addChild(new Spacer(1));
1758
+ c.addChild(new Text(theme.fg("text", args.task), 0, 0));
1759
+ }
1760
+ if (args.judge) {
1761
+ const j = args.judge as { agent?: string; criteria?: string };
1762
+ c.addChild(new Spacer(1));
1763
+ c.addChild(new Text(`${theme.fg("dim", "Judge:")} ${theme.fg("accent", j.agent || "?")} — ${theme.fg("dim", j.criteria || "")}`, 0, 0));
1764
+ }
1035
1765
  return c;
1036
1766
  },
1037
1767
  });