npm - gsd-pi - Versions diffs - 2.36.0-dev.f887f4e → 2.37.0 - Mend

gsd-pi 2.36.0-dev.f887f4e → 2.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/src/resources/extensions/gsd/prompts/research-slice.md CHANGED Viewed

@@ -46,8 +46,9 @@ Research what this slice needs. Narrate key findings and surprises as you go —
 2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
 3. Explore relevant code for this slice's scope. For targeted exploration, use `rg`, `find`, and reads. For broad or unfamiliar subsystems, use `scout` to map the relevant area first.
 4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
-5. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
-6. Write `{{outputPath}}`
+5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
+6. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
+7. Write `{{outputPath}}`
 The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir — just write the file.

package/src/resources/extensions/gsd/templates/preferences.md CHANGED Viewed

@@ -57,6 +57,12 @@ notifications:
   on_budget:
   on_milestone:
   on_attention:
+cmux:
+  enabled:
+  notifications:
+  sidebar:
+  splits:
+  browser:
 remote_questions:
   channel:
   channel_id:

package/src/resources/extensions/gsd/tests/auto-loop.test.ts CHANGED Viewed

@@ -317,6 +317,8 @@ function makeMockDeps(
     },
     clearUnitTimeout: () => {},
     updateProgressWidget: () => {},
+    syncCmuxSidebar: () => {},
+    logCmuxEvent: () => {},
     invalidateAllCaches: () => {
       callLog.push("invalidateAllCaches");
     },

package/src/resources/extensions/gsd/tests/cmux.test.ts ADDED Viewed

@@ -0,0 +1,98 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  buildCmuxProgress,
+  buildCmuxStatusLabel,
+  detectCmuxEnvironment,
+  markCmuxPromptShown,
+  resetCmuxPromptState,
+  resolveCmuxConfig,
+  shouldPromptToEnableCmux,
+} from "../../cmux/index.ts";
+import type { GSDState } from "../types.ts";
+test("detectCmuxEnvironment requires workspace, surface, and socket", () => {
+  const detected = detectCmuxEnvironment(
+    {
+      CMUX_WORKSPACE_ID: "workspace:1",
+      CMUX_SURFACE_ID: "surface:2",
+      CMUX_SOCKET_PATH: "/tmp/cmux.sock",
+    },
+    (path) => path === "/tmp/cmux.sock",
+    () => true,
+  );
+  assert.equal(detected.available, true);
+  assert.equal(detected.cliAvailable, true);
+});
+test("resolveCmuxConfig enables only when preference and environment are both active", () => {
+  const config = resolveCmuxConfig(
+    { cmux: { enabled: true, notifications: true, sidebar: true, splits: true } },
+    {
+      CMUX_WORKSPACE_ID: "workspace:1",
+      CMUX_SURFACE_ID: "surface:2",
+      CMUX_SOCKET_PATH: "/tmp/cmux.sock",
+    },
+    () => true,
+    () => true,
+  );
+  assert.equal(config.enabled, true);
+  assert.equal(config.notifications, true);
+  assert.equal(config.sidebar, true);
+  assert.equal(config.splits, true);
+});
+test("shouldPromptToEnableCmux only prompts once per session", () => {
+  resetCmuxPromptState();
+  assert.equal(shouldPromptToEnableCmux({}, {}, () => false, () => true), false);
+  assert.equal(
+    shouldPromptToEnableCmux(
+      {},
+      {
+        CMUX_WORKSPACE_ID: "workspace:1",
+        CMUX_SURFACE_ID: "surface:2",
+        CMUX_SOCKET_PATH: "/tmp/cmux.sock",
+      },
+      () => true,
+      () => true,
+    ),
+    true,
+  );
+  markCmuxPromptShown();
+  assert.equal(
+    shouldPromptToEnableCmux(
+      {},
+      {
+        CMUX_WORKSPACE_ID: "workspace:1",
+        CMUX_SURFACE_ID: "surface:2",
+        CMUX_SOCKET_PATH: "/tmp/cmux.sock",
+      },
+      () => true,
+      () => true,
+    ),
+    false,
+  );
+  resetCmuxPromptState();
+});
+test("buildCmuxStatusLabel and progress prefer deepest active unit", () => {
+  const state: GSDState = {
+    activeMilestone: { id: "M001", title: "Milestone" },
+    activeSlice: { id: "S02", title: "Slice" },
+    activeTask: { id: "T03", title: "Task" },
+    phase: "executing",
+    recentDecisions: [],
+    blockers: [],
+    nextAction: "Keep going",
+    registry: [],
+    progress: {
+      milestones: { done: 0, total: 1 },
+      slices: { done: 1, total: 3 },
+      tasks: { done: 2, total: 5 },
+    },
+  };
+  assert.equal(buildCmuxStatusLabel(state), "M001 S02/T03 · executing");
+  assert.deepEqual(buildCmuxProgress(state), { value: 0.4, label: "2/5 tasks" });
+});

package/src/resources/extensions/gsd/tests/preferences.test.ts CHANGED Viewed

@@ -171,6 +171,29 @@ test("notification fields validate correctly", () => {
   assert.equal(preferences.notifications?.on_complete, false);
 });
+test("cmux fields validate correctly", () => {
+  const { preferences, errors } = validatePreferences({
+    cmux: {
+      enabled: true,
+      notifications: true,
+      sidebar: false,
+      splits: true,
+      browser: false,
+    },
+  });
+  assert.equal(errors.length, 0);
+  assert.equal(preferences.cmux?.enabled, true);
+  assert.equal(preferences.cmux?.sidebar, false);
+  assert.equal(preferences.cmux?.splits, true);
+});
+test("cmux unknown keys produce warnings", () => {
+  const { warnings } = validatePreferences({
+    cmux: { enabled: true, strange_mode: true } as any,
+  });
+  assert.ok(warnings.some((warning) => warning.includes('unknown cmux key "strange_mode"')));
+});
 test("git fields comprehensive validation", () => {
   const { preferences, errors } = validatePreferences({
     git: {

package/src/resources/extensions/search-the-web/native-search.ts CHANGED Viewed

@@ -16,6 +16,16 @@ export const CUSTOM_SEARCH_TOOL_NAMES = ["search-the-web", "search_and_read", "g
 /** Thinking block types that require signature validation by the API */
 const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
+/**
+ * Maximum number of native web searches allowed per session (agent unit).
+ * The Anthropic API's `max_uses` is per-request — it resets on each API call.
+ * When `pause_turn` triggers a resubmit, the model gets a fresh budget.
+ * This session-level cap prevents unbounded search accumulation (#1309).
+ *
+ * 15 = 3 full turns of 5 searches each — generous for research, but bounded.
+ */
+export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
 /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
 export function preferBraveSearch(): boolean {
   // preferences.md takes priority over env var
@@ -74,6 +84,11 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
   let isAnthropicProvider = false;
   let modelSelectFired = false;
+  // Session-level native search counter (#1309).
+  // Tracks cumulative web_search_tool_result blocks across all turns in a session.
+  // Reset on session_start. Used to compute remaining budget for max_uses.
+  let sessionSearchCount = 0;
   // Track provider changes via model selection — also handles diagnostics
   // since model_select fires AFTER session_start and knows the provider.
   pi.on("model_select", async (event: any, ctx: any) => {
@@ -161,13 +176,41 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
     );
     payload.tools = tools;
+    // ── Session-level search budget (#1309) ──────────────────────────────
+    // Count web_search_tool_result blocks in the conversation history to
+    // determine how many native searches have already been used this session.
+    // The Anthropic API's max_uses resets per request, so without this guard,
+    // pause_turn → resubmit cycles allow unlimited total searches.
+    if (Array.isArray(messages)) {
+      let historySearchCount = 0;
+      for (const msg of messages) {
+        const content = msg.content;
+        if (!Array.isArray(content)) continue;
+        for (const block of content) {
+          if ((block as any)?.type === "web_search_tool_result") {
+            historySearchCount++;
+          }
+        }
+      }
+      // Sync counter from history (handles session restore / context replay)
+      sessionSearchCount = historySearchCount;
+    }
+    const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
+    if (remaining <= 0) {
+      // Budget exhausted — don't inject the search tool at all.
+      // The model will proceed without web search capability.
+      return payload;
+    }
     tools.push({
       type: "web_search_20250305",
       name: "web_search",
-      // Cap server-side searches per response to prevent the model from
-      // looping on web_search without synthesizing results (#817).
-      // 5 searches is generous — most queries need 1-2.
-      max_uses: 5,
+      // Cap per-request searches to the lesser of 5 (per-turn cap) or the
+      // remaining session budget (#1309). This prevents the model from
+      // consuming unlimited searches via pause_turn → resubmit cycles.
+      max_uses: Math.min(5, remaining),
     });
     return payload;
@@ -175,6 +218,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
   // Basic startup diagnostics — provider-specific info comes from model_select
   pi.on("session_start", async (_event: any, ctx: any) => {
+    // Reset session-level search budget (#1309)
+    sessionSearchCount = 0;
     const hasBrave = !!process.env.BRAVE_API_KEY;
     const hasJina = !!process.env.JINA_API_KEY;
     const hasAnswers = !!process.env.BRAVE_ANSWERS_KEY;

package/src/resources/extensions/shared/terminal.ts CHANGED Viewed

@@ -7,9 +7,14 @@
 const UNSUPPORTED_TERMS = ["apple_terminal", "warpterm"];
+export function isCmuxTerminal(env: NodeJS.ProcessEnv = process.env): boolean {
+  return Boolean(env.CMUX_WORKSPACE_ID && env.CMUX_SURFACE_ID);
+}
 export function supportsCtrlAltShortcuts(): boolean {
   const term = (process.env.TERM_PROGRAM || "").toLowerCase();
   const jetbrains = (process.env.TERMINAL_EMULATOR || "").toLowerCase().includes("jetbrains");
+  if (isCmuxTerminal()) return true;
   return !UNSUPPORTED_TERMS.some((t) => term.includes(t)) && !jetbrains;
 }

package/src/resources/extensions/subagent/index.ts CHANGED Viewed

@@ -34,6 +34,8 @@ import {
 	readIsolationMode,
 } from "./isolation.js";
 import { registerWorker, updateWorker } from "./worker-registry.js";
+import { loadEffectiveGSDPreferences } from "../gsd/preferences.js";
+import { CmuxClient, shellEscape } from "../cmux/index.js";
 const MAX_PARALLEL_TASKS = 8;
 const MAX_CONCURRENCY = 4;
@@ -257,6 +259,70 @@ function writePromptToTempFile(agentName: string, prompt: string): { dir: string
 	return { dir: tmpDir, filePath };
 }
+function buildSubagentProcessArgs(
+	agent: AgentConfig,
+	task: string,
+	tmpPromptPath: string | null,
+): string[] {
+	const args: string[] = ["--mode", "json", "-p", "--no-session"];
+	if (agent.model) args.push("--model", agent.model);
+	if (agent.tools && agent.tools.length > 0) args.push("--tools", agent.tools.join(","));
+	if (tmpPromptPath) args.push("--append-system-prompt", tmpPromptPath);
+	args.push(`Task: ${task}`);
+	return args;
+}
+function processSubagentEventLine(
+	line: string,
+	currentResult: SingleResult,
+	emitUpdate: () => void,
+): void {
+	if (!line.trim()) return;
+	let event: any;
+	try {
+		event = JSON.parse(line);
+	} catch {
+		return;
+	}
+	if (event.type === "message_end" && event.message) {
+		const msg = event.message as Message;
+		currentResult.messages.push(msg);
+		if (msg.role === "assistant") {
+			currentResult.usage.turns++;
+			const usage = msg.usage;
+			if (usage) {
+				currentResult.usage.input += usage.input || 0;
+				currentResult.usage.output += usage.output || 0;
+				currentResult.usage.cacheRead += usage.cacheRead || 0;
+				currentResult.usage.cacheWrite += usage.cacheWrite || 0;
+				currentResult.usage.cost += usage.cost?.total || 0;
+				currentResult.usage.contextTokens = usage.totalTokens || 0;
+			}
+			if (!currentResult.model && msg.model) currentResult.model = msg.model;
+			if (msg.stopReason) currentResult.stopReason = msg.stopReason;
+			if (msg.errorMessage) currentResult.errorMessage = msg.errorMessage;
+		}
+		emitUpdate();
+	}
+	if (event.type === "tool_result_end" && event.message) {
+		currentResult.messages.push(event.message as Message);
+		emitUpdate();
+	}
+}
+async function waitForFile(filePath: string, signal: AbortSignal | undefined, timeoutMs = 30 * 60 * 1000): Promise<boolean> {
+	const started = Date.now();
+	while (Date.now() - started < timeoutMs) {
+		if (signal?.aborted) return false;
+		if (fs.existsSync(filePath)) return true;
+		await new Promise((resolve) => setTimeout(resolve, 150));
+	}
+	return false;
+}
 type OnUpdateCallback = (partial: AgentToolResult<SubagentDetails>) => void;
 async function runSingleAgent(
@@ -286,10 +352,6 @@ async function runSingleAgent(
 		};
 	}
-	const args: string[] = ["--mode", "json", "-p", "--no-session"];
-	if (agent.model) args.push("--model", agent.model);
-	if (agent.tools && agent.tools.length > 0) args.push("--tools", agent.tools.join(","));
 	let tmpPromptDir: string | null = null;
 	let tmpPromptPath: string | null = null;
@@ -319,10 +381,8 @@ async function runSingleAgent(
 			const tmp = writePromptToTempFile(agent.name, agent.systemPrompt);
 			tmpPromptDir = tmp.dir;
 			tmpPromptPath = tmp.filePath;
-			args.push("--append-system-prompt", tmpPromptPath);
 		}
-		args.push(`Task: ${task}`);
+		const args = buildSubagentProcessArgs(agent, task, tmpPromptPath);
 		let wasAborted = false;
 		const exitCode = await new Promise<number>((resolve) => {
@@ -336,48 +396,11 @@ async function runSingleAgent(
 			liveSubagentProcesses.add(proc);
 			let buffer = "";
-			const processLine = (line: string) => {
-				if (!line.trim()) return;
-				let event: any;
-				try {
-					event = JSON.parse(line);
-				} catch {
-					return;
-				}
-				if (event.type === "message_end" && event.message) {
-					const msg = event.message as Message;
-					currentResult.messages.push(msg);
-					if (msg.role === "assistant") {
-						currentResult.usage.turns++;
-						const usage = msg.usage;
-						if (usage) {
-							currentResult.usage.input += usage.input || 0;
-							currentResult.usage.output += usage.output || 0;
-							currentResult.usage.cacheRead += usage.cacheRead || 0;
-							currentResult.usage.cacheWrite += usage.cacheWrite || 0;
-							currentResult.usage.cost += usage.cost?.total || 0;
-							currentResult.usage.contextTokens = usage.totalTokens || 0;
-						}
-						if (!currentResult.model && msg.model) currentResult.model = msg.model;
-						if (msg.stopReason) currentResult.stopReason = msg.stopReason;
-						if (msg.errorMessage) currentResult.errorMessage = msg.errorMessage;
-					}
-					emitUpdate();
-				}
-				if (event.type === "tool_result_end" && event.message) {
-					currentResult.messages.push(event.message as Message);
-					emitUpdate();
-				}
-			};
 			proc.stdout.on("data", (data) => {
 				buffer += data.toString();
 				const lines = buffer.split("\n");
 				buffer = lines.pop() || "";
-				for (const line of lines) processLine(line);
+				for (const line of lines) processSubagentEventLine(line, currentResult, emitUpdate);
 			});
 			proc.stderr.on("data", (data) => {
@@ -386,7 +409,7 @@ async function runSingleAgent(
 			proc.on("close", (code) => {
 				liveSubagentProcesses.delete(proc);
-				if (buffer.trim()) processLine(buffer);
+				if (buffer.trim()) processSubagentEventLine(buffer, currentResult, emitUpdate);
 				resolve(code ?? 0);
 			});
@@ -427,6 +450,120 @@ async function runSingleAgent(
 	}
 }
+async function runSingleAgentInCmuxSplit(
+	cmuxClient: CmuxClient,
+	direction: "right" | "down",
+	defaultCwd: string,
+	agents: AgentConfig[],
+	agentName: string,
+	task: string,
+	cwd: string | undefined,
+	step: number | undefined,
+	signal: AbortSignal | undefined,
+	onUpdate: OnUpdateCallback | undefined,
+	makeDetails: (results: SingleResult[]) => SubagentDetails,
+): Promise<SingleResult> {
+	const agent = agents.find((a) => a.name === agentName);
+	if (!agent) {
+		return runSingleAgent(defaultCwd, agents, agentName, task, cwd, step, signal, onUpdate, makeDetails);
+	}
+	let tmpPromptDir: string | null = null;
+	let tmpPromptPath: string | null = null;
+	let tmpOutputDir: string | null = null;
+	const currentResult: SingleResult = {
+		agent: agentName,
+		agentSource: agent.source,
+		task,
+		exitCode: 0,
+		messages: [],
+		stderr: "",
+		usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0, contextTokens: 0, turns: 0 },
+		model: agent.model,
+		step,
+	};
+	const emitUpdate = () => {
+		if (onUpdate) {
+			onUpdate({
+				content: [{ type: "text", text: getFinalOutput(currentResult.messages) || "(running...)" }],
+				details: makeDetails([currentResult]),
+			});
+		}
+	};
+	try {
+		if (agent.systemPrompt.trim()) {
+			const tmp = writePromptToTempFile(agent.name, agent.systemPrompt);
+			tmpPromptDir = tmp.dir;
+			tmpPromptPath = tmp.filePath;
+		}
+		tmpOutputDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-subagent-cmux-"));
+		const stdoutPath = path.join(tmpOutputDir, "stdout.jsonl");
+		const stderrPath = path.join(tmpOutputDir, "stderr.log");
+		const exitPath = path.join(tmpOutputDir, "exit.code");
+		const cmuxSurfaceId = await cmuxClient.createSplit(direction);
+		if (!cmuxSurfaceId) {
+			return runSingleAgent(defaultCwd, agents, agentName, task, cwd, step, signal, onUpdate, makeDetails);
+		}
+		const bundledPaths = (process.env.GSD_BUNDLED_EXTENSION_PATHS ?? "").split(path.delimiter).map((s) => s.trim()).filter(Boolean);
+		const extensionArgs = bundledPaths.flatMap((p) => ["--extension", p]);
+		const processArgs = [process.env.GSD_BIN_PATH!, ...extensionArgs, ...buildSubagentProcessArgs(agent, task, tmpPromptPath)];
+		const innerScript = [
+			`cd ${shellEscape(cwd ?? defaultCwd)}`,
+			"set -o pipefail",
+			`${shellEscape(process.execPath)} ${processArgs.map(shellEscape).join(" ")} 2> >(tee ${shellEscape(stderrPath)} >&2) | tee ${shellEscape(stdoutPath)}`,
+			"status=${PIPESTATUS[0]}",
+			`printf '%s' "$status" > ${shellEscape(exitPath)}`,
+		].join("; ");
+		const sent = await cmuxClient.sendSurface(cmuxSurfaceId, `bash -lc ${shellEscape(innerScript)}`);
+		if (!sent) {
+			return runSingleAgent(defaultCwd, agents, agentName, task, cwd, step, signal, onUpdate, makeDetails);
+		}
+		const finished = await waitForFile(exitPath, signal);
+		if (!finished) {
+			currentResult.exitCode = 1;
+			currentResult.stderr = "cmux split execution timed out or was aborted";
+			return currentResult;
+		}
+		if (fs.existsSync(stdoutPath)) {
+			const stdout = fs.readFileSync(stdoutPath, "utf-8");
+			for (const line of stdout.split("\n")) {
+				processSubagentEventLine(line, currentResult, emitUpdate);
+			}
+		}
+		if (fs.existsSync(stderrPath)) {
+			currentResult.stderr = fs.readFileSync(stderrPath, "utf-8");
+		}
+		currentResult.exitCode = Number.parseInt(fs.readFileSync(exitPath, "utf-8").trim() || "1", 10) || 0;
+		return currentResult;
+	} finally {
+		if (tmpPromptPath)
+			try {
+				fs.unlinkSync(tmpPromptPath);
+			} catch {
+				/* ignore */
+			}
+		if (tmpPromptDir)
+			try {
+				fs.rmdirSync(tmpPromptDir);
+			} catch {
+				/* ignore */
+			}
+		if (tmpOutputDir)
+			try {
+				fs.rmSync(tmpOutputDir, { recursive: true, force: true });
+			} catch {
+				/* ignore */
+			}
+	}
+}
 const TaskItem = Type.Object({
 	agent: Type.String({ description: "Name of the agent to invoke" }),
 	task: Type.String({ description: "Task to delegate to the agent" }),
@@ -511,6 +648,8 @@ export default function (pi: ExtensionAPI) {
 			const discovery = discoverAgents(ctx.cwd, agentScope);
 			const agents = discovery.agents;
 			const confirmProjectAgents = params.confirmProjectAgents ?? false;
+			const cmuxClient = CmuxClient.fromPreferences(loadEffectiveGSDPreferences()?.preferences);
+			const cmuxSplitsEnabled = cmuxClient.getConfig().splits;
 			// Resolve isolation mode
 			const isolationMode = readIsolationMode();
@@ -669,28 +808,26 @@ export default function (pi: ExtensionAPI) {
 				const batchSize = params.tasks.length;
 				const results = await mapWithConcurrencyLimit(params.tasks, MAX_CONCURRENCY, async (t, index) => {
 					const workerId = registerWorker(t.agent, t.task, index, batchSize, batchId);
-					let result = await runSingleAgent(
-						ctx.cwd,
-						agents,
-						t.agent,
-						t.task,
-						t.cwd,
-						undefined,
-						signal,
-						// Per-task update callback
-						(partial) => {
-							if (partial.details?.results[0]) {
-								allResults[index] = partial.details.results[0];
-								emitParallelUpdate();
-							}
-						},
-						makeDetails("parallel"),
-					);
-					// Auto-retry failed tasks (likely API rate limit or transient error)
-					const isFailed = result.exitCode !== 0 || (result.messages.length === 0 && !signal?.aborted);
-					if (isFailed && MAX_RETRIES > 0 && !signal?.aborted) {
-						result = await runSingleAgent(
+					const runTask = () => cmuxSplitsEnabled
+						? runSingleAgentInCmuxSplit(
+							cmuxClient,
+							index % 2 === 0 ? "right" : "down",
+							ctx.cwd,
+							agents,
+							t.agent,
+							t.task,
+							t.cwd,
+							undefined,
+							signal,
+							(partial) => {
+								if (partial.details?.results[0]) {
+									allResults[index] = partial.details.results[0];
+									emitParallelUpdate();
+								}
+							},
+							makeDetails("parallel"),
+						)
+						: runSingleAgent(
 							ctx.cwd,
 							agents,
 							t.agent,
@@ -706,6 +843,12 @@ export default function (pi: ExtensionAPI) {
 							},
 							makeDetails("parallel"),
 						);
+					let result = await runTask();
+					// Auto-retry failed tasks (likely API rate limit or transient error)
+					const isFailed = result.exitCode !== 0 || (result.messages.length === 0 && !signal?.aborted);
+					if (isFailed && MAX_RETRIES > 0 && !signal?.aborted) {
+						result = await runTask();
 					}
 					updateWorker(workerId, result.exitCode === 0 ? "completed" : "failed");
@@ -744,17 +887,31 @@ export default function (pi: ExtensionAPI) {
 						isolation = await createIsolation(effectiveCwd, taskId, isolationMode);
 					}
-					const result = await runSingleAgent(
-						ctx.cwd,
-						agents,
-						params.agent,
-						params.task,
-						isolation ? isolation.workDir : params.cwd,
-						undefined,
-						signal,
-						onUpdate,
-						makeDetails("single"),
-					);
+					const result = cmuxSplitsEnabled
+						? await runSingleAgentInCmuxSplit(
+							cmuxClient,
+							"right",
+							ctx.cwd,
+							agents,
+							params.agent,
+							params.task,
+							isolation ? isolation.workDir : params.cwd,
+							undefined,
+							signal,
+							onUpdate,
+							makeDetails("single"),
+						)
+						: await runSingleAgent(
+							ctx.cwd,
+							agents,
+							params.agent,
+							params.task,
+							isolation ? isolation.workDir : params.cwd,
+							undefined,
+							signal,
+							onUpdate,
+							makeDetails("single"),
+						);
 					// Capture and merge delta if isolated
 					if (isolation) {