gsd-pi 2.36.0-dev.f887f4e → 2.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/resources/extensions/cmux/index.js +321 -0
  2. package/dist/resources/extensions/gsd/auto-dashboard.js +334 -104
  3. package/dist/resources/extensions/gsd/auto-loop.js +11 -0
  4. package/dist/resources/extensions/gsd/auto.js +16 -0
  5. package/dist/resources/extensions/gsd/commands-cmux.js +120 -0
  6. package/dist/resources/extensions/gsd/commands-prefs-wizard.js +1 -1
  7. package/dist/resources/extensions/gsd/commands.js +51 -1
  8. package/dist/resources/extensions/gsd/docs/preferences-reference.md +25 -0
  9. package/dist/resources/extensions/gsd/index.js +5 -0
  10. package/dist/resources/extensions/gsd/notifications.js +10 -1
  11. package/dist/resources/extensions/gsd/preferences-types.js +2 -0
  12. package/dist/resources/extensions/gsd/preferences-validation.js +29 -0
  13. package/dist/resources/extensions/gsd/preferences.js +3 -0
  14. package/dist/resources/extensions/gsd/prompts/research-milestone.md +4 -3
  15. package/dist/resources/extensions/gsd/prompts/research-slice.md +3 -2
  16. package/dist/resources/extensions/gsd/templates/preferences.md +6 -0
  17. package/dist/resources/extensions/search-the-web/native-search.js +45 -4
  18. package/dist/resources/extensions/shared/terminal.js +5 -0
  19. package/dist/resources/extensions/subagent/index.js +180 -60
  20. package/package.json +1 -1
  21. package/packages/pi-coding-agent/package.json +1 -1
  22. package/packages/pi-tui/dist/terminal-image.d.ts.map +1 -1
  23. package/packages/pi-tui/dist/terminal-image.js +4 -0
  24. package/packages/pi-tui/dist/terminal-image.js.map +1 -1
  25. package/packages/pi-tui/src/terminal-image.ts +5 -0
  26. package/pkg/package.json +1 -1
  27. package/src/resources/extensions/cmux/index.ts +384 -0
  28. package/src/resources/extensions/gsd/auto-dashboard.ts +363 -116
  29. package/src/resources/extensions/gsd/auto-loop.ts +42 -0
  30. package/src/resources/extensions/gsd/auto.ts +21 -0
  31. package/src/resources/extensions/gsd/commands-cmux.ts +143 -0
  32. package/src/resources/extensions/gsd/commands-prefs-wizard.ts +1 -1
  33. package/src/resources/extensions/gsd/commands.ts +54 -1
  34. package/src/resources/extensions/gsd/docs/preferences-reference.md +25 -0
  35. package/src/resources/extensions/gsd/index.ts +8 -0
  36. package/src/resources/extensions/gsd/notifications.ts +10 -1
  37. package/src/resources/extensions/gsd/preferences-types.ts +13 -0
  38. package/src/resources/extensions/gsd/preferences-validation.ts +26 -0
  39. package/src/resources/extensions/gsd/preferences.ts +4 -0
  40. package/src/resources/extensions/gsd/prompts/research-milestone.md +4 -3
  41. package/src/resources/extensions/gsd/prompts/research-slice.md +3 -2
  42. package/src/resources/extensions/gsd/templates/preferences.md +6 -0
  43. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +2 -0
  44. package/src/resources/extensions/gsd/tests/cmux.test.ts +98 -0
  45. package/src/resources/extensions/gsd/tests/preferences.test.ts +23 -0
  46. package/src/resources/extensions/search-the-web/native-search.ts +50 -4
  47. package/src/resources/extensions/shared/terminal.ts +5 -0
  48. package/src/resources/extensions/subagent/index.ts +236 -79
@@ -46,8 +46,9 @@ Research what this slice needs. Narrate key findings and surprises as you go —
46
46
  2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
47
47
  3. Explore relevant code for this slice's scope. For targeted exploration, use `rg`, `find`, and reads. For broad or unfamiliar subsystems, use `scout` to map the relevant area first.
48
48
  4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
49
- 5. Use the **Research** output template from the inlined context aboveinclude only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` the correct template is already present in this prompt).
50
- 6. Write `{{outputPath}}`
49
+ 5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
50
+ 6. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
51
+ 7. Write `{{outputPath}}`
51
52
 
52
53
  The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir — just write the file.
53
54
 
@@ -57,6 +57,12 @@ notifications:
57
57
  on_budget:
58
58
  on_milestone:
59
59
  on_attention:
60
+ cmux:
61
+ enabled:
62
+ notifications:
63
+ sidebar:
64
+ splits:
65
+ browser:
60
66
  remote_questions:
61
67
  channel:
62
68
  channel_id:
@@ -317,6 +317,8 @@ function makeMockDeps(
317
317
  },
318
318
  clearUnitTimeout: () => {},
319
319
  updateProgressWidget: () => {},
320
+ syncCmuxSidebar: () => {},
321
+ logCmuxEvent: () => {},
320
322
  invalidateAllCaches: () => {
321
323
  callLog.push("invalidateAllCaches");
322
324
  },
@@ -0,0 +1,98 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import {
4
+ buildCmuxProgress,
5
+ buildCmuxStatusLabel,
6
+ detectCmuxEnvironment,
7
+ markCmuxPromptShown,
8
+ resetCmuxPromptState,
9
+ resolveCmuxConfig,
10
+ shouldPromptToEnableCmux,
11
+ } from "../../cmux/index.ts";
12
+ import type { GSDState } from "../types.ts";
13
+
14
+ test("detectCmuxEnvironment requires workspace, surface, and socket", () => {
15
+ const detected = detectCmuxEnvironment(
16
+ {
17
+ CMUX_WORKSPACE_ID: "workspace:1",
18
+ CMUX_SURFACE_ID: "surface:2",
19
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
20
+ },
21
+ (path) => path === "/tmp/cmux.sock",
22
+ () => true,
23
+ );
24
+ assert.equal(detected.available, true);
25
+ assert.equal(detected.cliAvailable, true);
26
+ });
27
+
28
+ test("resolveCmuxConfig enables only when preference and environment are both active", () => {
29
+ const config = resolveCmuxConfig(
30
+ { cmux: { enabled: true, notifications: true, sidebar: true, splits: true } },
31
+ {
32
+ CMUX_WORKSPACE_ID: "workspace:1",
33
+ CMUX_SURFACE_ID: "surface:2",
34
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
35
+ },
36
+ () => true,
37
+ () => true,
38
+ );
39
+ assert.equal(config.enabled, true);
40
+ assert.equal(config.notifications, true);
41
+ assert.equal(config.sidebar, true);
42
+ assert.equal(config.splits, true);
43
+ });
44
+
45
+ test("shouldPromptToEnableCmux only prompts once per session", () => {
46
+ resetCmuxPromptState();
47
+ assert.equal(shouldPromptToEnableCmux({}, {}, () => false, () => true), false);
48
+
49
+ assert.equal(
50
+ shouldPromptToEnableCmux(
51
+ {},
52
+ {
53
+ CMUX_WORKSPACE_ID: "workspace:1",
54
+ CMUX_SURFACE_ID: "surface:2",
55
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
56
+ },
57
+ () => true,
58
+ () => true,
59
+ ),
60
+ true,
61
+ );
62
+ markCmuxPromptShown();
63
+ assert.equal(
64
+ shouldPromptToEnableCmux(
65
+ {},
66
+ {
67
+ CMUX_WORKSPACE_ID: "workspace:1",
68
+ CMUX_SURFACE_ID: "surface:2",
69
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
70
+ },
71
+ () => true,
72
+ () => true,
73
+ ),
74
+ false,
75
+ );
76
+ resetCmuxPromptState();
77
+ });
78
+
79
+ test("buildCmuxStatusLabel and progress prefer deepest active unit", () => {
80
+ const state: GSDState = {
81
+ activeMilestone: { id: "M001", title: "Milestone" },
82
+ activeSlice: { id: "S02", title: "Slice" },
83
+ activeTask: { id: "T03", title: "Task" },
84
+ phase: "executing",
85
+ recentDecisions: [],
86
+ blockers: [],
87
+ nextAction: "Keep going",
88
+ registry: [],
89
+ progress: {
90
+ milestones: { done: 0, total: 1 },
91
+ slices: { done: 1, total: 3 },
92
+ tasks: { done: 2, total: 5 },
93
+ },
94
+ };
95
+
96
+ assert.equal(buildCmuxStatusLabel(state), "M001 S02/T03 · executing");
97
+ assert.deepEqual(buildCmuxProgress(state), { value: 0.4, label: "2/5 tasks" });
98
+ });
@@ -171,6 +171,29 @@ test("notification fields validate correctly", () => {
171
171
  assert.equal(preferences.notifications?.on_complete, false);
172
172
  });
173
173
 
174
+ test("cmux fields validate correctly", () => {
175
+ const { preferences, errors } = validatePreferences({
176
+ cmux: {
177
+ enabled: true,
178
+ notifications: true,
179
+ sidebar: false,
180
+ splits: true,
181
+ browser: false,
182
+ },
183
+ });
184
+ assert.equal(errors.length, 0);
185
+ assert.equal(preferences.cmux?.enabled, true);
186
+ assert.equal(preferences.cmux?.sidebar, false);
187
+ assert.equal(preferences.cmux?.splits, true);
188
+ });
189
+
190
+ test("cmux unknown keys produce warnings", () => {
191
+ const { warnings } = validatePreferences({
192
+ cmux: { enabled: true, strange_mode: true } as any,
193
+ });
194
+ assert.ok(warnings.some((warning) => warning.includes('unknown cmux key "strange_mode"')));
195
+ });
196
+
174
197
  test("git fields comprehensive validation", () => {
175
198
  const { preferences, errors } = validatePreferences({
176
199
  git: {
@@ -16,6 +16,16 @@ export const CUSTOM_SEARCH_TOOL_NAMES = ["search-the-web", "search_and_read", "g
16
16
  /** Thinking block types that require signature validation by the API */
17
17
  const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
18
18
 
19
+ /**
20
+ * Maximum number of native web searches allowed per session (agent unit).
21
+ * The Anthropic API's `max_uses` is per-request — it resets on each API call.
22
+ * When `pause_turn` triggers a resubmit, the model gets a fresh budget.
23
+ * This session-level cap prevents unbounded search accumulation (#1309).
24
+ *
25
+ * 15 = 3 full turns of 5 searches each — generous for research, but bounded.
26
+ */
27
+ export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
28
+
19
29
  /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
20
30
  export function preferBraveSearch(): boolean {
21
31
  // preferences.md takes priority over env var
@@ -74,6 +84,11 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
74
84
  let isAnthropicProvider = false;
75
85
  let modelSelectFired = false;
76
86
 
87
+ // Session-level native search counter (#1309).
88
+ // Tracks cumulative web_search_tool_result blocks across all turns in a session.
89
+ // Reset on session_start. Used to compute remaining budget for max_uses.
90
+ let sessionSearchCount = 0;
91
+
77
92
  // Track provider changes via model selection — also handles diagnostics
78
93
  // since model_select fires AFTER session_start and knows the provider.
79
94
  pi.on("model_select", async (event: any, ctx: any) => {
@@ -161,13 +176,41 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
161
176
  );
162
177
  payload.tools = tools;
163
178
 
179
+ // ── Session-level search budget (#1309) ──────────────────────────────
180
+ // Count web_search_tool_result blocks in the conversation history to
181
+ // determine how many native searches have already been used this session.
182
+ // The Anthropic API's max_uses resets per request, so without this guard,
183
+ // pause_turn → resubmit cycles allow unlimited total searches.
184
+ if (Array.isArray(messages)) {
185
+ let historySearchCount = 0;
186
+ for (const msg of messages) {
187
+ const content = msg.content;
188
+ if (!Array.isArray(content)) continue;
189
+ for (const block of content) {
190
+ if ((block as any)?.type === "web_search_tool_result") {
191
+ historySearchCount++;
192
+ }
193
+ }
194
+ }
195
+ // Sync counter from history (handles session restore / context replay)
196
+ sessionSearchCount = historySearchCount;
197
+ }
198
+
199
+ const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
200
+
201
+ if (remaining <= 0) {
202
+ // Budget exhausted — don't inject the search tool at all.
203
+ // The model will proceed without web search capability.
204
+ return payload;
205
+ }
206
+
164
207
  tools.push({
165
208
  type: "web_search_20250305",
166
209
  name: "web_search",
167
- // Cap server-side searches per response to prevent the model from
168
- // looping on web_search without synthesizing results (#817).
169
- // 5 searches is generous most queries need 1-2.
170
- max_uses: 5,
210
+ // Cap per-request searches to the lesser of 5 (per-turn cap) or the
211
+ // remaining session budget (#1309). This prevents the model from
212
+ // consuming unlimited searches via pause_turn resubmit cycles.
213
+ max_uses: Math.min(5, remaining),
171
214
  });
172
215
 
173
216
  return payload;
@@ -175,6 +218,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
175
218
 
176
219
  // Basic startup diagnostics — provider-specific info comes from model_select
177
220
  pi.on("session_start", async (_event: any, ctx: any) => {
221
+ // Reset session-level search budget (#1309)
222
+ sessionSearchCount = 0;
223
+
178
224
  const hasBrave = !!process.env.BRAVE_API_KEY;
179
225
  const hasJina = !!process.env.JINA_API_KEY;
180
226
  const hasAnswers = !!process.env.BRAVE_ANSWERS_KEY;
@@ -7,9 +7,14 @@
7
7
 
8
8
  const UNSUPPORTED_TERMS = ["apple_terminal", "warpterm"];
9
9
 
10
+ export function isCmuxTerminal(env: NodeJS.ProcessEnv = process.env): boolean {
11
+ return Boolean(env.CMUX_WORKSPACE_ID && env.CMUX_SURFACE_ID);
12
+ }
13
+
10
14
  export function supportsCtrlAltShortcuts(): boolean {
11
15
  const term = (process.env.TERM_PROGRAM || "").toLowerCase();
12
16
  const jetbrains = (process.env.TERMINAL_EMULATOR || "").toLowerCase().includes("jetbrains");
17
+ if (isCmuxTerminal()) return true;
13
18
  return !UNSUPPORTED_TERMS.some((t) => term.includes(t)) && !jetbrains;
14
19
  }
15
20
 
@@ -34,6 +34,8 @@ import {
34
34
  readIsolationMode,
35
35
  } from "./isolation.js";
36
36
  import { registerWorker, updateWorker } from "./worker-registry.js";
37
+ import { loadEffectiveGSDPreferences } from "../gsd/preferences.js";
38
+ import { CmuxClient, shellEscape } from "../cmux/index.js";
37
39
 
38
40
  const MAX_PARALLEL_TASKS = 8;
39
41
  const MAX_CONCURRENCY = 4;
@@ -257,6 +259,70 @@ function writePromptToTempFile(agentName: string, prompt: string): { dir: string
257
259
  return { dir: tmpDir, filePath };
258
260
  }
259
261
 
262
+ function buildSubagentProcessArgs(
263
+ agent: AgentConfig,
264
+ task: string,
265
+ tmpPromptPath: string | null,
266
+ ): string[] {
267
+ const args: string[] = ["--mode", "json", "-p", "--no-session"];
268
+ if (agent.model) args.push("--model", agent.model);
269
+ if (agent.tools && agent.tools.length > 0) args.push("--tools", agent.tools.join(","));
270
+ if (tmpPromptPath) args.push("--append-system-prompt", tmpPromptPath);
271
+ args.push(`Task: ${task}`);
272
+ return args;
273
+ }
274
+
275
+ function processSubagentEventLine(
276
+ line: string,
277
+ currentResult: SingleResult,
278
+ emitUpdate: () => void,
279
+ ): void {
280
+ if (!line.trim()) return;
281
+ let event: any;
282
+ try {
283
+ event = JSON.parse(line);
284
+ } catch {
285
+ return;
286
+ }
287
+
288
+ if (event.type === "message_end" && event.message) {
289
+ const msg = event.message as Message;
290
+ currentResult.messages.push(msg);
291
+
292
+ if (msg.role === "assistant") {
293
+ currentResult.usage.turns++;
294
+ const usage = msg.usage;
295
+ if (usage) {
296
+ currentResult.usage.input += usage.input || 0;
297
+ currentResult.usage.output += usage.output || 0;
298
+ currentResult.usage.cacheRead += usage.cacheRead || 0;
299
+ currentResult.usage.cacheWrite += usage.cacheWrite || 0;
300
+ currentResult.usage.cost += usage.cost?.total || 0;
301
+ currentResult.usage.contextTokens = usage.totalTokens || 0;
302
+ }
303
+ if (!currentResult.model && msg.model) currentResult.model = msg.model;
304
+ if (msg.stopReason) currentResult.stopReason = msg.stopReason;
305
+ if (msg.errorMessage) currentResult.errorMessage = msg.errorMessage;
306
+ }
307
+ emitUpdate();
308
+ }
309
+
310
+ if (event.type === "tool_result_end" && event.message) {
311
+ currentResult.messages.push(event.message as Message);
312
+ emitUpdate();
313
+ }
314
+ }
315
+
316
+ async function waitForFile(filePath: string, signal: AbortSignal | undefined, timeoutMs = 30 * 60 * 1000): Promise<boolean> {
317
+ const started = Date.now();
318
+ while (Date.now() - started < timeoutMs) {
319
+ if (signal?.aborted) return false;
320
+ if (fs.existsSync(filePath)) return true;
321
+ await new Promise((resolve) => setTimeout(resolve, 150));
322
+ }
323
+ return false;
324
+ }
325
+
260
326
  type OnUpdateCallback = (partial: AgentToolResult<SubagentDetails>) => void;
261
327
 
262
328
  async function runSingleAgent(
@@ -286,10 +352,6 @@ async function runSingleAgent(
286
352
  };
287
353
  }
288
354
 
289
- const args: string[] = ["--mode", "json", "-p", "--no-session"];
290
- if (agent.model) args.push("--model", agent.model);
291
- if (agent.tools && agent.tools.length > 0) args.push("--tools", agent.tools.join(","));
292
-
293
355
  let tmpPromptDir: string | null = null;
294
356
  let tmpPromptPath: string | null = null;
295
357
 
@@ -319,10 +381,8 @@ async function runSingleAgent(
319
381
  const tmp = writePromptToTempFile(agent.name, agent.systemPrompt);
320
382
  tmpPromptDir = tmp.dir;
321
383
  tmpPromptPath = tmp.filePath;
322
- args.push("--append-system-prompt", tmpPromptPath);
323
384
  }
324
-
325
- args.push(`Task: ${task}`);
385
+ const args = buildSubagentProcessArgs(agent, task, tmpPromptPath);
326
386
  let wasAborted = false;
327
387
 
328
388
  const exitCode = await new Promise<number>((resolve) => {
@@ -336,48 +396,11 @@ async function runSingleAgent(
336
396
  liveSubagentProcesses.add(proc);
337
397
  let buffer = "";
338
398
 
339
- const processLine = (line: string) => {
340
- if (!line.trim()) return;
341
- let event: any;
342
- try {
343
- event = JSON.parse(line);
344
- } catch {
345
- return;
346
- }
347
-
348
- if (event.type === "message_end" && event.message) {
349
- const msg = event.message as Message;
350
- currentResult.messages.push(msg);
351
-
352
- if (msg.role === "assistant") {
353
- currentResult.usage.turns++;
354
- const usage = msg.usage;
355
- if (usage) {
356
- currentResult.usage.input += usage.input || 0;
357
- currentResult.usage.output += usage.output || 0;
358
- currentResult.usage.cacheRead += usage.cacheRead || 0;
359
- currentResult.usage.cacheWrite += usage.cacheWrite || 0;
360
- currentResult.usage.cost += usage.cost?.total || 0;
361
- currentResult.usage.contextTokens = usage.totalTokens || 0;
362
- }
363
- if (!currentResult.model && msg.model) currentResult.model = msg.model;
364
- if (msg.stopReason) currentResult.stopReason = msg.stopReason;
365
- if (msg.errorMessage) currentResult.errorMessage = msg.errorMessage;
366
- }
367
- emitUpdate();
368
- }
369
-
370
- if (event.type === "tool_result_end" && event.message) {
371
- currentResult.messages.push(event.message as Message);
372
- emitUpdate();
373
- }
374
- };
375
-
376
399
  proc.stdout.on("data", (data) => {
377
400
  buffer += data.toString();
378
401
  const lines = buffer.split("\n");
379
402
  buffer = lines.pop() || "";
380
- for (const line of lines) processLine(line);
403
+ for (const line of lines) processSubagentEventLine(line, currentResult, emitUpdate);
381
404
  });
382
405
 
383
406
  proc.stderr.on("data", (data) => {
@@ -386,7 +409,7 @@ async function runSingleAgent(
386
409
 
387
410
  proc.on("close", (code) => {
388
411
  liveSubagentProcesses.delete(proc);
389
- if (buffer.trim()) processLine(buffer);
412
+ if (buffer.trim()) processSubagentEventLine(buffer, currentResult, emitUpdate);
390
413
  resolve(code ?? 0);
391
414
  });
392
415
 
@@ -427,6 +450,120 @@ async function runSingleAgent(
427
450
  }
428
451
  }
429
452
 
453
+ async function runSingleAgentInCmuxSplit(
454
+ cmuxClient: CmuxClient,
455
+ direction: "right" | "down",
456
+ defaultCwd: string,
457
+ agents: AgentConfig[],
458
+ agentName: string,
459
+ task: string,
460
+ cwd: string | undefined,
461
+ step: number | undefined,
462
+ signal: AbortSignal | undefined,
463
+ onUpdate: OnUpdateCallback | undefined,
464
+ makeDetails: (results: SingleResult[]) => SubagentDetails,
465
+ ): Promise<SingleResult> {
466
+ const agent = agents.find((a) => a.name === agentName);
467
+ if (!agent) {
468
+ return runSingleAgent(defaultCwd, agents, agentName, task, cwd, step, signal, onUpdate, makeDetails);
469
+ }
470
+
471
+ let tmpPromptDir: string | null = null;
472
+ let tmpPromptPath: string | null = null;
473
+ let tmpOutputDir: string | null = null;
474
+
475
+ const currentResult: SingleResult = {
476
+ agent: agentName,
477
+ agentSource: agent.source,
478
+ task,
479
+ exitCode: 0,
480
+ messages: [],
481
+ stderr: "",
482
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0, contextTokens: 0, turns: 0 },
483
+ model: agent.model,
484
+ step,
485
+ };
486
+
487
+ const emitUpdate = () => {
488
+ if (onUpdate) {
489
+ onUpdate({
490
+ content: [{ type: "text", text: getFinalOutput(currentResult.messages) || "(running...)" }],
491
+ details: makeDetails([currentResult]),
492
+ });
493
+ }
494
+ };
495
+
496
+ try {
497
+ if (agent.systemPrompt.trim()) {
498
+ const tmp = writePromptToTempFile(agent.name, agent.systemPrompt);
499
+ tmpPromptDir = tmp.dir;
500
+ tmpPromptPath = tmp.filePath;
501
+ }
502
+ tmpOutputDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-subagent-cmux-"));
503
+ const stdoutPath = path.join(tmpOutputDir, "stdout.jsonl");
504
+ const stderrPath = path.join(tmpOutputDir, "stderr.log");
505
+ const exitPath = path.join(tmpOutputDir, "exit.code");
506
+ const cmuxSurfaceId = await cmuxClient.createSplit(direction);
507
+ if (!cmuxSurfaceId) {
508
+ return runSingleAgent(defaultCwd, agents, agentName, task, cwd, step, signal, onUpdate, makeDetails);
509
+ }
510
+
511
+ const bundledPaths = (process.env.GSD_BUNDLED_EXTENSION_PATHS ?? "").split(path.delimiter).map((s) => s.trim()).filter(Boolean);
512
+ const extensionArgs = bundledPaths.flatMap((p) => ["--extension", p]);
513
+ const processArgs = [process.env.GSD_BIN_PATH!, ...extensionArgs, ...buildSubagentProcessArgs(agent, task, tmpPromptPath)];
514
+ const innerScript = [
515
+ `cd ${shellEscape(cwd ?? defaultCwd)}`,
516
+ "set -o pipefail",
517
+ `${shellEscape(process.execPath)} ${processArgs.map(shellEscape).join(" ")} 2> >(tee ${shellEscape(stderrPath)} >&2) | tee ${shellEscape(stdoutPath)}`,
518
+ "status=${PIPESTATUS[0]}",
519
+ `printf '%s' "$status" > ${shellEscape(exitPath)}`,
520
+ ].join("; ");
521
+
522
+ const sent = await cmuxClient.sendSurface(cmuxSurfaceId, `bash -lc ${shellEscape(innerScript)}`);
523
+ if (!sent) {
524
+ return runSingleAgent(defaultCwd, agents, agentName, task, cwd, step, signal, onUpdate, makeDetails);
525
+ }
526
+
527
+ const finished = await waitForFile(exitPath, signal);
528
+ if (!finished) {
529
+ currentResult.exitCode = 1;
530
+ currentResult.stderr = "cmux split execution timed out or was aborted";
531
+ return currentResult;
532
+ }
533
+
534
+ if (fs.existsSync(stdoutPath)) {
535
+ const stdout = fs.readFileSync(stdoutPath, "utf-8");
536
+ for (const line of stdout.split("\n")) {
537
+ processSubagentEventLine(line, currentResult, emitUpdate);
538
+ }
539
+ }
540
+ if (fs.existsSync(stderrPath)) {
541
+ currentResult.stderr = fs.readFileSync(stderrPath, "utf-8");
542
+ }
543
+ currentResult.exitCode = Number.parseInt(fs.readFileSync(exitPath, "utf-8").trim() || "1", 10) || 0;
544
+ return currentResult;
545
+ } finally {
546
+ if (tmpPromptPath)
547
+ try {
548
+ fs.unlinkSync(tmpPromptPath);
549
+ } catch {
550
+ /* ignore */
551
+ }
552
+ if (tmpPromptDir)
553
+ try {
554
+ fs.rmdirSync(tmpPromptDir);
555
+ } catch {
556
+ /* ignore */
557
+ }
558
+ if (tmpOutputDir)
559
+ try {
560
+ fs.rmSync(tmpOutputDir, { recursive: true, force: true });
561
+ } catch {
562
+ /* ignore */
563
+ }
564
+ }
565
+ }
566
+
430
567
  const TaskItem = Type.Object({
431
568
  agent: Type.String({ description: "Name of the agent to invoke" }),
432
569
  task: Type.String({ description: "Task to delegate to the agent" }),
@@ -511,6 +648,8 @@ export default function (pi: ExtensionAPI) {
511
648
  const discovery = discoverAgents(ctx.cwd, agentScope);
512
649
  const agents = discovery.agents;
513
650
  const confirmProjectAgents = params.confirmProjectAgents ?? false;
651
+ const cmuxClient = CmuxClient.fromPreferences(loadEffectiveGSDPreferences()?.preferences);
652
+ const cmuxSplitsEnabled = cmuxClient.getConfig().splits;
514
653
 
515
654
  // Resolve isolation mode
516
655
  const isolationMode = readIsolationMode();
@@ -669,28 +808,26 @@ export default function (pi: ExtensionAPI) {
669
808
  const batchSize = params.tasks.length;
670
809
  const results = await mapWithConcurrencyLimit(params.tasks, MAX_CONCURRENCY, async (t, index) => {
671
810
  const workerId = registerWorker(t.agent, t.task, index, batchSize, batchId);
672
- let result = await runSingleAgent(
673
- ctx.cwd,
674
- agents,
675
- t.agent,
676
- t.task,
677
- t.cwd,
678
- undefined,
679
- signal,
680
- // Per-task update callback
681
- (partial) => {
682
- if (partial.details?.results[0]) {
683
- allResults[index] = partial.details.results[0];
684
- emitParallelUpdate();
685
- }
686
- },
687
- makeDetails("parallel"),
688
- );
689
-
690
- // Auto-retry failed tasks (likely API rate limit or transient error)
691
- const isFailed = result.exitCode !== 0 || (result.messages.length === 0 && !signal?.aborted);
692
- if (isFailed && MAX_RETRIES > 0 && !signal?.aborted) {
693
- result = await runSingleAgent(
811
+ const runTask = () => cmuxSplitsEnabled
812
+ ? runSingleAgentInCmuxSplit(
813
+ cmuxClient,
814
+ index % 2 === 0 ? "right" : "down",
815
+ ctx.cwd,
816
+ agents,
817
+ t.agent,
818
+ t.task,
819
+ t.cwd,
820
+ undefined,
821
+ signal,
822
+ (partial) => {
823
+ if (partial.details?.results[0]) {
824
+ allResults[index] = partial.details.results[0];
825
+ emitParallelUpdate();
826
+ }
827
+ },
828
+ makeDetails("parallel"),
829
+ )
830
+ : runSingleAgent(
694
831
  ctx.cwd,
695
832
  agents,
696
833
  t.agent,
@@ -706,6 +843,12 @@ export default function (pi: ExtensionAPI) {
706
843
  },
707
844
  makeDetails("parallel"),
708
845
  );
846
+ let result = await runTask();
847
+
848
+ // Auto-retry failed tasks (likely API rate limit or transient error)
849
+ const isFailed = result.exitCode !== 0 || (result.messages.length === 0 && !signal?.aborted);
850
+ if (isFailed && MAX_RETRIES > 0 && !signal?.aborted) {
851
+ result = await runTask();
709
852
  }
710
853
 
711
854
  updateWorker(workerId, result.exitCode === 0 ? "completed" : "failed");
@@ -744,17 +887,31 @@ export default function (pi: ExtensionAPI) {
744
887
  isolation = await createIsolation(effectiveCwd, taskId, isolationMode);
745
888
  }
746
889
 
747
- const result = await runSingleAgent(
748
- ctx.cwd,
749
- agents,
750
- params.agent,
751
- params.task,
752
- isolation ? isolation.workDir : params.cwd,
753
- undefined,
754
- signal,
755
- onUpdate,
756
- makeDetails("single"),
757
- );
890
+ const result = cmuxSplitsEnabled
891
+ ? await runSingleAgentInCmuxSplit(
892
+ cmuxClient,
893
+ "right",
894
+ ctx.cwd,
895
+ agents,
896
+ params.agent,
897
+ params.task,
898
+ isolation ? isolation.workDir : params.cwd,
899
+ undefined,
900
+ signal,
901
+ onUpdate,
902
+ makeDetails("single"),
903
+ )
904
+ : await runSingleAgent(
905
+ ctx.cwd,
906
+ agents,
907
+ params.agent,
908
+ params.task,
909
+ isolation ? isolation.workDir : params.cwd,
910
+ undefined,
911
+ signal,
912
+ onUpdate,
913
+ makeDetails("single"),
914
+ );
758
915
 
759
916
  // Capture and merge delta if isolated
760
917
  if (isolation) {