gsd-pi 2.36.0-dev.f887f4e → 2.37.0-dev.3186675

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/resources/extensions/cmux/index.js +321 -0
  2. package/dist/resources/extensions/cmux/package.json +7 -0
  3. package/dist/resources/extensions/gsd/auto-dashboard.js +334 -104
  4. package/dist/resources/extensions/gsd/auto-loop.js +29 -4
  5. package/dist/resources/extensions/gsd/auto.js +35 -5
  6. package/dist/resources/extensions/gsd/commands-cmux.js +120 -0
  7. package/dist/resources/extensions/gsd/commands-prefs-wizard.js +1 -1
  8. package/dist/resources/extensions/gsd/commands.js +51 -1
  9. package/dist/resources/extensions/gsd/docs/preferences-reference.md +25 -0
  10. package/dist/resources/extensions/gsd/git-service.js +9 -1
  11. package/dist/resources/extensions/gsd/history.js +2 -1
  12. package/dist/resources/extensions/gsd/index.js +5 -0
  13. package/dist/resources/extensions/gsd/metrics.js +4 -2
  14. package/dist/resources/extensions/gsd/notifications.js +10 -1
  15. package/dist/resources/extensions/gsd/preferences-types.js +2 -0
  16. package/dist/resources/extensions/gsd/preferences-validation.js +29 -0
  17. package/dist/resources/extensions/gsd/preferences.js +3 -0
  18. package/dist/resources/extensions/gsd/prompts/research-milestone.md +4 -3
  19. package/dist/resources/extensions/gsd/prompts/research-slice.md +3 -2
  20. package/dist/resources/extensions/gsd/session-lock.js +26 -6
  21. package/dist/resources/extensions/gsd/templates/preferences.md +6 -0
  22. package/dist/resources/extensions/search-the-web/native-search.js +45 -4
  23. package/dist/resources/extensions/shared/format-utils.js +5 -41
  24. package/dist/resources/extensions/shared/layout-utils.js +46 -0
  25. package/dist/resources/extensions/shared/mod.js +2 -1
  26. package/dist/resources/extensions/shared/terminal.js +5 -0
  27. package/dist/resources/extensions/subagent/index.js +180 -60
  28. package/package.json +1 -1
  29. package/packages/pi-coding-agent/dist/core/extensions/loader.d.ts.map +1 -1
  30. package/packages/pi-coding-agent/dist/core/extensions/loader.js +8 -4
  31. package/packages/pi-coding-agent/dist/core/extensions/loader.js.map +1 -1
  32. package/packages/pi-coding-agent/package.json +1 -1
  33. package/packages/pi-coding-agent/src/core/extensions/loader.ts +8 -4
  34. package/packages/pi-tui/dist/terminal-image.d.ts.map +1 -1
  35. package/packages/pi-tui/dist/terminal-image.js +4 -0
  36. package/packages/pi-tui/dist/terminal-image.js.map +1 -1
  37. package/packages/pi-tui/src/terminal-image.ts +5 -0
  38. package/pkg/package.json +1 -1
  39. package/src/resources/extensions/cmux/index.ts +384 -0
  40. package/src/resources/extensions/cmux/package.json +7 -0
  41. package/src/resources/extensions/gsd/auto-dashboard.ts +363 -116
  42. package/src/resources/extensions/gsd/auto-loop.ts +66 -6
  43. package/src/resources/extensions/gsd/auto.ts +45 -5
  44. package/src/resources/extensions/gsd/commands-cmux.ts +143 -0
  45. package/src/resources/extensions/gsd/commands-prefs-wizard.ts +1 -1
  46. package/src/resources/extensions/gsd/commands.ts +54 -1
  47. package/src/resources/extensions/gsd/docs/preferences-reference.md +25 -0
  48. package/src/resources/extensions/gsd/git-service.ts +12 -1
  49. package/src/resources/extensions/gsd/history.ts +2 -1
  50. package/src/resources/extensions/gsd/index.ts +8 -0
  51. package/src/resources/extensions/gsd/metrics.ts +4 -2
  52. package/src/resources/extensions/gsd/notifications.ts +10 -1
  53. package/src/resources/extensions/gsd/preferences-types.ts +13 -0
  54. package/src/resources/extensions/gsd/preferences-validation.ts +26 -0
  55. package/src/resources/extensions/gsd/preferences.ts +4 -0
  56. package/src/resources/extensions/gsd/prompts/research-milestone.md +4 -3
  57. package/src/resources/extensions/gsd/prompts/research-slice.md +3 -2
  58. package/src/resources/extensions/gsd/session-lock.ts +41 -6
  59. package/src/resources/extensions/gsd/templates/preferences.md +6 -0
  60. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +39 -1
  61. package/src/resources/extensions/gsd/tests/auto-worktree.test.ts +19 -0
  62. package/src/resources/extensions/gsd/tests/cmux.test.ts +122 -0
  63. package/src/resources/extensions/gsd/tests/preferences.test.ts +23 -0
  64. package/src/resources/extensions/gsd/tests/session-lock-regression.test.ts +45 -0
  65. package/src/resources/extensions/search-the-web/native-search.ts +50 -4
  66. package/src/resources/extensions/shared/format-utils.ts +5 -44
  67. package/src/resources/extensions/shared/layout-utils.ts +49 -0
  68. package/src/resources/extensions/shared/mod.ts +7 -4
  69. package/src/resources/extensions/shared/terminal.ts +5 -0
  70. package/src/resources/extensions/shared/tests/format-utils.test.ts +5 -3
  71. package/src/resources/extensions/subagent/index.ts +236 -79
@@ -20,8 +20,10 @@ import { getAndClearSkills } from "./skill-telemetry.js";
20
20
  import { loadJsonFile, loadJsonFileOrNull, saveJsonFile } from "./json-persistence.js";
21
21
  import { parseUnitId } from "./unit-id.js";
22
22
 
23
- // Re-export from shared — canonical implementation lives in format-utils.
24
- export { formatTokenCount } from "../shared/mod.js";
23
+ // Re-export from shared — import directly from format-utils to avoid pulling
24
+ // in the full barrel (mod.js → ui.js → @gsd/pi-tui) which breaks when loaded
25
+ // outside jiti's alias resolution (e.g. dynamic import in auto-loop reports).
26
+ export { formatTokenCount } from "../shared/format-utils.js";
25
27
 
26
28
  // ─── Types ────────────────────────────────────────────────────────────────────
27
29
 
@@ -4,6 +4,7 @@
4
4
  import { execFileSync } from "node:child_process";
5
5
  import type { NotificationPreferences } from "./types.js";
6
6
  import { loadEffectiveGSDPreferences } from "./preferences.js";
7
+ import { CmuxClient, emitOsc777Notification, resolveCmuxConfig } from "../cmux/index.js";
7
8
 
8
9
  export type NotifyLevel = "info" | "success" | "warning" | "error";
9
10
  export type NotificationKind = "complete" | "error" | "budget" | "milestone" | "attention";
@@ -23,7 +24,15 @@ export function sendDesktopNotification(
23
24
  level: NotifyLevel = "info",
24
25
  kind: NotificationKind = "complete",
25
26
  ): void {
26
- if (!shouldSendDesktopNotification(kind)) return;
27
+ const loaded = loadEffectiveGSDPreferences()?.preferences;
28
+ if (!shouldSendDesktopNotification(kind, loaded?.notifications)) return;
29
+
30
+ const cmux = resolveCmuxConfig(loaded);
31
+ if (cmux.notifications) {
32
+ const delivered = CmuxClient.fromPreferences(loaded).notify(title, message);
33
+ if (delivered) return;
34
+ emitOsc777Notification(title, message);
35
+ }
27
36
 
28
37
  try {
29
38
  const command = buildDesktopNotificationCommand(process.platform, title, message, level);
@@ -68,6 +68,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
68
68
  "budget_enforcement",
69
69
  "context_pause_threshold",
70
70
  "notifications",
71
+ "cmux",
71
72
  "remote_questions",
72
73
  "git",
73
74
  "post_unit_hooks",
@@ -84,6 +85,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
84
85
  "search_provider",
85
86
  "compression_strategy",
86
87
  "context_selection",
88
+ "widget_mode",
87
89
  ]);
88
90
 
89
91
  /** Canonical list of all dispatch unit types. */
@@ -164,6 +166,14 @@ export interface RemoteQuestionsConfig {
164
166
  poll_interval_seconds?: number; // clamped to 2-30
165
167
  }
166
168
 
169
+ export interface CmuxPreferences {
170
+ enabled?: boolean;
171
+ notifications?: boolean;
172
+ sidebar?: boolean;
173
+ splits?: boolean;
174
+ browser?: boolean;
175
+ }
176
+
167
177
  export interface GSDPreferences {
168
178
  version?: number;
169
179
  mode?: WorkflowMode;
@@ -182,6 +192,7 @@ export interface GSDPreferences {
182
192
  budget_enforcement?: BudgetEnforcementMode;
183
193
  context_pause_threshold?: number;
184
194
  notifications?: NotificationPreferences;
195
+ cmux?: CmuxPreferences;
185
196
  remote_questions?: RemoteQuestionsConfig;
186
197
  git?: GitPreferences;
187
198
  post_unit_hooks?: PostUnitHookConfig[];
@@ -202,6 +213,8 @@ export interface GSDPreferences {
202
213
  compression_strategy?: CompressionStrategy;
203
214
  /** Context selection mode for file inlining. "full" inlines entire files, "smart" uses semantic chunking. Default derived from token profile. */
204
215
  context_selection?: ContextSelectionMode;
216
+ /** Default widget display mode for auto-mode dashboard. "full" | "small" | "min" | "off". Default: "full". */
217
+ widget_mode?: "full" | "small" | "min" | "off";
205
218
  }
206
219
 
207
220
  export interface LoadedGSDPreferences {
@@ -242,6 +242,32 @@ export function validatePreferences(preferences: GSDPreferences): {
242
242
  }
243
243
  }
244
244
 
245
+ // ─── Cmux ───────────────────────────────────────────────────────────────
246
+ if (preferences.cmux !== undefined) {
247
+ if (preferences.cmux && typeof preferences.cmux === "object") {
248
+ const cmux = preferences.cmux as Record<string, unknown>;
249
+ const validatedCmux: NonNullable<GSDPreferences["cmux"]> = {};
250
+ if (cmux.enabled !== undefined) validatedCmux.enabled = !!cmux.enabled;
251
+ if (cmux.notifications !== undefined) validatedCmux.notifications = !!cmux.notifications;
252
+ if (cmux.sidebar !== undefined) validatedCmux.sidebar = !!cmux.sidebar;
253
+ if (cmux.splits !== undefined) validatedCmux.splits = !!cmux.splits;
254
+ if (cmux.browser !== undefined) validatedCmux.browser = !!cmux.browser;
255
+
256
+ const knownCmuxKeys = new Set(["enabled", "notifications", "sidebar", "splits", "browser"]);
257
+ for (const key of Object.keys(cmux)) {
258
+ if (!knownCmuxKeys.has(key)) {
259
+ warnings.push(`unknown cmux key "${key}" — ignored`);
260
+ }
261
+ }
262
+
263
+ if (Object.keys(validatedCmux).length > 0) {
264
+ validated.cmux = validatedCmux;
265
+ }
266
+ } else {
267
+ errors.push("cmux must be an object");
268
+ }
269
+ }
270
+
245
271
  // ─── Remote Questions ───────────────────────────────────────────────
246
272
  if (preferences.remote_questions !== undefined) {
247
273
  if (preferences.remote_questions && typeof preferences.remote_questions === "object") {
@@ -45,6 +45,7 @@ export type {
45
45
  SkillDiscoveryMode,
46
46
  AutoSupervisorConfig,
47
47
  RemoteQuestionsConfig,
48
+ CmuxPreferences,
48
49
  GSDPreferences,
49
50
  LoadedGSDPreferences,
50
51
  SkillResolution,
@@ -241,6 +242,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
241
242
  notifications: (base.notifications || override.notifications)
242
243
  ? { ...(base.notifications ?? {}), ...(override.notifications ?? {}) }
243
244
  : undefined,
245
+ cmux: (base.cmux || override.cmux)
246
+ ? { ...(base.cmux ?? {}), ...(override.cmux ?? {}) }
247
+ : undefined,
244
248
  remote_questions: override.remote_questions
245
249
  ? { ...(base.remote_questions ?? {}), ...override.remote_questions }
246
250
  : base.remote_questions,
@@ -25,9 +25,10 @@ Then research the codebase and relevant technologies. Narrate key findings and s
25
25
  2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
26
26
  3. Explore relevant code. For small/familiar codebases, use `rg`, `find`, and targeted reads. For large or unfamiliar codebases, use `scout` to build a broad map efficiently before diving in.
27
27
  4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
28
- 5. Use the **Research** output template from the inlined context above include only sections that have real content
29
- 6. If `.gsd/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
30
- 7. Write `{{outputPath}}`
28
+ 5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
29
+ 6. Use the **Research** output template from the inlined context above include only sections that have real content
30
+ 7. If `.gsd/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
31
+ 8. Write `{{outputPath}}`
31
32
 
32
33
  ## Strategic Questions to Answer
33
34
 
@@ -46,8 +46,9 @@ Research what this slice needs. Narrate key findings and surprises as you go —
46
46
  2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
47
47
  3. Explore relevant code for this slice's scope. For targeted exploration, use `rg`, `find`, and reads. For broad or unfamiliar subsystems, use `scout` to map the relevant area first.
48
48
  4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
49
- 5. Use the **Research** output template from the inlined context aboveinclude only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` the correct template is already present in this prompt).
50
- 6. Write `{{outputPath}}`
49
+ 5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
50
+ 6. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
51
+ 7. Write `{{outputPath}}`
51
52
 
52
53
  The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir — just write the file.
53
54
 
@@ -40,6 +40,19 @@ export type SessionLockResult =
40
40
  | { acquired: true }
41
41
  | { acquired: false; reason: string; existingPid?: number };
42
42
 
43
+ export type SessionLockFailureReason =
44
+ | "compromised"
45
+ | "missing-metadata"
46
+ | "pid-mismatch";
47
+
48
+ export interface SessionLockStatus {
49
+ valid: boolean;
50
+ failureReason?: SessionLockFailureReason;
51
+ existingPid?: number;
52
+ expectedPid?: number;
53
+ recovered?: boolean;
54
+ }
55
+
43
56
  // ─── Module State ───────────────────────────────────────────────────────────
44
57
 
45
58
  /** Release function from proper-lockfile — calling it releases the OS lock. */
@@ -368,7 +381,7 @@ export function updateSessionLock(
368
381
  *
369
382
  * This is called periodically during the dispatch loop.
370
383
  */
371
- export function validateSessionLock(basePath: string): boolean {
384
+ export function getSessionLockStatus(basePath: string): SessionLockStatus {
372
385
  // Lock was compromised by proper-lockfile (mtime drift from sleep, stall, etc.)
373
386
  if (_lockCompromised) {
374
387
  // Recovery gate (#1512): Before declaring the lock lost, check if the lock
@@ -385,18 +398,23 @@ export function validateSessionLock(basePath: string): boolean {
385
398
  process.stderr.write(
386
399
  `[gsd] Lock recovered after onCompromised — lock file PID matched, re-acquired.\n`,
387
400
  );
388
- return true;
401
+ return { valid: true, recovered: true };
389
402
  }
390
403
  } catch {
391
404
  // Re-acquisition failed — fall through to return false
392
405
  }
393
406
  }
394
- return false;
407
+ return {
408
+ valid: false,
409
+ failureReason: "compromised",
410
+ existingPid: existing?.pid,
411
+ expectedPid: process.pid,
412
+ };
395
413
  }
396
414
 
397
415
  // If we have an OS-level lock, we're still the owner
398
416
  if (_releaseFunction && _lockedPath === basePath) {
399
- return true;
417
+ return { valid: true };
400
418
  }
401
419
 
402
420
  // Fallback: check the lock file PID
@@ -404,10 +422,27 @@ export function validateSessionLock(basePath: string): boolean {
404
422
  const existing = readExistingLockData(lp);
405
423
  if (!existing) {
406
424
  // Lock file was deleted — we lost ownership
407
- return false;
425
+ return {
426
+ valid: false,
427
+ failureReason: "missing-metadata",
428
+ expectedPid: process.pid,
429
+ };
430
+ }
431
+
432
+ if (existing.pid !== process.pid) {
433
+ return {
434
+ valid: false,
435
+ failureReason: "pid-mismatch",
436
+ existingPid: existing.pid,
437
+ expectedPid: process.pid,
438
+ };
408
439
  }
409
440
 
410
- return existing.pid === process.pid;
441
+ return { valid: true };
442
+ }
443
+
444
+ export function validateSessionLock(basePath: string): boolean {
445
+ return getSessionLockStatus(basePath).valid;
411
446
  }
412
447
 
413
448
  /**
@@ -57,6 +57,12 @@ notifications:
57
57
  on_budget:
58
58
  on_milestone:
59
59
  on_attention:
60
+ cmux:
61
+ enabled:
62
+ notifications:
63
+ sidebar:
64
+ splits:
65
+ browser:
60
66
  remote_questions:
61
67
  channel:
62
68
  channel_id:
@@ -14,6 +14,7 @@ import {
14
14
  type AgentEndEvent,
15
15
  type LoopDeps,
16
16
  } from "../auto-loop.js";
17
+ import type { SessionLockStatus } from "../session-lock.js";
17
18
 
18
19
  // ─── Helpers ─────────────────────────────────────────────────────────────────
19
20
 
@@ -317,6 +318,8 @@ function makeMockDeps(
317
318
  },
318
319
  clearUnitTimeout: () => {},
319
320
  updateProgressWidget: () => {},
321
+ syncCmuxSidebar: () => {},
322
+ logCmuxEvent: () => {},
320
323
  invalidateAllCaches: () => {
321
324
  callLog.push("invalidateAllCaches");
322
325
  },
@@ -339,7 +342,7 @@ function makeMockDeps(
339
342
  preDispatchHealthGate: async () => ({ proceed: true, fixesApplied: [] }),
340
343
  syncProjectRootToWorktree: () => {},
341
344
  checkResourcesStale: () => null,
342
- validateSessionLock: () => true,
345
+ validateSessionLock: () => ({ valid: true } as SessionLockStatus),
343
346
  updateSessionLock: () => {
344
347
  callLog.push("updateSessionLock");
345
348
  },
@@ -530,6 +533,41 @@ test("autoLoop exits on terminal complete state", async (t) => {
530
533
  );
531
534
  });
532
535
 
536
+ test("autoLoop passes structured session-lock failure details to the handler", async () => {
537
+ _resetPendingResolve();
538
+
539
+ const ctx = makeMockCtx();
540
+ ctx.ui.setStatus = () => {};
541
+ const pi = makeMockPi();
542
+ const s = makeLoopSession();
543
+ let observedLockStatus: SessionLockStatus | undefined;
544
+
545
+ const deps = makeMockDeps({
546
+ validateSessionLock: () =>
547
+ ({
548
+ valid: false,
549
+ failureReason: "compromised",
550
+ expectedPid: process.pid,
551
+ }) as SessionLockStatus,
552
+ handleLostSessionLock: (_ctx, lockStatus) => {
553
+ observedLockStatus = lockStatus;
554
+ deps.callLog.push("handleLostSessionLock");
555
+ },
556
+ });
557
+
558
+ await autoLoop(ctx, pi, s, deps);
559
+
560
+ assert.deepEqual(observedLockStatus, {
561
+ valid: false,
562
+ failureReason: "compromised",
563
+ expectedPid: process.pid,
564
+ });
565
+ assert.ok(
566
+ !deps.callLog.includes("resolveDispatch"),
567
+ "should stop before dispatch after lock validation fails",
568
+ );
569
+ });
570
+
533
571
  test("autoLoop exits on terminal blocked state", async (t) => {
534
572
  _resetPendingResolve();
535
573
 
@@ -153,6 +153,25 @@ async function main(): Promise<void> {
153
153
  // After teardown, originalBase should be null
154
154
  assertEq(getAutoWorktreeOriginalBase(), null, "no split-brain: originalBase cleared");
155
155
 
156
+ // ─── #1526: getMainBranch returns milestone branch in auto-worktree ──
157
+ console.log("\n=== #1526: getMainBranch() returns milestone/<MID> in auto-worktree ===");
158
+ {
159
+ const { GitServiceImpl } = await import("../git-service.ts");
160
+
161
+ // Create worktree
162
+ const wtPath = createAutoWorktree(tempDir, "M005");
163
+ // Don't set main_branch pref so getMainBranch falls through to worktree detection
164
+ const gitService = new GitServiceImpl(wtPath);
165
+ gitService.setMilestoneId("M005");
166
+
167
+ // Verify getMainBranch returns the milestone branch
168
+ const mainBranch = gitService.getMainBranch();
169
+ assertEq(mainBranch, "milestone/M005", "getMainBranch returns milestone/<MID> in auto-worktree");
170
+
171
+ // Cleanup
172
+ teardownAutoWorktree(tempDir, "M005");
173
+ }
174
+
156
175
  // ─── #778: reconcile plan checkboxes on re-attach ─────────────────
157
176
  console.log("\n=== #778: reconcile plan checkboxes on re-attach ===");
158
177
  {
@@ -0,0 +1,122 @@
1
+ import test, { describe } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import * as fs from "node:fs";
4
+ import * as path from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+ import {
7
+ buildCmuxProgress,
8
+ buildCmuxStatusLabel,
9
+ detectCmuxEnvironment,
10
+ markCmuxPromptShown,
11
+ resetCmuxPromptState,
12
+ resolveCmuxConfig,
13
+ shouldPromptToEnableCmux,
14
+ } from "../../cmux/index.ts";
15
+ import type { GSDState } from "../types.ts";
16
+
17
+ test("detectCmuxEnvironment requires workspace, surface, and socket", () => {
18
+ const detected = detectCmuxEnvironment(
19
+ {
20
+ CMUX_WORKSPACE_ID: "workspace:1",
21
+ CMUX_SURFACE_ID: "surface:2",
22
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
23
+ },
24
+ (path) => path === "/tmp/cmux.sock",
25
+ () => true,
26
+ );
27
+ assert.equal(detected.available, true);
28
+ assert.equal(detected.cliAvailable, true);
29
+ });
30
+
31
+ test("resolveCmuxConfig enables only when preference and environment are both active", () => {
32
+ const config = resolveCmuxConfig(
33
+ { cmux: { enabled: true, notifications: true, sidebar: true, splits: true } },
34
+ {
35
+ CMUX_WORKSPACE_ID: "workspace:1",
36
+ CMUX_SURFACE_ID: "surface:2",
37
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
38
+ },
39
+ () => true,
40
+ () => true,
41
+ );
42
+ assert.equal(config.enabled, true);
43
+ assert.equal(config.notifications, true);
44
+ assert.equal(config.sidebar, true);
45
+ assert.equal(config.splits, true);
46
+ });
47
+
48
+ test("shouldPromptToEnableCmux only prompts once per session", () => {
49
+ resetCmuxPromptState();
50
+ assert.equal(shouldPromptToEnableCmux({}, {}, () => false, () => true), false);
51
+
52
+ assert.equal(
53
+ shouldPromptToEnableCmux(
54
+ {},
55
+ {
56
+ CMUX_WORKSPACE_ID: "workspace:1",
57
+ CMUX_SURFACE_ID: "surface:2",
58
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
59
+ },
60
+ () => true,
61
+ () => true,
62
+ ),
63
+ true,
64
+ );
65
+ markCmuxPromptShown();
66
+ assert.equal(
67
+ shouldPromptToEnableCmux(
68
+ {},
69
+ {
70
+ CMUX_WORKSPACE_ID: "workspace:1",
71
+ CMUX_SURFACE_ID: "surface:2",
72
+ CMUX_SOCKET_PATH: "/tmp/cmux.sock",
73
+ },
74
+ () => true,
75
+ () => true,
76
+ ),
77
+ false,
78
+ );
79
+ resetCmuxPromptState();
80
+ });
81
+
82
+ test("buildCmuxStatusLabel and progress prefer deepest active unit", () => {
83
+ const state: GSDState = {
84
+ activeMilestone: { id: "M001", title: "Milestone" },
85
+ activeSlice: { id: "S02", title: "Slice" },
86
+ activeTask: { id: "T03", title: "Task" },
87
+ phase: "executing",
88
+ recentDecisions: [],
89
+ blockers: [],
90
+ nextAction: "Keep going",
91
+ registry: [],
92
+ progress: {
93
+ milestones: { done: 0, total: 1 },
94
+ slices: { done: 1, total: 3 },
95
+ tasks: { done: 2, total: 5 },
96
+ },
97
+ };
98
+
99
+ assert.equal(buildCmuxStatusLabel(state), "M001 S02/T03 · executing");
100
+ assert.deepEqual(buildCmuxProgress(state), { value: 0.4, label: "2/5 tasks" });
101
+ });
102
+
103
+ describe("cmux extension discovery opt-out", () => {
104
+ test("cmux directory has package.json with pi manifest to prevent auto-discovery as extension", () => {
105
+ const cmuxDir = path.resolve(
106
+ path.dirname(fileURLToPath(import.meta.url)),
107
+ "../../cmux",
108
+ );
109
+ const pkgPath = path.join(cmuxDir, "package.json");
110
+ assert.ok(fs.existsSync(pkgPath), `${pkgPath} must exist`);
111
+
112
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
113
+ assert.ok(
114
+ pkg.pi !== undefined && typeof pkg.pi === "object",
115
+ 'package.json must have a "pi" field to opt out of extension auto-discovery',
116
+ );
117
+ assert.ok(
118
+ !pkg.pi.extensions?.length,
119
+ "pi.extensions must be empty or absent — cmux is a library, not an extension",
120
+ );
121
+ });
122
+ });
@@ -171,6 +171,29 @@ test("notification fields validate correctly", () => {
171
171
  assert.equal(preferences.notifications?.on_complete, false);
172
172
  });
173
173
 
174
+ test("cmux fields validate correctly", () => {
175
+ const { preferences, errors } = validatePreferences({
176
+ cmux: {
177
+ enabled: true,
178
+ notifications: true,
179
+ sidebar: false,
180
+ splits: true,
181
+ browser: false,
182
+ },
183
+ });
184
+ assert.equal(errors.length, 0);
185
+ assert.equal(preferences.cmux?.enabled, true);
186
+ assert.equal(preferences.cmux?.sidebar, false);
187
+ assert.equal(preferences.cmux?.splits, true);
188
+ });
189
+
190
+ test("cmux unknown keys produce warnings", () => {
191
+ const { warnings } = validatePreferences({
192
+ cmux: { enabled: true, strange_mode: true } as any,
193
+ });
194
+ assert.ok(warnings.some((warning) => warning.includes('unknown cmux key "strange_mode"')));
195
+ });
196
+
174
197
  test("git fields comprehensive validation", () => {
175
198
  const { preferences, errors } = validatePreferences({
176
199
  git: {
@@ -17,6 +17,7 @@ import { tmpdir } from 'node:os';
17
17
 
18
18
  import {
19
19
  acquireSessionLock,
20
+ getSessionLockStatus,
20
21
  validateSessionLock,
21
22
  releaseSessionLock,
22
23
  readSessionLockData,
@@ -201,6 +202,50 @@ async function main(): Promise<void> {
201
202
  }
202
203
  }
203
204
 
205
+ // ─── 7b. getSessionLockStatus with missing metadata → reason surfaced ──
206
+ console.log('\n=== 7b. missing lock metadata → structured reason ===');
207
+ {
208
+ const base = mkdtempSync(join(tmpdir(), 'gsd-session-lock-'));
209
+ mkdirSync(join(base, '.gsd'), { recursive: true });
210
+
211
+ try {
212
+ const status = getSessionLockStatus(base);
213
+ assertEq(status.valid, false, 'missing lock metadata is invalid');
214
+ assertEq(status.failureReason, 'missing-metadata', 'missing metadata reason is surfaced');
215
+ assertEq(status.expectedPid, process.pid, 'expected PID is included');
216
+ } finally {
217
+ rmSync(base, { recursive: true, force: true });
218
+ }
219
+ }
220
+
221
+ // ─── 7c. getSessionLockStatus with foreign PID → reason surfaced ───────
222
+ console.log('\n=== 7c. foreign PID in lock file → structured reason ===');
223
+ {
224
+ const base = mkdtempSync(join(tmpdir(), 'gsd-session-lock-'));
225
+ mkdirSync(join(base, '.gsd'), { recursive: true });
226
+
227
+ try {
228
+ const foreignPid = process.pid + 1000;
229
+ const lockFile = join(gsdRoot(base), 'auto.lock');
230
+ writeFileSync(lockFile, JSON.stringify({
231
+ pid: foreignPid,
232
+ startedAt: new Date().toISOString(),
233
+ unitType: 'execute-task',
234
+ unitId: 'M001/S01/T01',
235
+ unitStartedAt: new Date().toISOString(),
236
+ completedUnits: 0,
237
+ }, null, 2));
238
+
239
+ const status = getSessionLockStatus(base);
240
+ assertEq(status.valid, false, 'foreign PID lock is invalid');
241
+ assertEq(status.failureReason, 'pid-mismatch', 'PID mismatch reason is surfaced');
242
+ assertEq(status.existingPid, foreignPid, 'existing PID is included');
243
+ assertEq(status.expectedPid, process.pid, 'expected PID is included');
244
+ } finally {
245
+ rmSync(base, { recursive: true, force: true });
246
+ }
247
+ }
248
+
204
249
  // ─── 8. Acquire after release is possible ─────────────────────────────
205
250
  console.log('\n=== 8. acquire after release → re-acquirable ===');
206
251
  {
@@ -16,6 +16,16 @@ export const CUSTOM_SEARCH_TOOL_NAMES = ["search-the-web", "search_and_read", "g
16
16
  /** Thinking block types that require signature validation by the API */
17
17
  const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
18
18
 
19
+ /**
20
+ * Maximum number of native web searches allowed per session (agent unit).
21
+ * The Anthropic API's `max_uses` is per-request — it resets on each API call.
22
+ * When `pause_turn` triggers a resubmit, the model gets a fresh budget.
23
+ * This session-level cap prevents unbounded search accumulation (#1309).
24
+ *
25
+ * 15 = 3 full turns of 5 searches each — generous for research, but bounded.
26
+ */
27
+ export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
28
+
19
29
  /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
20
30
  export function preferBraveSearch(): boolean {
21
31
  // preferences.md takes priority over env var
@@ -74,6 +84,11 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
74
84
  let isAnthropicProvider = false;
75
85
  let modelSelectFired = false;
76
86
 
87
+ // Session-level native search counter (#1309).
88
+ // Tracks cumulative web_search_tool_result blocks across all turns in a session.
89
+ // Reset on session_start. Used to compute remaining budget for max_uses.
90
+ let sessionSearchCount = 0;
91
+
77
92
  // Track provider changes via model selection — also handles diagnostics
78
93
  // since model_select fires AFTER session_start and knows the provider.
79
94
  pi.on("model_select", async (event: any, ctx: any) => {
@@ -161,13 +176,41 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
161
176
  );
162
177
  payload.tools = tools;
163
178
 
179
+ // ── Session-level search budget (#1309) ──────────────────────────────
180
+ // Count web_search_tool_result blocks in the conversation history to
181
+ // determine how many native searches have already been used this session.
182
+ // The Anthropic API's max_uses resets per request, so without this guard,
183
+ // pause_turn → resubmit cycles allow unlimited total searches.
184
+ if (Array.isArray(messages)) {
185
+ let historySearchCount = 0;
186
+ for (const msg of messages) {
187
+ const content = msg.content;
188
+ if (!Array.isArray(content)) continue;
189
+ for (const block of content) {
190
+ if ((block as any)?.type === "web_search_tool_result") {
191
+ historySearchCount++;
192
+ }
193
+ }
194
+ }
195
+ // Sync counter from history (handles session restore / context replay)
196
+ sessionSearchCount = historySearchCount;
197
+ }
198
+
199
+ const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
200
+
201
+ if (remaining <= 0) {
202
+ // Budget exhausted — don't inject the search tool at all.
203
+ // The model will proceed without web search capability.
204
+ return payload;
205
+ }
206
+
164
207
  tools.push({
165
208
  type: "web_search_20250305",
166
209
  name: "web_search",
167
- // Cap server-side searches per response to prevent the model from
168
- // looping on web_search without synthesizing results (#817).
169
- // 5 searches is generous most queries need 1-2.
170
- max_uses: 5,
210
+ // Cap per-request searches to the lesser of 5 (per-turn cap) or the
211
+ // remaining session budget (#1309). This prevents the model from
212
+ // consuming unlimited searches via pause_turn resubmit cycles.
213
+ max_uses: Math.min(5, remaining),
171
214
  });
172
215
 
173
216
  return payload;
@@ -175,6 +218,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
175
218
 
176
219
  // Basic startup diagnostics — provider-specific info comes from model_select
177
220
  pi.on("session_start", async (_event: any, ctx: any) => {
221
+ // Reset session-level search budget (#1309)
222
+ sessionSearchCount = 0;
223
+
178
224
  const hasBrave = !!process.env.BRAVE_API_KEY;
179
225
  const hasJina = !!process.env.JINA_API_KEY;
180
226
  const hasAnswers = !!process.env.BRAVE_ANSWERS_KEY;