npm - gsd-pi - Versions diffs - 2.35.0 → 2.36.0-dev.d612764 - Mend

gsd-pi 2.35.0 → 2.36.0-dev.d612764

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

package/dist/resources/extensions/gsd/prompts/complete-milestone.md CHANGED Viewed

@@ -28,6 +28,8 @@ Then:
 **Important:** Do NOT skip the success criteria and definition of done verification (steps 3-4). The milestone summary must reflect actual verified outcomes, not assumed success. If any criterion was not met, document it clearly in the summary and do not mark the milestone as passing verification.
+**File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.
 **You MUST write `{{milestoneSummaryPath}}` AND update PROJECT.md before finishing.**
 When done, say: "Milestone {{milestoneId}} complete."

package/dist/resources/extensions/gsd/prompts/research-milestone.md CHANGED Viewed

@@ -25,9 +25,10 @@ Then research the codebase and relevant technologies. Narrate key findings and s
 2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
 3. Explore relevant code. For small/familiar codebases, use `rg`, `find`, and targeted reads. For large or unfamiliar codebases, use `scout` to build a broad map efficiently before diving in.
 4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
-5. Use the **Research** output template from the inlined context above — include only sections that have real content
-6. If `.gsd/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
-7. Write `{{outputPath}}`
+5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
+6. Use the **Research** output template from the inlined context above — include only sections that have real content
+7. If `.gsd/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
+8. Write `{{outputPath}}`
 ## Strategic Questions to Answer

package/dist/resources/extensions/gsd/prompts/research-slice.md CHANGED Viewed

@@ -46,8 +46,9 @@ Research what this slice needs. Narrate key findings and surprises as you go —
 2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
 3. Explore relevant code for this slice's scope. For targeted exploration, use `rg`, `find`, and reads. For broad or unfamiliar subsystems, use `scout` to map the relevant area first.
 4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
-5. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
-6. Write `{{outputPath}}`
+5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
+6. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
+7. Write `{{outputPath}}`
 The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir — just write the file.

package/dist/resources/extensions/gsd/prompts/validate-milestone.md CHANGED Viewed

@@ -67,4 +67,6 @@ If verdict is `needs-remediation`:
 **You MUST write `{{validationPath}}` before finishing.**
+**File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.
 When done, say: "Milestone {{milestoneId}} validation complete — verdict: <verdict>."

package/dist/resources/extensions/gsd/roadmap-mutations.js ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * Roadmap Mutations — shared utilities for modifying roadmap checkbox state.
+ *
+ * Extracts the duplicated "flip slice checkbox" pattern that existed in
+ * doctor.ts, mechanical-completion.ts, and auto-recovery.ts.
+ */
+import { readFileSync } from "node:fs";
+import { atomicWriteSync } from "./atomic-write.js";
+import { resolveMilestoneFile } from "./paths.js";
+import { clearParseCache } from "./files.js";
+/**
+ * Mark a slice as done ([x]) in the milestone roadmap.
+ * Idempotent — no-op if already checked or if the slice isn't found.
+ *
+ * @returns true if the roadmap was modified, false if no change was needed
+ */
+export function markSliceDoneInRoadmap(basePath, mid, sid) {
+    const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
+    if (!roadmapFile)
+        return false;
+    let content;
+    try {
+        content = readFileSync(roadmapFile, "utf-8");
+    }
+    catch {
+        return false;
+    }
+    const updated = content.replace(new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${sid}:`, "m"), `$1[x] **${sid}:`);
+    if (updated === content)
+        return false;
+    atomicWriteSync(roadmapFile, updated);
+    clearParseCache();
+    return true;
+}
+/**
+ * Mark a task as done ([x]) in the slice plan.
+ * Idempotent — no-op if already checked or if the task isn't found.
+ *
+ * @returns true if the plan was modified, false if no change was needed
+ */
+export function markTaskDoneInPlan(basePath, planPath, tid) {
+    let content;
+    try {
+        content = readFileSync(planPath, "utf-8");
+    }
+    catch {
+        return false;
+    }
+    const updated = content.replace(new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${tid}:`, "m"), `$1[x] **${tid}:`);
+    if (updated === content)
+        return false;
+    atomicWriteSync(planPath, updated);
+    clearParseCache();
+    return true;
+}

package/dist/resources/extensions/gsd/session-lock.js CHANGED Viewed

@@ -32,8 +32,17 @@ let _lockPid = 0;
 let _lockCompromised = false;
 /** Whether we've already registered a process.on('exit') handler. */
 let _exitHandlerRegistered = false;
+/** Snapshotted lock file path — captured at acquireSessionLock time to avoid
+ *  gsdRoot() resolving differently in worktree vs project root contexts (#1363). */
+let _snapshotLockPath = null;
+/** Timestamp when the session lock was acquired — used to detect false-positive
+ *  onCompromised events from event loop stalls within the stale window (#1362). */
+let _lockAcquiredAt = 0;
 const LOCK_FILE = "auto.lock";
 function lockPath(basePath) {
+    // If we have a snapshotted path from acquisition, use it for consistency
+    if (_snapshotLockPath)
+        return _snapshotLockPath;
     return join(gsdRoot(basePath), LOCK_FILE);
 }
 // ─── Stray Lock Cleanup ─────────────────────────────────────────────────────
@@ -175,8 +184,17 @@ export function acquireSessionLock(basePath) {
             onCompromised: () => {
                 // proper-lockfile detected mtime drift (system sleep, event loop stall, etc.).
                 // Default handler throws inside setTimeout — an uncaught exception that crashes
-                // or corrupts process state. Instead, set a flag so validateSessionLock() can
-                // detect the compromise gracefully on the next dispatch cycle.
+                // or corrupts process state.
+                //
+                // False-positive suppression (#1362): If we're still within the stale window
+                // (30 min since acquisition), the mtime mismatch is from an event loop stall
+                // during a long LLM call — not a real takeover. Log and continue.
+                const elapsed = Date.now() - _lockAcquiredAt;
+                if (elapsed < 1_800_000) {
+                    process.stderr.write(`[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — event loop stall, continuing.\n`);
+                    return; // Suppress false positive
+                }
+                // Past the stale window — this is a real compromise
                 _lockCompromised = true;
                 _releaseFunction = null;
             },
@@ -185,6 +203,8 @@ export function acquireSessionLock(basePath) {
         _lockedPath = basePath;
         _lockPid = process.pid;
         _lockCompromised = false;
+        _lockAcquiredAt = Date.now();
+        _snapshotLockPath = lp; // Snapshot the resolved path for consistent access (#1363)
         // Safety net: clean up lock dir on process exit if _releaseFunction
         // wasn't called (e.g., normal exit after clean completion) (#1245).
         ensureExitHandler(gsdDir);
@@ -211,6 +231,14 @@ export function acquireSessionLock(basePath) {
                     stale: 1_800_000, // 30 minutes — match primary lock settings
                     update: 10_000,
                     onCompromised: () => {
+                        // Same false-positive suppression as the primary lock (#1512).
+                        // Without this, the retry path fires _lockCompromised unconditionally
+                        // on benign mtime drift (laptop sleep, heavy LLM event loop stalls).
+                        const elapsed = Date.now() - _lockAcquiredAt;
+                        if (elapsed < 1_800_000) {
+                            process.stderr.write(`[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — event loop stall, continuing.\n`);
+                            return;
+                        }
                         _lockCompromised = true;
                         _releaseFunction = null;
                     },
@@ -219,6 +247,8 @@ export function acquireSessionLock(basePath) {
                 _lockedPath = basePath;
                 _lockPid = process.pid;
                 _lockCompromised = false;
+                _lockAcquiredAt = Date.now();
+                _snapshotLockPath = lp; // Snapshot for retry path too (#1363)
                 // Safety net — uses centralized handler to avoid double-registration
                 ensureExitHandler(gsdDir);
                 atomicWriteSync(lp, JSON.stringify(lockData, null, 2));
@@ -293,6 +323,25 @@ export function updateSessionLock(basePath, unitType, unitId, completedUnits, se
 export function validateSessionLock(basePath) {
     // Lock was compromised by proper-lockfile (mtime drift from sleep, stall, etc.)
     if (_lockCompromised) {
+        // Recovery gate (#1512): Before declaring the lock lost, check if the lock
+        // file still contains our PID. If it does, no other process took over — the
+        // onCompromised fired from benign mtime drift (laptop sleep, event loop stall
+        // beyond the stale window). Attempt re-acquisition instead of giving up.
+        const lp = lockPath(basePath);
+        const existing = readExistingLockData(lp);
+        if (existing && existing.pid === process.pid) {
+            // Lock file still ours — try to re-acquire the OS lock
+            try {
+                const result = acquireSessionLock(basePath);
+                if (result.acquired) {
+                    process.stderr.write(`[gsd] Lock recovered after onCompromised — lock file PID matched, re-acquired.\n`);
+                    return true;
+                }
+            }
+            catch {
+                // Re-acquisition failed — fall through to return false
+            }
+        }
         return false;
     }
     // If we have an OS-level lock, we're still the owner
@@ -348,6 +397,8 @@ export function releaseSessionLock(basePath) {
     _lockedPath = null;
     _lockPid = 0;
     _lockCompromised = false;
+    _lockAcquiredAt = 0;
+    _snapshotLockPath = null;
 }
 /**
  * Check if a session lock exists and return its data (for crash recovery).

package/dist/resources/extensions/gsd/state.js CHANGED Viewed

@@ -33,11 +33,12 @@ export function isValidationTerminal(validationContent) {
     const verdict = match[1].match(/verdict:\s*(\S+)/);
     if (!verdict)
         return false;
+    const v = verdict[1] === 'passed' ? 'pass' : verdict[1];
     // 'pass' and 'needs-attention' are always terminal.
     // 'needs-remediation' is treated as terminal to prevent infinite loops
     // when no remediation slices exist in the roadmap (#832). The validation
     // report is preserved on disk for manual review.
-    return verdict[1] === 'pass' || verdict[1] === 'needs-attention' || verdict[1] === 'needs-remediation';
+    return v === 'pass' || v === 'needs-attention' || v === 'needs-remediation';
 }
 const CACHE_TTL_MS = 100;
 let _stateCache = null;

package/dist/resources/extensions/gsd/templates/plan.md CHANGED Viewed

@@ -113,6 +113,14 @@
   - Tasks execute sequentially in order (T01, T02, T03, ...)
   - est: is informational (e.g. 30m, 1h, 2h) and optional
+  Verify field rules:
+  - MUST be a mechanically executable command: `npm test`, `grep -q "pattern" file`, `test -f path`
+  - For content/document tasks: verify file existence, section count, YAML validity, or word count
+    NOT exact phrasing, specific formulas, or "zero TBD" aspirational criteria
+  - If no command can verify the output, write: "Manual review — file exists and is non-empty"
+  - BAD: "Sections 3.1 and 3.2 exist with exact formulas. Zero TBD/TODO."
+  - GOOD: `grep -c "^## " doc.md` returns >= 4 (4+ sections), `! grep -q "TBD\|TODO" doc.md`
   Integration closure rule:
   - At least one slice in any multi-boundary milestone should perform real composition/wiring, not just contract hardening
   - For the final assembly slice, verification must exercise the real entrypoint or runtime path

package/dist/resources/extensions/gsd/templates/preferences.md CHANGED Viewed

@@ -57,6 +57,12 @@ notifications:
   on_budget:
   on_milestone:
   on_attention:
+cmux:
+  enabled:
+  notifications:
+  sidebar:
+  splits:
+  browser:
 remote_questions:
   channel:
   channel_id:

package/dist/resources/extensions/gsd/worktree-resolver.js CHANGED Viewed

@@ -12,6 +12,8 @@
  * Key invariant: `createAutoWorktree()` and `enterAutoWorktree()` call
  * `process.chdir()` internally — this class MUST NOT double-chdir.
  */
+import { existsSync, unlinkSync } from "node:fs";
+import { join } from "node:path";
 import { debugLog } from "./debug-logger.js";
 // ─── WorktreeResolver ──────────────────────────────────────────────────────
 export class WorktreeResolver {
@@ -253,6 +255,16 @@ export class WorktreeResolver {
                 fallback: "chdir-to-project-root",
             });
             ctx.notify(`Milestone merge failed: ${msg}`, "warning");
+            // Clean up stale merge state left by failed squash-merge (#1389)
+            try {
+                const gitDir = join(originalBase || this.s.basePath, ".git");
+                for (const f of ["SQUASH_MSG", "MERGE_HEAD", "MERGE_MSG"]) {
+                    const p = join(gitDir, f);
+                    if (existsSync(p))
+                        unlinkSync(p);
+                }
+            }
+            catch { /* best-effort */ }
             // Error recovery: always restore to project root
             if (originalBase) {
                 try {

package/dist/resources/extensions/remote-questions/remote-command.js CHANGED Viewed

@@ -2,12 +2,12 @@
  * Remote Questions — /gsd remote command
  */
 import { AuthStorage } from "@gsd/pi-coding-agent";
-import { CURSOR_MARKER, Editor, Key, matchesKey, truncateToWidth } from "@gsd/pi-tui";
+import { Editor, Key, matchesKey, truncateToWidth } from "@gsd/pi-tui";
 import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { getGlobalGSDPreferencesPath, loadEffectiveGSDPreferences } from "../gsd/preferences.js";
 import { getRemoteConfigStatus, isValidChannelId, resolveRemoteConfig } from "./config.js";
-import { sanitizeError } from "../shared/sanitize.js";
+import { maskEditorLine, sanitizeError } from "../shared/mod.js";
 import { getLatestPromptSummary } from "./status.js";
 export async function handleRemote(subcommand, ctx, _pi) {
     const trimmed = subcommand.trim();
@@ -339,26 +339,6 @@ function removeRemoteQuestionsConfig() {
     const next = frontmatter ? `---\n${frontmatter}\n---${content.slice(fmMatch[0].length)}` : content.slice(fmMatch[0].length).replace(/^\n+/, "");
     writeFileSync(prefsPath, next, "utf-8");
 }
-function maskEditorLine(line) {
-    let output = "";
-    let i = 0;
-    while (i < line.length) {
-        if (line.startsWith(CURSOR_MARKER, i)) {
-            output += CURSOR_MARKER;
-            i += CURSOR_MARKER.length;
-            continue;
-        }
-        const ansiMatch = /^\x1b\[[0-9;]*m/.exec(line.slice(i));
-        if (ansiMatch) {
-            output += ansiMatch[0];
-            i += ansiMatch[0].length;
-            continue;
-        }
-        output += line[i] === " " ? " " : "*";
-        i += 1;
-    }
-    return output;
-}
 async function promptMaskedInput(ctx, label, hint) {
     if (!ctx.hasUI)
         return null;

package/dist/resources/extensions/search-the-web/native-search.js CHANGED Viewed

@@ -11,6 +11,15 @@ export const BRAVE_TOOL_NAMES = ["search-the-web", "search_and_read"];
 export const CUSTOM_SEARCH_TOOL_NAMES = ["search-the-web", "search_and_read", "google_search"];
 /** Thinking block types that require signature validation by the API */
 const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
+/**
+ * Maximum number of native web searches allowed per session (agent unit).
+ * The Anthropic API's `max_uses` is per-request — it resets on each API call.
+ * When `pause_turn` triggers a resubmit, the model gets a fresh budget.
+ * This session-level cap prevents unbounded search accumulation (#1309).
+ *
+ * 15 = 3 full turns of 5 searches each — generous for research, but bounded.
+ */
+export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
 /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
 export function preferBraveSearch() {
     // preferences.md takes priority over env var
@@ -57,6 +66,10 @@ export function stripThinkingFromHistory(messages) {
 export function registerNativeSearchHooks(pi) {
     let isAnthropicProvider = false;
     let modelSelectFired = false;
+    // Session-level native search counter (#1309).
+    // Tracks cumulative web_search_tool_result blocks across all turns in a session.
+    // Reset on session_start. Used to compute remaining budget for max_uses.
+    let sessionSearchCount = 0;
     // Track provider changes via model selection — also handles diagnostics
     // since model_select fires AFTER session_start and knows the provider.
     pi.on("model_select", async (event, ctx) => {
@@ -135,18 +148,46 @@ export function registerNativeSearchHooks(pi) {
         // the model and causes it to pick custom tools which can fail with network errors.
         tools = tools.filter((t) => !CUSTOM_SEARCH_TOOL_NAMES.includes(t.name));
         payload.tools = tools;
+        // ── Session-level search budget (#1309) ──────────────────────────────
+        // Count web_search_tool_result blocks in the conversation history to
+        // determine how many native searches have already been used this session.
+        // The Anthropic API's max_uses resets per request, so without this guard,
+        // pause_turn → resubmit cycles allow unlimited total searches.
+        if (Array.isArray(messages)) {
+            let historySearchCount = 0;
+            for (const msg of messages) {
+                const content = msg.content;
+                if (!Array.isArray(content))
+                    continue;
+                for (const block of content) {
+                    if (block?.type === "web_search_tool_result") {
+                        historySearchCount++;
+                    }
+                }
+            }
+            // Sync counter from history (handles session restore / context replay)
+            sessionSearchCount = historySearchCount;
+        }
+        const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
+        if (remaining <= 0) {
+            // Budget exhausted — don't inject the search tool at all.
+            // The model will proceed without web search capability.
+            return payload;
+        }
         tools.push({
             type: "web_search_20250305",
             name: "web_search",
-            // Cap server-side searches per response to prevent the model from
-            // looping on web_search without synthesizing results (#817).
-            // 5 searches is generous — most queries need 1-2.
-            max_uses: 5,
+            // Cap per-request searches to the lesser of 5 (per-turn cap) or the
+            // remaining session budget (#1309). This prevents the model from
+            // consuming unlimited searches via pause_turn → resubmit cycles.
+            max_uses: Math.min(5, remaining),
         });
         return payload;
     });
     // Basic startup diagnostics — provider-specific info comes from model_select
     pi.on("session_start", async (_event, ctx) => {
+        // Reset session-level search budget (#1309)
+        sessionSearchCount = 0;
         const hasBrave = !!process.env.BRAVE_API_KEY;
         const hasJina = !!process.env.JINA_API_KEY;
         const hasAnswers = !!process.env.BRAVE_ANSWERS_KEY;

package/dist/resources/extensions/shared/mod.js CHANGED Viewed

@@ -6,6 +6,6 @@ export { toPosixPath } from "./path-display.js";
 export { showInterviewRound } from "./interview-ui.js";
 export { showNextAction } from "./next-action-ui.js";
 export { showConfirm } from "./confirm-ui.js";
-export { sanitizeError } from "./sanitize.js";
+export { sanitizeError, maskEditorLine } from "./sanitize.js";
 export { formatDateShort, truncateWithEllipsis } from "./format-utils.js";
 export { splitFrontmatter, parseFrontmatterMap } from "./frontmatter.js";

package/dist/resources/extensions/shared/sanitize.js CHANGED Viewed

@@ -1,6 +1,8 @@
 /**
  * Sanitize error messages by redacting token-like strings before surfacing.
+ * Also provides maskEditorLine for masking sensitive TUI editor input.
  */
+import { CURSOR_MARKER } from "@gsd/pi-tui";
 const TOKEN_PATTERNS = [
     /xoxb-[A-Za-z0-9\-]+/g, // Slack bot tokens
     /xoxp-[A-Za-z0-9\-]+/g, // Slack user tokens
@@ -15,3 +17,31 @@ export function sanitizeError(msg) {
     }
     return sanitized;
 }
+/**
+ * Replace editor visible text with masked characters while preserving
+ * ANSI cursor/sequencer codes. Keeps border/metadata lines readable.
+ */
+export function maskEditorLine(line) {
+    if (line.startsWith("─")) {
+        return line;
+    }
+    let output = "";
+    let i = 0;
+    while (i < line.length) {
+        if (line.startsWith(CURSOR_MARKER, i)) {
+            output += CURSOR_MARKER;
+            i += CURSOR_MARKER.length;
+            continue;
+        }
+        const ansiMatch = /^\x1b\[[0-9;]*m/.exec(line.slice(i));
+        if (ansiMatch) {
+            output += ansiMatch[0];
+            i += ansiMatch[0].length;
+            continue;
+        }
+        const ch = line[i];
+        output += ch === " " ? " " : "*";
+        i += 1;
+    }
+    return output;
+}

package/dist/resources/extensions/shared/terminal.js CHANGED Viewed

@@ -5,9 +5,14 @@
  * Terminals that lack this support silently swallow the key combos.
  */
 const UNSUPPORTED_TERMS = ["apple_terminal", "warpterm"];
+export function isCmuxTerminal(env = process.env) {
+    return Boolean(env.CMUX_WORKSPACE_ID && env.CMUX_SURFACE_ID);
+}
 export function supportsCtrlAltShortcuts() {
     const term = (process.env.TERM_PROGRAM || "").toLowerCase();
     const jetbrains = (process.env.TERMINAL_EMULATOR || "").toLowerCase().includes("jetbrains");
+    if (isCmuxTerminal())
+        return true;
     return !UNSUPPORTED_TERMS.some((t) => term.includes(t)) && !jetbrains;
 }
 /**