npm - gsd-pi - Versions diffs - 2.26.0 → 2.26.1-next.1 - Mend

gsd-pi 2.26.0 → 2.26.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/headless.d.ts CHANGED Viewed

@@ -20,6 +20,7 @@ export interface HeadlessOptions {
     contextText?: string;
     auto?: boolean;
     verbose?: boolean;
+    maxRestarts?: number;
 }
 export declare function parseHeadlessArgs(argv: string[]): HeadlessOptions;
 export declare function runHeadless(options: HeadlessOptions): Promise<void>;

package/dist/headless.js CHANGED Viewed

@@ -58,6 +58,13 @@ export function parseHeadlessArgs(argv) {
             else if (arg === '--verbose') {
                 options.verbose = true;
             }
+            else if (arg === '--max-restarts' && i + 1 < args.length) {
+                options.maxRestarts = parseInt(args[++i], 10);
+                if (Number.isNaN(options.maxRestarts) || options.maxRestarts < 0) {
+                    process.stderr.write('[headless] Error: --max-restarts must be a non-negative integer\n');
+                    process.exit(1);
+                }
+            }
         }
         else if (!positionalStarted) {
             positionalStarted = true;
@@ -220,6 +227,31 @@ function bootstrapGsdProject(basePath) {
     mkdirSync(join(gsdDir, 'runtime'), { recursive: true });
 }
 export async function runHeadless(options) {
+    const maxRestarts = options.maxRestarts ?? 3;
+    let restartCount = 0;
+    while (true) {
+        const result = await runHeadlessOnce(options, restartCount);
+        // Success or blocked — exit normally
+        if (result.exitCode === 0 || result.exitCode === 2) {
+            process.exit(result.exitCode);
+        }
+        // Crash/error — check if we should restart
+        if (restartCount >= maxRestarts) {
+            process.stderr.write(`[headless] Max restarts (${maxRestarts}) reached. Exiting.\n`);
+            process.exit(result.exitCode);
+        }
+        // Don't restart if SIGINT/SIGTERM was received
+        if (result.interrupted) {
+            process.exit(result.exitCode);
+        }
+        restartCount++;
+        const backoffMs = Math.min(5000 * restartCount, 30_000);
+        process.stderr.write(`[headless] Restarting in ${(backoffMs / 1000).toFixed(0)}s (attempt ${restartCount}/${maxRestarts})...\n`);
+        await new Promise(resolve => setTimeout(resolve, backoffMs));
+    }
+}
+async function runHeadlessOnce(options, restartCount) {
+    let interrupted = false;
     const startTime = Date.now();
     const isNewMilestone = options.command === 'new-milestone';
     // For new-milestone, load context and bootstrap .gsd/ before spawning RPC child
@@ -369,6 +401,7 @@ export async function runHeadless(options) {
     // Signal handling
     const signalHandler = () => {
         process.stderr.write('\n[headless] Interrupted, stopping child process...\n');
+        interrupted = true;
         exitCode = 1;
         client.stop().finally(() => {
             clearTimeout(timeoutTimer);
@@ -460,6 +493,9 @@ export async function runHeadless(options) {
     process.stderr.write(`[headless] Status: ${status}\n`);
     process.stderr.write(`[headless] Duration: ${duration}s\n`);
     process.stderr.write(`[headless] Events: ${totalEvents} total, ${toolCallCount} tool calls\n`);
+    if (restartCount > 0) {
+        process.stderr.write(`[headless] Restarts: ${restartCount}\n`);
+    }
     // On failure, print last 5 events for diagnostics
     if (exitCode !== 0) {
         const lastFive = recentEvents.slice(-5);
@@ -470,5 +506,5 @@ export async function runHeadless(options) {
             }
         }
     }
-    process.exit(exitCode);
+    return { exitCode, interrupted };
 }

package/dist/loader.js CHANGED Viewed

@@ -3,7 +3,7 @@
 // Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
 import { fileURLToPath } from 'url';
 import { dirname, resolve, join, delimiter } from 'path';
-import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync } from 'fs';
+import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync, cpSync } from 'fs';
 // Fast-path: handle --version/-v and --help/-h before importing any heavy
 // dependencies. This avoids loading the entire pi-coding-agent barrel import
 // (~1s) just to print a version string.
@@ -137,8 +137,12 @@ if (process.env.HTTP_PROXY || process.env.HTTPS_PROXY || process.env.http_proxy
     const { EnvHttpProxyAgent, setGlobalDispatcher } = await import('undici');
     setGlobalDispatcher(new EnvHttpProxyAgent());
 }
-// Ensure workspace packages are linked before importing cli.js (which imports @gsd/*).
+// Ensure workspace packages are linked (or copied on Windows) before importing
+// cli.js (which imports @gsd/*).
 // npm postinstall handles this normally, but npx --ignore-scripts skips postinstall.
+// On Windows without Developer Mode or admin rights, symlinkSync will throw even for
+// 'junction' type — so we fall back to cpSync (a full directory copy) which works
+// everywhere without elevated permissions.
 const gsdScopeDir = join(gsdNodeModules, '@gsd');
 const packagesDir = join(gsdRoot, 'packages');
 const wsPackages = ['native', 'pi-agent-core', 'pi-ai', 'pi-coding-agent', 'pi-tui'];
@@ -148,14 +152,39 @@ try {
     for (const pkg of wsPackages) {
         const target = join(gsdScopeDir, pkg);
         const source = join(packagesDir, pkg);
-        if (existsSync(source) && !existsSync(target)) {
+        if (!existsSync(source) || existsSync(target))
+            continue;
+        try {
+            symlinkSync(source, target, 'junction');
+        }
+        catch {
+            // Symlink failed (common on Windows without Developer Mode / admin).
+            // Fall back to a directory copy — slower on first run but universally works.
             try {
-                symlinkSync(source, target, 'junction');
+                cpSync(source, target, { recursive: true });
             }
             catch { /* non-fatal */ }
         }
     }
 }
 catch { /* non-fatal */ }
+// Validate critical workspace packages are resolvable. If still missing after the
+// symlink+copy attempts, emit a clear diagnostic instead of a cryptic
+// ERR_MODULE_NOT_FOUND from deep inside cli.js.
+const criticalPackages = ['pi-coding-agent'];
+const missingPackages = criticalPackages.filter(pkg => !existsSync(join(gsdScopeDir, pkg)));
+if (missingPackages.length > 0) {
+    const missing = missingPackages.map(p => `@gsd/${p}`).join(', ');
+    process.stderr.write(`\nError: GSD installation is broken — missing packages: ${missing}\n\n` +
+        `This is usually caused by one of:\n` +
+        `  • An outdated version installed from npm (run: npm install -g gsd-pi@latest)\n` +
+        `  • The packages/ directory was excluded from the installed tarball\n` +
+        `  • A filesystem error prevented linking or copying the workspace packages\n\n` +
+        `Fix it by reinstalling:\n\n` +
+        `  npm install -g gsd-pi@latest\n\n` +
+        `If the issue persists, please open an issue at:\n` +
+        `  https://github.com/gsd-build/gsd-2/issues\n`);
+    process.exit(1);
+}
 // Dynamic import defers ESM evaluation — config.js will see PI_PACKAGE_DIR above
 await import('./cli.js');

package/dist/resources/extensions/gsd/auto.ts CHANGED Viewed

@@ -18,8 +18,10 @@ import type {
 import { deriveState } from "./state.js";
 import type { BudgetEnforcementMode, GSDState } from "./types.js";
-import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parseSummary } from "./files.js";
+import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parsePlan, parseSummary } from "./files.js";
 import { loadPrompt } from "./prompt-loader.js";
+import { runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "./verification-gate.js";
+import { writeVerificationJSON } from "./verification-evidence.js";
 export { inlinePriorMilestoneSummary } from "./files.js";
 import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
 import {
@@ -370,6 +372,11 @@ function escapeStaleWorktree(base: string): string {
 /** Crash recovery prompt — set by startAuto, consumed by first dispatchNextUnit */
 let pendingCrashRecovery: string | null = null;
+/** Pending verification retry — set when gate fails with retries remaining, consumed by dispatchNextUnit */
+let pendingVerificationRetry: { unitId: string; failureContext: string; attempt: number } | null = null;
+/** Verification retry count per unitId — separate from unitDispatchCount which tracks artifact-missing retries */
+const verificationRetryCount = new Map<string, number>();
 /** Session file path captured at pause — used to synthesize recovery briefing on resume */
 let pausedSessionFile: string | null = null;
@@ -730,6 +737,8 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI, reason
   clearActivityLogState();
   resetProactiveHealing();
   pendingCrashRecovery = null;
+  pendingVerificationRetry = null;
+  verificationRetryCount.clear();
   pausedSessionFile = null;
   _handlingAgentEnd = false;
   ctx?.ui.setStatus("gsd-auto", undefined);
@@ -767,6 +776,8 @@ export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Pro
   active = false;
   paused = true;
+  pendingVerificationRetry = null;
+  verificationRetryCount.clear();
   // Preserve: unitDispatchCount, currentUnit, basePath, verbose, cmdCtx,
   // completedUnits, autoStartTime, currentMilestoneId, originalModelId
   // — all needed for resume and dashboard display
@@ -1574,6 +1585,145 @@ export async function handleAgentEnd(
     }
   }
+  // ── Verification gate: run typecheck/lint/test after execute-task ──
+  if (currentUnit && currentUnit.type === "execute-task") {
+    try {
+      const effectivePrefs = loadEffectiveGSDPreferences();
+      const prefs = effectivePrefs?.preferences;
+      // Read task plan verify field from the current task's slice plan
+      // unitId format is "M001/S01/T03" — extract mid, sid, tid
+      const parts = currentUnit.id.split("/");
+      let taskPlanVerify: string | undefined;
+      if (parts.length >= 3) {
+        const [mid, sid, tid] = parts;
+        const planFile = resolveSliceFile(basePath, mid, sid, "PLAN");
+        if (planFile) {
+          const planContent = await loadFile(planFile);
+          if (planContent) {
+            const slicePlan = parsePlan(planContent);
+            const taskEntry = slicePlan?.tasks?.find(t => t.id === tid);
+            taskPlanVerify = taskEntry?.verify;
+          }
+        }
+      }
+      const result = runVerificationGate({
+        basePath,
+        unitId: currentUnit.id,
+        cwd: basePath,
+        preferenceCommands: prefs?.verification_commands,
+        taskPlanVerify,
+      });
+      // Capture runtime errors from bg-shell and browser console
+      const runtimeErrors = await captureRuntimeErrors();
+      if (runtimeErrors.length > 0) {
+        result.runtimeErrors = runtimeErrors;
+        // Blocking runtime errors override gate pass
+        if (runtimeErrors.some(e => e.blocking)) {
+          result.passed = false;
+        }
+      }
+      // Conditional dependency audit (R008)
+      const auditWarnings = runDependencyAudit(basePath);
+      if (auditWarnings.length > 0) {
+        result.auditWarnings = auditWarnings;
+        process.stderr.write(`verification-gate: ${auditWarnings.length} audit warning(s)\n`);
+        for (const w of auditWarnings) {
+          process.stderr.write(`  [${w.severity}] ${w.name}: ${w.title}\n`);
+        }
+      }
+      // Auto-fix retry preferences (R005 / D005)
+      const autoFixEnabled = prefs?.verification_auto_fix !== false; // default true
+      const maxRetries = typeof prefs?.verification_max_retries === "number" ? prefs.verification_max_retries : 2;
+      const completionKey = `${currentUnit.type}/${currentUnit.id}`;
+      if (result.checks.length > 0) {
+        const passCount = result.checks.filter(c => c.exitCode === 0).length;
+        const total = result.checks.length;
+        if (result.passed) {
+          ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`);
+        } else {
+          const failures = result.checks.filter(c => c.exitCode !== 0);
+          const failNames = failures.map(f => f.command).join(", ");
+          ctx.ui.notify(`Verification gate: FAILED — ${failNames}`);
+          process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`);
+          for (const f of failures) {
+            process.stderr.write(`  ${f.command} exited ${f.exitCode}\n`);
+            if (f.stderr) process.stderr.write(`  stderr: ${f.stderr.slice(0, 500)}\n`);
+          }
+        }
+      }
+      // Log blocking runtime errors to stderr
+      if (result.runtimeErrors?.some(e => e.blocking)) {
+        const blockingErrors = result.runtimeErrors.filter(e => e.blocking);
+        process.stderr.write(`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`);
+        for (const err of blockingErrors) {
+          process.stderr.write(`  [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`);
+        }
+      }
+      // Write verification evidence JSON artifact
+      const attempt = verificationRetryCount.get(currentUnit.id) ?? 0;
+      if (parts.length >= 3) {
+        try {
+          const [mid, sid, tid] = parts;
+          const sDir = resolveSlicePath(basePath, mid, sid);
+          if (sDir) {
+            const tasksDir = join(sDir, "tasks");
+            if (result.passed) {
+              writeVerificationJSON(result, tasksDir, tid, currentUnit.id);
+            } else {
+              const nextAttempt = attempt + 1;
+              writeVerificationJSON(result, tasksDir, tid, currentUnit.id, nextAttempt, maxRetries);
+            }
+          }
+        } catch (evidenceErr) {
+          process.stderr.write(`verification-evidence: write error — ${(evidenceErr as Error).message}\n`);
+        }
+      }
+      // ── Auto-fix retry logic ──
+      if (result.passed) {
+        // Gate passed — clear retry state and continue normal flow
+        verificationRetryCount.delete(currentUnit.id);
+        pendingVerificationRetry = null;
+      } else if (autoFixEnabled && attempt + 1 <= maxRetries) {
+        // Gate failed, retries remaining — set up retry and return early
+        const nextAttempt = attempt + 1;
+        verificationRetryCount.set(currentUnit.id, nextAttempt);
+        pendingVerificationRetry = {
+          unitId: currentUnit.id,
+          failureContext: formatFailureContext(result),
+          attempt: nextAttempt,
+        };
+        ctx.ui.notify(`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning");
+        // Remove completion key so dispatchNextUnit re-dispatches this unit
+        completedKeySet.delete(completionKey);
+        removePersistedKey(basePath, completionKey);
+        return; // ← Critical: exit before DB dual-write and post-unit hooks
+      } else {
+        // Gate failed, retries exhausted (or auto-fix disabled) — pause for human review
+        const exhaustedAttempt = attempt + 1;
+        verificationRetryCount.delete(currentUnit.id);
+        pendingVerificationRetry = null;
+        ctx.ui.notify(
+          `Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
+          "error",
+        );
+        await pauseAuto(ctx, pi);
+        return;
+      }
+    } catch (err) {
+      // Gate errors are non-fatal — log and continue
+      process.stderr.write(`verification-gate: error — ${(err as Error).message}\n`);
+    }
+  }
   // ── DB dual-write: re-import changed markdown files so next unit's prompts use fresh data ──
   if (isDbAvailable()) {
     try {
@@ -2975,6 +3125,17 @@ async function dispatchNextUnit(
   // Cap injected content to prevent unbounded prompt growth → OOM
   const MAX_RECOVERY_CHARS = 50_000;
   let finalPrompt = prompt;
+  // Verification retry — inject failure context so the agent can auto-fix
+  if (pendingVerificationRetry) {
+    const retryCtx = pendingVerificationRetry;
+    pendingVerificationRetry = null;
+    const capped = retryCtx.failureContext.length > MAX_RECOVERY_CHARS
+      ? retryCtx.failureContext.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...failure context truncated]"
+      : retryCtx.failureContext;
+    finalPrompt = `**VERIFICATION FAILED — AUTO-FIX ATTEMPT ${retryCtx.attempt}**\n\nThe verification gate ran after your previous attempt and found failures. Fix these issues before completing the task.\n\n${capped}\n\n---\n\n${finalPrompt}`;
+  }
   if (pendingCrashRecovery) {
     const capped = pendingCrashRecovery.length > MAX_RECOVERY_CHARS
       ? pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...recovery briefing truncated to prevent memory exhaustion]"

package/dist/resources/extensions/gsd/observability-validator.ts CHANGED Viewed

@@ -298,6 +298,27 @@ export function validateTaskSummaryContent(file: string, content: string): Valid
     });
   }
+  const evidence = getSection(content, "Verification Evidence", 2);
+  if (!evidence) {
+    issues.push({
+      severity: "warning",
+      scope: "task-summary",
+      file,
+      ruleId: "evidence_block_missing",
+      message: "Task summary is missing `## Verification Evidence`.",
+      suggestion: "Add a verification evidence table showing gate check results (command, exit code, verdict, duration).",
+    });
+  } else if (sectionLooksPlaceholderOnly(evidence)) {
+    issues.push({
+      severity: "warning",
+      scope: "task-summary",
+      file,
+      ruleId: "evidence_block_placeholder",
+      message: "Task summary verification evidence section still looks like placeholder text.",
+      suggestion: "Replace placeholders with actual gate results or note that no verification commands were discovered.",
+    });
+  }
   return issues;
 }

package/dist/resources/extensions/gsd/preferences.ts CHANGED Viewed

@@ -76,6 +76,9 @@ const KNOWN_PREFERENCE_KEYS = new Set<string>([
   "phases",
   "auto_visualize",
   "parallel",
+  "verification_commands",
+  "verification_auto_fix",
+  "verification_max_retries",
 ]);
 export interface GSDSkillRule {
@@ -173,6 +176,9 @@ export interface GSDPreferences {
   phases?: PhaseSkipPreferences;
   auto_visualize?: boolean;
   parallel?: import("./types.js").ParallelConfig;
+  verification_commands?: string[];
+  verification_auto_fix?: boolean;
+  verification_max_retries?: number;
 }
 export interface LoadedGSDPreferences {
@@ -773,6 +779,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
     parallel: (base.parallel || override.parallel)
       ? { ...(base.parallel ?? {}), ...(override.parallel ?? {}) } as import("./types.js").ParallelConfig
       : undefined,
+    verification_commands: mergeStringLists(base.verification_commands, override.verification_commands),
+    verification_auto_fix: override.verification_auto_fix ?? base.verification_auto_fix,
+    verification_max_retries: override.verification_max_retries ?? base.verification_max_retries,
   };
 }
@@ -1205,6 +1214,39 @@ export function validatePreferences(preferences: GSDPreferences): {
     }
   }
+  // ─── Verification Preferences ───────────────────────────────────────────
+  if (preferences.verification_commands !== undefined) {
+    if (Array.isArray(preferences.verification_commands)) {
+      const allStrings = preferences.verification_commands.every(
+        (item: unknown) => typeof item === "string",
+      );
+      if (allStrings) {
+        validated.verification_commands = preferences.verification_commands;
+      } else {
+        errors.push("verification_commands must be an array of strings");
+      }
+    } else {
+      errors.push("verification_commands must be an array of strings");
+    }
+  }
+  if (preferences.verification_auto_fix !== undefined) {
+    if (typeof preferences.verification_auto_fix === "boolean") {
+      validated.verification_auto_fix = preferences.verification_auto_fix;
+    } else {
+      errors.push("verification_auto_fix must be a boolean");
+    }
+  }
+  if (preferences.verification_max_retries !== undefined) {
+    const raw = preferences.verification_max_retries;
+    if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
+      validated.verification_max_retries = Math.floor(raw);
+    } else {
+      errors.push("verification_max_retries must be a non-negative number");
+    }
+  }
   // ─── Git Preferences ───────────────────────────────────────────────────
   if (preferences.git && typeof preferences.git === "object") {
     const git: Record<string, unknown> = {};

package/dist/resources/extensions/gsd/prompts/execute-task.md CHANGED Viewed

@@ -38,15 +38,16 @@ Then:
    - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
 6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
 7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
-8. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
+8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
+9. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
    - exercise the real flow in the browser
    - prefer `browser_batch` when the next few actions are obvious and sequential
    - prefer `browser_assert` for explicit pass/fail verification of the intended outcome
    - use `browser_diff` when an action's effect is ambiguous
    - use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI
    - record verification in terms of explicit checks passed/failed, not only prose interpretation
-9. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
-10. **If execution is running long or verification fails:**
+10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
+11. **If execution is running long or verification fails:**
     **Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step.

package/dist/resources/extensions/gsd/templates/task-summary.md CHANGED Viewed

@@ -37,6 +37,15 @@ blocker_discovered: false
 {{whatWasVerifiedAndHow — commands run, tests passed, behavior confirmed}}
+## Verification Evidence
+<!-- Populated from verification gate output. If the gate ran, fill in the table below.
+     If no gate ran (e.g., no verification commands discovered), note that. -->
+| # | Command | Exit Code | Verdict | Duration |
+|---|---------|-----------|---------|----------|
+| {{row}} | {{command}} | {{exitCode}} | {{verdict}} | {{duration}} |
 ## Diagnostics
 {{howToInspectWhatThisTaskBuiltLater — status surfaces, logs, error shapes, failure artifacts, or none}}