gsd-pi 2.26.0 → 2.26.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/dist/headless.d.ts +1 -0
  2. package/dist/headless.js +37 -1
  3. package/dist/loader.js +33 -4
  4. package/dist/resources/extensions/gsd/auto.ts +162 -1
  5. package/dist/resources/extensions/gsd/observability-validator.ts +21 -0
  6. package/dist/resources/extensions/gsd/preferences.ts +42 -0
  7. package/dist/resources/extensions/gsd/prompts/execute-task.md +4 -3
  8. package/dist/resources/extensions/gsd/templates/task-summary.md +9 -0
  9. package/dist/resources/extensions/gsd/tests/verification-evidence.test.ts +743 -0
  10. package/dist/resources/extensions/gsd/tests/verification-gate.test.ts +965 -0
  11. package/dist/resources/extensions/gsd/types.ts +38 -0
  12. package/dist/resources/extensions/gsd/verification-evidence.ts +183 -0
  13. package/dist/resources/extensions/gsd/verification-gate.ts +567 -0
  14. package/package.json +1 -1
  15. package/scripts/link-workspace-packages.cjs +22 -6
  16. package/src/resources/extensions/gsd/auto.ts +162 -1
  17. package/src/resources/extensions/gsd/observability-validator.ts +21 -0
  18. package/src/resources/extensions/gsd/preferences.ts +42 -0
  19. package/src/resources/extensions/gsd/prompts/execute-task.md +4 -3
  20. package/src/resources/extensions/gsd/templates/task-summary.md +9 -0
  21. package/src/resources/extensions/gsd/tests/verification-evidence.test.ts +743 -0
  22. package/src/resources/extensions/gsd/tests/verification-gate.test.ts +965 -0
  23. package/src/resources/extensions/gsd/types.ts +38 -0
  24. package/src/resources/extensions/gsd/verification-evidence.ts +183 -0
  25. package/src/resources/extensions/gsd/verification-gate.ts +567 -0
@@ -20,6 +20,7 @@ export interface HeadlessOptions {
20
20
  contextText?: string;
21
21
  auto?: boolean;
22
22
  verbose?: boolean;
23
+ maxRestarts?: number;
23
24
  }
24
25
  export declare function parseHeadlessArgs(argv: string[]): HeadlessOptions;
25
26
  export declare function runHeadless(options: HeadlessOptions): Promise<void>;
package/dist/headless.js CHANGED
@@ -58,6 +58,13 @@ export function parseHeadlessArgs(argv) {
58
58
  else if (arg === '--verbose') {
59
59
  options.verbose = true;
60
60
  }
61
+ else if (arg === '--max-restarts' && i + 1 < args.length) {
62
+ options.maxRestarts = parseInt(args[++i], 10);
63
+ if (Number.isNaN(options.maxRestarts) || options.maxRestarts < 0) {
64
+ process.stderr.write('[headless] Error: --max-restarts must be a non-negative integer\n');
65
+ process.exit(1);
66
+ }
67
+ }
61
68
  }
62
69
  else if (!positionalStarted) {
63
70
  positionalStarted = true;
@@ -220,6 +227,31 @@ function bootstrapGsdProject(basePath) {
220
227
  mkdirSync(join(gsdDir, 'runtime'), { recursive: true });
221
228
  }
222
229
  export async function runHeadless(options) {
230
+ const maxRestarts = options.maxRestarts ?? 3;
231
+ let restartCount = 0;
232
+ while (true) {
233
+ const result = await runHeadlessOnce(options, restartCount);
234
+ // Success or blocked — exit normally
235
+ if (result.exitCode === 0 || result.exitCode === 2) {
236
+ process.exit(result.exitCode);
237
+ }
238
+ // Crash/error — check if we should restart
239
+ if (restartCount >= maxRestarts) {
240
+ process.stderr.write(`[headless] Max restarts (${maxRestarts}) reached. Exiting.\n`);
241
+ process.exit(result.exitCode);
242
+ }
243
+ // Don't restart if SIGINT/SIGTERM was received
244
+ if (result.interrupted) {
245
+ process.exit(result.exitCode);
246
+ }
247
+ restartCount++;
248
+ const backoffMs = Math.min(5000 * restartCount, 30_000);
249
+ process.stderr.write(`[headless] Restarting in ${(backoffMs / 1000).toFixed(0)}s (attempt ${restartCount}/${maxRestarts})...\n`);
250
+ await new Promise(resolve => setTimeout(resolve, backoffMs));
251
+ }
252
+ }
253
+ async function runHeadlessOnce(options, restartCount) {
254
+ let interrupted = false;
223
255
  const startTime = Date.now();
224
256
  const isNewMilestone = options.command === 'new-milestone';
225
257
  // For new-milestone, load context and bootstrap .gsd/ before spawning RPC child
@@ -369,6 +401,7 @@ export async function runHeadless(options) {
369
401
  // Signal handling
370
402
  const signalHandler = () => {
371
403
  process.stderr.write('\n[headless] Interrupted, stopping child process...\n');
404
+ interrupted = true;
372
405
  exitCode = 1;
373
406
  client.stop().finally(() => {
374
407
  clearTimeout(timeoutTimer);
@@ -460,6 +493,9 @@ export async function runHeadless(options) {
460
493
  process.stderr.write(`[headless] Status: ${status}\n`);
461
494
  process.stderr.write(`[headless] Duration: ${duration}s\n`);
462
495
  process.stderr.write(`[headless] Events: ${totalEvents} total, ${toolCallCount} tool calls\n`);
496
+ if (restartCount > 0) {
497
+ process.stderr.write(`[headless] Restarts: ${restartCount}\n`);
498
+ }
463
499
  // On failure, print last 5 events for diagnostics
464
500
  if (exitCode !== 0) {
465
501
  const lastFive = recentEvents.slice(-5);
@@ -470,5 +506,5 @@ export async function runHeadless(options) {
470
506
  }
471
507
  }
472
508
  }
473
- process.exit(exitCode);
509
+ return { exitCode, interrupted };
474
510
  }
package/dist/loader.js CHANGED
@@ -3,7 +3,7 @@
3
3
  // Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
4
4
  import { fileURLToPath } from 'url';
5
5
  import { dirname, resolve, join, delimiter } from 'path';
6
- import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync } from 'fs';
6
+ import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync, cpSync } from 'fs';
7
7
  // Fast-path: handle --version/-v and --help/-h before importing any heavy
8
8
  // dependencies. This avoids loading the entire pi-coding-agent barrel import
9
9
  // (~1s) just to print a version string.
@@ -137,8 +137,12 @@ if (process.env.HTTP_PROXY || process.env.HTTPS_PROXY || process.env.http_proxy
137
137
  const { EnvHttpProxyAgent, setGlobalDispatcher } = await import('undici');
138
138
  setGlobalDispatcher(new EnvHttpProxyAgent());
139
139
  }
140
- // Ensure workspace packages are linked before importing cli.js (which imports @gsd/*).
140
+ // Ensure workspace packages are linked (or copied on Windows) before importing
141
+ // cli.js (which imports @gsd/*).
141
142
  // npm postinstall handles this normally, but npx --ignore-scripts skips postinstall.
143
+ // On Windows without Developer Mode or admin rights, symlinkSync will throw even for
144
+ // 'junction' type — so we fall back to cpSync (a full directory copy) which works
145
+ // everywhere without elevated permissions.
142
146
  const gsdScopeDir = join(gsdNodeModules, '@gsd');
143
147
  const packagesDir = join(gsdRoot, 'packages');
144
148
  const wsPackages = ['native', 'pi-agent-core', 'pi-ai', 'pi-coding-agent', 'pi-tui'];
@@ -148,14 +152,39 @@ try {
148
152
  for (const pkg of wsPackages) {
149
153
  const target = join(gsdScopeDir, pkg);
150
154
  const source = join(packagesDir, pkg);
151
- if (existsSync(source) && !existsSync(target)) {
155
+ if (!existsSync(source) || existsSync(target))
156
+ continue;
157
+ try {
158
+ symlinkSync(source, target, 'junction');
159
+ }
160
+ catch {
161
+ // Symlink failed (common on Windows without Developer Mode / admin).
162
+ // Fall back to a directory copy — slower on first run but universally works.
152
163
  try {
153
- symlinkSync(source, target, 'junction');
164
+ cpSync(source, target, { recursive: true });
154
165
  }
155
166
  catch { /* non-fatal */ }
156
167
  }
157
168
  }
158
169
  }
159
170
  catch { /* non-fatal */ }
171
+ // Validate critical workspace packages are resolvable. If still missing after the
172
+ // symlink+copy attempts, emit a clear diagnostic instead of a cryptic
173
+ // ERR_MODULE_NOT_FOUND from deep inside cli.js.
174
+ const criticalPackages = ['pi-coding-agent'];
175
+ const missingPackages = criticalPackages.filter(pkg => !existsSync(join(gsdScopeDir, pkg)));
176
+ if (missingPackages.length > 0) {
177
+ const missing = missingPackages.map(p => `@gsd/${p}`).join(', ');
178
+ process.stderr.write(`\nError: GSD installation is broken — missing packages: ${missing}\n\n` +
179
+ `This is usually caused by one of:\n` +
180
+ ` • An outdated version installed from npm (run: npm install -g gsd-pi@latest)\n` +
181
+ ` • The packages/ directory was excluded from the installed tarball\n` +
182
+ ` • A filesystem error prevented linking or copying the workspace packages\n\n` +
183
+ `Fix it by reinstalling:\n\n` +
184
+ ` npm install -g gsd-pi@latest\n\n` +
185
+ `If the issue persists, please open an issue at:\n` +
186
+ ` https://github.com/gsd-build/gsd-2/issues\n`);
187
+ process.exit(1);
188
+ }
160
189
  // Dynamic import defers ESM evaluation — config.js will see PI_PACKAGE_DIR above
161
190
  await import('./cli.js');
@@ -18,8 +18,10 @@ import type {
18
18
 
19
19
  import { deriveState } from "./state.js";
20
20
  import type { BudgetEnforcementMode, GSDState } from "./types.js";
21
- import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parseSummary } from "./files.js";
21
+ import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parsePlan, parseSummary } from "./files.js";
22
22
  import { loadPrompt } from "./prompt-loader.js";
23
+ import { runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "./verification-gate.js";
24
+ import { writeVerificationJSON } from "./verification-evidence.js";
23
25
  export { inlinePriorMilestoneSummary } from "./files.js";
24
26
  import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
25
27
  import {
@@ -370,6 +372,11 @@ function escapeStaleWorktree(base: string): string {
370
372
  /** Crash recovery prompt — set by startAuto, consumed by first dispatchNextUnit */
371
373
  let pendingCrashRecovery: string | null = null;
372
374
 
375
+ /** Pending verification retry — set when gate fails with retries remaining, consumed by dispatchNextUnit */
376
+ let pendingVerificationRetry: { unitId: string; failureContext: string; attempt: number } | null = null;
377
+ /** Verification retry count per unitId — separate from unitDispatchCount which tracks artifact-missing retries */
378
+ const verificationRetryCount = new Map<string, number>();
379
+
373
380
  /** Session file path captured at pause — used to synthesize recovery briefing on resume */
374
381
  let pausedSessionFile: string | null = null;
375
382
 
@@ -730,6 +737,8 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI, reason
730
737
  clearActivityLogState();
731
738
  resetProactiveHealing();
732
739
  pendingCrashRecovery = null;
740
+ pendingVerificationRetry = null;
741
+ verificationRetryCount.clear();
733
742
  pausedSessionFile = null;
734
743
  _handlingAgentEnd = false;
735
744
  ctx?.ui.setStatus("gsd-auto", undefined);
@@ -767,6 +776,8 @@ export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Pro
767
776
 
768
777
  active = false;
769
778
  paused = true;
779
+ pendingVerificationRetry = null;
780
+ verificationRetryCount.clear();
770
781
  // Preserve: unitDispatchCount, currentUnit, basePath, verbose, cmdCtx,
771
782
  // completedUnits, autoStartTime, currentMilestoneId, originalModelId
772
783
  // — all needed for resume and dashboard display
@@ -1574,6 +1585,145 @@ export async function handleAgentEnd(
1574
1585
  }
1575
1586
  }
1576
1587
 
1588
+ // ── Verification gate: run typecheck/lint/test after execute-task ──
1589
+ if (currentUnit && currentUnit.type === "execute-task") {
1590
+ try {
1591
+ const effectivePrefs = loadEffectiveGSDPreferences();
1592
+ const prefs = effectivePrefs?.preferences;
1593
+
1594
+ // Read task plan verify field from the current task's slice plan
1595
+ // unitId format is "M001/S01/T03" — extract mid, sid, tid
1596
+ const parts = currentUnit.id.split("/");
1597
+ let taskPlanVerify: string | undefined;
1598
+ if (parts.length >= 3) {
1599
+ const [mid, sid, tid] = parts;
1600
+ const planFile = resolveSliceFile(basePath, mid, sid, "PLAN");
1601
+ if (planFile) {
1602
+ const planContent = await loadFile(planFile);
1603
+ if (planContent) {
1604
+ const slicePlan = parsePlan(planContent);
1605
+ const taskEntry = slicePlan?.tasks?.find(t => t.id === tid);
1606
+ taskPlanVerify = taskEntry?.verify;
1607
+ }
1608
+ }
1609
+ }
1610
+
1611
+ const result = runVerificationGate({
1612
+ basePath,
1613
+ unitId: currentUnit.id,
1614
+ cwd: basePath,
1615
+ preferenceCommands: prefs?.verification_commands,
1616
+ taskPlanVerify,
1617
+ });
1618
+
1619
+ // Capture runtime errors from bg-shell and browser console
1620
+ const runtimeErrors = await captureRuntimeErrors();
1621
+ if (runtimeErrors.length > 0) {
1622
+ result.runtimeErrors = runtimeErrors;
1623
+ // Blocking runtime errors override gate pass
1624
+ if (runtimeErrors.some(e => e.blocking)) {
1625
+ result.passed = false;
1626
+ }
1627
+ }
1628
+
1629
+ // Conditional dependency audit (R008)
1630
+ const auditWarnings = runDependencyAudit(basePath);
1631
+ if (auditWarnings.length > 0) {
1632
+ result.auditWarnings = auditWarnings;
1633
+ process.stderr.write(`verification-gate: ${auditWarnings.length} audit warning(s)\n`);
1634
+ for (const w of auditWarnings) {
1635
+ process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
1636
+ }
1637
+ }
1638
+
1639
+ // Auto-fix retry preferences (R005 / D005)
1640
+ const autoFixEnabled = prefs?.verification_auto_fix !== false; // default true
1641
+ const maxRetries = typeof prefs?.verification_max_retries === "number" ? prefs.verification_max_retries : 2;
1642
+ const completionKey = `${currentUnit.type}/${currentUnit.id}`;
1643
+
1644
+ if (result.checks.length > 0) {
1645
+ const passCount = result.checks.filter(c => c.exitCode === 0).length;
1646
+ const total = result.checks.length;
1647
+ if (result.passed) {
1648
+ ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`);
1649
+ } else {
1650
+ const failures = result.checks.filter(c => c.exitCode !== 0);
1651
+ const failNames = failures.map(f => f.command).join(", ");
1652
+ ctx.ui.notify(`Verification gate: FAILED — ${failNames}`);
1653
+ process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`);
1654
+ for (const f of failures) {
1655
+ process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
1656
+ if (f.stderr) process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
1657
+ }
1658
+ }
1659
+ }
1660
+
1661
+ // Log blocking runtime errors to stderr
1662
+ if (result.runtimeErrors?.some(e => e.blocking)) {
1663
+ const blockingErrors = result.runtimeErrors.filter(e => e.blocking);
1664
+ process.stderr.write(`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`);
1665
+ for (const err of blockingErrors) {
1666
+ process.stderr.write(` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`);
1667
+ }
1668
+ }
1669
+
1670
+ // Write verification evidence JSON artifact
1671
+ const attempt = verificationRetryCount.get(currentUnit.id) ?? 0;
1672
+ if (parts.length >= 3) {
1673
+ try {
1674
+ const [mid, sid, tid] = parts;
1675
+ const sDir = resolveSlicePath(basePath, mid, sid);
1676
+ if (sDir) {
1677
+ const tasksDir = join(sDir, "tasks");
1678
+ if (result.passed) {
1679
+ writeVerificationJSON(result, tasksDir, tid, currentUnit.id);
1680
+ } else {
1681
+ const nextAttempt = attempt + 1;
1682
+ writeVerificationJSON(result, tasksDir, tid, currentUnit.id, nextAttempt, maxRetries);
1683
+ }
1684
+ }
1685
+ } catch (evidenceErr) {
1686
+ process.stderr.write(`verification-evidence: write error — ${(evidenceErr as Error).message}\n`);
1687
+ }
1688
+ }
1689
+
1690
+ // ── Auto-fix retry logic ──
1691
+ if (result.passed) {
1692
+ // Gate passed — clear retry state and continue normal flow
1693
+ verificationRetryCount.delete(currentUnit.id);
1694
+ pendingVerificationRetry = null;
1695
+ } else if (autoFixEnabled && attempt + 1 <= maxRetries) {
1696
+ // Gate failed, retries remaining — set up retry and return early
1697
+ const nextAttempt = attempt + 1;
1698
+ verificationRetryCount.set(currentUnit.id, nextAttempt);
1699
+ pendingVerificationRetry = {
1700
+ unitId: currentUnit.id,
1701
+ failureContext: formatFailureContext(result),
1702
+ attempt: nextAttempt,
1703
+ };
1704
+ ctx.ui.notify(`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning");
1705
+ // Remove completion key so dispatchNextUnit re-dispatches this unit
1706
+ completedKeySet.delete(completionKey);
1707
+ removePersistedKey(basePath, completionKey);
1708
+ return; // ← Critical: exit before DB dual-write and post-unit hooks
1709
+ } else {
1710
+ // Gate failed, retries exhausted (or auto-fix disabled) — pause for human review
1711
+ const exhaustedAttempt = attempt + 1;
1712
+ verificationRetryCount.delete(currentUnit.id);
1713
+ pendingVerificationRetry = null;
1714
+ ctx.ui.notify(
1715
+ `Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
1716
+ "error",
1717
+ );
1718
+ await pauseAuto(ctx, pi);
1719
+ return;
1720
+ }
1721
+ } catch (err) {
1722
+ // Gate errors are non-fatal — log and continue
1723
+ process.stderr.write(`verification-gate: error — ${(err as Error).message}\n`);
1724
+ }
1725
+ }
1726
+
1577
1727
  // ── DB dual-write: re-import changed markdown files so next unit's prompts use fresh data ──
1578
1728
  if (isDbAvailable()) {
1579
1729
  try {
@@ -2975,6 +3125,17 @@ async function dispatchNextUnit(
2975
3125
  // Cap injected content to prevent unbounded prompt growth → OOM
2976
3126
  const MAX_RECOVERY_CHARS = 50_000;
2977
3127
  let finalPrompt = prompt;
3128
+
3129
+ // Verification retry — inject failure context so the agent can auto-fix
3130
+ if (pendingVerificationRetry) {
3131
+ const retryCtx = pendingVerificationRetry;
3132
+ pendingVerificationRetry = null;
3133
+ const capped = retryCtx.failureContext.length > MAX_RECOVERY_CHARS
3134
+ ? retryCtx.failureContext.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...failure context truncated]"
3135
+ : retryCtx.failureContext;
3136
+ finalPrompt = `**VERIFICATION FAILED — AUTO-FIX ATTEMPT ${retryCtx.attempt}**\n\nThe verification gate ran after your previous attempt and found failures. Fix these issues before completing the task.\n\n${capped}\n\n---\n\n${finalPrompt}`;
3137
+ }
3138
+
2978
3139
  if (pendingCrashRecovery) {
2979
3140
  const capped = pendingCrashRecovery.length > MAX_RECOVERY_CHARS
2980
3141
  ? pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...recovery briefing truncated to prevent memory exhaustion]"
@@ -298,6 +298,27 @@ export function validateTaskSummaryContent(file: string, content: string): Valid
298
298
  });
299
299
  }
300
300
 
301
+ const evidence = getSection(content, "Verification Evidence", 2);
302
+ if (!evidence) {
303
+ issues.push({
304
+ severity: "warning",
305
+ scope: "task-summary",
306
+ file,
307
+ ruleId: "evidence_block_missing",
308
+ message: "Task summary is missing `## Verification Evidence`.",
309
+ suggestion: "Add a verification evidence table showing gate check results (command, exit code, verdict, duration).",
310
+ });
311
+ } else if (sectionLooksPlaceholderOnly(evidence)) {
312
+ issues.push({
313
+ severity: "warning",
314
+ scope: "task-summary",
315
+ file,
316
+ ruleId: "evidence_block_placeholder",
317
+ message: "Task summary verification evidence section still looks like placeholder text.",
318
+ suggestion: "Replace placeholders with actual gate results or note that no verification commands were discovered.",
319
+ });
320
+ }
321
+
301
322
  return issues;
302
323
  }
303
324
 
@@ -76,6 +76,9 @@ const KNOWN_PREFERENCE_KEYS = new Set<string>([
76
76
  "phases",
77
77
  "auto_visualize",
78
78
  "parallel",
79
+ "verification_commands",
80
+ "verification_auto_fix",
81
+ "verification_max_retries",
79
82
  ]);
80
83
 
81
84
  export interface GSDSkillRule {
@@ -173,6 +176,9 @@ export interface GSDPreferences {
173
176
  phases?: PhaseSkipPreferences;
174
177
  auto_visualize?: boolean;
175
178
  parallel?: import("./types.js").ParallelConfig;
179
+ verification_commands?: string[];
180
+ verification_auto_fix?: boolean;
181
+ verification_max_retries?: number;
176
182
  }
177
183
 
178
184
  export interface LoadedGSDPreferences {
@@ -773,6 +779,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
773
779
  parallel: (base.parallel || override.parallel)
774
780
  ? { ...(base.parallel ?? {}), ...(override.parallel ?? {}) } as import("./types.js").ParallelConfig
775
781
  : undefined,
782
+ verification_commands: mergeStringLists(base.verification_commands, override.verification_commands),
783
+ verification_auto_fix: override.verification_auto_fix ?? base.verification_auto_fix,
784
+ verification_max_retries: override.verification_max_retries ?? base.verification_max_retries,
776
785
  };
777
786
  }
778
787
 
@@ -1205,6 +1214,39 @@ export function validatePreferences(preferences: GSDPreferences): {
1205
1214
  }
1206
1215
  }
1207
1216
 
1217
+ // ─── Verification Preferences ───────────────────────────────────────────
1218
+ if (preferences.verification_commands !== undefined) {
1219
+ if (Array.isArray(preferences.verification_commands)) {
1220
+ const allStrings = preferences.verification_commands.every(
1221
+ (item: unknown) => typeof item === "string",
1222
+ );
1223
+ if (allStrings) {
1224
+ validated.verification_commands = preferences.verification_commands;
1225
+ } else {
1226
+ errors.push("verification_commands must be an array of strings");
1227
+ }
1228
+ } else {
1229
+ errors.push("verification_commands must be an array of strings");
1230
+ }
1231
+ }
1232
+
1233
+ if (preferences.verification_auto_fix !== undefined) {
1234
+ if (typeof preferences.verification_auto_fix === "boolean") {
1235
+ validated.verification_auto_fix = preferences.verification_auto_fix;
1236
+ } else {
1237
+ errors.push("verification_auto_fix must be a boolean");
1238
+ }
1239
+ }
1240
+
1241
+ if (preferences.verification_max_retries !== undefined) {
1242
+ const raw = preferences.verification_max_retries;
1243
+ if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
1244
+ validated.verification_max_retries = Math.floor(raw);
1245
+ } else {
1246
+ errors.push("verification_max_retries must be a non-negative number");
1247
+ }
1248
+ }
1249
+
1208
1250
  // ─── Git Preferences ───────────────────────────────────────────────────
1209
1251
  if (preferences.git && typeof preferences.git === "object") {
1210
1252
  const git: Record<string, unknown> = {};
@@ -38,15 +38,16 @@ Then:
38
38
  - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
39
39
  6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
40
40
  7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
41
- 8. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
41
+ 8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
42
+ 9. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
42
43
  - exercise the real flow in the browser
43
44
  - prefer `browser_batch` when the next few actions are obvious and sequential
44
45
  - prefer `browser_assert` for explicit pass/fail verification of the intended outcome
45
46
  - use `browser_diff` when an action's effect is ambiguous
46
47
  - use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI
47
48
  - record verification in terms of explicit checks passed/failed, not only prose interpretation
48
- 9. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
49
- 10. **If execution is running long or verification fails:**
49
+ 10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
50
+ 11. **If execution is running long or verification fails:**
50
51
 
51
52
  **Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step.
52
53
 
@@ -37,6 +37,15 @@ blocker_discovered: false
37
37
 
38
38
  {{whatWasVerifiedAndHow — commands run, tests passed, behavior confirmed}}
39
39
 
40
+ ## Verification Evidence
41
+
42
+ <!-- Populated from verification gate output. If the gate ran, fill in the table below.
43
+ If no gate ran (e.g., no verification commands discovered), note that. -->
44
+
45
+ | # | Command | Exit Code | Verdict | Duration |
46
+ |---|---------|-----------|---------|----------|
47
+ | {{row}} | {{command}} | {{exitCode}} | {{verdict}} | {{duration}} |
48
+
40
49
  ## Diagnostics
41
50
 
42
51
  {{howToInspectWhatThisTaskBuiltLater — status surfaces, logs, error shapes, failure artifacts, or none}}