gsd-pi 2.26.0 → 2.26.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/headless.d.ts +1 -0
- package/dist/headless.js +37 -1
- package/dist/loader.js +33 -4
- package/dist/resources/extensions/gsd/auto.ts +162 -1
- package/dist/resources/extensions/gsd/observability-validator.ts +21 -0
- package/dist/resources/extensions/gsd/preferences.ts +42 -0
- package/dist/resources/extensions/gsd/prompts/execute-task.md +4 -3
- package/dist/resources/extensions/gsd/templates/task-summary.md +9 -0
- package/dist/resources/extensions/gsd/tests/verification-evidence.test.ts +743 -0
- package/dist/resources/extensions/gsd/tests/verification-gate.test.ts +965 -0
- package/dist/resources/extensions/gsd/types.ts +38 -0
- package/dist/resources/extensions/gsd/verification-evidence.ts +183 -0
- package/dist/resources/extensions/gsd/verification-gate.ts +567 -0
- package/package.json +1 -1
- package/scripts/link-workspace-packages.cjs +22 -6
- package/src/resources/extensions/gsd/auto.ts +162 -1
- package/src/resources/extensions/gsd/observability-validator.ts +21 -0
- package/src/resources/extensions/gsd/preferences.ts +42 -0
- package/src/resources/extensions/gsd/prompts/execute-task.md +4 -3
- package/src/resources/extensions/gsd/templates/task-summary.md +9 -0
- package/src/resources/extensions/gsd/tests/verification-evidence.test.ts +743 -0
- package/src/resources/extensions/gsd/tests/verification-gate.test.ts +965 -0
- package/src/resources/extensions/gsd/types.ts +38 -0
- package/src/resources/extensions/gsd/verification-evidence.ts +183 -0
- package/src/resources/extensions/gsd/verification-gate.ts +567 -0
package/dist/headless.d.ts
CHANGED
|
@@ -20,6 +20,7 @@ export interface HeadlessOptions {
|
|
|
20
20
|
contextText?: string;
|
|
21
21
|
auto?: boolean;
|
|
22
22
|
verbose?: boolean;
|
|
23
|
+
maxRestarts?: number;
|
|
23
24
|
}
|
|
24
25
|
export declare function parseHeadlessArgs(argv: string[]): HeadlessOptions;
|
|
25
26
|
export declare function runHeadless(options: HeadlessOptions): Promise<void>;
|
package/dist/headless.js
CHANGED
|
@@ -58,6 +58,13 @@ export function parseHeadlessArgs(argv) {
|
|
|
58
58
|
else if (arg === '--verbose') {
|
|
59
59
|
options.verbose = true;
|
|
60
60
|
}
|
|
61
|
+
else if (arg === '--max-restarts' && i + 1 < args.length) {
|
|
62
|
+
options.maxRestarts = parseInt(args[++i], 10);
|
|
63
|
+
if (Number.isNaN(options.maxRestarts) || options.maxRestarts < 0) {
|
|
64
|
+
process.stderr.write('[headless] Error: --max-restarts must be a non-negative integer\n');
|
|
65
|
+
process.exit(1);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
61
68
|
}
|
|
62
69
|
else if (!positionalStarted) {
|
|
63
70
|
positionalStarted = true;
|
|
@@ -220,6 +227,31 @@ function bootstrapGsdProject(basePath) {
|
|
|
220
227
|
mkdirSync(join(gsdDir, 'runtime'), { recursive: true });
|
|
221
228
|
}
|
|
222
229
|
export async function runHeadless(options) {
|
|
230
|
+
const maxRestarts = options.maxRestarts ?? 3;
|
|
231
|
+
let restartCount = 0;
|
|
232
|
+
while (true) {
|
|
233
|
+
const result = await runHeadlessOnce(options, restartCount);
|
|
234
|
+
// Success or blocked — exit normally
|
|
235
|
+
if (result.exitCode === 0 || result.exitCode === 2) {
|
|
236
|
+
process.exit(result.exitCode);
|
|
237
|
+
}
|
|
238
|
+
// Crash/error — check if we should restart
|
|
239
|
+
if (restartCount >= maxRestarts) {
|
|
240
|
+
process.stderr.write(`[headless] Max restarts (${maxRestarts}) reached. Exiting.\n`);
|
|
241
|
+
process.exit(result.exitCode);
|
|
242
|
+
}
|
|
243
|
+
// Don't restart if SIGINT/SIGTERM was received
|
|
244
|
+
if (result.interrupted) {
|
|
245
|
+
process.exit(result.exitCode);
|
|
246
|
+
}
|
|
247
|
+
restartCount++;
|
|
248
|
+
const backoffMs = Math.min(5000 * restartCount, 30_000);
|
|
249
|
+
process.stderr.write(`[headless] Restarting in ${(backoffMs / 1000).toFixed(0)}s (attempt ${restartCount}/${maxRestarts})...\n`);
|
|
250
|
+
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
async function runHeadlessOnce(options, restartCount) {
|
|
254
|
+
let interrupted = false;
|
|
223
255
|
const startTime = Date.now();
|
|
224
256
|
const isNewMilestone = options.command === 'new-milestone';
|
|
225
257
|
// For new-milestone, load context and bootstrap .gsd/ before spawning RPC child
|
|
@@ -369,6 +401,7 @@ export async function runHeadless(options) {
|
|
|
369
401
|
// Signal handling
|
|
370
402
|
const signalHandler = () => {
|
|
371
403
|
process.stderr.write('\n[headless] Interrupted, stopping child process...\n');
|
|
404
|
+
interrupted = true;
|
|
372
405
|
exitCode = 1;
|
|
373
406
|
client.stop().finally(() => {
|
|
374
407
|
clearTimeout(timeoutTimer);
|
|
@@ -460,6 +493,9 @@ export async function runHeadless(options) {
|
|
|
460
493
|
process.stderr.write(`[headless] Status: ${status}\n`);
|
|
461
494
|
process.stderr.write(`[headless] Duration: ${duration}s\n`);
|
|
462
495
|
process.stderr.write(`[headless] Events: ${totalEvents} total, ${toolCallCount} tool calls\n`);
|
|
496
|
+
if (restartCount > 0) {
|
|
497
|
+
process.stderr.write(`[headless] Restarts: ${restartCount}\n`);
|
|
498
|
+
}
|
|
463
499
|
// On failure, print last 5 events for diagnostics
|
|
464
500
|
if (exitCode !== 0) {
|
|
465
501
|
const lastFive = recentEvents.slice(-5);
|
|
@@ -470,5 +506,5 @@ export async function runHeadless(options) {
|
|
|
470
506
|
}
|
|
471
507
|
}
|
|
472
508
|
}
|
|
473
|
-
|
|
509
|
+
return { exitCode, interrupted };
|
|
474
510
|
}
|
package/dist/loader.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
|
4
4
|
import { fileURLToPath } from 'url';
|
|
5
5
|
import { dirname, resolve, join, delimiter } from 'path';
|
|
6
|
-
import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync } from 'fs';
|
|
6
|
+
import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync, cpSync } from 'fs';
|
|
7
7
|
// Fast-path: handle --version/-v and --help/-h before importing any heavy
|
|
8
8
|
// dependencies. This avoids loading the entire pi-coding-agent barrel import
|
|
9
9
|
// (~1s) just to print a version string.
|
|
@@ -137,8 +137,12 @@ if (process.env.HTTP_PROXY || process.env.HTTPS_PROXY || process.env.http_proxy
|
|
|
137
137
|
const { EnvHttpProxyAgent, setGlobalDispatcher } = await import('undici');
|
|
138
138
|
setGlobalDispatcher(new EnvHttpProxyAgent());
|
|
139
139
|
}
|
|
140
|
-
// Ensure workspace packages are linked
|
|
140
|
+
// Ensure workspace packages are linked (or copied on Windows) before importing
|
|
141
|
+
// cli.js (which imports @gsd/*).
|
|
141
142
|
// npm postinstall handles this normally, but npx --ignore-scripts skips postinstall.
|
|
143
|
+
// On Windows without Developer Mode or admin rights, symlinkSync will throw even for
|
|
144
|
+
// 'junction' type — so we fall back to cpSync (a full directory copy) which works
|
|
145
|
+
// everywhere without elevated permissions.
|
|
142
146
|
const gsdScopeDir = join(gsdNodeModules, '@gsd');
|
|
143
147
|
const packagesDir = join(gsdRoot, 'packages');
|
|
144
148
|
const wsPackages = ['native', 'pi-agent-core', 'pi-ai', 'pi-coding-agent', 'pi-tui'];
|
|
@@ -148,14 +152,39 @@ try {
|
|
|
148
152
|
for (const pkg of wsPackages) {
|
|
149
153
|
const target = join(gsdScopeDir, pkg);
|
|
150
154
|
const source = join(packagesDir, pkg);
|
|
151
|
-
if (existsSync(source)
|
|
155
|
+
if (!existsSync(source) || existsSync(target))
|
|
156
|
+
continue;
|
|
157
|
+
try {
|
|
158
|
+
symlinkSync(source, target, 'junction');
|
|
159
|
+
}
|
|
160
|
+
catch {
|
|
161
|
+
// Symlink failed (common on Windows without Developer Mode / admin).
|
|
162
|
+
// Fall back to a directory copy — slower on first run but universally works.
|
|
152
163
|
try {
|
|
153
|
-
|
|
164
|
+
cpSync(source, target, { recursive: true });
|
|
154
165
|
}
|
|
155
166
|
catch { /* non-fatal */ }
|
|
156
167
|
}
|
|
157
168
|
}
|
|
158
169
|
}
|
|
159
170
|
catch { /* non-fatal */ }
|
|
171
|
+
// Validate critical workspace packages are resolvable. If still missing after the
|
|
172
|
+
// symlink+copy attempts, emit a clear diagnostic instead of a cryptic
|
|
173
|
+
// ERR_MODULE_NOT_FOUND from deep inside cli.js.
|
|
174
|
+
const criticalPackages = ['pi-coding-agent'];
|
|
175
|
+
const missingPackages = criticalPackages.filter(pkg => !existsSync(join(gsdScopeDir, pkg)));
|
|
176
|
+
if (missingPackages.length > 0) {
|
|
177
|
+
const missing = missingPackages.map(p => `@gsd/${p}`).join(', ');
|
|
178
|
+
process.stderr.write(`\nError: GSD installation is broken — missing packages: ${missing}\n\n` +
|
|
179
|
+
`This is usually caused by one of:\n` +
|
|
180
|
+
` • An outdated version installed from npm (run: npm install -g gsd-pi@latest)\n` +
|
|
181
|
+
` • The packages/ directory was excluded from the installed tarball\n` +
|
|
182
|
+
` • A filesystem error prevented linking or copying the workspace packages\n\n` +
|
|
183
|
+
`Fix it by reinstalling:\n\n` +
|
|
184
|
+
` npm install -g gsd-pi@latest\n\n` +
|
|
185
|
+
`If the issue persists, please open an issue at:\n` +
|
|
186
|
+
` https://github.com/gsd-build/gsd-2/issues\n`);
|
|
187
|
+
process.exit(1);
|
|
188
|
+
}
|
|
160
189
|
// Dynamic import defers ESM evaluation — config.js will see PI_PACKAGE_DIR above
|
|
161
190
|
await import('./cli.js');
|
|
@@ -18,8 +18,10 @@ import type {
|
|
|
18
18
|
|
|
19
19
|
import { deriveState } from "./state.js";
|
|
20
20
|
import type { BudgetEnforcementMode, GSDState } from "./types.js";
|
|
21
|
-
import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parseSummary } from "./files.js";
|
|
21
|
+
import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parsePlan, parseSummary } from "./files.js";
|
|
22
22
|
import { loadPrompt } from "./prompt-loader.js";
|
|
23
|
+
import { runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "./verification-gate.js";
|
|
24
|
+
import { writeVerificationJSON } from "./verification-evidence.js";
|
|
23
25
|
export { inlinePriorMilestoneSummary } from "./files.js";
|
|
24
26
|
import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
|
|
25
27
|
import {
|
|
@@ -370,6 +372,11 @@ function escapeStaleWorktree(base: string): string {
|
|
|
370
372
|
/** Crash recovery prompt — set by startAuto, consumed by first dispatchNextUnit */
|
|
371
373
|
let pendingCrashRecovery: string | null = null;
|
|
372
374
|
|
|
375
|
+
/** Pending verification retry — set when gate fails with retries remaining, consumed by dispatchNextUnit */
|
|
376
|
+
let pendingVerificationRetry: { unitId: string; failureContext: string; attempt: number } | null = null;
|
|
377
|
+
/** Verification retry count per unitId — separate from unitDispatchCount which tracks artifact-missing retries */
|
|
378
|
+
const verificationRetryCount = new Map<string, number>();
|
|
379
|
+
|
|
373
380
|
/** Session file path captured at pause — used to synthesize recovery briefing on resume */
|
|
374
381
|
let pausedSessionFile: string | null = null;
|
|
375
382
|
|
|
@@ -730,6 +737,8 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI, reason
|
|
|
730
737
|
clearActivityLogState();
|
|
731
738
|
resetProactiveHealing();
|
|
732
739
|
pendingCrashRecovery = null;
|
|
740
|
+
pendingVerificationRetry = null;
|
|
741
|
+
verificationRetryCount.clear();
|
|
733
742
|
pausedSessionFile = null;
|
|
734
743
|
_handlingAgentEnd = false;
|
|
735
744
|
ctx?.ui.setStatus("gsd-auto", undefined);
|
|
@@ -767,6 +776,8 @@ export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Pro
|
|
|
767
776
|
|
|
768
777
|
active = false;
|
|
769
778
|
paused = true;
|
|
779
|
+
pendingVerificationRetry = null;
|
|
780
|
+
verificationRetryCount.clear();
|
|
770
781
|
// Preserve: unitDispatchCount, currentUnit, basePath, verbose, cmdCtx,
|
|
771
782
|
// completedUnits, autoStartTime, currentMilestoneId, originalModelId
|
|
772
783
|
// — all needed for resume and dashboard display
|
|
@@ -1574,6 +1585,145 @@ export async function handleAgentEnd(
|
|
|
1574
1585
|
}
|
|
1575
1586
|
}
|
|
1576
1587
|
|
|
1588
|
+
// ── Verification gate: run typecheck/lint/test after execute-task ──
|
|
1589
|
+
if (currentUnit && currentUnit.type === "execute-task") {
|
|
1590
|
+
try {
|
|
1591
|
+
const effectivePrefs = loadEffectiveGSDPreferences();
|
|
1592
|
+
const prefs = effectivePrefs?.preferences;
|
|
1593
|
+
|
|
1594
|
+
// Read task plan verify field from the current task's slice plan
|
|
1595
|
+
// unitId format is "M001/S01/T03" — extract mid, sid, tid
|
|
1596
|
+
const parts = currentUnit.id.split("/");
|
|
1597
|
+
let taskPlanVerify: string | undefined;
|
|
1598
|
+
if (parts.length >= 3) {
|
|
1599
|
+
const [mid, sid, tid] = parts;
|
|
1600
|
+
const planFile = resolveSliceFile(basePath, mid, sid, "PLAN");
|
|
1601
|
+
if (planFile) {
|
|
1602
|
+
const planContent = await loadFile(planFile);
|
|
1603
|
+
if (planContent) {
|
|
1604
|
+
const slicePlan = parsePlan(planContent);
|
|
1605
|
+
const taskEntry = slicePlan?.tasks?.find(t => t.id === tid);
|
|
1606
|
+
taskPlanVerify = taskEntry?.verify;
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
const result = runVerificationGate({
|
|
1612
|
+
basePath,
|
|
1613
|
+
unitId: currentUnit.id,
|
|
1614
|
+
cwd: basePath,
|
|
1615
|
+
preferenceCommands: prefs?.verification_commands,
|
|
1616
|
+
taskPlanVerify,
|
|
1617
|
+
});
|
|
1618
|
+
|
|
1619
|
+
// Capture runtime errors from bg-shell and browser console
|
|
1620
|
+
const runtimeErrors = await captureRuntimeErrors();
|
|
1621
|
+
if (runtimeErrors.length > 0) {
|
|
1622
|
+
result.runtimeErrors = runtimeErrors;
|
|
1623
|
+
// Blocking runtime errors override gate pass
|
|
1624
|
+
if (runtimeErrors.some(e => e.blocking)) {
|
|
1625
|
+
result.passed = false;
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1629
|
+
// Conditional dependency audit (R008)
|
|
1630
|
+
const auditWarnings = runDependencyAudit(basePath);
|
|
1631
|
+
if (auditWarnings.length > 0) {
|
|
1632
|
+
result.auditWarnings = auditWarnings;
|
|
1633
|
+
process.stderr.write(`verification-gate: ${auditWarnings.length} audit warning(s)\n`);
|
|
1634
|
+
for (const w of auditWarnings) {
|
|
1635
|
+
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
// Auto-fix retry preferences (R005 / D005)
|
|
1640
|
+
const autoFixEnabled = prefs?.verification_auto_fix !== false; // default true
|
|
1641
|
+
const maxRetries = typeof prefs?.verification_max_retries === "number" ? prefs.verification_max_retries : 2;
|
|
1642
|
+
const completionKey = `${currentUnit.type}/${currentUnit.id}`;
|
|
1643
|
+
|
|
1644
|
+
if (result.checks.length > 0) {
|
|
1645
|
+
const passCount = result.checks.filter(c => c.exitCode === 0).length;
|
|
1646
|
+
const total = result.checks.length;
|
|
1647
|
+
if (result.passed) {
|
|
1648
|
+
ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`);
|
|
1649
|
+
} else {
|
|
1650
|
+
const failures = result.checks.filter(c => c.exitCode !== 0);
|
|
1651
|
+
const failNames = failures.map(f => f.command).join(", ");
|
|
1652
|
+
ctx.ui.notify(`Verification gate: FAILED — ${failNames}`);
|
|
1653
|
+
process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`);
|
|
1654
|
+
for (const f of failures) {
|
|
1655
|
+
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
|
|
1656
|
+
if (f.stderr) process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
|
|
1657
|
+
}
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
// Log blocking runtime errors to stderr
|
|
1662
|
+
if (result.runtimeErrors?.some(e => e.blocking)) {
|
|
1663
|
+
const blockingErrors = result.runtimeErrors.filter(e => e.blocking);
|
|
1664
|
+
process.stderr.write(`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`);
|
|
1665
|
+
for (const err of blockingErrors) {
|
|
1666
|
+
process.stderr.write(` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`);
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
|
|
1670
|
+
// Write verification evidence JSON artifact
|
|
1671
|
+
const attempt = verificationRetryCount.get(currentUnit.id) ?? 0;
|
|
1672
|
+
if (parts.length >= 3) {
|
|
1673
|
+
try {
|
|
1674
|
+
const [mid, sid, tid] = parts;
|
|
1675
|
+
const sDir = resolveSlicePath(basePath, mid, sid);
|
|
1676
|
+
if (sDir) {
|
|
1677
|
+
const tasksDir = join(sDir, "tasks");
|
|
1678
|
+
if (result.passed) {
|
|
1679
|
+
writeVerificationJSON(result, tasksDir, tid, currentUnit.id);
|
|
1680
|
+
} else {
|
|
1681
|
+
const nextAttempt = attempt + 1;
|
|
1682
|
+
writeVerificationJSON(result, tasksDir, tid, currentUnit.id, nextAttempt, maxRetries);
|
|
1683
|
+
}
|
|
1684
|
+
}
|
|
1685
|
+
} catch (evidenceErr) {
|
|
1686
|
+
process.stderr.write(`verification-evidence: write error — ${(evidenceErr as Error).message}\n`);
|
|
1687
|
+
}
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
// ── Auto-fix retry logic ──
|
|
1691
|
+
if (result.passed) {
|
|
1692
|
+
// Gate passed — clear retry state and continue normal flow
|
|
1693
|
+
verificationRetryCount.delete(currentUnit.id);
|
|
1694
|
+
pendingVerificationRetry = null;
|
|
1695
|
+
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
|
|
1696
|
+
// Gate failed, retries remaining — set up retry and return early
|
|
1697
|
+
const nextAttempt = attempt + 1;
|
|
1698
|
+
verificationRetryCount.set(currentUnit.id, nextAttempt);
|
|
1699
|
+
pendingVerificationRetry = {
|
|
1700
|
+
unitId: currentUnit.id,
|
|
1701
|
+
failureContext: formatFailureContext(result),
|
|
1702
|
+
attempt: nextAttempt,
|
|
1703
|
+
};
|
|
1704
|
+
ctx.ui.notify(`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning");
|
|
1705
|
+
// Remove completion key so dispatchNextUnit re-dispatches this unit
|
|
1706
|
+
completedKeySet.delete(completionKey);
|
|
1707
|
+
removePersistedKey(basePath, completionKey);
|
|
1708
|
+
return; // ← Critical: exit before DB dual-write and post-unit hooks
|
|
1709
|
+
} else {
|
|
1710
|
+
// Gate failed, retries exhausted (or auto-fix disabled) — pause for human review
|
|
1711
|
+
const exhaustedAttempt = attempt + 1;
|
|
1712
|
+
verificationRetryCount.delete(currentUnit.id);
|
|
1713
|
+
pendingVerificationRetry = null;
|
|
1714
|
+
ctx.ui.notify(
|
|
1715
|
+
`Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
|
|
1716
|
+
"error",
|
|
1717
|
+
);
|
|
1718
|
+
await pauseAuto(ctx, pi);
|
|
1719
|
+
return;
|
|
1720
|
+
}
|
|
1721
|
+
} catch (err) {
|
|
1722
|
+
// Gate errors are non-fatal — log and continue
|
|
1723
|
+
process.stderr.write(`verification-gate: error — ${(err as Error).message}\n`);
|
|
1724
|
+
}
|
|
1725
|
+
}
|
|
1726
|
+
|
|
1577
1727
|
// ── DB dual-write: re-import changed markdown files so next unit's prompts use fresh data ──
|
|
1578
1728
|
if (isDbAvailable()) {
|
|
1579
1729
|
try {
|
|
@@ -2975,6 +3125,17 @@ async function dispatchNextUnit(
|
|
|
2975
3125
|
// Cap injected content to prevent unbounded prompt growth → OOM
|
|
2976
3126
|
const MAX_RECOVERY_CHARS = 50_000;
|
|
2977
3127
|
let finalPrompt = prompt;
|
|
3128
|
+
|
|
3129
|
+
// Verification retry — inject failure context so the agent can auto-fix
|
|
3130
|
+
if (pendingVerificationRetry) {
|
|
3131
|
+
const retryCtx = pendingVerificationRetry;
|
|
3132
|
+
pendingVerificationRetry = null;
|
|
3133
|
+
const capped = retryCtx.failureContext.length > MAX_RECOVERY_CHARS
|
|
3134
|
+
? retryCtx.failureContext.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...failure context truncated]"
|
|
3135
|
+
: retryCtx.failureContext;
|
|
3136
|
+
finalPrompt = `**VERIFICATION FAILED — AUTO-FIX ATTEMPT ${retryCtx.attempt}**\n\nThe verification gate ran after your previous attempt and found failures. Fix these issues before completing the task.\n\n${capped}\n\n---\n\n${finalPrompt}`;
|
|
3137
|
+
}
|
|
3138
|
+
|
|
2978
3139
|
if (pendingCrashRecovery) {
|
|
2979
3140
|
const capped = pendingCrashRecovery.length > MAX_RECOVERY_CHARS
|
|
2980
3141
|
? pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...recovery briefing truncated to prevent memory exhaustion]"
|
|
@@ -298,6 +298,27 @@ export function validateTaskSummaryContent(file: string, content: string): Valid
|
|
|
298
298
|
});
|
|
299
299
|
}
|
|
300
300
|
|
|
301
|
+
const evidence = getSection(content, "Verification Evidence", 2);
|
|
302
|
+
if (!evidence) {
|
|
303
|
+
issues.push({
|
|
304
|
+
severity: "warning",
|
|
305
|
+
scope: "task-summary",
|
|
306
|
+
file,
|
|
307
|
+
ruleId: "evidence_block_missing",
|
|
308
|
+
message: "Task summary is missing `## Verification Evidence`.",
|
|
309
|
+
suggestion: "Add a verification evidence table showing gate check results (command, exit code, verdict, duration).",
|
|
310
|
+
});
|
|
311
|
+
} else if (sectionLooksPlaceholderOnly(evidence)) {
|
|
312
|
+
issues.push({
|
|
313
|
+
severity: "warning",
|
|
314
|
+
scope: "task-summary",
|
|
315
|
+
file,
|
|
316
|
+
ruleId: "evidence_block_placeholder",
|
|
317
|
+
message: "Task summary verification evidence section still looks like placeholder text.",
|
|
318
|
+
suggestion: "Replace placeholders with actual gate results or note that no verification commands were discovered.",
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
|
|
301
322
|
return issues;
|
|
302
323
|
}
|
|
303
324
|
|
|
@@ -76,6 +76,9 @@ const KNOWN_PREFERENCE_KEYS = new Set<string>([
|
|
|
76
76
|
"phases",
|
|
77
77
|
"auto_visualize",
|
|
78
78
|
"parallel",
|
|
79
|
+
"verification_commands",
|
|
80
|
+
"verification_auto_fix",
|
|
81
|
+
"verification_max_retries",
|
|
79
82
|
]);
|
|
80
83
|
|
|
81
84
|
export interface GSDSkillRule {
|
|
@@ -173,6 +176,9 @@ export interface GSDPreferences {
|
|
|
173
176
|
phases?: PhaseSkipPreferences;
|
|
174
177
|
auto_visualize?: boolean;
|
|
175
178
|
parallel?: import("./types.js").ParallelConfig;
|
|
179
|
+
verification_commands?: string[];
|
|
180
|
+
verification_auto_fix?: boolean;
|
|
181
|
+
verification_max_retries?: number;
|
|
176
182
|
}
|
|
177
183
|
|
|
178
184
|
export interface LoadedGSDPreferences {
|
|
@@ -773,6 +779,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
|
|
|
773
779
|
parallel: (base.parallel || override.parallel)
|
|
774
780
|
? { ...(base.parallel ?? {}), ...(override.parallel ?? {}) } as import("./types.js").ParallelConfig
|
|
775
781
|
: undefined,
|
|
782
|
+
verification_commands: mergeStringLists(base.verification_commands, override.verification_commands),
|
|
783
|
+
verification_auto_fix: override.verification_auto_fix ?? base.verification_auto_fix,
|
|
784
|
+
verification_max_retries: override.verification_max_retries ?? base.verification_max_retries,
|
|
776
785
|
};
|
|
777
786
|
}
|
|
778
787
|
|
|
@@ -1205,6 +1214,39 @@ export function validatePreferences(preferences: GSDPreferences): {
|
|
|
1205
1214
|
}
|
|
1206
1215
|
}
|
|
1207
1216
|
|
|
1217
|
+
// ─── Verification Preferences ───────────────────────────────────────────
|
|
1218
|
+
if (preferences.verification_commands !== undefined) {
|
|
1219
|
+
if (Array.isArray(preferences.verification_commands)) {
|
|
1220
|
+
const allStrings = preferences.verification_commands.every(
|
|
1221
|
+
(item: unknown) => typeof item === "string",
|
|
1222
|
+
);
|
|
1223
|
+
if (allStrings) {
|
|
1224
|
+
validated.verification_commands = preferences.verification_commands;
|
|
1225
|
+
} else {
|
|
1226
|
+
errors.push("verification_commands must be an array of strings");
|
|
1227
|
+
}
|
|
1228
|
+
} else {
|
|
1229
|
+
errors.push("verification_commands must be an array of strings");
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
if (preferences.verification_auto_fix !== undefined) {
|
|
1234
|
+
if (typeof preferences.verification_auto_fix === "boolean") {
|
|
1235
|
+
validated.verification_auto_fix = preferences.verification_auto_fix;
|
|
1236
|
+
} else {
|
|
1237
|
+
errors.push("verification_auto_fix must be a boolean");
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
if (preferences.verification_max_retries !== undefined) {
|
|
1242
|
+
const raw = preferences.verification_max_retries;
|
|
1243
|
+
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
|
|
1244
|
+
validated.verification_max_retries = Math.floor(raw);
|
|
1245
|
+
} else {
|
|
1246
|
+
errors.push("verification_max_retries must be a non-negative number");
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
|
|
1208
1250
|
// ─── Git Preferences ───────────────────────────────────────────────────
|
|
1209
1251
|
if (preferences.git && typeof preferences.git === "object") {
|
|
1210
1252
|
const git: Record<string, unknown> = {};
|
|
@@ -38,15 +38,16 @@ Then:
|
|
|
38
38
|
- Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
|
|
39
39
|
6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
|
|
40
40
|
7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
|
|
41
|
-
8.
|
|
41
|
+
8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
|
|
42
|
+
9. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
|
|
42
43
|
- exercise the real flow in the browser
|
|
43
44
|
- prefer `browser_batch` when the next few actions are obvious and sequential
|
|
44
45
|
- prefer `browser_assert` for explicit pass/fail verification of the intended outcome
|
|
45
46
|
- use `browser_diff` when an action's effect is ambiguous
|
|
46
47
|
- use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI
|
|
47
48
|
- record verification in terms of explicit checks passed/failed, not only prose interpretation
|
|
48
|
-
|
|
49
|
-
|
|
49
|
+
10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
|
|
50
|
+
11. **If execution is running long or verification fails:**
|
|
50
51
|
|
|
51
52
|
**Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step.
|
|
52
53
|
|
|
@@ -37,6 +37,15 @@ blocker_discovered: false
|
|
|
37
37
|
|
|
38
38
|
{{whatWasVerifiedAndHow — commands run, tests passed, behavior confirmed}}
|
|
39
39
|
|
|
40
|
+
## Verification Evidence
|
|
41
|
+
|
|
42
|
+
<!-- Populated from verification gate output. If the gate ran, fill in the table below.
|
|
43
|
+
If no gate ran (e.g., no verification commands discovered), note that. -->
|
|
44
|
+
|
|
45
|
+
| # | Command | Exit Code | Verdict | Duration |
|
|
46
|
+
|---|---------|-----------|---------|----------|
|
|
47
|
+
| {{row}} | {{command}} | {{exitCode}} | {{verdict}} | {{duration}} |
|
|
48
|
+
|
|
40
49
|
## Diagnostics
|
|
41
50
|
|
|
42
51
|
{{howToInspectWhatThisTaskBuiltLater — status surfaces, logs, error shapes, failure artifacts, or none}}
|