gsd-pi 2.63.0-dev.351157b → 2.63.0-dev.d04bbc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +4 -0
- package/dist/headless-query.js +11 -1
- package/dist/resources/extensions/gsd/auto/detect-stuck.js +27 -0
- package/dist/resources/extensions/gsd/auto/phases.js +34 -0
- package/dist/resources/extensions/gsd/auto/session.js +4 -0
- package/dist/resources/extensions/gsd/auto-model-selection.js +32 -0
- package/dist/resources/extensions/gsd/auto-post-unit.js +79 -0
- package/dist/resources/extensions/gsd/auto-timers.js +2 -1
- package/dist/resources/extensions/gsd/bootstrap/db-tools.js +87 -28
- package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +23 -0
- package/dist/resources/extensions/gsd/bootstrap/system-context.js +30 -2
- package/dist/resources/extensions/gsd/preferences-types.js +1 -0
- package/dist/resources/extensions/gsd/prompt-loader.js +7 -0
- package/dist/resources/extensions/gsd/prompts/system.md +3 -7
- package/dist/resources/extensions/gsd/safety/content-validator.js +73 -0
- package/dist/resources/extensions/gsd/safety/destructive-guard.js +34 -0
- package/dist/resources/extensions/gsd/safety/evidence-collector.js +109 -0
- package/dist/resources/extensions/gsd/safety/evidence-cross-ref.js +83 -0
- package/dist/resources/extensions/gsd/safety/file-change-validator.js +71 -0
- package/dist/resources/extensions/gsd/safety/git-checkpoint.js +91 -0
- package/dist/resources/extensions/gsd/safety/safety-harness.js +64 -0
- package/dist/resources/extensions/ollama/index.js +22 -10
- package/dist/resources/extensions/ollama/ollama-chat-provider.js +1 -1
- package/dist/update-cmd.js +4 -2
- package/dist/web/standalone/.next/BUILD_ID +1 -1
- package/dist/web/standalone/.next/app-path-routes-manifest.json +18 -18
- package/dist/web/standalone/.next/build-manifest.json +2 -2
- package/dist/web/standalone/.next/prerender-manifest.json +3 -3
- package/dist/web/standalone/.next/server/app/_global-error.html +2 -2
- package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.html +1 -1
- package/dist/web/standalone/.next/server/app/index.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app-paths-manifest.json +18 -18
- package/dist/web/standalone/.next/server/pages/404.html +1 -1
- package/dist/web/standalone/.next/server/pages/500.html +2 -2
- package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
- package/dist/welcome-screen.js +1 -1
- package/package.json +1 -1
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.d.ts +2 -0
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.d.ts.map +1 -0
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.js +46 -0
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.js.map +1 -0
- package/packages/pi-coding-agent/dist/core/model-registry.d.ts.map +1 -1
- package/packages/pi-coding-agent/dist/core/model-registry.js +11 -0
- package/packages/pi-coding-agent/dist/core/model-registry.js.map +1 -1
- package/packages/pi-coding-agent/dist/core/sdk.d.ts.map +1 -1
- package/packages/pi-coding-agent/dist/core/sdk.js +2 -3
- package/packages/pi-coding-agent/dist/core/sdk.js.map +1 -1
- package/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts +81 -0
- package/packages/pi-coding-agent/src/core/model-registry.ts +12 -0
- package/packages/pi-coding-agent/src/core/sdk.ts +2 -3
- package/src/resources/extensions/gsd/auto/detect-stuck.ts +27 -0
- package/src/resources/extensions/gsd/auto/phases.ts +39 -0
- package/src/resources/extensions/gsd/auto/session.ts +5 -0
- package/src/resources/extensions/gsd/auto-model-selection.ts +36 -0
- package/src/resources/extensions/gsd/auto-post-unit.ts +88 -0
- package/src/resources/extensions/gsd/auto-timers.ts +2 -1
- package/src/resources/extensions/gsd/bootstrap/db-tools.ts +86 -28
- package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +27 -0
- package/src/resources/extensions/gsd/bootstrap/system-context.ts +31 -2
- package/src/resources/extensions/gsd/preferences-types.ts +13 -0
- package/src/resources/extensions/gsd/prompt-loader.ts +8 -0
- package/src/resources/extensions/gsd/prompts/system.md +3 -7
- package/src/resources/extensions/gsd/safety/content-validator.ts +98 -0
- package/src/resources/extensions/gsd/safety/destructive-guard.ts +49 -0
- package/src/resources/extensions/gsd/safety/evidence-collector.ts +151 -0
- package/src/resources/extensions/gsd/safety/evidence-cross-ref.ts +120 -0
- package/src/resources/extensions/gsd/safety/file-change-validator.ts +108 -0
- package/src/resources/extensions/gsd/safety/git-checkpoint.ts +106 -0
- package/src/resources/extensions/gsd/safety/safety-harness.ts +105 -0
- package/src/resources/extensions/gsd/tests/complete-slice-string-coercion.test.ts +211 -0
- package/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts +50 -0
- package/src/resources/extensions/gsd/tests/git-checkpoint.test.ts +94 -0
- package/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts +42 -0
- package/src/resources/extensions/gsd/workflow-logger.ts +2 -1
- package/src/resources/extensions/ollama/index.ts +20 -11
- package/src/resources/extensions/ollama/ollama-auth-mode.test.ts +20 -0
- package/src/resources/extensions/ollama/ollama-chat-provider.ts +1 -1
- package/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts +82 -0
- /package/dist/web/standalone/.next/static/{QmuF-eAbuU_2MQ03t38qr → vIq9fmvRUaFOpguoX5j4W}/_buildManifest.js +0 -0
- /package/dist/web/standalone/.next/static/{QmuF-eAbuU_2MQ03t38qr → vIq9fmvRUaFOpguoX5j4W}/_ssgManifest.js +0 -0
|
@@ -6,6 +6,13 @@
|
|
|
6
6
|
|
|
7
7
|
import type { WindowEntry } from "./types.js";
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Pattern matching ENOENT errors with a file path.
|
|
11
|
+
* Matches: "ENOENT: no such file or directory, access '/path/to/file'"
|
|
12
|
+
* and similar Node.js filesystem error messages.
|
|
13
|
+
*/
|
|
14
|
+
const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/;
|
|
15
|
+
|
|
9
16
|
/**
|
|
10
17
|
* Analyze a sliding window of recent unit dispatches for stuck patterns.
|
|
11
18
|
* Returns a signal with reason if stuck, null otherwise.
|
|
@@ -13,6 +20,8 @@ import type { WindowEntry } from "./types.js";
|
|
|
13
20
|
* Rule 1: Same error string twice in a row → stuck immediately.
|
|
14
21
|
* Rule 2: Same unit key 3+ consecutive times → stuck (preserves prior behavior).
|
|
15
22
|
* Rule 3: Oscillation A→B→A→B in last 4 entries → stuck.
|
|
23
|
+
* Rule 4: Same ENOENT path in any 2 entries within the window → stuck (#3575).
|
|
24
|
+
* Missing files don't self-heal between retries — retrying wastes budget.
|
|
16
25
|
*/
|
|
17
26
|
export function detectStuck(
|
|
18
27
|
window: readonly WindowEntry[],
|
|
@@ -56,5 +65,23 @@ export function detectStuck(
|
|
|
56
65
|
}
|
|
57
66
|
}
|
|
58
67
|
|
|
68
|
+
// Rule 4: Same ENOENT path seen twice in window (#3575)
|
|
69
|
+
// Missing files don't appear between retries — stop immediately.
|
|
70
|
+
const enoentPaths = new Map<string, number>();
|
|
71
|
+
for (const entry of window) {
|
|
72
|
+
if (!entry.error) continue;
|
|
73
|
+
const match = ENOENT_PATH_RE.exec(entry.error);
|
|
74
|
+
if (!match) continue;
|
|
75
|
+
const filePath = match[1];
|
|
76
|
+
const count = (enoentPaths.get(filePath) ?? 0) + 1;
|
|
77
|
+
if (count >= 2) {
|
|
78
|
+
return {
|
|
79
|
+
stuck: true,
|
|
80
|
+
reason: `Missing file referenced twice: ${filePath} (ENOENT)`,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
enoentPaths.set(filePath, count);
|
|
84
|
+
}
|
|
85
|
+
|
|
59
86
|
return null;
|
|
60
87
|
}
|
|
@@ -37,6 +37,9 @@ import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
|
|
|
37
37
|
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
|
|
38
38
|
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
|
|
39
39
|
import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
|
|
40
|
+
import { resetEvidence } from "../safety/evidence-collector.js";
|
|
41
|
+
import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
|
|
42
|
+
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
|
|
40
43
|
|
|
41
44
|
// ─── generateMilestoneReport ──────────────────────────────────────────────────
|
|
42
45
|
|
|
@@ -1079,6 +1082,21 @@ export async function runUnitPhase(
|
|
|
1079
1082
|
if (mid)
|
|
1080
1083
|
deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
|
|
1081
1084
|
|
|
1085
|
+
// ── Safety harness: reset evidence + create checkpoint ──
|
|
1086
|
+
const safetyConfig = resolveSafetyHarnessConfig(
|
|
1087
|
+
prefs?.safety_harness as Record<string, unknown> | undefined,
|
|
1088
|
+
);
|
|
1089
|
+
if (safetyConfig.enabled && safetyConfig.evidence_collection) {
|
|
1090
|
+
resetEvidence();
|
|
1091
|
+
}
|
|
1092
|
+
// Only checkpoint code-executing units (not lifecycle/planning units)
|
|
1093
|
+
if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
|
|
1094
|
+
s.checkpointSha = createCheckpoint(s.basePath, unitId);
|
|
1095
|
+
if (s.checkpointSha) {
|
|
1096
|
+
debugLog("runUnitPhase", { phase: "checkpoint-created", unitId, sha: s.checkpointSha.slice(0, 8) });
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1082
1100
|
// Prompt injection
|
|
1083
1101
|
let finalPrompt = prompt;
|
|
1084
1102
|
|
|
@@ -1376,6 +1394,27 @@ export async function runUnitPhase(
|
|
|
1376
1394
|
|
|
1377
1395
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
|
|
1378
1396
|
|
|
1397
|
+
// ── Safety harness: checkpoint cleanup or rollback ──
|
|
1398
|
+
if (s.checkpointSha) {
|
|
1399
|
+
if (unitResult.status === "error" && safetyConfig.auto_rollback) {
|
|
1400
|
+
const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
|
|
1401
|
+
if (rolled) {
|
|
1402
|
+
ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
|
|
1403
|
+
debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
|
|
1404
|
+
}
|
|
1405
|
+
} else if (unitResult.status === "error") {
|
|
1406
|
+
ctx.ui.notify(
|
|
1407
|
+
`Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
|
|
1408
|
+
"warning",
|
|
1409
|
+
);
|
|
1410
|
+
} else {
|
|
1411
|
+
// Success — clean up checkpoint ref
|
|
1412
|
+
cleanupCheckpoint(s.basePath, unitId);
|
|
1413
|
+
debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
|
|
1414
|
+
}
|
|
1415
|
+
s.checkpointSha = null;
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1379
1418
|
return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
|
|
1380
1419
|
}
|
|
1381
1420
|
|
|
@@ -145,6 +145,10 @@ export class AutoSession {
|
|
|
145
145
|
lastBaselineCharCount: number | undefined;
|
|
146
146
|
pendingQuickTasks: CaptureEntry[] = [];
|
|
147
147
|
|
|
148
|
+
// ── Safety harness ───────────────────────────────────────────────────────
|
|
149
|
+
/** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
|
|
150
|
+
checkpointSha: string | null = null;
|
|
151
|
+
|
|
148
152
|
// ── Signal handler ───────────────────────────────────────────────────────
|
|
149
153
|
sigtermHandler: (() => void) | null = null;
|
|
150
154
|
|
|
@@ -223,6 +227,7 @@ export class AutoSession {
|
|
|
223
227
|
this.lastToolInvocationError = null;
|
|
224
228
|
this.isolationDegraded = false;
|
|
225
229
|
this.milestoneMergedInPhases = false;
|
|
230
|
+
this.checkpointSha = null;
|
|
226
231
|
|
|
227
232
|
// Signal handler
|
|
228
233
|
this.sigtermHandler = null;
|
|
@@ -31,6 +31,9 @@ export function resolvePreferredModelConfig(
|
|
|
31
31
|
const routingConfig = resolveDynamicRoutingConfig();
|
|
32
32
|
if (!routingConfig.enabled || !routingConfig.tier_models) return undefined;
|
|
33
33
|
|
|
34
|
+
// Don't synthesize a routing config for flat-rate providers (#3453).
|
|
35
|
+
if (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider)) return undefined;
|
|
36
|
+
|
|
34
37
|
const ceilingModel = routingConfig.tier_models.heavy
|
|
35
38
|
?? (autoModeStartModel ? `${autoModeStartModel.provider}/${autoModeStartModel.id}` : undefined);
|
|
36
39
|
if (!ceilingModel) return undefined;
|
|
@@ -71,6 +74,27 @@ export async function selectAndApplyModel(
|
|
|
71
74
|
let effectiveModelConfig = modelConfig;
|
|
72
75
|
let routingTierLabel = "";
|
|
73
76
|
|
|
77
|
+
// Disable routing for flat-rate providers like GitHub Copilot (#3453).
|
|
78
|
+
// All models cost the same per request, so downgrading to a cheaper
|
|
79
|
+
// model provides no cost benefit — it only degrades quality.
|
|
80
|
+
// Fail-closed: if primary model can't be resolved, fall back to
|
|
81
|
+
// provider-level signals rather than allowing unwanted downgrades.
|
|
82
|
+
if (routingConfig.enabled) {
|
|
83
|
+
const primaryModel = resolveModelId(modelConfig.primary, availableModels, ctx.model?.provider);
|
|
84
|
+
if (primaryModel) {
|
|
85
|
+
if (isFlatRateProvider(primaryModel.provider)) {
|
|
86
|
+
routingConfig.enabled = false;
|
|
87
|
+
}
|
|
88
|
+
} else if (
|
|
89
|
+
(autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider))
|
|
90
|
+
|| (ctx.model?.provider && isFlatRateProvider(ctx.model.provider))
|
|
91
|
+
) {
|
|
92
|
+
// Primary model unresolvable but provider signals indicate flat-rate —
|
|
93
|
+
// disable routing to prevent quality degradation.
|
|
94
|
+
routingConfig.enabled = false;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
74
98
|
if (routingConfig.enabled) {
|
|
75
99
|
let budgetPct: number | undefined;
|
|
76
100
|
if (routingConfig.budget_pressure !== false) {
|
|
@@ -320,3 +344,15 @@ export function resolveModelId<T extends { id: string; provider: string }>(
|
|
|
320
344
|
// Fall back to first non-extension candidate, or any candidate
|
|
321
345
|
return candidates.find(m => !EXTENSION_PROVIDERS.has(m.provider)) ?? candidates[0];
|
|
322
346
|
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Flat-rate providers charge the same per request regardless of model.
|
|
350
|
+
* Dynamic routing provides no cost benefit — it only degrades quality (#3453).
|
|
351
|
+
* Uses case-insensitive matching with alias support to prevent fail-open on
|
|
352
|
+
* provider naming variations (e.g. "copilot" vs "github-copilot").
|
|
353
|
+
*/
|
|
354
|
+
const FLAT_RATE_PROVIDERS = new Set(["github-copilot", "copilot"]);
|
|
355
|
+
|
|
356
|
+
export function isFlatRateProvider(provider: string): boolean {
|
|
357
|
+
return FLAT_RATE_PROVIDERS.has(provider.toLowerCase());
|
|
358
|
+
}
|
|
@@ -52,6 +52,13 @@ import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures
|
|
|
52
52
|
import { debugLog } from "./debug-logger.js";
|
|
53
53
|
import { runSafely } from "./auto-utils.js";
|
|
54
54
|
import type { AutoSession, SidecarItem } from "./auto/session.js";
|
|
55
|
+
import { getEvidence } from "./safety/evidence-collector.js";
|
|
56
|
+
import { validateFileChanges } from "./safety/file-change-validator.js";
|
|
57
|
+
// crossReferenceEvidence available for future use when verification_evidence is stored in DB
|
|
58
|
+
// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
|
|
59
|
+
import { validateContent } from "./safety/content-validator.js";
|
|
60
|
+
import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
|
|
61
|
+
import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
|
|
55
62
|
|
|
56
63
|
/** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
|
|
57
64
|
const MAX_VERIFICATION_RETRIES = 3;
|
|
@@ -437,6 +444,87 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
|
|
|
437
444
|
debugLog("postUnit", { phase: "rogue-detection", error: String(e) });
|
|
438
445
|
}
|
|
439
446
|
|
|
447
|
+
// ── Safety harness: post-unit validation ──
|
|
448
|
+
try {
|
|
449
|
+
const { loadEffectiveGSDPreferences } = await import("./preferences.js");
|
|
450
|
+
const prefs = loadEffectiveGSDPreferences()?.preferences;
|
|
451
|
+
const safetyConfig = resolveSafetyHarnessConfig(
|
|
452
|
+
prefs?.safety_harness as Record<string, unknown> | undefined,
|
|
453
|
+
);
|
|
454
|
+
|
|
455
|
+
if (safetyConfig.enabled) {
|
|
456
|
+
const { milestone: sMid, slice: sSid, task: sTid } = parseUnitId(s.currentUnit.id);
|
|
457
|
+
|
|
458
|
+
// File change validation (execute-task only, after auto-commit)
|
|
459
|
+
if (safetyConfig.file_change_validation && s.currentUnit.type === "execute-task" && sMid && sSid && sTid && isDbAvailable()) {
|
|
460
|
+
try {
|
|
461
|
+
const taskRow = getTask(sMid, sSid, sTid);
|
|
462
|
+
if (taskRow) {
|
|
463
|
+
const expectedOutput = taskRow.expected_output ?? [];
|
|
464
|
+
const plannedFiles = taskRow.files ?? [];
|
|
465
|
+
const audit = validateFileChanges(s.basePath, expectedOutput, plannedFiles);
|
|
466
|
+
if (audit && audit.violations.length > 0) {
|
|
467
|
+
const warnings = audit.violations.filter(v => v.severity === "warning");
|
|
468
|
+
for (const v of warnings) {
|
|
469
|
+
logWarning("safety", `file-change: ${v.file} — ${v.reason}`);
|
|
470
|
+
}
|
|
471
|
+
if (warnings.length > 0) {
|
|
472
|
+
ctx.ui.notify(
|
|
473
|
+
`Safety: ${warnings.length} unexpected file change(s) outside task plan`,
|
|
474
|
+
"warning",
|
|
475
|
+
);
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
} catch (e) {
|
|
480
|
+
debugLog("postUnit", { phase: "safety-file-change", error: String(e) });
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Evidence cross-reference (execute-task only)
|
|
485
|
+
// Verification evidence is passed via the complete-task tool call and
|
|
486
|
+
// stored in the SUMMARY.md on disk — not available as structured data
|
|
487
|
+
// in the DB. The evidence collector tracks actual bash tool calls, so
|
|
488
|
+
// we can still detect units that claimed success but ran no commands.
|
|
489
|
+
if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
|
|
490
|
+
try {
|
|
491
|
+
const actual = getEvidence();
|
|
492
|
+
const bashCalls = actual.filter(e => e.kind === "bash");
|
|
493
|
+
// If the task is marked complete but zero bash commands were run,
|
|
494
|
+
// it's suspicious — the LLM may have fabricated results.
|
|
495
|
+
if (sMid && sSid && sTid && isDbAvailable()) {
|
|
496
|
+
const taskRow = getTask(sMid, sSid, sTid);
|
|
497
|
+
if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
|
|
498
|
+
logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
|
|
499
|
+
ctx.ui.notify(
|
|
500
|
+
`Safety: task ${sTid} has verification commands but no bash calls were recorded`,
|
|
501
|
+
"warning",
|
|
502
|
+
);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
} catch (e) {
|
|
506
|
+
debugLog("postUnit", { phase: "safety-evidence-xref", error: String(e) });
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Content validation (plan-slice, plan-milestone)
|
|
511
|
+
if (safetyConfig.content_validation) {
|
|
512
|
+
try {
|
|
513
|
+
const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
|
|
514
|
+
const contentViolations = validateContent(s.currentUnit.type, artifactPath);
|
|
515
|
+
for (const v of contentViolations) {
|
|
516
|
+
logWarning("safety", `content: ${v.reason}`);
|
|
517
|
+
ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
|
|
518
|
+
}
|
|
519
|
+
} catch (e) {
|
|
520
|
+
debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
} catch (e) {
|
|
525
|
+
debugLog("postUnit", { phase: "safety-harness", error: String(e) });
|
|
526
|
+
}
|
|
527
|
+
|
|
440
528
|
// Artifact verification
|
|
441
529
|
let triggerArtifactVerified = false;
|
|
442
530
|
if (!s.currentUnit.type.startsWith("hook/")) {
|
|
@@ -106,8 +106,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
|
|
|
106
106
|
}
|
|
107
107
|
}
|
|
108
108
|
const estimateMinutes = taskEstimate ? parseEstimateMinutes(taskEstimate) : null;
|
|
109
|
+
const MAX_TIMEOUT_SCALE = 6; // Cap at 6x (60min task). Prevents 2h+ tasks from creating 120min+ timeout windows.
|
|
109
110
|
const timeoutScale = estimateMinutes && estimateMinutes > 0
|
|
110
|
-
? Math.max(1, estimateMinutes / 10)
|
|
111
|
+
? Math.min(MAX_TIMEOUT_SCALE, Math.max(1, estimateMinutes / 10))
|
|
111
112
|
: 1;
|
|
112
113
|
|
|
113
114
|
const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000 * timeoutScale;
|
|
@@ -704,8 +704,14 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|
|
704
704
|
};
|
|
705
705
|
}
|
|
706
706
|
try {
|
|
707
|
+
// Coerce string items to objects for verificationEvidence (#3541).
|
|
708
|
+
const coerced = { ...params };
|
|
709
|
+
coerced.verificationEvidence = (params.verificationEvidence ?? []).map((v: any) =>
|
|
710
|
+
typeof v === "string" ? { command: v, exitCode: -1, verdict: "unknown (coerced from string)", durationMs: 0 } : v,
|
|
711
|
+
);
|
|
712
|
+
|
|
707
713
|
const { handleCompleteTask } = await import("../tools/complete-task.js");
|
|
708
|
-
const result = await handleCompleteTask(
|
|
714
|
+
const result = await handleCompleteTask(coerced, process.cwd());
|
|
709
715
|
if ("error" in result) {
|
|
710
716
|
return {
|
|
711
717
|
content: [{ type: "text" as const, text: `Error completing task: ${result.error}` }],
|
|
@@ -761,12 +767,15 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|
|
761
767
|
keyDecisions: Type.Optional(Type.Array(Type.String(), { description: "List of key decisions made during this task" })),
|
|
762
768
|
blockerDiscovered: Type.Optional(Type.Boolean({ description: "Whether a plan-invalidating blocker was discovered" })),
|
|
763
769
|
verificationEvidence: Type.Optional(Type.Array(
|
|
764
|
-
Type.
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
+
Type.Union([
|
|
771
|
+
Type.Object({
|
|
772
|
+
command: Type.String({ description: "Verification command that was run" }),
|
|
773
|
+
exitCode: Type.Number({ description: "Exit code of the command" }),
|
|
774
|
+
verdict: Type.String({ description: "Pass/fail verdict (e.g. '✅ pass', '❌ fail')" }),
|
|
775
|
+
durationMs: Type.Number({ description: "Duration of the command in milliseconds" }),
|
|
776
|
+
}),
|
|
777
|
+
Type.String({ description: "Fallback: verification summary string" }),
|
|
778
|
+
]),
|
|
770
779
|
{ description: "Array of verification evidence entries" },
|
|
771
780
|
)),
|
|
772
781
|
}),
|
|
@@ -787,8 +796,42 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|
|
787
796
|
};
|
|
788
797
|
}
|
|
789
798
|
try {
|
|
799
|
+
// Coerce string items to objects for fields where LLMs sometimes pass
|
|
800
|
+
// plain strings instead of the expected { key, value } shape (#3541).
|
|
801
|
+
// Parses "key — value" or "key - value" format when possible.
|
|
802
|
+
const splitPair = (s: string): [string, string] => {
|
|
803
|
+
const m = s.match(/^(.+?)\s*(?:—|-)\s+(.+)$/);
|
|
804
|
+
return m ? [m[1].trim(), m[2].trim()] : [s.trim(), ""];
|
|
805
|
+
};
|
|
806
|
+
const coerced = { ...params };
|
|
807
|
+
coerced.filesModified = (params.filesModified ?? []).map((f: any) => {
|
|
808
|
+
if (typeof f !== "string") return f;
|
|
809
|
+
const [path, description] = splitPair(f);
|
|
810
|
+
return { path, description };
|
|
811
|
+
});
|
|
812
|
+
coerced.requires = (params.requires ?? []).map((r: any) => {
|
|
813
|
+
if (typeof r !== "string") return r;
|
|
814
|
+
const [slice, provides] = splitPair(r);
|
|
815
|
+
return { slice, provides };
|
|
816
|
+
});
|
|
817
|
+
coerced.requirementsAdvanced = (params.requirementsAdvanced ?? []).map((r: any) => {
|
|
818
|
+
if (typeof r !== "string") return r;
|
|
819
|
+
const [id, how] = splitPair(r);
|
|
820
|
+
return { id, how };
|
|
821
|
+
});
|
|
822
|
+
coerced.requirementsValidated = (params.requirementsValidated ?? []).map((r: any) => {
|
|
823
|
+
if (typeof r !== "string") return r;
|
|
824
|
+
const [id, proof] = splitPair(r);
|
|
825
|
+
return { id, proof };
|
|
826
|
+
});
|
|
827
|
+
coerced.requirementsInvalidated = (params.requirementsInvalidated ?? []).map((r: any) => {
|
|
828
|
+
if (typeof r !== "string") return r;
|
|
829
|
+
const [id, what] = splitPair(r);
|
|
830
|
+
return { id, what };
|
|
831
|
+
});
|
|
832
|
+
|
|
790
833
|
const { handleCompleteSlice } = await import("../tools/complete-slice.js");
|
|
791
|
-
const result = await handleCompleteSlice(
|
|
834
|
+
const result = await handleCompleteSlice(coerced, process.cwd());
|
|
792
835
|
if ("error" in result) {
|
|
793
836
|
return {
|
|
794
837
|
content: [{ type: "text" as const, text: `Error completing slice: ${result.error}` }],
|
|
@@ -850,38 +893,53 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|
|
850
893
|
drillDownPaths: Type.Optional(Type.Array(Type.String(), { description: "Paths to task summaries for drill-down" })),
|
|
851
894
|
affects: Type.Optional(Type.Array(Type.String(), { description: "Downstream slices affected" })),
|
|
852
895
|
requirementsAdvanced: Type.Optional(Type.Array(
|
|
853
|
-
Type.
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
896
|
+
Type.Union([
|
|
897
|
+
Type.Object({
|
|
898
|
+
id: Type.String({ description: "Requirement ID" }),
|
|
899
|
+
how: Type.String({ description: "How it was advanced" }),
|
|
900
|
+
}),
|
|
901
|
+
Type.String({ description: "Fallback: 'ID — how' string" }),
|
|
902
|
+
]),
|
|
857
903
|
{ description: "Requirements advanced by this slice" },
|
|
858
904
|
)),
|
|
859
905
|
requirementsValidated: Type.Optional(Type.Array(
|
|
860
|
-
Type.
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
906
|
+
Type.Union([
|
|
907
|
+
Type.Object({
|
|
908
|
+
id: Type.String({ description: "Requirement ID" }),
|
|
909
|
+
proof: Type.String({ description: "What proof validates it" }),
|
|
910
|
+
}),
|
|
911
|
+
Type.String({ description: "Fallback: 'ID — proof' string" }),
|
|
912
|
+
]),
|
|
864
913
|
{ description: "Requirements validated by this slice" },
|
|
865
914
|
)),
|
|
866
915
|
requirementsInvalidated: Type.Optional(Type.Array(
|
|
867
|
-
Type.
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
916
|
+
Type.Union([
|
|
917
|
+
Type.Object({
|
|
918
|
+
id: Type.String({ description: "Requirement ID" }),
|
|
919
|
+
what: Type.String({ description: "What changed" }),
|
|
920
|
+
}),
|
|
921
|
+
Type.String({ description: "Fallback: 'ID — what' string" }),
|
|
922
|
+
]),
|
|
871
923
|
{ description: "Requirements invalidated or re-scoped" },
|
|
872
924
|
)),
|
|
873
925
|
filesModified: Type.Optional(Type.Array(
|
|
874
|
-
Type.
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
926
|
+
Type.Union([
|
|
927
|
+
Type.Object({
|
|
928
|
+
path: Type.String({ description: "File path" }),
|
|
929
|
+
description: Type.String({ description: "What changed" }),
|
|
930
|
+
}),
|
|
931
|
+
Type.String({ description: "Fallback: file path string" }),
|
|
932
|
+
]),
|
|
878
933
|
{ description: "Files modified with descriptions" },
|
|
879
934
|
)),
|
|
880
935
|
requires: Type.Optional(Type.Array(
|
|
881
|
-
Type.
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
936
|
+
Type.Union([
|
|
937
|
+
Type.Object({
|
|
938
|
+
slice: Type.String({ description: "Dependency slice ID" }),
|
|
939
|
+
provides: Type.String({ description: "What was consumed from it" }),
|
|
940
|
+
}),
|
|
941
|
+
Type.String({ description: "Fallback: slice ID string" }),
|
|
942
|
+
]),
|
|
885
943
|
{ description: "Upstream slice dependencies consumed" },
|
|
886
944
|
)),
|
|
887
945
|
}),
|
|
@@ -18,6 +18,9 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
|
|
|
18
18
|
import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
|
|
19
19
|
import { saveActivityLog } from "../activity-log.js";
|
|
20
20
|
import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
|
|
21
|
+
import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
|
|
22
|
+
import { classifyCommand } from "../safety/destructive-guard.js";
|
|
23
|
+
import { logWarning as safetyLogWarning } from "../workflow-logger.js";
|
|
21
24
|
|
|
22
25
|
// Skip the welcome screen on the very first session_start — cli.ts already
|
|
23
26
|
// printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
|
|
@@ -203,6 +206,26 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|
|
203
206
|
if (result.block) return result;
|
|
204
207
|
});
|
|
205
208
|
|
|
209
|
+
// ── Safety harness: evidence collection + destructive command warnings ──
|
|
210
|
+
pi.on("tool_call", async (event, ctx) => {
|
|
211
|
+
if (!isAutoActive()) return;
|
|
212
|
+
safetyRecordToolCall(event.toolName, event.input as Record<string, unknown>);
|
|
213
|
+
|
|
214
|
+
// Destructive command classification (warn only, never block)
|
|
215
|
+
if (isToolCallEventType("bash", event)) {
|
|
216
|
+
const classification = classifyCommand(event.input.command);
|
|
217
|
+
if (classification.destructive) {
|
|
218
|
+
safetyLogWarning("safety", `destructive command: ${classification.labels.join(", ")}`, {
|
|
219
|
+
command: String(event.input.command).slice(0, 200),
|
|
220
|
+
});
|
|
221
|
+
ctx.ui.notify(
|
|
222
|
+
`Destructive command detected: ${classification.labels.join(", ")}`,
|
|
223
|
+
"warning",
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
});
|
|
228
|
+
|
|
206
229
|
pi.on("tool_result", async (event) => {
|
|
207
230
|
if (event.toolName !== "ask_user_questions") return;
|
|
208
231
|
const milestoneId = getDiscussionMilestoneId();
|
|
@@ -268,6 +291,10 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|
|
268
291
|
: (typeof event.result?.content?.[0]?.text === "string" ? event.result.content[0].text : String(event.result));
|
|
269
292
|
recordToolInvocationError(event.toolName, errorText);
|
|
270
293
|
}
|
|
294
|
+
// Safety harness: record tool execution results for evidence cross-referencing
|
|
295
|
+
if (isAutoActive()) {
|
|
296
|
+
safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
|
|
297
|
+
}
|
|
271
298
|
});
|
|
272
299
|
|
|
273
300
|
pi.on("model_select", async (_event, ctx) => {
|
|
@@ -6,9 +6,10 @@ import type { ExtensionContext } from "@gsd/pi-coding-agent";
|
|
|
6
6
|
|
|
7
7
|
import { logWarning } from "../workflow-logger.js";
|
|
8
8
|
import { debugTime } from "../debug-logger.js";
|
|
9
|
-
import { loadPrompt } from "../prompt-loader.js";
|
|
9
|
+
import { loadPrompt, getTemplatesDir } from "../prompt-loader.js";
|
|
10
10
|
import { readForensicsMarker } from "../forensics.js";
|
|
11
11
|
import { resolveAllSkillReferences, renderPreferencesForSystemPrompt, loadEffectiveGSDPreferences } from "../preferences.js";
|
|
12
|
+
import { resolveSkillReference } from "../preferences-skills.js";
|
|
12
13
|
import { resolveGsdRootFile, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTaskFiles, resolveTasksDir, relSliceFile, relSlicePath, relTaskFile } from "../paths.js";
|
|
13
14
|
import { hasSkillSnapshot, detectNewSkills, formatSkillsXml } from "../skill-discovery.js";
|
|
14
15
|
import { getActiveAutoWorktreeContext } from "../auto-worktree.js";
|
|
@@ -20,6 +21,31 @@ import { markCmuxPromptShown, shouldPromptToEnableCmux } from "../../cmux/index.
|
|
|
20
21
|
|
|
21
22
|
const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
|
|
22
23
|
|
|
24
|
+
/**
|
|
25
|
+
* Bundled skill triggers — resolved dynamically at runtime instead of
|
|
26
|
+
* hardcoding absolute paths in the system prompt template. Only skills
|
|
27
|
+
* that actually exist on disk are included in the table. (#3575)
|
|
28
|
+
*/
|
|
29
|
+
const BUNDLED_SKILL_TRIGGERS: Array<{ trigger: string; skill: string }> = [
|
|
30
|
+
{ trigger: "Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling", skill: "frontend-design" },
|
|
31
|
+
{ trigger: "macOS or iOS apps - SwiftUI, Xcode, App Store", skill: "swiftui" },
|
|
32
|
+
{ trigger: "Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail", skill: "debug-like-expert" },
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
function buildBundledSkillsTable(): string {
|
|
36
|
+
const cwd = process.cwd();
|
|
37
|
+
const rows: string[] = [];
|
|
38
|
+
for (const { trigger, skill } of BUNDLED_SKILL_TRIGGERS) {
|
|
39
|
+
const resolution = resolveSkillReference(skill, cwd);
|
|
40
|
+
if (resolution.method === "unresolved") continue; // skill not installed — omit from prompt
|
|
41
|
+
rows.push(`| ${trigger} | \`${resolution.resolvedPath}\` |`);
|
|
42
|
+
}
|
|
43
|
+
if (rows.length === 0) {
|
|
44
|
+
return "*No bundled skills found. Install skills to `~/.agents/skills/` or `~/.claude/skills/`.*";
|
|
45
|
+
}
|
|
46
|
+
return `| Trigger | Skill to load |\n|---|---|\n${rows.join("\n")}`;
|
|
47
|
+
}
|
|
48
|
+
|
|
23
49
|
function warnDeprecatedAgentInstructions(): void {
|
|
24
50
|
const paths = [
|
|
25
51
|
join(gsdHome, "agent-instructions.md"),
|
|
@@ -43,7 +69,10 @@ export async function buildBeforeAgentStartResult(
|
|
|
43
69
|
if (!existsSync(join(process.cwd(), ".gsd"))) return undefined;
|
|
44
70
|
|
|
45
71
|
const stopContextTimer = debugTime("context-inject");
|
|
46
|
-
const systemContent = loadPrompt("system"
|
|
72
|
+
const systemContent = loadPrompt("system", {
|
|
73
|
+
bundledSkillsTable: buildBundledSkillsTable(),
|
|
74
|
+
templatesDir: getTemplatesDir(),
|
|
75
|
+
});
|
|
47
76
|
const loadedPreferences = loadEffectiveGSDPreferences();
|
|
48
77
|
if (shouldPromptToEnableCmux(loadedPreferences?.preferences)) {
|
|
49
78
|
markCmuxPromptShown();
|
|
@@ -105,6 +105,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
|
|
|
105
105
|
"experimental",
|
|
106
106
|
"codebase",
|
|
107
107
|
"slice_parallel",
|
|
108
|
+
"safety_harness",
|
|
108
109
|
]);
|
|
109
110
|
|
|
110
111
|
/** Canonical list of all dispatch unit types. */
|
|
@@ -291,6 +292,18 @@ export interface GSDPreferences {
|
|
|
291
292
|
codebase?: CodebaseMapPreferences;
|
|
292
293
|
/** Slice-level parallelism within a milestone. Disabled by default. */
|
|
293
294
|
slice_parallel?: { enabled?: boolean; max_workers?: number };
|
|
295
|
+
/** LLM safety harness configuration. Monitors, validates, and constrains LLM behavior during auto-mode. Enabled by default with warn-and-continue policy. */
|
|
296
|
+
safety_harness?: {
|
|
297
|
+
enabled?: boolean;
|
|
298
|
+
evidence_collection?: boolean;
|
|
299
|
+
file_change_validation?: boolean;
|
|
300
|
+
evidence_cross_reference?: boolean;
|
|
301
|
+
destructive_command_warnings?: boolean;
|
|
302
|
+
content_validation?: boolean;
|
|
303
|
+
checkpoints?: boolean;
|
|
304
|
+
auto_rollback?: boolean;
|
|
305
|
+
timeout_scale_cap?: number;
|
|
306
|
+
};
|
|
294
307
|
}
|
|
295
308
|
|
|
296
309
|
export interface LoadedGSDPreferences {
|
|
@@ -51,6 +51,14 @@ const __extensionDir = resolveExtensionDir();
|
|
|
51
51
|
const promptsDir = join(__extensionDir, "prompts");
|
|
52
52
|
const templatesDir = join(__extensionDir, "templates");
|
|
53
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Return the resolved templates directory path for use in prompts.
|
|
56
|
+
* Avoids hardcoding `~/.gsd/agent/extensions/gsd/templates/` in templates. (#3575)
|
|
57
|
+
*/
|
|
58
|
+
export function getTemplatesDir(): string {
|
|
59
|
+
return templatesDir;
|
|
60
|
+
}
|
|
61
|
+
|
|
54
62
|
// Cache all templates eagerly at module load — a running session uses the
|
|
55
63
|
// template versions that were on disk at startup, immune to later overwrites.
|
|
56
64
|
const templateCache = new Map<string, string>();
|
|
@@ -24,13 +24,9 @@ Leave the project in a state where the next agent can immediately understand wha
|
|
|
24
24
|
|
|
25
25
|
## Skills
|
|
26
26
|
|
|
27
|
-
GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches.
|
|
27
|
+
GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches. Use bare skill names — GSD resolves them to the correct path automatically.
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|---|---|
|
|
31
|
-
| Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling | `~/.gsd/agent/skills/frontend-design/SKILL.md` |
|
|
32
|
-
| macOS or iOS apps - SwiftUI, Xcode, App Store | `~/.gsd/agent/skills/swiftui/SKILL.md` |
|
|
33
|
-
| Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail | `~/.gsd/agent/skills/debug-like-expert/SKILL.md` |
|
|
29
|
+
{{bundledSkillsTable}}
|
|
34
30
|
|
|
35
31
|
## Hard Rules
|
|
36
32
|
|
|
@@ -119,7 +115,7 @@ In all modes, slices commit sequentially on the active branch; there are no per-
|
|
|
119
115
|
### Artifact Templates
|
|
120
116
|
|
|
121
117
|
Templates showing the expected format for each artifact type are in:
|
|
122
|
-
|
|
118
|
+
`{{templatesDir}}`
|
|
123
119
|
|
|
124
120
|
**Always read the relevant template before writing an artifact** to match the expected structure exactly. The parsers that read these files depend on specific formatting:
|
|
125
121
|
|