gsd-pi 2.63.0-dev.351157b → 2.63.0-dev.786f0ff

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/cli.js +4 -0
  2. package/dist/headless-query.js +11 -1
  3. package/dist/resources/extensions/gsd/auto/detect-stuck.js +27 -0
  4. package/dist/resources/extensions/gsd/auto/phases.js +34 -0
  5. package/dist/resources/extensions/gsd/auto/session.js +4 -0
  6. package/dist/resources/extensions/gsd/auto-model-selection.js +32 -0
  7. package/dist/resources/extensions/gsd/auto-post-unit.js +79 -0
  8. package/dist/resources/extensions/gsd/auto-timers.js +2 -1
  9. package/dist/resources/extensions/gsd/bootstrap/db-tools.js +87 -28
  10. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +23 -0
  11. package/dist/resources/extensions/gsd/bootstrap/system-context.js +30 -2
  12. package/dist/resources/extensions/gsd/preferences-types.js +1 -0
  13. package/dist/resources/extensions/gsd/prompt-loader.js +7 -0
  14. package/dist/resources/extensions/gsd/prompts/system.md +3 -7
  15. package/dist/resources/extensions/gsd/safety/content-validator.js +73 -0
  16. package/dist/resources/extensions/gsd/safety/destructive-guard.js +34 -0
  17. package/dist/resources/extensions/gsd/safety/evidence-collector.js +109 -0
  18. package/dist/resources/extensions/gsd/safety/evidence-cross-ref.js +83 -0
  19. package/dist/resources/extensions/gsd/safety/file-change-validator.js +71 -0
  20. package/dist/resources/extensions/gsd/safety/git-checkpoint.js +91 -0
  21. package/dist/resources/extensions/gsd/safety/safety-harness.js +64 -0
  22. package/dist/resources/extensions/ollama/index.js +22 -10
  23. package/dist/resources/extensions/ollama/ollama-chat-provider.js +1 -1
  24. package/dist/update-cmd.js +4 -2
  25. package/dist/web/standalone/.next/BUILD_ID +1 -1
  26. package/dist/web/standalone/.next/app-path-routes-manifest.json +18 -18
  27. package/dist/web/standalone/.next/build-manifest.json +2 -2
  28. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  29. package/dist/web/standalone/.next/server/app/_global-error.html +2 -2
  30. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  31. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  32. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  33. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  34. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  35. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  37. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  38. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  39. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  43. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  44. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  45. package/dist/web/standalone/.next/server/app/index.html +1 -1
  46. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  47. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  48. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  49. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  50. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  51. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  52. package/dist/web/standalone/.next/server/app-paths-manifest.json +18 -18
  53. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  54. package/dist/web/standalone/.next/server/pages/500.html +2 -2
  55. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  56. package/package.json +1 -1
  57. package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.d.ts +2 -0
  58. package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.d.ts.map +1 -0
  59. package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.js +46 -0
  60. package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.js.map +1 -0
  61. package/packages/pi-coding-agent/dist/core/model-registry.d.ts.map +1 -1
  62. package/packages/pi-coding-agent/dist/core/model-registry.js +11 -0
  63. package/packages/pi-coding-agent/dist/core/model-registry.js.map +1 -1
  64. package/packages/pi-coding-agent/dist/core/sdk.d.ts.map +1 -1
  65. package/packages/pi-coding-agent/dist/core/sdk.js +2 -3
  66. package/packages/pi-coding-agent/dist/core/sdk.js.map +1 -1
  67. package/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts +81 -0
  68. package/packages/pi-coding-agent/src/core/model-registry.ts +12 -0
  69. package/packages/pi-coding-agent/src/core/sdk.ts +2 -3
  70. package/src/resources/extensions/gsd/auto/detect-stuck.ts +27 -0
  71. package/src/resources/extensions/gsd/auto/phases.ts +39 -0
  72. package/src/resources/extensions/gsd/auto/session.ts +5 -0
  73. package/src/resources/extensions/gsd/auto-model-selection.ts +36 -0
  74. package/src/resources/extensions/gsd/auto-post-unit.ts +88 -0
  75. package/src/resources/extensions/gsd/auto-timers.ts +2 -1
  76. package/src/resources/extensions/gsd/bootstrap/db-tools.ts +86 -28
  77. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +27 -0
  78. package/src/resources/extensions/gsd/bootstrap/system-context.ts +31 -2
  79. package/src/resources/extensions/gsd/preferences-types.ts +13 -0
  80. package/src/resources/extensions/gsd/prompt-loader.ts +8 -0
  81. package/src/resources/extensions/gsd/prompts/system.md +3 -7
  82. package/src/resources/extensions/gsd/safety/content-validator.ts +98 -0
  83. package/src/resources/extensions/gsd/safety/destructive-guard.ts +49 -0
  84. package/src/resources/extensions/gsd/safety/evidence-collector.ts +151 -0
  85. package/src/resources/extensions/gsd/safety/evidence-cross-ref.ts +120 -0
  86. package/src/resources/extensions/gsd/safety/file-change-validator.ts +108 -0
  87. package/src/resources/extensions/gsd/safety/git-checkpoint.ts +106 -0
  88. package/src/resources/extensions/gsd/safety/safety-harness.ts +105 -0
  89. package/src/resources/extensions/gsd/tests/complete-slice-string-coercion.test.ts +211 -0
  90. package/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts +50 -0
  91. package/src/resources/extensions/gsd/tests/git-checkpoint.test.ts +94 -0
  92. package/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts +42 -0
  93. package/src/resources/extensions/gsd/workflow-logger.ts +2 -1
  94. package/src/resources/extensions/ollama/index.ts +20 -11
  95. package/src/resources/extensions/ollama/ollama-auth-mode.test.ts +20 -0
  96. package/src/resources/extensions/ollama/ollama-chat-provider.ts +1 -1
  97. package/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts +82 -0
  98. /package/dist/web/standalone/.next/static/{QmuF-eAbuU_2MQ03t38qr → SDB1T-4NqkMjYirjjqQhr}/_buildManifest.js +0 -0
  99. /package/dist/web/standalone/.next/static/{QmuF-eAbuU_2MQ03t38qr → SDB1T-4NqkMjYirjjqQhr}/_ssgManifest.js +0 -0
@@ -6,6 +6,13 @@
6
6
 
7
7
  import type { WindowEntry } from "./types.js";
8
8
 
9
+ /**
10
+ * Pattern matching ENOENT errors with a file path.
11
+ * Matches: "ENOENT: no such file or directory, access '/path/to/file'"
12
+ * and similar Node.js filesystem error messages.
13
+ */
14
+ const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/;
15
+
9
16
  /**
10
17
  * Analyze a sliding window of recent unit dispatches for stuck patterns.
11
18
  * Returns a signal with reason if stuck, null otherwise.
@@ -13,6 +20,8 @@ import type { WindowEntry } from "./types.js";
13
20
  * Rule 1: Same error string twice in a row → stuck immediately.
14
21
  * Rule 2: Same unit key 3+ consecutive times → stuck (preserves prior behavior).
15
22
  * Rule 3: Oscillation A→B→A→B in last 4 entries → stuck.
23
+ * Rule 4: Same ENOENT path in any 2 entries within the window → stuck (#3575).
24
+ * Missing files don't self-heal between retries — retrying wastes budget.
16
25
  */
17
26
  export function detectStuck(
18
27
  window: readonly WindowEntry[],
@@ -56,5 +65,23 @@ export function detectStuck(
56
65
  }
57
66
  }
58
67
 
68
+ // Rule 4: Same ENOENT path seen twice in window (#3575)
69
+ // Missing files don't appear between retries — stop immediately.
70
+ const enoentPaths = new Map<string, number>();
71
+ for (const entry of window) {
72
+ if (!entry.error) continue;
73
+ const match = ENOENT_PATH_RE.exec(entry.error);
74
+ if (!match) continue;
75
+ const filePath = match[1];
76
+ const count = (enoentPaths.get(filePath) ?? 0) + 1;
77
+ if (count >= 2) {
78
+ return {
79
+ stuck: true,
80
+ reason: `Missing file referenced twice: ${filePath} (ENOENT)`,
81
+ };
82
+ }
83
+ enoentPaths.set(filePath, count);
84
+ }
85
+
59
86
  return null;
60
87
  }
@@ -37,6 +37,9 @@ import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
37
37
  import { getEligibleSlices } from "../slice-parallel-eligibility.js";
38
38
  import { startSliceParallel } from "../slice-parallel-orchestrator.js";
39
39
  import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
40
+ import { resetEvidence } from "../safety/evidence-collector.js";
41
+ import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
42
+ import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
40
43
 
41
44
  // ─── generateMilestoneReport ──────────────────────────────────────────────────
42
45
 
@@ -1079,6 +1082,21 @@ export async function runUnitPhase(
1079
1082
  if (mid)
1080
1083
  deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
1081
1084
 
1085
+ // ── Safety harness: reset evidence + create checkpoint ──
1086
+ const safetyConfig = resolveSafetyHarnessConfig(
1087
+ prefs?.safety_harness as Record<string, unknown> | undefined,
1088
+ );
1089
+ if (safetyConfig.enabled && safetyConfig.evidence_collection) {
1090
+ resetEvidence();
1091
+ }
1092
+ // Only checkpoint code-executing units (not lifecycle/planning units)
1093
+ if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
1094
+ s.checkpointSha = createCheckpoint(s.basePath, unitId);
1095
+ if (s.checkpointSha) {
1096
+ debugLog("runUnitPhase", { phase: "checkpoint-created", unitId, sha: s.checkpointSha.slice(0, 8) });
1097
+ }
1098
+ }
1099
+
1082
1100
  // Prompt injection
1083
1101
  let finalPrompt = prompt;
1084
1102
 
@@ -1376,6 +1394,27 @@ export async function runUnitPhase(
1376
1394
 
1377
1395
  deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
1378
1396
 
1397
+ // ── Safety harness: checkpoint cleanup or rollback ──
1398
+ if (s.checkpointSha) {
1399
+ if (unitResult.status === "error" && safetyConfig.auto_rollback) {
1400
+ const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
1401
+ if (rolled) {
1402
+ ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
1403
+ debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
1404
+ }
1405
+ } else if (unitResult.status === "error") {
1406
+ ctx.ui.notify(
1407
+ `Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
1408
+ "warning",
1409
+ );
1410
+ } else {
1411
+ // Success — clean up checkpoint ref
1412
+ cleanupCheckpoint(s.basePath, unitId);
1413
+ debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
1414
+ }
1415
+ s.checkpointSha = null;
1416
+ }
1417
+
1379
1418
  return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
1380
1419
  }
1381
1420
 
@@ -145,6 +145,10 @@ export class AutoSession {
145
145
  lastBaselineCharCount: number | undefined;
146
146
  pendingQuickTasks: CaptureEntry[] = [];
147
147
 
148
+ // ── Safety harness ───────────────────────────────────────────────────────
149
+ /** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
150
+ checkpointSha: string | null = null;
151
+
148
152
  // ── Signal handler ───────────────────────────────────────────────────────
149
153
  sigtermHandler: (() => void) | null = null;
150
154
 
@@ -223,6 +227,7 @@ export class AutoSession {
223
227
  this.lastToolInvocationError = null;
224
228
  this.isolationDegraded = false;
225
229
  this.milestoneMergedInPhases = false;
230
+ this.checkpointSha = null;
226
231
 
227
232
  // Signal handler
228
233
  this.sigtermHandler = null;
@@ -31,6 +31,9 @@ export function resolvePreferredModelConfig(
31
31
  const routingConfig = resolveDynamicRoutingConfig();
32
32
  if (!routingConfig.enabled || !routingConfig.tier_models) return undefined;
33
33
 
34
+ // Don't synthesize a routing config for flat-rate providers (#3453).
35
+ if (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider)) return undefined;
36
+
34
37
  const ceilingModel = routingConfig.tier_models.heavy
35
38
  ?? (autoModeStartModel ? `${autoModeStartModel.provider}/${autoModeStartModel.id}` : undefined);
36
39
  if (!ceilingModel) return undefined;
@@ -71,6 +74,27 @@ export async function selectAndApplyModel(
71
74
  let effectiveModelConfig = modelConfig;
72
75
  let routingTierLabel = "";
73
76
 
77
+ // Disable routing for flat-rate providers like GitHub Copilot (#3453).
78
+ // All models cost the same per request, so downgrading to a cheaper
79
+ // model provides no cost benefit — it only degrades quality.
80
+ // Fail-closed: if primary model can't be resolved, fall back to
81
+ // provider-level signals rather than allowing unwanted downgrades.
82
+ if (routingConfig.enabled) {
83
+ const primaryModel = resolveModelId(modelConfig.primary, availableModels, ctx.model?.provider);
84
+ if (primaryModel) {
85
+ if (isFlatRateProvider(primaryModel.provider)) {
86
+ routingConfig.enabled = false;
87
+ }
88
+ } else if (
89
+ (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider))
90
+ || (ctx.model?.provider && isFlatRateProvider(ctx.model.provider))
91
+ ) {
92
+ // Primary model unresolvable but provider signals indicate flat-rate —
93
+ // disable routing to prevent quality degradation.
94
+ routingConfig.enabled = false;
95
+ }
96
+ }
97
+
74
98
  if (routingConfig.enabled) {
75
99
  let budgetPct: number | undefined;
76
100
  if (routingConfig.budget_pressure !== false) {
@@ -320,3 +344,15 @@ export function resolveModelId<T extends { id: string; provider: string }>(
320
344
  // Fall back to first non-extension candidate, or any candidate
321
345
  return candidates.find(m => !EXTENSION_PROVIDERS.has(m.provider)) ?? candidates[0];
322
346
  }
347
+
348
+ /**
349
+ * Flat-rate providers charge the same per request regardless of model.
350
+ * Dynamic routing provides no cost benefit — it only degrades quality (#3453).
351
+ * Uses case-insensitive matching with alias support to prevent fail-open on
352
+ * provider naming variations (e.g. "copilot" vs "github-copilot").
353
+ */
354
+ const FLAT_RATE_PROVIDERS = new Set(["github-copilot", "copilot"]);
355
+
356
+ export function isFlatRateProvider(provider: string): boolean {
357
+ return FLAT_RATE_PROVIDERS.has(provider.toLowerCase());
358
+ }
@@ -52,6 +52,13 @@ import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures
52
52
  import { debugLog } from "./debug-logger.js";
53
53
  import { runSafely } from "./auto-utils.js";
54
54
  import type { AutoSession, SidecarItem } from "./auto/session.js";
55
+ import { getEvidence } from "./safety/evidence-collector.js";
56
+ import { validateFileChanges } from "./safety/file-change-validator.js";
57
+ // crossReferenceEvidence available for future use when verification_evidence is stored in DB
58
+ // import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
59
+ import { validateContent } from "./safety/content-validator.js";
60
+ import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
61
+ import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
55
62
 
56
63
  /** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
57
64
  const MAX_VERIFICATION_RETRIES = 3;
@@ -437,6 +444,87 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
437
444
  debugLog("postUnit", { phase: "rogue-detection", error: String(e) });
438
445
  }
439
446
 
447
+ // ── Safety harness: post-unit validation ──
448
+ try {
449
+ const { loadEffectiveGSDPreferences } = await import("./preferences.js");
450
+ const prefs = loadEffectiveGSDPreferences()?.preferences;
451
+ const safetyConfig = resolveSafetyHarnessConfig(
452
+ prefs?.safety_harness as Record<string, unknown> | undefined,
453
+ );
454
+
455
+ if (safetyConfig.enabled) {
456
+ const { milestone: sMid, slice: sSid, task: sTid } = parseUnitId(s.currentUnit.id);
457
+
458
+ // File change validation (execute-task only, after auto-commit)
459
+ if (safetyConfig.file_change_validation && s.currentUnit.type === "execute-task" && sMid && sSid && sTid && isDbAvailable()) {
460
+ try {
461
+ const taskRow = getTask(sMid, sSid, sTid);
462
+ if (taskRow) {
463
+ const expectedOutput = taskRow.expected_output ?? [];
464
+ const plannedFiles = taskRow.files ?? [];
465
+ const audit = validateFileChanges(s.basePath, expectedOutput, plannedFiles);
466
+ if (audit && audit.violations.length > 0) {
467
+ const warnings = audit.violations.filter(v => v.severity === "warning");
468
+ for (const v of warnings) {
469
+ logWarning("safety", `file-change: ${v.file} — ${v.reason}`);
470
+ }
471
+ if (warnings.length > 0) {
472
+ ctx.ui.notify(
473
+ `Safety: ${warnings.length} unexpected file change(s) outside task plan`,
474
+ "warning",
475
+ );
476
+ }
477
+ }
478
+ }
479
+ } catch (e) {
480
+ debugLog("postUnit", { phase: "safety-file-change", error: String(e) });
481
+ }
482
+ }
483
+
484
+ // Evidence cross-reference (execute-task only)
485
+ // Verification evidence is passed via the complete-task tool call and
486
+ // stored in the SUMMARY.md on disk — not available as structured data
487
+ // in the DB. The evidence collector tracks actual bash tool calls, so
488
+ // we can still detect units that claimed success but ran no commands.
489
+ if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
490
+ try {
491
+ const actual = getEvidence();
492
+ const bashCalls = actual.filter(e => e.kind === "bash");
493
+ // If the task is marked complete but zero bash commands were run,
494
+ // it's suspicious — the LLM may have fabricated results.
495
+ if (sMid && sSid && sTid && isDbAvailable()) {
496
+ const taskRow = getTask(sMid, sSid, sTid);
497
+ if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
498
+ logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
499
+ ctx.ui.notify(
500
+ `Safety: task ${sTid} has verification commands but no bash calls were recorded`,
501
+ "warning",
502
+ );
503
+ }
504
+ }
505
+ } catch (e) {
506
+ debugLog("postUnit", { phase: "safety-evidence-xref", error: String(e) });
507
+ }
508
+ }
509
+
510
+ // Content validation (plan-slice, plan-milestone)
511
+ if (safetyConfig.content_validation) {
512
+ try {
513
+ const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
514
+ const contentViolations = validateContent(s.currentUnit.type, artifactPath);
515
+ for (const v of contentViolations) {
516
+ logWarning("safety", `content: ${v.reason}`);
517
+ ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
518
+ }
519
+ } catch (e) {
520
+ debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
521
+ }
522
+ }
523
+ }
524
+ } catch (e) {
525
+ debugLog("postUnit", { phase: "safety-harness", error: String(e) });
526
+ }
527
+
440
528
  // Artifact verification
441
529
  let triggerArtifactVerified = false;
442
530
  if (!s.currentUnit.type.startsWith("hook/")) {
@@ -106,8 +106,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
106
106
  }
107
107
  }
108
108
  const estimateMinutes = taskEstimate ? parseEstimateMinutes(taskEstimate) : null;
109
+ const MAX_TIMEOUT_SCALE = 6; // Cap at 6x (60min task). Prevents 2h+ tasks from creating 120min+ timeout windows.
109
110
  const timeoutScale = estimateMinutes && estimateMinutes > 0
110
- ? Math.max(1, estimateMinutes / 10) // 10min task = 1x, 30min = 3x, 2h = 12x
111
+ ? Math.min(MAX_TIMEOUT_SCALE, Math.max(1, estimateMinutes / 10))
111
112
  : 1;
112
113
 
113
114
  const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000 * timeoutScale;
@@ -704,8 +704,14 @@ export function registerDbTools(pi: ExtensionAPI): void {
704
704
  };
705
705
  }
706
706
  try {
707
+ // Coerce string items to objects for verificationEvidence (#3541).
708
+ const coerced = { ...params };
709
+ coerced.verificationEvidence = (params.verificationEvidence ?? []).map((v: any) =>
710
+ typeof v === "string" ? { command: v, exitCode: -1, verdict: "unknown (coerced from string)", durationMs: 0 } : v,
711
+ );
712
+
707
713
  const { handleCompleteTask } = await import("../tools/complete-task.js");
708
- const result = await handleCompleteTask(params, process.cwd());
714
+ const result = await handleCompleteTask(coerced, process.cwd());
709
715
  if ("error" in result) {
710
716
  return {
711
717
  content: [{ type: "text" as const, text: `Error completing task: ${result.error}` }],
@@ -761,12 +767,15 @@ export function registerDbTools(pi: ExtensionAPI): void {
761
767
  keyDecisions: Type.Optional(Type.Array(Type.String(), { description: "List of key decisions made during this task" })),
762
768
  blockerDiscovered: Type.Optional(Type.Boolean({ description: "Whether a plan-invalidating blocker was discovered" })),
763
769
  verificationEvidence: Type.Optional(Type.Array(
764
- Type.Object({
765
- command: Type.String({ description: "Verification command that was run" }),
766
- exitCode: Type.Number({ description: "Exit code of the command" }),
767
- verdict: Type.String({ description: "Pass/fail verdict (e.g. '✅ pass', '❌ fail')" }),
768
- durationMs: Type.Number({ description: "Duration of the command in milliseconds" }),
769
- }),
770
+ Type.Union([
771
+ Type.Object({
772
+ command: Type.String({ description: "Verification command that was run" }),
773
+ exitCode: Type.Number({ description: "Exit code of the command" }),
774
+ verdict: Type.String({ description: "Pass/fail verdict (e.g. '✅ pass', '❌ fail')" }),
775
+ durationMs: Type.Number({ description: "Duration of the command in milliseconds" }),
776
+ }),
777
+ Type.String({ description: "Fallback: verification summary string" }),
778
+ ]),
770
779
  { description: "Array of verification evidence entries" },
771
780
  )),
772
781
  }),
@@ -787,8 +796,42 @@ export function registerDbTools(pi: ExtensionAPI): void {
787
796
  };
788
797
  }
789
798
  try {
799
+ // Coerce string items to objects for fields where LLMs sometimes pass
800
+ // plain strings instead of the expected { key, value } shape (#3541).
801
+ // Parses "key — value" or "key - value" format when possible.
802
+ const splitPair = (s: string): [string, string] => {
803
+ const m = s.match(/^(.+?)\s*(?:—|-)\s+(.+)$/);
804
+ return m ? [m[1].trim(), m[2].trim()] : [s.trim(), ""];
805
+ };
806
+ const coerced = { ...params };
807
+ coerced.filesModified = (params.filesModified ?? []).map((f: any) => {
808
+ if (typeof f !== "string") return f;
809
+ const [path, description] = splitPair(f);
810
+ return { path, description };
811
+ });
812
+ coerced.requires = (params.requires ?? []).map((r: any) => {
813
+ if (typeof r !== "string") return r;
814
+ const [slice, provides] = splitPair(r);
815
+ return { slice, provides };
816
+ });
817
+ coerced.requirementsAdvanced = (params.requirementsAdvanced ?? []).map((r: any) => {
818
+ if (typeof r !== "string") return r;
819
+ const [id, how] = splitPair(r);
820
+ return { id, how };
821
+ });
822
+ coerced.requirementsValidated = (params.requirementsValidated ?? []).map((r: any) => {
823
+ if (typeof r !== "string") return r;
824
+ const [id, proof] = splitPair(r);
825
+ return { id, proof };
826
+ });
827
+ coerced.requirementsInvalidated = (params.requirementsInvalidated ?? []).map((r: any) => {
828
+ if (typeof r !== "string") return r;
829
+ const [id, what] = splitPair(r);
830
+ return { id, what };
831
+ });
832
+
790
833
  const { handleCompleteSlice } = await import("../tools/complete-slice.js");
791
- const result = await handleCompleteSlice(params, process.cwd());
834
+ const result = await handleCompleteSlice(coerced, process.cwd());
792
835
  if ("error" in result) {
793
836
  return {
794
837
  content: [{ type: "text" as const, text: `Error completing slice: ${result.error}` }],
@@ -850,38 +893,53 @@ export function registerDbTools(pi: ExtensionAPI): void {
850
893
  drillDownPaths: Type.Optional(Type.Array(Type.String(), { description: "Paths to task summaries for drill-down" })),
851
894
  affects: Type.Optional(Type.Array(Type.String(), { description: "Downstream slices affected" })),
852
895
  requirementsAdvanced: Type.Optional(Type.Array(
853
- Type.Object({
854
- id: Type.String({ description: "Requirement ID" }),
855
- how: Type.String({ description: "How it was advanced" }),
856
- }),
896
+ Type.Union([
897
+ Type.Object({
898
+ id: Type.String({ description: "Requirement ID" }),
899
+ how: Type.String({ description: "How it was advanced" }),
900
+ }),
901
+ Type.String({ description: "Fallback: 'ID — how' string" }),
902
+ ]),
857
903
  { description: "Requirements advanced by this slice" },
858
904
  )),
859
905
  requirementsValidated: Type.Optional(Type.Array(
860
- Type.Object({
861
- id: Type.String({ description: "Requirement ID" }),
862
- proof: Type.String({ description: "What proof validates it" }),
863
- }),
906
+ Type.Union([
907
+ Type.Object({
908
+ id: Type.String({ description: "Requirement ID" }),
909
+ proof: Type.String({ description: "What proof validates it" }),
910
+ }),
911
+ Type.String({ description: "Fallback: 'ID — proof' string" }),
912
+ ]),
864
913
  { description: "Requirements validated by this slice" },
865
914
  )),
866
915
  requirementsInvalidated: Type.Optional(Type.Array(
867
- Type.Object({
868
- id: Type.String({ description: "Requirement ID" }),
869
- what: Type.String({ description: "What changed" }),
870
- }),
916
+ Type.Union([
917
+ Type.Object({
918
+ id: Type.String({ description: "Requirement ID" }),
919
+ what: Type.String({ description: "What changed" }),
920
+ }),
921
+ Type.String({ description: "Fallback: 'ID — what' string" }),
922
+ ]),
871
923
  { description: "Requirements invalidated or re-scoped" },
872
924
  )),
873
925
  filesModified: Type.Optional(Type.Array(
874
- Type.Object({
875
- path: Type.String({ description: "File path" }),
876
- description: Type.String({ description: "What changed" }),
877
- }),
926
+ Type.Union([
927
+ Type.Object({
928
+ path: Type.String({ description: "File path" }),
929
+ description: Type.String({ description: "What changed" }),
930
+ }),
931
+ Type.String({ description: "Fallback: file path string" }),
932
+ ]),
878
933
  { description: "Files modified with descriptions" },
879
934
  )),
880
935
  requires: Type.Optional(Type.Array(
881
- Type.Object({
882
- slice: Type.String({ description: "Dependency slice ID" }),
883
- provides: Type.String({ description: "What was consumed from it" }),
884
- }),
936
+ Type.Union([
937
+ Type.Object({
938
+ slice: Type.String({ description: "Dependency slice ID" }),
939
+ provides: Type.String({ description: "What was consumed from it" }),
940
+ }),
941
+ Type.String({ description: "Fallback: slice ID string" }),
942
+ ]),
885
943
  { description: "Upstream slice dependencies consumed" },
886
944
  )),
887
945
  }),
@@ -18,6 +18,9 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
18
18
  import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
19
19
  import { saveActivityLog } from "../activity-log.js";
20
20
  import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
21
+ import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
22
+ import { classifyCommand } from "../safety/destructive-guard.js";
23
+ import { logWarning as safetyLogWarning } from "../workflow-logger.js";
21
24
 
22
25
  // Skip the welcome screen on the very first session_start — cli.ts already
23
26
  // printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
@@ -203,6 +206,26 @@ export function registerHooks(pi: ExtensionAPI): void {
203
206
  if (result.block) return result;
204
207
  });
205
208
 
209
+ // ── Safety harness: evidence collection + destructive command warnings ──
210
+ pi.on("tool_call", async (event, ctx) => {
211
+ if (!isAutoActive()) return;
212
+ safetyRecordToolCall(event.toolName, event.input as Record<string, unknown>);
213
+
214
+ // Destructive command classification (warn only, never block)
215
+ if (isToolCallEventType("bash", event)) {
216
+ const classification = classifyCommand(event.input.command);
217
+ if (classification.destructive) {
218
+ safetyLogWarning("safety", `destructive command: ${classification.labels.join(", ")}`, {
219
+ command: String(event.input.command).slice(0, 200),
220
+ });
221
+ ctx.ui.notify(
222
+ `Destructive command detected: ${classification.labels.join(", ")}`,
223
+ "warning",
224
+ );
225
+ }
226
+ }
227
+ });
228
+
206
229
  pi.on("tool_result", async (event) => {
207
230
  if (event.toolName !== "ask_user_questions") return;
208
231
  const milestoneId = getDiscussionMilestoneId();
@@ -268,6 +291,10 @@ export function registerHooks(pi: ExtensionAPI): void {
268
291
  : (typeof event.result?.content?.[0]?.text === "string" ? event.result.content[0].text : String(event.result));
269
292
  recordToolInvocationError(event.toolName, errorText);
270
293
  }
294
+ // Safety harness: record tool execution results for evidence cross-referencing
295
+ if (isAutoActive()) {
296
+ safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
297
+ }
271
298
  });
272
299
 
273
300
  pi.on("model_select", async (_event, ctx) => {
@@ -6,9 +6,10 @@ import type { ExtensionContext } from "@gsd/pi-coding-agent";
6
6
 
7
7
  import { logWarning } from "../workflow-logger.js";
8
8
  import { debugTime } from "../debug-logger.js";
9
- import { loadPrompt } from "../prompt-loader.js";
9
+ import { loadPrompt, getTemplatesDir } from "../prompt-loader.js";
10
10
  import { readForensicsMarker } from "../forensics.js";
11
11
  import { resolveAllSkillReferences, renderPreferencesForSystemPrompt, loadEffectiveGSDPreferences } from "../preferences.js";
12
+ import { resolveSkillReference } from "../preferences-skills.js";
12
13
  import { resolveGsdRootFile, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTaskFiles, resolveTasksDir, relSliceFile, relSlicePath, relTaskFile } from "../paths.js";
13
14
  import { hasSkillSnapshot, detectNewSkills, formatSkillsXml } from "../skill-discovery.js";
14
15
  import { getActiveAutoWorktreeContext } from "../auto-worktree.js";
@@ -20,6 +21,31 @@ import { markCmuxPromptShown, shouldPromptToEnableCmux } from "../../cmux/index.
20
21
 
21
22
  const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
22
23
 
24
+ /**
25
+ * Bundled skill triggers — resolved dynamically at runtime instead of
26
+ * hardcoding absolute paths in the system prompt template. Only skills
27
+ * that actually exist on disk are included in the table. (#3575)
28
+ */
29
+ const BUNDLED_SKILL_TRIGGERS: Array<{ trigger: string; skill: string }> = [
30
+ { trigger: "Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling", skill: "frontend-design" },
31
+ { trigger: "macOS or iOS apps - SwiftUI, Xcode, App Store", skill: "swiftui" },
32
+ { trigger: "Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail", skill: "debug-like-expert" },
33
+ ];
34
+
35
+ function buildBundledSkillsTable(): string {
36
+ const cwd = process.cwd();
37
+ const rows: string[] = [];
38
+ for (const { trigger, skill } of BUNDLED_SKILL_TRIGGERS) {
39
+ const resolution = resolveSkillReference(skill, cwd);
40
+ if (resolution.method === "unresolved") continue; // skill not installed — omit from prompt
41
+ rows.push(`| ${trigger} | \`${resolution.resolvedPath}\` |`);
42
+ }
43
+ if (rows.length === 0) {
44
+ return "*No bundled skills found. Install skills to `~/.agents/skills/` or `~/.claude/skills/`.*";
45
+ }
46
+ return `| Trigger | Skill to load |\n|---|---|\n${rows.join("\n")}`;
47
+ }
48
+
23
49
  function warnDeprecatedAgentInstructions(): void {
24
50
  const paths = [
25
51
  join(gsdHome, "agent-instructions.md"),
@@ -43,7 +69,10 @@ export async function buildBeforeAgentStartResult(
43
69
  if (!existsSync(join(process.cwd(), ".gsd"))) return undefined;
44
70
 
45
71
  const stopContextTimer = debugTime("context-inject");
46
- const systemContent = loadPrompt("system");
72
+ const systemContent = loadPrompt("system", {
73
+ bundledSkillsTable: buildBundledSkillsTable(),
74
+ templatesDir: getTemplatesDir(),
75
+ });
47
76
  const loadedPreferences = loadEffectiveGSDPreferences();
48
77
  if (shouldPromptToEnableCmux(loadedPreferences?.preferences)) {
49
78
  markCmuxPromptShown();
@@ -105,6 +105,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
105
105
  "experimental",
106
106
  "codebase",
107
107
  "slice_parallel",
108
+ "safety_harness",
108
109
  ]);
109
110
 
110
111
  /** Canonical list of all dispatch unit types. */
@@ -291,6 +292,18 @@ export interface GSDPreferences {
291
292
  codebase?: CodebaseMapPreferences;
292
293
  /** Slice-level parallelism within a milestone. Disabled by default. */
293
294
  slice_parallel?: { enabled?: boolean; max_workers?: number };
295
+ /** LLM safety harness configuration. Monitors, validates, and constrains LLM behavior during auto-mode. Enabled by default with warn-and-continue policy. */
296
+ safety_harness?: {
297
+ enabled?: boolean;
298
+ evidence_collection?: boolean;
299
+ file_change_validation?: boolean;
300
+ evidence_cross_reference?: boolean;
301
+ destructive_command_warnings?: boolean;
302
+ content_validation?: boolean;
303
+ checkpoints?: boolean;
304
+ auto_rollback?: boolean;
305
+ timeout_scale_cap?: number;
306
+ };
294
307
  }
295
308
 
296
309
  export interface LoadedGSDPreferences {
@@ -51,6 +51,14 @@ const __extensionDir = resolveExtensionDir();
51
51
  const promptsDir = join(__extensionDir, "prompts");
52
52
  const templatesDir = join(__extensionDir, "templates");
53
53
 
54
+ /**
55
+ * Return the resolved templates directory path for use in prompts.
56
+ * Avoids hardcoding `~/.gsd/agent/extensions/gsd/templates/` in templates. (#3575)
57
+ */
58
+ export function getTemplatesDir(): string {
59
+ return templatesDir;
60
+ }
61
+
54
62
  // Cache all templates eagerly at module load — a running session uses the
55
63
  // template versions that were on disk at startup, immune to later overwrites.
56
64
  const templateCache = new Map<string, string>();
@@ -24,13 +24,9 @@ Leave the project in a state where the next agent can immediately understand wha
24
24
 
25
25
  ## Skills
26
26
 
27
- GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches.
27
+ GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches. Use bare skill names — GSD resolves them to the correct path automatically.
28
28
 
29
- | Trigger | Skill to load |
30
- |---|---|
31
- | Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling | `~/.gsd/agent/skills/frontend-design/SKILL.md` |
32
- | macOS or iOS apps - SwiftUI, Xcode, App Store | `~/.gsd/agent/skills/swiftui/SKILL.md` |
33
- | Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail | `~/.gsd/agent/skills/debug-like-expert/SKILL.md` |
29
+ {{bundledSkillsTable}}
34
30
 
35
31
  ## Hard Rules
36
32
 
@@ -119,7 +115,7 @@ In all modes, slices commit sequentially on the active branch; there are no per-
119
115
  ### Artifact Templates
120
116
 
121
117
  Templates showing the expected format for each artifact type are in:
122
- `~/.gsd/agent/extensions/gsd/templates/`
118
+ `{{templatesDir}}`
123
119
 
124
120
  **Always read the relevant template before writing an artifact** to match the expected structure exactly. The parsers that read these files depend on specific formatting:
125
121