gsd-pi 2.80.0-dev.c5c38454b → 2.80.0-dev.f55d16d13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/resources/.managed-resources-content-hash +1 -1
  2. package/dist/resources/GSD-WORKFLOW.md +2 -2
  3. package/dist/resources/extensions/gsd/auto/phases.js +37 -30
  4. package/dist/resources/extensions/gsd/auto-post-unit.js +10 -10
  5. package/dist/resources/extensions/gsd/auto-prompts.js +111 -1
  6. package/dist/resources/extensions/gsd/auto.js +9 -1
  7. package/dist/resources/extensions/gsd/clean-root-preflight.js +42 -4
  8. package/dist/resources/extensions/gsd/detection.js +106 -0
  9. package/dist/resources/extensions/gsd/prompts/complete-milestone.md +7 -8
  10. package/dist/resources/extensions/gsd/prompts/plan-milestone.md +3 -1
  11. package/dist/resources/extensions/gsd/safety/evidence-collector.js +10 -2
  12. package/dist/resources/extensions/gsd/worktree-manager.js +16 -14
  13. package/dist/tsconfig.extensions.tsbuildinfo +1 -1
  14. package/dist/web/standalone/.next/BUILD_ID +1 -1
  15. package/dist/web/standalone/.next/app-path-routes-manifest.json +14 -14
  16. package/dist/web/standalone/.next/build-manifest.json +2 -2
  17. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  18. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  19. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  20. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  21. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  22. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  23. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  24. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  25. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  26. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  27. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  28. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  29. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  30. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  31. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  32. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  33. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  34. package/dist/web/standalone/.next/server/app/index.html +1 -1
  35. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  37. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  38. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  39. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app-paths-manifest.json +14 -14
  42. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  43. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  44. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  45. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  46. package/package.json +1 -1
  47. package/packages/pi-coding-agent/dist/core/chat-controller-ordering.test.js +30 -0
  48. package/packages/pi-coding-agent/dist/core/chat-controller-ordering.test.js.map +1 -1
  49. package/packages/pi-coding-agent/dist/modes/interactive/controllers/chat-controller.d.ts.map +1 -1
  50. package/packages/pi-coding-agent/dist/modes/interactive/controllers/chat-controller.js +2 -0
  51. package/packages/pi-coding-agent/dist/modes/interactive/controllers/chat-controller.js.map +1 -1
  52. package/packages/pi-coding-agent/src/core/chat-controller-ordering.test.ts +36 -0
  53. package/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts +2 -0
  54. package/packages/pi-coding-agent/tsconfig.tsbuildinfo +1 -1
  55. package/src/resources/GSD-WORKFLOW.md +2 -2
  56. package/src/resources/extensions/gsd/auto/loop-deps.ts +1 -0
  57. package/src/resources/extensions/gsd/auto/phases.ts +42 -28
  58. package/src/resources/extensions/gsd/auto-post-unit.ts +10 -10
  59. package/src/resources/extensions/gsd/auto-prompts.ts +116 -1
  60. package/src/resources/extensions/gsd/auto.ts +12 -1
  61. package/src/resources/extensions/gsd/clean-root-preflight.ts +41 -3
  62. package/src/resources/extensions/gsd/detection.ts +128 -0
  63. package/src/resources/extensions/gsd/prompts/complete-milestone.md +7 -8
  64. package/src/resources/extensions/gsd/prompts/plan-milestone.md +3 -1
  65. package/src/resources/extensions/gsd/safety/evidence-collector.ts +11 -2
  66. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +1 -1
  67. package/src/resources/extensions/gsd/tests/clean-root-preflight.test.ts +88 -2
  68. package/src/resources/extensions/gsd/tests/detection.test.ts +140 -0
  69. package/src/resources/extensions/gsd/tests/right-sized-workflow-prompts.test.ts +192 -0
  70. package/src/resources/extensions/gsd/tests/safety-harness-false-positives.test.ts +29 -0
  71. package/src/resources/extensions/gsd/tests/start-auto-detached.test.ts +46 -2
  72. package/src/resources/extensions/gsd/tests/worktree-health-dispatch.test.ts +37 -6
  73. package/src/resources/extensions/gsd/tests/worktree-manager.test.ts +7 -0
  74. package/src/resources/extensions/gsd/tests/worktree-nested-git-safety.test.ts +9 -2
  75. package/src/resources/extensions/gsd/worktree-manager.ts +15 -4
  76. /package/dist/web/standalone/.next/static/{TCSim36ZpcPu2WgeoC45g → mPZbi5BH9dwokaPZlrYuQ}/_buildManifest.js +0 -0
  77. /package/dist/web/standalone/.next/static/{TCSim36ZpcPu2WgeoC45g → mPZbi5BH9dwokaPZlrYuQ}/_ssgManifest.js +0 -0
@@ -1 +1 @@
1
- 7088672cce649c64
1
+ 8b735b96d5d09cb8
@@ -28,7 +28,7 @@ Then do the thing `STATE.md` says to do next.
28
28
  ## The Hierarchy
29
29
 
30
30
  ```
31
- Milestone → a shippable version (4-10 slices)
31
+ Milestone → a shippable version (1-10 slices, sized to the work)
32
32
  Slice → one demoable vertical capability (1-7 tasks)
33
33
  Task → one context-window-sized unit of work (fits in one session)
34
34
  ```
@@ -331,7 +331,7 @@ The **Don't Hand-Roll** and **Common Pitfalls** sections prevent the most expens
331
331
 
332
332
  **For a milestone (roadmap):**
333
333
  1. Read `M###-CONTEXT.md`, `M###-RESEARCH.md`, and `.gsd/DECISIONS.md` if they exist.
334
- 2. Decompose the vision into 4-10 demoable vertical slices.
334
+ 2. Decompose the vision into 1-10 demoable vertical slices. Prefer one slice for tiny, single-file, or static work unless the request clearly spans independent capabilities.
335
335
  3. Order by risk (high-risk first to validate feasibility early).
336
336
  4. Write `M###-ROADMAP.md` with checkboxes, risk levels, dependencies, demo sentences.
337
337
  5. **Write the boundary map** — for each slice, specify what it produces (functions, types, interfaces, endpoints) and what it consumes from upstream slices. This forces interface thinking before implementation and enables deterministic verification that slices actually connect.
@@ -13,13 +13,13 @@ import { detectStuck } from "./detect-stuck.js";
13
13
  import { runUnit } from "./run-unit.js";
14
14
  import { debugLog } from "../debug-logger.js";
15
15
  import { resolveWorktreeProjectRoot, normalizeWorktreePathForCompare } from "../worktree-root.js";
16
- import { PROJECT_FILES, hasProjectFileInAncestor } from "../detection.js";
16
+ import { classifyProject } from "../detection.js";
17
17
  import { MergeConflictError } from "../git-service.js";
18
18
  import { setCurrentPhase, clearCurrentPhase } from "../../shared/gsd-phase-state.js";
19
19
  import { pauseAutoForProviderError } from "../provider-error-pause.js";
20
20
  import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
21
21
  import { join, basename } from "node:path";
22
- import { existsSync, cpSync, readdirSync } from "node:fs";
22
+ import { existsSync, cpSync } from "node:fs";
23
23
  import { logWarning, logError, _resetLogs, drainLogs, drainAndSummarize, formatForNotification, hasAnyIssues, } from "../workflow-logger.js";
24
24
  import { gsdRoot } from "../paths.js";
25
25
  import { atomicWriteSync } from "../atomic-write.js";
@@ -498,7 +498,7 @@ export async function runPreDispatch(ic, loopState) {
498
498
  }
499
499
  // #2909: postflight — restore stashed changes after successful merge
500
500
  if (preflightTransition.stashPushed) {
501
- deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
501
+ deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, preflightTransition.stashMarker, ctx.ui.notify.bind(ctx.ui));
502
502
  }
503
503
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
504
504
  deps.invalidateAllCaches();
@@ -574,7 +574,7 @@ export async function runPreDispatch(ic, loopState) {
574
574
  }
575
575
  // #2909: postflight — restore stashed changes after successful merge
576
576
  if (preflightAllComplete.stashPushed) {
577
- deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
577
+ deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, preflightAllComplete.stashMarker, ctx.ui.notify.bind(ctx.ui));
578
578
  }
579
579
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
580
580
  }
@@ -660,7 +660,7 @@ export async function runPreDispatch(ic, loopState) {
660
660
  }
661
661
  // #2909: postflight — restore stashed changes after successful merge
662
662
  if (preflightComplete.stashPushed) {
663
- deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
663
+ deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, preflightComplete.stashMarker, ctx.ui.notify.bind(ctx.ui));
664
664
  }
665
665
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
666
666
  }
@@ -1084,8 +1084,9 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
1084
1084
  // Verify the working directory is a valid git checkout with project
1085
1085
  // files before dispatching work. A broken worktree causes agents to
1086
1086
  // hallucinate summaries since they cannot read or write any files.
1087
- // Uses the shared PROJECT_FILES list from detection.ts to support all
1088
- // ecosystems (Rust, Go, Python, Java, etc.), not just JS.
1087
+ // Uses project classification so project presence is not conflated with
1088
+ // ecosystem marker detection. Static/minimal repos become untyped-existing.
1089
+ let projectClassification = null;
1089
1090
  if (s.basePath && unitType === "execute-task") {
1090
1091
  const gitMarker = join(s.basePath, ".git");
1091
1092
  const hasGit = deps.existsSync(gitMarker);
@@ -1096,30 +1097,26 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
1096
1097
  await deps.stopAuto(ctx, pi, msg);
1097
1098
  return { action: "break", reason: "worktree-invalid" };
1098
1099
  }
1099
- const hasProjectFile = PROJECT_FILES.some((f) => deps.existsSync(join(s.basePath, f)));
1100
- const hasSrcDir = deps.existsSync(join(s.basePath, "src"));
1101
- // Xcode bundles have project-specific names (*.xcodeproj, *.xcworkspace)
1102
- // that cannot be matched by exact filename — scan the directory by suffix.
1103
- let hasXcodeBundle = false;
1104
- try {
1105
- const entries = deps.existsSync(s.basePath) ? readdirSync(s.basePath) : [];
1106
- hasXcodeBundle = entries.some((e) => e.endsWith(".xcodeproj") || e.endsWith(".xcworkspace"));
1100
+ projectClassification = classifyProject(s.basePath);
1101
+ if (projectClassification.kind === "invalid-repo") {
1102
+ const msg = `Worktree health check failed: ${s.basePath} classified as invalid-repo (${projectClassification.reason}) — refusing to dispatch ${unitType} ${unitId}`;
1103
+ debugLog("runUnitPhase", { phase: "worktree-health-invalid-repo", basePath: s.basePath, classification: projectClassification });
1104
+ if (projectClassification.reason === "missing .git" && hasGit) {
1105
+ ctx.ui.notify(`Warning: ${s.basePath} project classification could not confirm .git; assuming it has no project content yet — proceeding as greenfield project because worktree health reported .git present`, "warning");
1106
+ }
1107
+ else {
1108
+ ctx.ui.notify(msg, "error");
1109
+ await deps.stopAuto(ctx, pi, msg);
1110
+ return { action: "break", reason: "worktree-invalid" };
1111
+ }
1107
1112
  }
1108
- catch (err) {
1109
- debugLog("runUnitPhase", { phase: "xcode-bundle-scan-failed", basePath: s.basePath, error: String(err) });
1110
- }
1111
- // Monorepo support (#2347): if no project files in the worktree directory,
1112
- // walk parent directories up to the filesystem root. In monorepos,
1113
- // package.json / Cargo.toml etc. live in a parent directory.
1114
- const hasProjectFileInParent = !hasProjectFile && !hasSrcDir && !hasXcodeBundle
1115
- ? hasProjectFileInAncestor(s.basePath, deps.existsSync)
1116
- : false;
1117
- if (!hasProjectFile && !hasSrcDir && !hasXcodeBundle && !hasProjectFileInParent) {
1118
- // Greenfield projects won't have project files yet — the first task creates them.
1119
- // Log a warning but allow execution to proceed. The .git check above is sufficient
1120
- // to ensure we're in a valid working directory.
1121
- debugLog("runUnitPhase", { phase: "worktree-health-warn-greenfield", basePath: s.basePath, hasProjectFile, hasSrcDir, hasXcodeBundle });
1122
- ctx.ui.notify(`Warning: ${s.basePath} has no recognized project files — proceeding as greenfield project`, "warning");
1113
+ else if (projectClassification.kind === "greenfield") {
1114
+ debugLog("runUnitPhase", { phase: "worktree-health-greenfield", basePath: s.basePath, classification: projectClassification });
1115
+ ctx.ui.notify(`Warning: ${s.basePath} has no project content yet — proceeding as greenfield project`, "warning");
1116
+ }
1117
+ else if (projectClassification.kind === "untyped-existing") {
1118
+ debugLog("runUnitPhase", { phase: "worktree-health-untyped-existing", basePath: s.basePath, classification: projectClassification });
1119
+ ctx.ui.notify(`Notice: ${s.basePath} has existing project content but no recognized tooling markers — using generic file-level workflow guidance`, "info");
1123
1120
  }
1124
1121
  }
1125
1122
  // Detect retry and capture previous tier for escalation
@@ -1182,6 +1179,16 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
1182
1179
  }
1183
1180
  // Prompt injection
1184
1181
  let finalPrompt = prompt;
1182
+ if (unitType === "execute-task") {
1183
+ projectClassification ??= classifyProject(s.basePath);
1184
+ if (projectClassification.kind === "untyped-existing") {
1185
+ const samples = projectClassification.contentFiles.slice(0, 8).join(", ") || "project files";
1186
+ finalPrompt +=
1187
+ "\n\n**Project classification:** Existing untyped project. No recognized build/tooling markers were detected, " +
1188
+ "so use generic file-level workflow guidance. Task plans and completion summaries must list every concrete " +
1189
+ `project file changed in \`files\` or \`expected_output\`. Detected content sample: ${samples}.`;
1190
+ }
1191
+ }
1185
1192
  if (s.pendingVerificationRetry) {
1186
1193
  const retryCtx = s.pendingVerificationRetry;
1187
1194
  s.pendingVerificationRetry = null;
@@ -25,7 +25,7 @@ import { verifyExpectedArtifact, resolveExpectedArtifactPath, writeBlockerPlaceh
25
25
  import { regenerateIfMissing } from "./workflow-projections.js";
26
26
  import { syncStateToProjectRoot } from "./auto-worktree.js";
27
27
  import { normalizeWorktreePathForCompare } from "./worktree-root.js";
28
- import { isDbAvailable, getTask, getSlice, getMilestone, updateTaskStatus, _getAdapter } from "./gsd-db.js";
28
+ import { isDbAvailable, getTask, getSlice, getMilestone, updateTaskStatus, _getAdapter, getVerificationEvidence } from "./gsd-db.js";
29
29
  import { renderPlanCheckboxes } from "./markdown-renderer.js";
30
30
  import { consumeSignal } from "./session-status-io.js";
31
31
  import { checkPostUnitHooks, isRetryPending, consumeRetryTrigger, persistHookState, resolveHookArtifactPath, } from "./post-unit-hooks.js";
@@ -719,21 +719,21 @@ export async function postUnitPreVerification(pctx, opts) {
719
719
  }
720
720
  }
721
721
  // Evidence cross-reference (execute-task only)
722
- // Verification evidence is passed via the complete-task tool call and
723
- // stored in the SUMMARY.md on disk not available as structured data
724
- // in the DB. The evidence collector tracks actual bash tool calls, so
725
- // we can still detect units that claimed success but ran no commands.
722
+ // Only compare against concrete command evidence persisted by the task
723
+ // completion tool. A prose Verify field can be satisfied later by the
724
+ // host verification gate, so it is not enough to accuse the unit.
726
725
  if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
727
726
  try {
728
727
  const actual = getEvidence();
729
728
  const bashCalls = actual.filter(e => e.kind === "bash");
730
- // If the task is marked complete but zero bash commands were run,
731
- // it's suspicious — the LLM may have fabricated results.
732
729
  if (sMid && sSid && sTid && isDbAvailable()) {
733
730
  const taskRow = getTask(sMid, sSid, sTid);
734
- if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
735
- logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
736
- ctx.ui.notify(`Safety: task ${sTid} has verification commands but no bash calls were recorded`, "warning");
731
+ const claimedCommands = getVerificationEvidence(sMid, sSid, sTid)
732
+ .map((row) => row.command)
733
+ .filter((command) => typeof command === "string" && command.trim().length > 0);
734
+ if (taskRow?.status === "complete" && claimedCommands.length > 0 && bashCalls.length === 0) {
735
+ logWarning("safety", "task claimed verification command evidence but no execution tool calls were recorded");
736
+ ctx.ui.notify(`Safety: task ${sTid} claimed command evidence but no execution tool calls were recorded`, "warning");
737
737
  }
738
738
  }
739
739
  }
@@ -6,7 +6,7 @@
6
6
  * utility.
7
7
  */
8
8
  import { loadFile, parseContinue, parseSummary, loadActiveOverrides, formatOverridesSection, parseTaskPlanFile } from "./files.js";
9
- import { hasVerdict, getUatType } from "./verdict-parser.js";
9
+ import { hasVerdict, getUatType, extractVerdict } from "./verdict-parser.js";
10
10
  import { loadPrompt, inlineTemplate } from "./prompt-loader.js";
11
11
  import { resolveMilestoneFile, resolveSliceFile, resolveSlicePath, resolveTasksDir, resolveTaskFiles, resolveTaskFile, relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath, resolveGsdRootFile, relGsdRootFile, resolveRuntimeFile, } from "./paths.js";
12
12
  import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveGSDPreferences, resolveAllSkillReferences } from "./preferences.js";
@@ -25,6 +25,7 @@ import { logWarning } from "./workflow-logger.js";
25
25
  import { inlineGraphSubgraph } from "./graph-context.js";
26
26
  import { buildExtractionStepsBlock } from "./commands-extract-learnings.js";
27
27
  import { resolveSkillManifest, warnIfManifestHasMissingSkills } from "./skill-manifest.js";
28
+ import { classifyProject } from "./detection.js";
28
29
  // ─── Preamble Cap ─────────────────────────────────────────────────────────────
29
30
  /**
30
31
  * Historical static ceiling for the preamble cap. Kept as an upper bound even
@@ -62,6 +63,104 @@ function resolvePromptBudgets() {
62
63
  function resolveSummaryBudgetChars() {
63
64
  return resolvePromptBudgets().summaryBudgetChars;
64
65
  }
66
+ function formatProjectClassificationForPlanning(classification) {
67
+ const sampleFiles = classification.contentFiles.slice(0, 8);
68
+ const sample = sampleFiles.length > 0 ? sampleFiles.map((file) => `\`${file}\``).join(", ") : "(none)";
69
+ const lines = [
70
+ "### Project Classification",
71
+ "",
72
+ `- **Kind:** ${classification.kind}`,
73
+ `- **Content files:** ${classification.contentFiles.length}`,
74
+ `- **Sample files:** ${sample}`,
75
+ `- **Reason:** ${classification.reason}`,
76
+ "",
77
+ ];
78
+ if (classification.kind === "untyped-existing") {
79
+ if (classification.contentFiles.length <= 2) {
80
+ lines.push("**Workflow sizing:** This is a tiny existing untyped project. Prefer exactly one slice unless the milestone request clearly spans multiple independent user-visible capabilities.");
81
+ }
82
+ else if (classification.contentFiles.length <= 5) {
83
+ lines.push("**Workflow sizing:** This is a small existing untyped project. Prefer 1-2 slices unless the milestone request clearly spans multiple independent user-visible capabilities.");
84
+ }
85
+ else {
86
+ lines.push("**Workflow sizing:** Existing untyped project. Use generic file-level workflow guidance and size slices by real capability boundaries, not by missing tooling markers.");
87
+ }
88
+ }
89
+ else if (classification.kind === "greenfield") {
90
+ lines.push("**Workflow sizing:** No project content exists yet. Use normal greenfield sizing for the requested scope.");
91
+ }
92
+ else if (classification.kind === "typed-existing") {
93
+ lines.push("**Workflow sizing:** Known project markers exist. Use normal ecosystem-aware planning guidance.");
94
+ }
95
+ else {
96
+ lines.push("**Workflow sizing:** Invalid repository state. Planning should surface this as a blocker rather than inventing project structure.");
97
+ }
98
+ return lines.join("\n");
99
+ }
100
+ function normalizeArtifactRef(value) {
101
+ return value.trim().replace(/^[-\s]+/, "").replace(/^["'`]+|["'`]+$/g, "").replaceAll("\\", "/").replace(/^\.\//, "");
102
+ }
103
+ function parseCoveredArtifacts(validationContent) {
104
+ const covered = new Set();
105
+ const lines = validationContent.split(/\r?\n/);
106
+ let inCoveredArtifacts = false;
107
+ for (const line of lines) {
108
+ if (/^\s*covered[-_]?artifacts\s*:/i.test(line)) {
109
+ inCoveredArtifacts = true;
110
+ const inline = line.split(/covered[-_]?artifacts\s*:/i)[1]?.trim();
111
+ if (inline && inline !== "[]") {
112
+ inline.replace(/^\[|\]$/g, "").split(",").map(normalizeArtifactRef).filter(Boolean).forEach((item) => covered.add(item));
113
+ }
114
+ continue;
115
+ }
116
+ if (!inCoveredArtifacts)
117
+ continue;
118
+ if (/^\S/.test(line) && !/^\s*-/.test(line))
119
+ break;
120
+ const item = line.match(/^\s*-\s*(.+)$/)?.[1];
121
+ if (item)
122
+ covered.add(normalizeArtifactRef(item));
123
+ }
124
+ return covered;
125
+ }
126
+ function isValidationFreshOrApplicable(validationContent, currentArtifacts) {
127
+ if (!validationContent)
128
+ return false;
129
+ if (!/validation_metadata:/i.test(validationContent))
130
+ return false;
131
+ const coveredArtifacts = parseCoveredArtifacts(validationContent);
132
+ if (coveredArtifacts.size === 0)
133
+ return false;
134
+ return currentArtifacts
135
+ .map(normalizeArtifactRef)
136
+ .filter(Boolean)
137
+ .every((artifact) => coveredArtifacts.has(artifact));
138
+ }
139
+ function formatCloseoutReviewInstructions(validationContent, validationRel, currentArtifacts) {
140
+ const verdict = validationContent ? extractVerdict(validationContent) : null;
141
+ const validationFresh = isValidationFreshOrApplicable(validationContent, currentArtifacts);
142
+ if (verdict === "pass" && validationFresh) {
143
+ return [
144
+ "### Passing Validation Artifact",
145
+ "",
146
+ `A passing validation artifact is present at \`${validationRel}\`. Treat it as authoritative for success criteria, requirement coverage, verification classes, and cross-slice integration.`,
147
+ "",
148
+ "Do not delegate fresh reviewer/security/tester audits and do not redo the validation evidence review unless the artifact is internally inconsistent with the inlined summaries. Focus this unit on final milestone narrative, learnings, PROJECT/requirements updates, and `gsd_complete_milestone`.",
149
+ ].join("\n");
150
+ }
151
+ if (verdict) {
152
+ return [
153
+ "### Validation Requires Attention",
154
+ "",
155
+ `A validation artifact is present at \`${validationRel}\` with verdict \`${verdict}\`, but it is missing freshness metadata or does not cover current milestone artifacts. Do not treat the milestone as complete unless the issues are resolved and evidence supports completion.`,
156
+ ].join("\n");
157
+ }
158
+ return [
159
+ "### No Passing Validation Artifact",
160
+ "",
161
+ `No passing validation artifact was found at \`${validationRel}\`. Use the full closeout review path before completion.`,
162
+ ].join("\n");
163
+ }
65
164
  function capPreamble(preamble) {
66
165
  // Cap inlined context at min(historical 30K ceiling, scaled inline budget).
67
166
  // The ceiling preserves pre-fix behavior for large-window users; the scaled
@@ -1465,6 +1564,7 @@ export async function buildPlanMilestonePrompt(mid, midTitle, base, level) {
1465
1564
  const researchAnchor = readPhaseAnchor(base, mid, "research-milestone");
1466
1565
  if (researchAnchor)
1467
1566
  inlined.push(formatAnchorForPrompt(researchAnchor));
1567
+ inlined.push(formatProjectClassificationForPlanning(classifyProject(base)));
1468
1568
  inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context"));
1469
1569
  const researchInline = await inlineFileOptional(researchPath, researchRel, "Milestone Research");
1470
1570
  if (researchInline)
@@ -2017,6 +2117,9 @@ export async function buildCompleteMilestonePrompt(mid, midTitle, base, level) {
2017
2117
  const inlineLevel = level ?? resolveInlineLevel();
2018
2118
  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
2019
2119
  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
2120
+ const validationPath = resolveMilestoneFile(base, mid, "VALIDATION");
2121
+ const validationRel = relMilestoneFile(base, mid, "VALIDATION");
2122
+ const validationContent = validationPath ? await loadFile(validationPath) : null;
2020
2123
  const inlined = [];
2021
2124
  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
2022
2125
  // Inline all slice summaries (deduplicated by slice ID)
@@ -2056,6 +2159,13 @@ export async function buildCompleteMilestonePrompt(mid, midTitle, base, level) {
2056
2159
  const pathList = summaryRelPaths.map(p => `- \`${p}\``).join("\n");
2057
2160
  inlined.push(`### On-demand Slice Summaries\n\nExcerpted above. Read the full file for any slice when the excerpt's section heads don't carry enough narrative for the milestone summary you're drafting:\n\n${pathList}`);
2058
2161
  }
2162
+ const validationContext = [
2163
+ formatCloseoutReviewInstructions(validationContent, validationRel, [validationRel, roadmapRel, ...summaryRelPaths]),
2164
+ ];
2165
+ if (validationContent) {
2166
+ validationContext.push(`### Milestone Validation\nSource: \`${validationRel}\`\n\n${validationContent.trim()}`);
2167
+ }
2168
+ inlined.unshift(...validationContext);
2059
2169
  // Inline root GSD files (skip for minimal — completion can read these if needed)
2060
2170
  if (inlineLevel !== "minimal") {
2061
2171
  const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
@@ -225,8 +225,16 @@ function synthesizePausedSessionRecovery(basePath, unitType, unitId, sessionFile
225
225
  export function _synthesizePausedSessionRecoveryForTest(basePath, unitType, unitId, sessionFile) {
226
226
  return synthesizePausedSessionRecovery(basePath, unitType, unitId, sessionFile);
227
227
  }
228
+ const DETACHED_AUTO_KEEPALIVE_INTERVAL_MS = 30_000;
229
+ function withDetachedAutoKeepalive(run) {
230
+ const keepAlive = setInterval(() => { }, DETACHED_AUTO_KEEPALIVE_INTERVAL_MS);
231
+ return run.finally(() => {
232
+ clearInterval(keepAlive);
233
+ });
234
+ }
235
+ export const _withDetachedAutoKeepaliveForTest = withDetachedAutoKeepalive;
228
236
  export function startAutoDetached(ctx, pi, base, verboseMode, options) {
229
- void startAuto(ctx, pi, base, verboseMode, options).catch((err) => {
237
+ void withDetachedAutoKeepalive(startAuto(ctx, pi, base, verboseMode, options)).catch((err) => {
230
238
  const message = getErrorMessage(err);
231
239
  ctx.ui.notify(`Auto-start failed: ${message}`, "error");
232
240
  logWarning("engine", `auto start error: ${message}`, { file: "auto.ts" });
@@ -16,6 +16,31 @@ import { execFileSync } from "node:child_process";
16
16
  import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
17
17
  import { logWarning } from "./workflow-logger.js";
18
18
  import { nativeHasChanges } from "./native-git-bridge.js";
19
+ function findPreflightStashRef(basePath, milestoneId, stashMarker) {
20
+ const markerPrefix = `gsd-preflight-stash:${milestoneId}:`;
21
+ let fallbackRef = null;
22
+ try {
23
+ const list = execFileSync("git", ["stash", "list", "--format=%gd%x00%s"], {
24
+ cwd: basePath,
25
+ stdio: ["ignore", "pipe", "pipe"],
26
+ encoding: "utf-8",
27
+ env: GIT_NO_PROMPT_ENV,
28
+ });
29
+ for (const line of list.split("\n")) {
30
+ const [ref, subject] = line.split("\x00");
31
+ if (!ref || !subject)
32
+ continue;
33
+ if (stashMarker && subject.includes(stashMarker))
34
+ return ref;
35
+ if (!fallbackRef && subject.includes(markerPrefix))
36
+ fallbackRef = ref;
37
+ }
38
+ }
39
+ catch (err) {
40
+ logWarning("preflight", `stash list failed before restore: ${err instanceof Error ? err.message : String(err)}`);
41
+ }
42
+ return fallbackRef;
43
+ }
19
44
  /**
20
45
  * Check the working tree for dirty files before a milestone merge.
21
46
  *
@@ -47,7 +72,8 @@ export function preflightCleanRoot(basePath, milestoneId, notify) {
47
72
  notify(warnMsg, "warning");
48
73
  // Push the stash
49
74
  try {
50
- execFileSync("git", ["stash", "push", "--include-untracked", "-m", "gsd-preflight-stash"], {
75
+ const stashMarker = `gsd-preflight-stash:${milestoneId}:${process.pid}:${Date.now()}:${process.hrtime.bigint().toString(36)}`;
76
+ execFileSync("git", ["stash", "push", "--include-untracked", "-m", `gsd-preflight-stash [${stashMarker}]`], {
51
77
  cwd: basePath,
52
78
  stdio: ["ignore", "pipe", "pipe"],
53
79
  encoding: "utf-8",
@@ -55,6 +81,7 @@ export function preflightCleanRoot(basePath, milestoneId, notify) {
55
81
  });
56
82
  return {
57
83
  stashPushed: true,
84
+ stashMarker,
58
85
  summary: `Stashed uncommitted changes before merge (milestone ${milestoneId}).`,
59
86
  };
60
87
  }
@@ -73,9 +100,17 @@ export function preflightCleanRoot(basePath, milestoneId, notify) {
73
100
  * Any pop error (e.g. conflict) is logged and notified but does NOT throw —
74
101
  * the merge already completed successfully.
75
102
  */
76
- export function postflightPopStash(basePath, milestoneId, notify) {
103
+ export function postflightPopStash(basePath, milestoneId, stashMarker, notify) {
104
+ let stashRef = null;
77
105
  try {
78
- execFileSync("git", ["stash", "pop"], {
106
+ stashRef = findPreflightStashRef(basePath, milestoneId, stashMarker);
107
+ if (!stashRef) {
108
+ const msg = `No matching GSD preflight stash found for milestone ${milestoneId}; leaving stash list untouched.`;
109
+ logWarning("preflight", msg);
110
+ notify(msg, "warning");
111
+ return;
112
+ }
113
+ execFileSync("git", ["stash", "pop", stashRef], {
79
114
  cwd: basePath,
80
115
  stdio: ["ignore", "pipe", "pipe"],
81
116
  encoding: "utf-8",
@@ -86,7 +121,10 @@ export function postflightPopStash(basePath, milestoneId, notify) {
86
121
  catch (err) {
87
122
  // Pop conflicts mean the merged code collides with the stashed changes.
88
123
  // Log a warning — the user needs to resolve manually, but the merge succeeded.
89
- const msg = `git stash pop failed after merge of milestone ${milestoneId}: ${err instanceof Error ? err.message : String(err)}. Run "git stash pop" manually to restore your changes.`;
124
+ const restoreHint = stashRef
125
+ ? `Run "git stash pop ${stashRef}" or "git stash apply ${stashRef}" manually to restore the correct stash.`
126
+ : `Run "git stash list" to find the matching GSD preflight stash before restoring manually.`;
127
+ const msg = `git stash pop ${stashRef ?? ""}`.trim() + ` failed after merge of milestone ${milestoneId}: ${err instanceof Error ? err.message : String(err)}. ${restoreHint}`;
90
128
  logWarning("preflight", msg);
91
129
  notify(msg, "warning");
92
130
  }
@@ -5,6 +5,7 @@
5
5
  * Used by init-wizard.ts and guided-flow.ts to determine what onboarding
6
6
  * flow to show when entering a project directory.
7
7
  */
8
+ import { execFileSync } from "node:child_process";
8
9
  import { existsSync, openSync, readSync, closeSync, readdirSync, readFileSync, statSync } from "node:fs";
9
10
  import { dirname, join, parse as parsePath } from "node:path";
10
11
  import { homedir } from "node:os";
@@ -171,6 +172,7 @@ const TEST_MARKERS = [
171
172
  const RECURSIVE_SCAN_IGNORED_DIRS = new Set([
172
173
  ".git",
173
174
  ".gsd",
175
+ ".bg-shell",
174
176
  ".planning",
175
177
  ".plans",
176
178
  ".claude",
@@ -194,6 +196,7 @@ const RECURSIVE_SCAN_IGNORED_DIRS = new Set([
194
196
  "DerivedData",
195
197
  "out",
196
198
  ]);
199
+ const PROJECT_CONTENT_EXCLUDE_DIRS = RECURSIVE_SCAN_IGNORED_DIRS;
197
200
  /** Project file markers safe to detect recursively via suffix matching. */
198
201
  const ROOT_ONLY_PROJECT_FILES = new Set([
199
202
  ".github/workflows",
@@ -429,6 +432,109 @@ export function detectProjectSignals(basePath) {
429
432
  verificationCommands,
430
433
  };
431
434
  }
435
+ function normalizeGitPath(file) {
436
+ return file.replaceAll("\\", "/").replace(/^\.\//, "");
437
+ }
438
+ function isProjectContentFile(file) {
439
+ const normalized = normalizeGitPath(file);
440
+ if (!normalized || normalized.endsWith("/"))
441
+ return false;
442
+ if (normalized === ".gitignore" || normalized === ".gitattributes")
443
+ return false;
444
+ const parts = normalized.split("/");
445
+ if (parts.some((part) => PROJECT_CONTENT_EXCLUDE_DIRS.has(part)))
446
+ return false;
447
+ if (normalized.endsWith(".DS_Store"))
448
+ return false;
449
+ return true;
450
+ }
451
+ function runGitLines(basePath, args) {
452
+ try {
453
+ const output = execFileSync("git", args, {
454
+ cwd: basePath,
455
+ stdio: ["ignore", "pipe", "ignore"],
456
+ encoding: "utf-8",
457
+ }).trim();
458
+ return output ? output.split("\n").map((line) => line.trim()).filter(Boolean) : [];
459
+ }
460
+ catch {
461
+ return [];
462
+ }
463
+ }
464
+ function listTrackedProjectFiles(basePath) {
465
+ return runGitLines(basePath, ["ls-files"])
466
+ .map(normalizeGitPath)
467
+ .filter(isProjectContentFile);
468
+ }
469
+ function listUntrackedProjectFiles(basePath) {
470
+ return runGitLines(basePath, ["ls-files", "--others", "--exclude-standard"])
471
+ .map(normalizeGitPath)
472
+ .filter(isProjectContentFile);
473
+ }
474
+ function hasKnownProjectMarkers(basePath, signals) {
475
+ if (signals.detectedFiles.length > 0)
476
+ return true;
477
+ if (signals.xcodePlatforms.length > 0)
478
+ return true;
479
+ return false;
480
+ }
481
+ /**
482
+ * Classify repo presence separately from ecosystem/tooling markers.
483
+ *
484
+ * Known project files identify tooling. Git-tracked/non-ignored content
485
+ * identifies whether this is an existing project at all. This keeps small
486
+ * static or documentation repos from being mislabeled as greenfield.
487
+ */
488
+ export function classifyProject(basePath) {
489
+ const signals = detectProjectSignals(basePath);
490
+ const markers = [...signals.detectedFiles];
491
+ if (!signals.isGitRepo) {
492
+ return {
493
+ kind: "invalid-repo",
494
+ signals,
495
+ trackedFiles: [],
496
+ untrackedFiles: [],
497
+ contentFiles: [],
498
+ markers,
499
+ reason: "missing .git",
500
+ };
501
+ }
502
+ const trackedFiles = listTrackedProjectFiles(basePath);
503
+ const untrackedFiles = listUntrackedProjectFiles(basePath);
504
+ const contentFiles = [...new Set([...trackedFiles, ...untrackedFiles])];
505
+ const hasMarkers = hasKnownProjectMarkers(basePath, signals);
506
+ if (hasMarkers) {
507
+ return {
508
+ kind: "typed-existing",
509
+ signals,
510
+ trackedFiles,
511
+ untrackedFiles,
512
+ contentFiles,
513
+ markers,
514
+ reason: markers.length > 0 ? `detected markers: ${markers.join(", ")}` : "detected project structure",
515
+ };
516
+ }
517
+ if (contentFiles.length > 0) {
518
+ return {
519
+ kind: "untyped-existing",
520
+ signals,
521
+ trackedFiles,
522
+ untrackedFiles,
523
+ contentFiles,
524
+ markers,
525
+ reason: "project content exists but no recognized tooling markers were found",
526
+ };
527
+ }
528
+ return {
529
+ kind: "greenfield",
530
+ signals,
531
+ trackedFiles,
532
+ untrackedFiles,
533
+ contentFiles,
534
+ markers,
535
+ reason: "no tracked or non-ignored project content",
536
+ };
537
+ }
432
538
  // ─── Xcode Platform Detection ───────────────────────────────────────────────────
433
539
  /** Known SDKROOT values → canonical platform names. */
434
540
  const SDKROOT_MAP = {
@@ -16,15 +16,14 @@ Start with what the excerpts give you. Read full files when the section heads si
16
16
 
17
17
  **On-demand Read ordering:** Complete all slice SUMMARY Reads you need for cross-slice synthesis, the Decision Re-evaluation table, and LEARNINGS **before** calling `gsd_complete_milestone` (step 12). Once that tool runs, the milestone is marked complete in the DB, so it must be the final persistent milestone-closeout write.
18
18
 
19
- ### Delegate Review Work
19
+ ### Closeout Review Mode
20
20
 
21
- Use `subagent` for review work needing fresh context, before drafting LEARNINGS:
21
+ The inlined context includes a validation status block.
22
22
 
23
- - Cross-slice integrations or new public APIs -> **reviewer** with milestone diff and roadmap.
24
- - Auth, network, parsing, file IO, shell exec, or crypto -> **security** audit.
25
- - Significant tests added or changed -> **tester** coverage check against success criteria.
23
+ - If it says a passing validation artifact is present, treat that artifact as authoritative for success criteria, requirement coverage, verification classes, and cross-slice integration. Do not delegate fresh reviewer/security/tester audits unless the validation artifact is internally inconsistent with the inlined summaries.
24
+ - If validation is missing, stale, non-pass, or internally inconsistent, use `subagent` for review work needing fresh context before drafting LEARNINGS: cross-slice integrations or new public APIs -> **reviewer**; auth, network, parsing, file IO, shell exec, or crypto -> **security**; significant tests added or changed -> **tester**.
26
25
 
27
- Subagents report only; they do not write user source. Fold findings into Decision Re-evaluation and LEARNINGS before completion.
26
+ Subagents report only; they do not write user source. Fold any findings into Decision Re-evaluation and LEARNINGS before completion.
28
27
 
29
28
  {{inlinedContext}}
30
29
 
@@ -33,8 +32,8 @@ Subagents report only; they do not write user source. Fold findings into Decisio
33
32
  1. Use the **Milestone Summary** output template from the inlined context above
34
33
  2. {{skillActivation}}
35
34
  3. **Verify code changes exist.** Compare milestone work against the integration branch (`main`, `master`, or recorded branch), using merge-base as older revision and `HEAD` as newer. If the diff lists non-`.gsd/` files, pass. If `HEAD` equals the integration branch/merge-base, treat it as a self-diff retry: inspect milestone-scoped commit evidence (`GSD-Unit: {{milestoneId}}` or production `GSD-Task: Sxx/Tyy` trailers touching `.gsd/milestones/{{milestoneId}}/`) and verify those commits touched non-`.gsd/` files. Record **verification failure** only when neither source shows implementation files.
36
- 4. Verify every **success criterion** from `{{roadmapPath}}` with evidence from summaries, tests, or observable behavior. Record unmet criteria as **verification failure**.
37
- 5. Verify **definition of done**: all slices `[x]`, summaries exist, and integrations work. Record unmet items as **verification failure**.
35
+ 4. Verify every **success criterion** from `{{roadmapPath}}`. If passing validation is present, summarize the validation evidence instead of re-auditing it; otherwise verify with evidence from summaries, tests, or observable behavior. Record unmet criteria as **verification failure**.
36
+ 5. Verify **definition of done**: all slices `[x]`, summaries exist, and integrations work. If passing validation is present, trust its integration/verification verdict unless inconsistent with current artifacts. Record unmet items as **verification failure**.
38
37
  6. If the roadmap includes a **Horizontal Checklist**, verify each item and note unchecked items in the summary.
39
38
  7. Fill the **Decision Re-evaluation** table: compare each key `.gsd/DECISIONS.md` decision from this milestone with what shipped, and flag decisions to revisit.
40
39
  8. Validate **requirement status transitions**. For each changed requirement, confirm evidence supports the new status. Requirements may move between Active, Validated, Deferred, Blocked, or Out of Scope only with proof.
@@ -48,7 +48,7 @@ Narrate decomposition reasoning in complete sentences: grouping, risk order, ver
48
48
  Then:
49
49
  1. Use the **Roadmap** output template from the inlined context above
50
50
  2. {{skillActivation}}
51
- 3. Create only as many demoable vertical slices as the work genuinely needs.
51
+ 3. Create only as many demoable vertical slices as the work genuinely needs. Use 1-10 slices, sized to the work; tiny/single-file/static work should usually be one slice.
52
52
  4. Order by risk, high-risk first.
53
53
  5. Call `gsd_plan_milestone` to persist milestone fields, slice rows, and **Horizontal Checklist** through the DB-backed path. Fill checklist concerns considered during planning: requirements, decisions, shutdown, revenue, auth, shared resources, reconnection. Omit for trivial milestones. Do **not** write `{{outputPath}}`, `ROADMAP.md`, or other planning artifacts manually; the tool owns rendering and persistence.
54
54
  6. If planning produced structural decisions (slice ordering, technology choices, scope exclusions), call `gsd_decision_save` for each; the tool assigns IDs and regenerates `.gsd/DECISIONS.md`.
@@ -78,6 +78,8 @@ Apply these when decomposing and ordering slices:
78
78
  - Ship features, not proofs; use clearly marked realistic stubs only when necessary.
79
79
  - **Dependency format is comma-separated, never range syntax.** Write `depends:[S01,S02,S03]`, not `depends:[S01-S03]`.
80
80
  - Roadmap ambition must match the milestone; right-size decomposition.
81
+ - Missing ecosystem markers are not a reason to over-plan. If Project Classification says `untyped-existing`, treat the listed content files as the project surface and use generic file-level workflow guidance.
82
+ - For `untyped-existing` projects with 1-2 content files, prefer exactly one slice unless the request clearly spans multiple independent user-visible capabilities. For 3-5 content files, prefer 1-2 slices.
81
83
 
82
84
  ## Progressive Planning (ADR-011)
83
85