npm - gsd-pi - Versions diffs - 2.47.0 → 2.48.0-dev.ced2eca - Mend

gsd-pi 2.47.0 → 2.48.0-dev.ced2eca

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

package/dist/resources/extensions/gsd/guided-flow.js CHANGED Viewed

@@ -29,7 +29,7 @@ import { showProjectInit, offerMigration } from "./init-wizard.js";
 import { validateDirectory } from "./validate-directory.js";
 import { showConfirm } from "../shared/tui.js";
 import { debugLog } from "./debug-logger.js";
-import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds } from "./milestone-ids.js";
+import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js";
 import { parkMilestone, discardMilestone } from "./milestone-actions.js";
 import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
 // ─── Re-exports (preserve public API for existing importers) ────────────────
@@ -296,6 +296,8 @@ function bootstrapGsdProject(basePath) {
  * and dispatches the headless discuss prompt (no Q&A rounds).
  */
 export async function showHeadlessMilestoneCreation(ctx, pi, basePath, seedContext) {
+    // Clear stale reservations from previous cancelled sessions (#2488)
+    clearReservedMilestoneIds();
     // Ensure .gsd/ is bootstrapped
     bootstrapGsdProject(basePath);
     // Generate next milestone ID
@@ -403,9 +405,14 @@ export async function showDiscuss(ctx, pi, basePath) {
     // Invalidate caches to pick up artifacts written by a just-completed discuss/plan
     invalidateAllCaches();
     const state = await deriveState(basePath);
-    // Guard: no active milestone
+    // No active milestone — check for pending milestones to discuss instead
     if (!state.activeMilestone) {
-        ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+        const pendingMilestones = state.registry.filter(m => m.status === "pending");
+        if (pendingMilestones.length === 0) {
+            ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+            return;
+        }
+        await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
         return;
     }
     const mid = state.activeMilestone.id;
@@ -526,6 +533,16 @@ export async function showDiscuss(ctx, pi, basePath) {
                 recommended: s.id === firstUndiscussedId,
             };
         });
+        // Offer access to queued milestones when any exist
+        const pendingMilestones = state.registry.filter(m => m.status === "pending");
+        if (pendingMilestones.length > 0) {
+            actions.push({
+                id: "discuss_queued_milestone",
+                label: "Discuss a queued milestone",
+                description: `Refine context for ${pendingMilestones.length} queued milestone(s). Does not affect current execution.`,
+                recommended: false,
+            });
+        }
         const choice = await showNextAction(ctx, {
             title: "GSD — Discuss a slice",
             summary: [
@@ -537,6 +554,10 @@ export async function showDiscuss(ctx, pi, basePath) {
         });
         if (choice === "not_yet")
             return;
+        if (choice === "discuss_queued_milestone") {
+            await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
+            return;
+        }
         const chosen = pendingSlices.find(s => s.id === choice);
         if (!chosen)
             return;
@@ -564,6 +585,63 @@ export async function showDiscuss(ctx, pi, basePath) {
         invalidateAllCaches();
     }
 }
+// ─── Queued Milestone Discussion ─────────────────────────────────────────────
+/**
+ * Show a picker of queued (pending) milestones and dispatch a discuss flow for
+ * the chosen one. Discussing a queued milestone does NOT activate it — it only
+ * refines the CONTEXT.md artifact so it is better prepared when auto-mode
+ * eventually reaches it.
+ */
+async function showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones) {
+    const actions = pendingMilestones.map((m, i) => {
+        const hasContext = !!resolveMilestoneFile(basePath, m.id, "CONTEXT");
+        const hasDraft = !hasContext && !!resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT");
+        const contextStatus = hasContext ? "context ✓" : hasDraft ? "draft context" : "no context yet";
+        return {
+            id: m.id,
+            label: `${m.id}: ${m.title}`,
+            description: `[queued] · ${contextStatus}`,
+            recommended: i === 0,
+        };
+    });
+    const choice = await showNextAction(ctx, {
+        title: "GSD — Discuss a queued milestone",
+        summary: [
+            "Select a queued milestone to discuss.",
+            "Discussing will update its context file. It will not be activated.",
+        ],
+        actions,
+        notYetMessage: "Run /gsd discuss when ready.",
+    });
+    if (choice === "not_yet")
+        return;
+    const chosen = pendingMilestones.find(m => m.id === choice);
+    if (!chosen)
+        return;
+    await dispatchDiscussForMilestone(ctx, pi, basePath, chosen.id, chosen.title);
+}
+/**
+ * Dispatch the guided-discuss-milestone prompt for a milestone without
+ * setting pendingAutoStart — so discussing a queued milestone does not
+ * implicitly activate it when the session ends.
+ */
+async function dispatchDiscussForMilestone(ctx, pi, basePath, mid, milestoneTitle) {
+    const draftFile = resolveMilestoneFile(basePath, mid, "CONTEXT-DRAFT");
+    const draftContent = draftFile ? await loadFile(draftFile) : null;
+    const discussMilestoneTemplates = inlineTemplate("context", "Context");
+    const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
+    const basePrompt = loadPrompt("guided-discuss-milestone", {
+        milestoneId: mid,
+        milestoneTitle,
+        inlinedTemplates: discussMilestoneTemplates,
+        structuredQuestionsAvailable,
+        commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`),
+    });
+    const prompt = draftContent
+        ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
+        : basePrompt;
+    await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "plan-milestone");
+}
 // ─── Smart Entry Point ────────────────────────────────────────────────────────
 /**
  * The one wizard. Reads state, shows contextual options, dispatches into the workflow doc.
@@ -693,6 +771,10 @@ async function handleMilestoneActions(ctx, pi, basePath, milestoneId, milestoneT
 }
 export async function showSmartEntry(ctx, pi, basePath, options) {
     const stepMode = options?.step;
+    // ── Clear stale milestone ID reservations from previous cancelled sessions ──
+    // Reservations only need to survive within a single /gsd interaction.
+    // Without this, each cancelled session permanently bumps the next ID. (#2488)
+    clearReservedMilestoneIds();
     // ── Directory safety check — refuse to operate in system/home dirs ───
     const dirCheck = validateDirectory(basePath);
     if (dirCheck.severity === "blocked") {

package/dist/resources/extensions/gsd/prompts/discuss-headless.md CHANGED Viewed

@@ -1,86 +1,253 @@
 # Headless Milestone Creation
-You are creating a GSD milestone from a provided specification document. This is a **headless** (non-interactive) flow — do NOT ask the user any questions. Work entirely from the provided specification.
+You are creating a GSD milestone from a provided specification document. This is a **headless** (non-interactive) flow — do NOT ask the user any questions. Wherever the interactive flow would ask the user, make your best-judgment call and document it as an assumption.
 ## Provided Specification
 {{seedContext}}
-## Your Task
+## Reflection Step
-### Step 1: Reflect
+Summarize your understanding of the specification concretely — not abstractly:
-Summarize your understanding of the specification concretely:
-- What is being built
-- Major capabilities/features
-- Scope estimate (how many milestones × slices)
-- Any ambiguities or gaps you notice
+1. Summarize what is being built in your own words.
+2. Give an honest size read: roughly how many milestones, roughly how many slices in the first one. Base this on the actual work involved, not a classification label.
+3. Include scope honesty — a bullet list of the major capabilities: "Here's what I'm reading from the spec: [bullet list of major capabilities]."
+4. Note any ambiguities, gaps, or areas where the spec is vague.
-### Step 2: Investigate (brief)
+Print this reflection in chat. Do not skip this step.
-Quickly scout the codebase to understand what already exists — spend no more than 5-6 tool calls here:
-- `ls` the project root and key directories
-- Search for relevant existing code, patterns, dependencies
-- Check library docs if needed (`resolve_library` / `get_library_docs`)
+## Vision Mapping
-Then move on to writing artifacts. Do not explore exhaustively — the research phase will do deeper investigation later.
+Decide the approach based on the actual scope:
-### Step 3: Make Decisions
+**If the work spans multiple milestones:** Map the full landscape:
+1. Propose a milestone sequence — names, one-line intents, rough dependencies
+2. Print this in chat as the working milestone sequence
-For any ambiguities or gaps in the specification:
-- Make your best-guess decision based on the spec's intent, codebase patterns, and domain conventions
-- Document each assumption clearly in the Context file
+**If the work fits in a single milestone:** Proceed directly to investigation.
-### Step 4: Assess Scope
+**Anti-reduction rule:** If the spec describes a big vision, plan the big vision. Do not reduce scope. Phase complex/risky work into later milestones — do not cut it. The spec's ambition is the target, and your job is to sequence it intelligently, not shrink it.
-Based on reflection + investigation:
-- Is this a single milestone or multiple milestones?
-- If multi-milestone: plan the full sequence with dependencies
+## Mandatory Investigation
-### Step 5: Write Artifacts
+Do a mandatory investigation pass before making any decisions. This is not optional.
-**Milestone ID**: {{milestoneId}}
+1. **Scout the codebase** — `ls`, `find`, `rg`, or `scout` for broad unfamiliar areas. Understand what already exists, what patterns are established, what constraints current code imposes.
+2. **Check library docs** — `resolve_library` / `get_library_docs` for any tech mentioned in the spec. Get current facts about capabilities, constraints, API shapes, version-specific behavior.
+3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the spec references external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
-Use these templates exactly:
+**Web search budget:** Budget carefully across investigation + focused research:
+- Prefer `resolve_library` / `get_library_docs` over `web_search` for library documentation.
+- Prefer `search_and_read` for one-shot topic research.
+- Target 2-3 web searches in this investigation pass. Save remaining budget for focused research.
+- Do NOT repeat the same or similar queries.
-{{inlinedTemplates}}
+The goal: your decisions should reflect what's actually true in the codebase and ecosystem, not what you assume.
+## Autonomous Decision-Making
+For every area where the spec is ambiguous, vague, or silent:
+- Apply the depth checklist (below) to identify what needs resolution
+- Make your best-judgment call based on: the spec's intent, codebase patterns, domain conventions, and investigation findings
+- **Document every assumption** in the Context file under an "Assumptions" section
+- For each assumption, note: what the spec said (or didn't say), what you decided, and why
+### Depth Checklist
+Ensure ALL of these are resolved before writing artifacts — from the spec + investigation, not by asking:
+- [ ] **What is being built** — concrete enough that you could explain it to a stranger
+- [ ] **Why it needs to exist** — the problem it solves or the desire it fulfills
+- [ ] **Who it's for** — even if just the spec author
+- [ ] **What "done" looks like** — observable outcomes, not abstract goals
+- [ ] **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
+- [ ] **What external systems/services this touches** — APIs, databases, third-party services, hardware
+If the spec leaves any of these unresolved, make your best-judgment call and document it.
+## Depth Verification
+Print a structured depth summary in chat covering:
+- What you understood the spec to describe
+- Key technical findings from investigation
+- Assumptions you made and why
+- Areas where you're least confident
+This is your audit trail. Print it — do not skip it.
+## Focused Research
+Do a focused research pass before roadmap creation.
+Research is advisory, not auto-binding. Use the spec + investigation to identify:
+- table stakes the product space usually expects
+- domain-standard behaviors that may be implied but not stated
+- likely omissions that would make the product feel incomplete
+- plausible anti-features or scope traps
+- differentiators worth preserving
+For multi-milestone visions, research should cover the full landscape, not just the first milestone. Research findings may affect milestone sequencing, not just slice ordering within M001.
+**Key difference from interactive flow:** Where the interactive flow would present research-surfaced candidate requirements for the user to confirm/defer/reject, you instead apply your best judgment. If a research finding clearly aligns with the spec's intent, include it. If it's tangential or would expand scope beyond what the spec describes, defer it or mark it out of scope. Document the reasoning.
+## Capability Contract
+Before writing a roadmap, produce `.gsd/REQUIREMENTS.md`.
+Use it as the project's explicit capability contract.
+Requirements must be organized into:
+- Active
+- Validated
+- Deferred
+- Out of Scope
+- Traceability
+Each requirement should include:
+- stable ID (`R###`)
+- title
+- class
+- status
+- description
+- why it matters
+- source (`spec`, `inferred`, `research`, or `execution`)
+- primary owning slice
+- supporting slices
+- validation status
+- notes
+Rules:
+- Keep requirements capability-oriented, not a giant feature inventory
+- Every Active requirement must either be mapped to a roadmap owner, explicitly deferred, blocked with reason, or moved out of scope
+- Product-facing work should capture launchability, primary user loop, continuity, and failure visibility when relevant
+- Later milestones may have provisional ownership, but the first planned milestone should map requirements to concrete slices wherever possible
+For multi-milestone projects, requirements should span the full vision. Requirements owned by later milestones get provisional ownership. The full requirement set captures the spec's complete vision — milestones are the sequencing strategy, not the scope boundary.
+**Print the requirements in chat before writing the roadmap.** Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope).
+## Scope Assessment
+Confirm the size estimate from your reflection still holds. Investigation and research often reveal hidden complexity or simplify things. If the scope grew or shrank significantly, adjust the milestone and slice counts accordingly.
+## Output Phase
+### Roadmap Preview
+Before writing any files, **print the planned roadmap in chat**. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
+This is the user's audit trail in the TUI scrollback — do not skip it.
+### Naming Convention
-**For single milestone**, write in this order:
+Directories use bare IDs. Files use ID-SUFFIX format. Titles live inside file content, not in names.
+- Milestone dir: `.gsd/milestones/{{milestoneId}}/`
+- Milestone files: `{{milestoneId}}-CONTEXT.md`, `{{milestoneId}}-ROADMAP.md`
+- Slice dirs: `S01/`, `S02/`, etc.
+### Single Milestone
+In a single pass:
 1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices`
-2. Write `.gsd/PROJECT.md` (using Project template)
-3. Write `.gsd/REQUIREMENTS.md` (using Requirements template)
-4. Write `{{contextPath}}` (using Context template) — preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. Document assumptions under an "Assumptions" section.
-5. Write `{{roadmapPath}}` (using Roadmap template) — decompose into demoable vertical slices with checkboxes, risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice.
-6. Seed `.gsd/DECISIONS.md` (using Decisions template)
+2. Write or update `.gsd/PROJECT.md` — use the **Project** output template below. Describe what the project is, its current state, and list the milestone sequence.
+3. Write or update `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Confirm requirement states, ownership, and traceability before roadmap creation.
+**Depth-Preservation Guidance for context.md:**
+Preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. If the spec said "craft feel," write "craft feel" — not "high-quality user experience." The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision.
+4. Write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during research. Include an "Assumptions" section documenting every judgment call.
+5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
+6. For each architectural or pattern decision, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
 7. {{commitInstruction}}
-9. Say exactly: "Milestone {{milestoneId}} ready."
-**For multi-milestone**, write in this order:
+After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
+### Multi-Milestone
+#### Phase 1: Shared artifacts
 1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones/<ID>/slices` for each.
-2. Write `.gsd/PROJECT.md` — full vision across ALL milestones (using Project template)
-3. Write `.gsd/REQUIREMENTS.md` — full capability contract (using Requirements template)
-4. Seed `.gsd/DECISIONS.md` (using Decisions template)
-5. Write PRIMARY `{{contextPath}}` — full context with all assumptions documented
-6. Write PRIMARY `{{roadmapPath}}` — detailed slices for the first milestone only
-7. For each remaining milestone, write full CONTEXT.md with `depends_on` frontmatter:
-   ```yaml
-   ---
-   depends_on: [M001, M002]
-   ---
-   # M003: Title
-   ```
-   Each context file should be rich enough that a future agent — with no memory of this conversation — can understand the intent, constraints, dependencies, what the milestone unlocks, and what "done" looks like.
-8. {{multiMilestoneCommitInstruction}}
-10. Say exactly: "Milestone {{milestoneId}} ready."
+2. Write `.gsd/PROJECT.md` — use the **Project** output template below.
+3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
+4. For any architectural or pattern decisions, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
+#### Phase 2: Primary milestone
+5. Write a full `CONTEXT.md` for the primary milestone (the first in sequence). Include an "Assumptions" section.
+6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
+#### MANDATORY: depends_on Frontmatter in CONTEXT.md
+Every CONTEXT.md for a milestone that depends on other milestones MUST have YAML frontmatter with `depends_on`. The auto-mode state machine reads this field to determine execution order — without it, milestones may execute out of order or in parallel when they shouldn't.
+```yaml
+---
+depends_on: [M001, M002]
+---
+# M003: Title
+```
+If a milestone has no dependencies, omit the frontmatter. Do NOT rely on QUEUE.md or PROJECT.md for dependency tracking — the state machine only reads CONTEXT.md frontmatter.
+#### Phase 3: Remaining milestones
+For each remaining milestone, in dependency order, autonomously decide the best readiness mode:
+- **Write full context** — if the spec provides enough detail for this milestone and investigation confirms feasibility. Write a full `CONTEXT.md` with technical assumptions verified against the actual codebase.
+- **Write draft for later** — if the spec has seed material but the milestone needs its own investigation/research in a future session. Write a `CONTEXT-DRAFT.md` capturing seed material, key ideas, provisional scope, and open questions. **Downstream:** Auto-mode pauses at this milestone and prompts the user to discuss.
+- **Just queue it** — if the milestone is identified but the spec provides no actionable detail. No context file written. **Downstream:** Auto-mode pauses and starts a full discussion from scratch.
+**Default to writing full context** when the spec is detailed enough. Default to draft when the spec mentions the milestone but is vague. Default to queue when the milestone is implied by the vision but not described.
+**Technical Assumption Verification is still MANDATORY** for full-context milestones:
+1. Read the actual code for every file or module you reference. Confirm APIs exist, check what functions actually do.
+2. Check for stale assumptions — verify referenced modules still work as described.
+3. Print findings in chat before writing each milestone's CONTEXT.md.
+Each context file (full or draft) should be rich enough that a future agent encountering it fresh — with no memory of this conversation — can understand the intent, constraints, dependencies, what this milestone unlocks, and what "done" looks like.
+#### Milestone Gate Tracking (MANDATORY for multi-milestone)
+After deciding each milestone's readiness, immediately write or update `.gsd/DISCUSSION-MANIFEST.json`:
+```json
+{
+  "primary": "M001",
+  "milestones": {
+    "M001": { "gate": "discussed", "context": "full" },
+    "M002": { "gate": "discussed", "context": "full" },
+    "M003": { "gate": "queued",    "context": "none" }
+  },
+  "total": 3,
+  "gates_completed": 3
+}
+```
+Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`.
+For single-milestone projects, do NOT write this file.
+#### Phase 4: Finalize
+7. {{multiMilestoneCommitInstruction}}
+After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
 ## Critical Rules
-- **DO NOT ask the user any questions** — this is headless mode
+- **DO NOT ask the user any questions** — this is headless mode. Make judgment calls and document them.
 - **Preserve the specification's terminology** — don't paraphrase domain-specific language
-- **Document assumptions** — when you make a judgment call, note it in CONTEXT.md under "Assumptions"
-- **Investigate before writing** — always scout the codebase first
-- **Use depends_on frontmatter** for multi-milestone sequences (the state machine reads this field to determine execution order)
-- **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Do not ask "what's the minimum viable version?" or reduce scope. Phase complex/risky work into later milestones — do not cut it.
-- **Naming convention** — always use `gsd_milestone_generate_id` to get milestone IDs. Directories use bare IDs (e.g. `M001/` or `M001-r5jzab/`), files use ID-SUFFIX format (e.g. `M001-CONTEXT.md` or `M001-r5jzab-CONTEXT.md`). Never invent milestone IDs manually.
+- **Document assumptions** — every judgment call gets noted in CONTEXT.md under "Assumptions" with reasoning
+- **Investigate thoroughly** — scout codebase, check library docs, web search. Same rigor as interactive mode.
+- **Do focused research** — identify table stakes, domain standards, omissions, scope traps. Same rigor as interactive mode.
+- **Use proper tools** — `gsd_plan_milestone` for roadmaps, `gsd_decision_save` for decisions, `gsd_milestone_generate_id` for IDs
+- **Print artifacts in chat** — requirements table, roadmap preview, depth summary. The TUI scrollback is the user's audit trail.
+- **Use depends_on frontmatter** for multi-milestone sequences
+- **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Phase complexity — don't cut it.
+- **Naming convention** — always use `gsd_milestone_generate_id` for IDs. Directories use bare IDs, files use ID-SUFFIX format.
 - **End with "Milestone {{milestoneId}} ready."** — this triggers auto-start detection
+{{inlinedTemplates}}

package/dist/resources/extensions/gsd/prompts/forensics.md CHANGED Viewed

@@ -36,6 +36,8 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 ├── doctor-history.jsonl         — doctor check history
 ├── activity/                    — session activity logs (JSONL per unit)
 │   └── {seq}-{unitType}-{unitId}.jsonl
+├── journal/                     — structured event journal (JSONL per day)
+│   └── YYYY-MM-DD.jsonl
 ├── runtime/
 │   ├── paused-session.json      — serialized session when auto pauses
 │   └── headless-context.md      — headless resume context
@@ -60,6 +62,32 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 - `usage` field on assistant messages: `input`, `output`, `cacheRead`, `cacheWrite`, `totalTokens`, `cost`
 - **To trace a failure**: find the last activity log, search for `isError: true` tool results, then read the agent's reasoning text preceding that error
+### Journal Format (`.gsd/journal/`)
+The journal is a structured event log for auto-mode iterations. Each daily file contains JSONL entries:
+```
+{ ts: "ISO-8601", flowId: "UUID", seq: 0, eventType: "iteration-start", rule?: "rule-name", causedBy?: { flowId, seq }, data?: { unitId, status, ... } }
+```
+**Key event types:**
+- `iteration-start` / `iteration-end` — marks loop iteration boundaries
+- `dispatch-match` / `dispatch-stop` — what the auto-mode decided to do (or not do)
+- `unit-start` / `unit-end` — lifecycle of individual work units
+- `terminal` — auto-mode reached a terminal state (all done, budget exceeded, etc.)
+- `guard-block` — dispatch was blocked by a guard condition (e.g. needs user input)
+- `stuck-detected` — the loop detected it was stuck (same unit repeatedly dispatched)
+- `milestone-transition` — a milestone was promoted or completed
+- `worktree-enter` / `worktree-create-failed` / `worktree-merge-start` / `worktree-merge-failed` — worktree operations
+**Key concepts:**
+- **flowId**: UUID grouping all events in one iteration. Use to reconstruct what happened in a single loop pass.
+- **causedBy**: Cross-reference to a prior event (same or different flow). Enables causal chain tracing.
+- **seq**: Monotonically increasing within a flow. Reconstruct event order within an iteration.
+**To trace a stuck loop**: filter for `stuck-detected` events, then follow `flowId` to see the surrounding dispatch and unit events.
+**To trace a guard block**: filter for `guard-block` events, check `data.reason` for why dispatch was blocked.
 ### Crash Lock Format (`auto.lock`)
 JSON with fields: `pid`, `startedAt`, `unitType`, `unitId`, `unitStartedAt`, `completedUnits`, `sessionFile`
@@ -78,20 +106,24 @@ A unit dispatched more than once (`type/id` appears multiple times) indicates a
 1. **Start with the pre-parsed forensic report** above. The anomaly section contains automated findings — treat these as leads, not conclusions.
-2. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
+2. **Check the journal timeline** if present. The journal events show the auto-mode's decision sequence (dispatches, guards, stuck detection, worktree operations). Use flow IDs to group related events and trace causal chains.
+3. **Cross-reference activity logs and journal**. Activity logs show *what the LLM did* (tool calls, reasoning, errors). Journal events show *what auto-mode decided* (dispatch rules, iteration boundaries, state transitions). Together they reveal the full picture.
+4. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
-3. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
+5. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
-4. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
+6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
-5. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
+7. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
    - Missing edge case / unhandled condition
    - Wrong boolean logic or comparison
    - Race condition or ordering issue
    - State corruption (e.g. completed-units.json out of sync with artifacts)
    - Timeout / recovery logic not triggering correctly
-6. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
+8. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
 ## Output

package/dist/resources/extensions/gsd/prompts/run-uat.md CHANGED Viewed

@@ -29,7 +29,7 @@ You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply a
 - `runtime-executable` — execute the specified command or script. Capture stdout/stderr as evidence. Record pass/fail based on exit code and output.
 - `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
 - `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
-- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN` and use an overall verdict of `PARTIAL` unless every required check was objective and passed.
+- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN`. Use an overall verdict of `PASS` when all automatable checks succeed (even if human-only checks remain as `NEEDS-HUMAN`). Use `PARTIAL` only when automatable checks themselves were inconclusive.
 ### Evidence tools
@@ -51,9 +51,9 @@ For each check, record:
 - `PASS`, `FAIL`, or `NEEDS-HUMAN`
 After running all checks, compute the **overall verdict**:
-- `PASS` — all required checks passed and no human-only checks remain
-- `FAIL` — one or more checks failed
-- `PARTIAL` — some checks passed, but one or more checks were skipped, inconclusive, or still require human judgment
+- `PASS` — all automatable checks passed. Any remaining checks that honestly require human judgment are marked `NEEDS-HUMAN` with clear instructions for the human reviewer. (This is the correct verdict for mixed/human-experience/live-runtime modes when all automatable checks succeed.)
+- `FAIL` — one or more automatable checks failed
+- `PARTIAL` — one or more automatable checks were skipped or returned inconclusive results (not the same as `NEEDS-HUMAN` — use PARTIAL only when the agent itself could not determine pass/fail for a check it was supposed to automate)
 Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content` — the tool computes the file path and persists to both DB and disk. The content should follow this format:

package/dist/resources/extensions/gsd/session-forensics.js CHANGED Viewed

@@ -126,7 +126,16 @@ export function extractTrace(entries) {
                 }
             }
             if (isError && resultText) {
-                errors.push(resultText.slice(0, 300));
+                // Filter out benign "errors" that are normal during code exploration:
+                // - grep/rg/find returning exit code 1 (no matches) is expected POSIX behavior
+                // - User interrupts (Escape/skip) are intentional, not failures
+                const trimmed = resultText.trim();
+                const isBenignNoMatch = pending?.name === "bash" &&
+                    /^\(no output\)\s*\n\s*Command exited with code 1$/m.test(trimmed);
+                const isUserSkip = /^Skipped due to queued user message/i.test(trimmed);
+                if (!isBenignNoMatch && !isUserSkip) {
+                    errors.push(resultText.slice(0, 300));
+                }
             }
         }
     }

package/dist/resources/extensions/gsd/worktree-command.js CHANGED Viewed

@@ -548,7 +548,7 @@ async function handleMerge(basePath, name, ctx, pi, targetBranch) {
         // --- Deterministic merge path (preferred) ---
         // Try a direct squash-merge first. Only fall back to LLM on conflict.
         const commitType = inferCommitType(name);
-        const commitMessage = `${commitType}(${name}): merge worktree ${name}`;
+        const commitMessage = `${commitType}: merge worktree ${name}\n\nGSD-Worktree: ${name}`;
         // Reconcile worktree DB into main DB before squash merge
         const wtDbPath = join(worktreePath(basePath, name), ".gsd", "gsd.db");
         const mainDbPath = join(basePath, ".gsd", "gsd.db");

package/dist/web/standalone/.next/BUILD_ID CHANGED Viewed

	@@ -1 +1 @@
1	- ~~VPcLnRF4BL8VoJEilBwlB~~
1	+ PTL5V00OW8q4-092tUQKx