gsd-pi 2.15.0 → 2.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/resource-loader.d.ts +1 -0
  2. package/dist/resource-loader.js +10 -1
  3. package/dist/resources/GSD-WORKFLOW.md +70 -68
  4. package/dist/resources/extensions/gsd/auto-dashboard.ts +11 -0
  5. package/dist/resources/extensions/gsd/auto-prompts.ts +6 -0
  6. package/dist/resources/extensions/gsd/auto-recovery.ts +21 -4
  7. package/dist/resources/extensions/gsd/auto.ts +42 -1
  8. package/dist/resources/extensions/gsd/prompts/complete-milestone.md +4 -0
  9. package/dist/resources/extensions/gsd/prompts/plan-milestone.md +4 -0
  10. package/dist/resources/extensions/gsd/prompts/reassess-roadmap.md +4 -0
  11. package/dist/resources/extensions/gsd/prompts/replan-slice.md +4 -0
  12. package/dist/resources/extensions/gsd/prompts/research-milestone.md +4 -0
  13. package/dist/resources/extensions/gsd/prompts/run-uat.md +4 -0
  14. package/dist/resources/extensions/gsd/tests/auto-dashboard.test.ts +153 -0
  15. package/dist/resources/extensions/gsd/tests/auto-recovery.test.ts +272 -0
  16. package/dist/resources/extensions/gsd/tests/complete-milestone.test.ts +3 -0
  17. package/dist/resources/extensions/gsd/tests/crash-recovery.test.ts +134 -0
  18. package/dist/resources/extensions/gsd/tests/reassess-prompt.test.ts +3 -0
  19. package/dist/resources/extensions/gsd/tests/replan-slice.test.ts +3 -0
  20. package/dist/resources/extensions/gsd/tests/run-uat.test.ts +1 -0
  21. package/dist/resources/extensions/gsd/undo.ts +3 -2
  22. package/package.json +1 -1
  23. package/packages/pi-ai/dist/providers/google-shared.d.ts +3 -0
  24. package/packages/pi-ai/dist/providers/google-shared.d.ts.map +1 -1
  25. package/packages/pi-ai/dist/providers/google-shared.js +63 -5
  26. package/packages/pi-ai/dist/providers/google-shared.js.map +1 -1
  27. package/packages/pi-ai/src/providers/google-shared.ts +66 -6
  28. package/src/resources/GSD-WORKFLOW.md +70 -68
  29. package/src/resources/extensions/gsd/auto-dashboard.ts +11 -0
  30. package/src/resources/extensions/gsd/auto-prompts.ts +6 -0
  31. package/src/resources/extensions/gsd/auto-recovery.ts +21 -4
  32. package/src/resources/extensions/gsd/auto.ts +42 -1
  33. package/src/resources/extensions/gsd/prompts/complete-milestone.md +4 -0
  34. package/src/resources/extensions/gsd/prompts/plan-milestone.md +4 -0
  35. package/src/resources/extensions/gsd/prompts/reassess-roadmap.md +4 -0
  36. package/src/resources/extensions/gsd/prompts/replan-slice.md +4 -0
  37. package/src/resources/extensions/gsd/prompts/research-milestone.md +4 -0
  38. package/src/resources/extensions/gsd/prompts/run-uat.md +4 -0
  39. package/src/resources/extensions/gsd/tests/auto-dashboard.test.ts +153 -0
  40. package/src/resources/extensions/gsd/tests/auto-recovery.test.ts +272 -0
  41. package/src/resources/extensions/gsd/tests/complete-milestone.test.ts +3 -0
  42. package/src/resources/extensions/gsd/tests/crash-recovery.test.ts +134 -0
  43. package/src/resources/extensions/gsd/tests/reassess-prompt.test.ts +3 -0
  44. package/src/resources/extensions/gsd/tests/replan-slice.test.ts +3 -0
  45. package/src/resources/extensions/gsd/tests/run-uat.test.ts +1 -0
  46. package/src/resources/extensions/gsd/undo.ts +3 -2
@@ -1,6 +1,7 @@
1
1
  import { DefaultResourceLoader } from '@gsd/pi-coding-agent';
2
2
  export declare function discoverExtensionEntryPaths(extensionsDir: string): string[];
3
3
  export declare function readManagedResourceVersion(agentDir: string): string | null;
4
+ export declare function readManagedResourceSyncedAt(agentDir: string): number | null;
4
5
  export declare function getNewerManagedResourceVersion(agentDir: string, currentVersion: string): string | null;
5
6
  /**
6
7
  * Syncs all bundled resources to agentDir (~/.gsd/agent/) on every launch.
@@ -85,7 +85,7 @@ function getBundledGsdVersion() {
85
85
  }
86
86
  }
87
87
  function writeManagedResourceManifest(agentDir) {
88
- const manifest = { gsdVersion: getBundledGsdVersion() };
88
+ const manifest = { gsdVersion: getBundledGsdVersion(), syncedAt: Date.now() };
89
89
  writeFileSync(getManagedResourceManifestPath(agentDir), JSON.stringify(manifest));
90
90
  }
91
91
  export function readManagedResourceVersion(agentDir) {
@@ -97,6 +97,15 @@ export function readManagedResourceVersion(agentDir) {
97
97
  return null;
98
98
  }
99
99
  }
100
+ export function readManagedResourceSyncedAt(agentDir) {
101
+ try {
102
+ const manifest = JSON.parse(readFileSync(getManagedResourceManifestPath(agentDir), 'utf-8'));
103
+ return typeof manifest?.syncedAt === 'number' ? manifest.syncedAt : null;
104
+ }
105
+ catch {
106
+ return null;
107
+ }
108
+ }
100
109
  export function getNewerManagedResourceVersion(agentDir, currentVersion) {
101
110
  const managedVersion = readManagedResourceVersion(agentDir);
102
111
  if (!managedVersion) {
@@ -4,8 +4,8 @@
4
4
  >
5
5
  > **When to read this:** At the start of any session working on GSD-managed work, or when loaded by `/gsd`.
6
6
  >
7
- > **After reading this, always read `.gsd/state.md` to find out what's next.**
8
- > If the milestone has a `context.md`, read that too — it contains project-specific decisions, reference paths, and implementation guidance that this generic methodology doc does not.
7
+ > **After reading this, always read `.gsd/STATE.md` to find out what's next.**
8
+ > If the milestone has a `M###-CONTEXT.md`, read that too. If the active slice has an `S##-CONTEXT.md`, read that as well these files contain project-specific decisions, reference paths, and implementation guidance that this generic methodology doc does not.
9
9
 
10
10
  ---
11
11
 
@@ -13,13 +13,14 @@
13
13
 
14
14
  Read these files in order and act on what they say:
15
15
 
16
- 1. **`.gsd/state.md`** — Where are we? What's the next action?
17
- 2. **`.gsd/milestones/<active>/roadmap.md`** — What's the plan? Which slices are done? (state.md tells you which milestone is active)
18
- 3. **`.gsd/milestones/<active>/context.md`** — Project-specific decisions, reference paths, constraints. Read this before doing implementation work.
19
- 4. If a slice is active, read its **`plan.md`** — Which tasks exist? Which are done?
20
- 5. If a task was interrupted, check for **`continue.md`** in the active slice directory Resume from there.
16
+ 1. **`.gsd/STATE.md`** — Where are we? What's the next action?
17
+ 2. **`.gsd/milestones/<active>/M###-ROADMAP.md`** — What's the plan? Which slices are done? (`STATE.md` tells you which milestone is active)
18
+ 3. **`.gsd/milestones/<active>/M###-CONTEXT.md`** — Milestone-level project decisions, reference paths, constraints. Read this before doing implementation work.
19
+ 4. If a slice is active and has one, read **`S##-CONTEXT.md`** — Slice-specific decisions and constraints.
20
+ 5. If a slice is active, read its **`S##-PLAN.md`** Which tasks exist? Which are done?
21
+ 6. If a task was interrupted, check for **`continue.md`** in the active slice directory — Resume from there.
21
22
 
22
- Then do the thing `state.md` says to do next.
23
+ Then do the thing `STATE.md` says to do next.
23
24
 
24
25
  ---
25
26
 
@@ -41,32 +42,32 @@ All artifacts live in `.gsd/` at the project root:
41
42
 
42
43
  ```
43
44
  .gsd/
44
- state.md # Dashboard — always read first
45
- decisions.md # Append-only decisions register
45
+ STATE.md # Dashboard — always read first (derived cache; runtime, gitignored)
46
+ DECISIONS.md # Append-only decisions register
46
47
  milestones/
47
48
  M001/
48
- roadmap.md # Milestone plan (checkboxes = state)
49
- context.md # Optional: user decisions from discuss phase
50
- research.md # Optional: codebase/tech research
51
- summary.md # Milestone rollup (updated as slices complete)
49
+ M001-ROADMAP.md # Milestone plan (checkboxes = state)
50
+ M001-CONTEXT.md # Optional: user decisions from discuss phase
51
+ M001-RESEARCH.md # Optional: codebase/tech research
52
+ M001-SUMMARY.md # Milestone rollup (updated as slices complete)
52
53
  slices/
53
54
  S01/
54
- plan.md # Task decomposition for this slice
55
- context.md # Optional: slice-level user decisions
56
- research.md # Optional: slice-level research
57
- summary.md # Slice summary (written on completion)
58
- uat.md # Non-blocking human test script (written on completion)
55
+ S01-PLAN.md # Task decomposition for this slice
56
+ S01-CONTEXT.md # Optional: slice-level user decisions
57
+ S01-RESEARCH.md # Optional: slice-level research
58
+ S01-SUMMARY.md # Slice summary (written on completion)
59
+ S01-UAT.md # Non-blocking human test script (written on completion)
59
60
  continue.md # Ephemeral: resume point if interrupted
60
61
  tasks/
61
- T01-plan.md # Individual task plan
62
- T01-summary.md # Task summary with frontmatter
62
+ T01-PLAN.md # Individual task plan
63
+ T01-SUMMARY.md # Task summary with frontmatter
63
64
  ```
64
65
 
65
66
  ---
66
67
 
67
68
  ## File Format Reference
68
69
 
69
- ### `roadmap.md`
70
+ ### `M###-ROADMAP.md`
70
71
 
71
72
  ```markdown
72
73
  # M001: Title of the Milestone
@@ -93,7 +94,7 @@ All artifacts live in `.gsd/` at the project root:
93
94
 
94
95
  **Parsing rules:** `- [x]` = done, `- [ ]` = not done. The `risk:` and `depends:[]` tags are inline metadata parsed from the line. `depends:[]` lists slice IDs this slice requires to be complete first.
95
96
 
96
- **Boundary Map** (required section in roadmap.md):
97
+ **Boundary Map** (required section in M###-ROADMAP.md):
97
98
 
98
99
  After the slices section, include a `## Boundary Map` that shows what each slice produces and consumes:
99
100
 
@@ -123,7 +124,7 @@ The boundary map is a **planning artifact** — not runnable code. It:
123
124
  - Enables deterministic verification that slices actually connect
124
125
  - Gets updated during slice planning if new interfaces emerge
125
126
 
126
- ### `plan.md` (slice-level)
127
+ ### `S##-PLAN.md` (slice-level)
127
128
 
128
129
  ```markdown
129
130
  # S01: Slice Title
@@ -148,7 +149,7 @@ The boundary map is a **planning artifact** — not runnable code. It:
148
149
  - path/to/another.ts
149
150
  ```
150
151
 
151
- ### `TNN-plan.md` (task-level)
152
+ ### `T##-PLAN.md` (task-level)
152
153
 
153
154
  ```markdown
154
155
  # T01: Task Title
@@ -188,7 +189,7 @@ Critical wiring between artifacts:
188
189
 
189
190
  **Must-haves are what make verification mechanically checkable.** Truths are checked by running commands or reading output. Artifacts are checked by confirming files exist with real content. Key links are checked by confirming imports/references actually connect the pieces.
190
191
 
191
- ### `state.md`
192
+ ### `STATE.md`
192
193
 
193
194
  ```markdown
194
195
  # GSD State
@@ -209,10 +210,10 @@ Critical wiring between artifacts:
209
210
  Exact next thing to do.
210
211
  ```
211
212
 
212
- ### `context.md` (from discuss phase)
213
+ ### `M###-CONTEXT.md` / `S##-CONTEXT.md` (from discuss phase)
213
214
 
214
215
  ```markdown
215
- # S01: Slice Title — Context
216
+ # M001: Milestone or Slice Title — Context
216
217
 
217
218
  **Gathered:** 2026-03-07
218
219
  **Status:** Ready for planning
@@ -228,7 +229,7 @@ Exact next thing to do.
228
229
  - Ideas that came up but belong in other slices
229
230
  ```
230
231
 
231
- ### `decisions.md` (append-only register)
232
+ ### `DECISIONS.md` (append-only register)
232
233
 
233
234
  ```markdown
234
235
  # Decisions Register
@@ -265,7 +266,7 @@ Work flows through these phases. Each phase produces a file.
265
266
  ### Phase 1: Discuss (Optional)
266
267
 
267
268
  **Purpose:** Capture user decisions on gray areas before planning.
268
- **Produces:** `context.md` at milestone or slice level.
269
+ **Produces:** `M###-CONTEXT.md` for milestone-level discussion or `S##-CONTEXT.md` for slice-level discussion.
269
270
  **When to use:** When the scope has ambiguities the user should weigh in on.
270
271
  **When to skip:** When the user already knows exactly what they want, or told you to just go.
271
272
 
@@ -273,18 +274,18 @@ Work flows through these phases. Each phase produces a file.
273
274
  1. Read the roadmap to understand the scope.
274
275
  2. Identify 3-5 gray areas — implementation decisions the user cares about.
275
276
  3. Use `ask_user_questions` to discuss each area.
276
- 4. Write decisions to `context.md`.
277
+ 4. Write decisions to the appropriate context file (`M###-CONTEXT.md` or `S##-CONTEXT.md`).
277
278
  5. Do NOT discuss how to implement — only what the user wants.
278
279
 
279
280
  ### Phase 2: Research (Optional)
280
281
 
281
282
  **Purpose:** Scout the codebase and relevant docs before planning.
282
- **Produces:** `research.md` at milestone or slice level.
283
+ **Produces:** `M###-RESEARCH.md` at milestone level or `S##-RESEARCH.md` at slice level.
283
284
  **When to use:** When working in unfamiliar code, with unfamiliar libraries, or on complex integrations.
284
285
  **When to skip:** When the codebase is familiar and the work is straightforward.
285
286
 
286
287
  **How to do it manually:**
287
- 1. Read `context.md` if it exists — know what decisions are locked.
288
+ 1. Read `M###-CONTEXT.md` and/or `S##-CONTEXT.md` if they exist — know what decisions are locked.
288
289
  2. Scout relevant code: `rg`, `find`, read key files.
289
290
  3. Use `resolve_library` / `get_library_docs` if needed.
290
291
  4. Write findings to `research.md` with these sections:
@@ -324,24 +325,24 @@ The **Don't Hand-Roll** and **Common Pitfalls** sections prevent the most expens
324
325
  ### Phase 3: Plan
325
326
 
326
327
  **Purpose:** Decompose work into context-window-sized tasks with must-haves.
327
- **Produces:** `plan.md` + individual `T01-plan.md` files.
328
+ **Produces:** `S##-PLAN.md` + individual `T01-PLAN.md` files.
328
329
 
329
330
  **For a milestone (roadmap):**
330
- 1. Read `context.md`, `research.md`, and `.gsd/decisions.md` if they exist.
331
+ 1. Read `M###-CONTEXT.md`, `M###-RESEARCH.md`, and `.gsd/DECISIONS.md` if they exist.
331
332
  2. Decompose the vision into 4-10 demoable vertical slices.
332
333
  3. Order by risk (high-risk first to validate feasibility early).
333
- 4. Write `roadmap.md` with checkboxes, risk levels, dependencies, demo sentences.
334
+ 4. Write `M###-ROADMAP.md` with checkboxes, risk levels, dependencies, demo sentences.
334
335
  5. **Write the boundary map** — for each slice, specify what it produces (functions, types, interfaces, endpoints) and what it consumes from upstream slices. This forces interface thinking before implementation and enables deterministic verification that slices actually connect.
335
336
 
336
337
  **For a slice (task decomposition):**
337
- 1. Read the slice's entry in `roadmap.md` **and its boundary map section** — know what interfaces this slice must produce and consume.
338
- 2. Read `context.md`, `research.md`, and `.gsd/decisions.md` if they exist for this slice.
338
+ 1. Read the slice's entry in `M###-ROADMAP.md` **and its boundary map section** — know what interfaces this slice must produce and consume.
339
+ 2. Read `M###-CONTEXT.md`, `S##-CONTEXT.md`, `M###-RESEARCH.md`, `S##-RESEARCH.md`, and `.gsd/DECISIONS.md` if they exist for this slice.
339
340
  3. Read summaries from dependency slices (check `depends:[]` in roadmap).
340
341
  4. Verify that upstream slices' actual outputs match what the boundary map says this slice consumes. If they diverge, update the boundary map.
341
342
  5. Decompose into 1-7 tasks, each fitting one context window.
342
343
  6. Each task needs: title, description, steps (3-10), must-haves (observable verification criteria).
343
344
  7. Must-haves should reference boundary map contracts — e.g. "exports `generateToken()` as specified in boundary map S01→S02".
344
- 8. Write `plan.md` and individual `TNN-plan.md` files.
345
+ 8. Write `S##-PLAN.md` and individual `T##-PLAN.md` files.
345
346
 
346
347
  ### Phase 4: Execute
347
348
 
@@ -349,10 +350,10 @@ The **Don't Hand-Roll** and **Common Pitfalls** sections prevent the most expens
349
350
  **Produces:** Code changes + `[DONE:n]` markers.
350
351
 
351
352
  **How to do it manually:**
352
- 1. Read the task's `TNN-plan.md`.
353
+ 1. Read the task's `T##-PLAN.md`.
353
354
  2. Read relevant summaries from prior tasks (for context on what's already built).
354
355
  3. Execute each step. Mark progress with `[DONE:n]` in responses.
355
- 4. If you made an architectural, pattern, or library decision, append it to `.gsd/decisions.md`.
356
+ 4. If you made an architectural, pattern, or library decision, append it to `.gsd/DECISIONS.md`.
356
357
  5. If interrupted or context is getting full, write `continue.md` (see below).
357
358
 
358
359
  ### Phase 5: Verify
@@ -400,7 +401,7 @@ When verification finds gaps, include a **Gaps** section with what's missing, im
400
401
  ### Phase 6: Summarize
401
402
 
402
403
  **Purpose:** Record what happened for downstream tasks.
403
- **Produces:** `TNN-summary.md`, and when slice completes, `summary.md`.
404
+ **Produces:** `T##-SUMMARY.md`, and when slice completes, `S##-SUMMARY.md`.
404
405
 
405
406
  **Task summary format:**
406
407
  ```markdown
@@ -421,7 +422,7 @@ key_decisions:
421
422
  patterns_established:
422
423
  - "Pattern name and where it lives"
423
424
  drill_down_paths:
424
- - .gsd/milestones/M001/slices/S01/tasks/T01-plan.md
425
+ - .gsd/milestones/M001/slices/S01/tasks/T01-PLAN.md
425
426
  duration: 15min
426
427
  verification_result: pass
427
428
  completed_at: 2026-03-07T16:00:00Z
@@ -445,7 +446,7 @@ What differed from the plan and why (or "None").
445
446
 
446
447
  The one-liner must be substantive: "JWT auth with refresh rotation using jose" not "Authentication implemented."
447
448
 
448
- **Slice summary:** Written when all tasks in a slice complete. Compresses all task summaries. Includes `drill_down_paths` to each task summary. During slice completion, review task summaries for `key_decisions` and ensure any significant ones are captured in `.gsd/decisions.md`.
449
+ **Slice summary:** Written when all tasks in a slice complete. Compresses all task summaries. Includes `drill_down_paths` to each task summary. During slice completion, review task summaries for `key_decisions` and ensure any significant ones are captured in `.gsd/DECISIONS.md`.
449
450
 
450
451
  **Milestone summary:** Updated each time a slice completes. Compresses all slice summaries. This is what gets injected into later slice planning instead of loading many individual summaries.
451
452
 
@@ -454,16 +455,16 @@ The one-liner must be substantive: "JWT auth with refresh rotation using jose" n
454
455
  **Purpose:** Mark work done and move to the next thing.
455
456
 
456
457
  **After a task completes:**
457
- 1. Mark the task done in `plan.md` (checkbox).
458
+ 1. Mark the task done in `S##-PLAN.md` (checkbox).
458
459
  2. Check if there's a next task in the slice → execute it.
459
- 3. If slice is complete → write slice summary, mark slice done in `roadmap.md`.
460
+ 3. If slice is complete → write slice summary, mark slice done in `M###-ROADMAP.md`.
460
461
 
461
462
  **After a slice completes:**
462
- 1. Write slice `summary.md` (compresses all task summaries).
463
- 2. Write slice `uat.md` — a non-blocking human test script derived from the slice's must-haves and demo sentence. The agent does NOT wait for UAT results.
464
- 3. Mark the slice checkbox in `roadmap.md` as `[x]`.
465
- 4. Update `state.md` with new position.
466
- 5. Update milestone `summary.md` with the completed slice's contributions.
463
+ 1. Write slice `S##-SUMMARY.md` (compresses all task summaries).
464
+ 2. Write slice `S##-UAT.md` — a non-blocking human test script derived from the slice's must-haves and demo sentence. The agent does NOT wait for UAT results.
465
+ 3. Mark the slice checkbox in `M###-ROADMAP.md` as `[x]`.
466
+ 4. Update `STATE.md` with new position.
467
+ 5. Update milestone `M###-SUMMARY.md` with the completed slice's contributions.
467
468
  6. Continue to next slice immediately. The user tests the UAT whenever convenient.
468
469
  7. If the user reports UAT failures later, create fix tasks in the current or a new slice.
469
470
  8. If all slices done → milestone complete.
@@ -513,17 +514,17 @@ The EXACT first thing to do when resuming. Not vague. Specific.
513
514
 
514
515
  ## State Management
515
516
 
516
- ### `state.md` is a derived cache
517
+ ### `STATE.md` is a derived cache
517
518
 
518
519
  It is NOT the source of truth. It's a convenience dashboard.
519
520
 
520
521
  **Sources of truth:**
521
- - `roadmap.md` → which slices exist and which are done
522
- - `plan.md` → which tasks exist within a slice
523
- - `TNN-summary.md` → what happened during a task
524
- - `summary.md` (slice/milestone) → compressed outcomes
522
+ - `M###-ROADMAP.md` → which slices exist and which are done
523
+ - `S##-PLAN.md` → which tasks exist within a slice
524
+ - `T##-SUMMARY.md` → what happened during a task
525
+ - `S##-SUMMARY.md` and `M###-SUMMARY.md` → compressed slice and milestone outcomes
525
526
 
526
- **Update `state.md`** after every significant action:
527
+ **Update `STATE.md`** after every significant action:
527
528
  - Active milestone/slice/task
528
529
  - Recent decisions (last 3-5)
529
530
  - Blockers
@@ -611,9 +612,9 @@ Tasks completed:
611
612
 
612
613
  When planning or executing a task, load relevant prior context:
613
614
 
614
- 1. Check the current slice's `depends:[]` in `roadmap.md`.
615
+ 1. Check the current slice's `depends:[]` in `M###-ROADMAP.md`.
615
616
  2. Load summaries from those dependency slices.
616
- 3. Start with the **highest available level** — milestone `summary.md` first.
617
+ 3. Start with the **highest available level** — milestone `M###-SUMMARY.md` first.
617
618
  4. Only drill down to slice/task summaries if you need specific detail.
618
619
  5. Stay within **~2500 tokens** of total injected summary context.
619
620
  6. If the dependency chain is too large, drop the oldest/least-relevant summaries first.
@@ -630,32 +631,33 @@ These are soft caps — exceed them when genuinely needed, but don't let summari
630
631
 
631
632
  ## Project-Specific Context
632
633
 
633
- This methodology doc is generic. Project-specific guidance belongs in the milestone's `context.md`:
634
+ This methodology doc is generic. Project-specific guidance belongs in the milestone and slice context files:
634
635
 
635
- - **`.gsd/milestones/<active>/context.md`** — Architecture decisions, reference file paths, per-slice doc reading guides, implementation constraints, and any project-specific protocols (worktrees, testing, etc.)
636
+ - **`.gsd/milestones/<active>/M###-CONTEXT.md`** — milestone-level architecture decisions, reference file paths, and implementation constraints
637
+ - **`.gsd/milestones/<active>/slices/S##/S##-CONTEXT.md`** — slice-level decisions, edge cases, and narrow implementation guidance when present
636
638
 
637
- **Always read the active milestone's `context.md` before starting implementation work.** It tells you what decisions are locked, what files to reference, and how to verify your work in this specific project.
639
+ **Always read the active milestone's `M###-CONTEXT.md` before starting implementation work.** If the active slice also has `S##-CONTEXT.md`, read that too. These files tell you what decisions are locked, what files to reference, and how to verify your work in this specific project.
638
640
 
639
641
  ---
640
642
 
641
643
  ## Checklist for a Fresh Session
642
644
 
643
- 1. Read `.gsd/state.md` — what's the next action?
645
+ 1. Read `.gsd/STATE.md` — what's the next action?
644
646
  2. Check for `continue.md` in the active slice — is there interrupted work?
645
647
  3. If resuming: read `continue.md`, delete it, pick up from "Next Action".
646
- 4. If starting fresh: read the active slice's `plan.md`, find the next incomplete task.
647
- 5. If in a planning or research phase, read `.gsd/decisions.md` — respect existing decisions.
648
+ 4. If starting fresh: read the active slice's `S##-PLAN.md`, find the next incomplete task.
649
+ 5. If in a planning or research phase, read `.gsd/DECISIONS.md` — respect existing decisions.
648
650
  6. Read relevant summaries from prior tasks/slices for context.
649
651
  7. Do the work.
650
652
  8. Verify the must-haves.
651
653
  9. Write the summary.
652
- 10. Mark done, update `state.md`, advance.
653
- 11. If context is getting full or you're done for now: write `continue.md` if mid-task, or update `state.md` with next action if between tasks.
654
+ 10. Mark done, update `STATE.md`, advance.
655
+ 11. If context is getting full or you're done for now: write `continue.md` if mid-task, or update `STATE.md` with next action if between tasks.
654
656
 
655
657
  ## When Context Gets Large
656
658
 
657
659
  If you sense context pressure (many files read, long execution, lots of tool output):
658
660
 
659
661
  1. **If mid-task:** Write `continue.md` with exact resume state. Tell the user: "Context is getting full. I've saved progress to continue.md. Start a new session and run `/gsd` to pick up where you left off, or `/gsd auto` to resume in auto-execution mode."
660
- 2. **If between tasks:** Just update `state.md` with the next action. No continue file needed — the next session will read state.md and pick up the next task cleanly.
662
+ 2. **If between tasks:** Just update `STATE.md` with the next action. No continue file needed — the next session will read STATE.md and pick up the next task cleanly.
661
663
  3. **Don't fight it.** The whole system is designed for this. A fresh session with the right files loaded is better than a stale session with degraded reasoning.
@@ -265,6 +265,16 @@ export function updateProgressWidget(
265
265
  tui.requestRender();
266
266
  }, 800);
267
267
 
268
+ // Refresh progress cache from disk every 5s so the widget reflects
269
+ // task/slice completion mid-unit. Without this, the progress bar only
270
+ // updates at dispatch time, appearing frozen during long-running units.
271
+ const progressRefreshTimer = mid ? setInterval(() => {
272
+ try {
273
+ updateSliceProgressCache(accessors.getBasePath(), mid.id, slice?.id);
274
+ cachedLines = undefined;
275
+ } catch { /* non-fatal */ }
276
+ }, 5_000) : null;
277
+
268
278
  return {
269
279
  render(width: number): string[] {
270
280
  if (cachedLines && cachedWidth === width) return cachedLines;
@@ -416,6 +426,7 @@ export function updateProgressWidget(
416
426
  },
417
427
  dispose() {
418
428
  clearInterval(pulseTimer);
429
+ if (progressRefreshTimer) clearInterval(progressRefreshTimer);
419
430
  },
420
431
  };
421
432
  });
@@ -383,6 +383,7 @@ export async function buildResearchMilestonePrompt(mid: string, midTitle: string
383
383
 
384
384
  const outputRelPath = relMilestoneFile(base, mid, "RESEARCH");
385
385
  return loadPrompt("research-milestone", {
386
+ workingDirectory: base,
386
387
  milestoneId: mid, milestoneTitle: midTitle,
387
388
  milestonePath: relMilestonePath(base, mid),
388
389
  contextPath: contextRel,
@@ -422,6 +423,7 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
422
423
  const outputRelPath = relMilestoneFile(base, mid, "ROADMAP");
423
424
  const secretsOutputPath = relMilestoneFile(base, mid, "SECRETS");
424
425
  return loadPrompt("plan-milestone", {
426
+ workingDirectory: base,
425
427
  milestoneId: mid, milestoneTitle: midTitle,
426
428
  milestonePath: relMilestonePath(base, mid),
427
429
  contextPath: contextRel,
@@ -667,6 +669,7 @@ export async function buildCompleteMilestonePrompt(
667
669
  const milestoneSummaryPath = `${relMilestonePath(base, mid)}/${mid}-SUMMARY.md`;
668
670
 
669
671
  return loadPrompt("complete-milestone", {
672
+ workingDirectory: base,
670
673
  milestoneId: mid,
671
674
  milestoneTitle: midTitle,
672
675
  roadmapPath: roadmapRel,
@@ -715,6 +718,7 @@ export async function buildReplanSlicePrompt(
715
718
  const replanPath = `${relSlicePath(base, mid, sid)}/${sid}-REPLAN.md`;
716
719
 
717
720
  return loadPrompt("replan-slice", {
721
+ workingDirectory: base,
718
722
  milestoneId: mid,
719
723
  sliceId: sid,
720
724
  sliceTitle: sTitle,
@@ -748,6 +752,7 @@ export async function buildRunUatPrompt(
748
752
  const uatType = extractUatType(uatContent) ?? "human-experience";
749
753
 
750
754
  return loadPrompt("run-uat", {
755
+ workingDirectory: base,
751
756
  milestoneId: mid,
752
757
  sliceId,
753
758
  uatPath,
@@ -780,6 +785,7 @@ export async function buildReassessRoadmapPrompt(
780
785
  const assessmentPath = relSliceFile(base, mid, completedSliceId, "ASSESSMENT");
781
786
 
782
787
  return loadPrompt("reassess-roadmap", {
788
+ workingDirectory: base,
783
789
  milestoneId: mid,
784
790
  milestoneTitle: midTitle,
785
791
  completedSliceId,
@@ -149,7 +149,12 @@ export function verifyExpectedArtifact(unitType: string, unitId: string, base: s
149
149
  const roadmap = parseRoadmap(roadmapContent);
150
150
  const slice = roadmap.slices.find(s => s.id === sid);
151
151
  if (slice && !slice.done) return false;
152
- } catch (e) { /* corrupt roadmap — be lenient and treat as verified */ void e; }
152
+ } catch {
153
+ // Corrupt/unparseable roadmap — fail verification so the unit
154
+ // re-runs and has a chance to fix the roadmap. Silently passing
155
+ // here could advance past an incomplete slice.
156
+ return false;
157
+ }
153
158
  }
154
159
  }
155
160
  }
@@ -251,6 +256,11 @@ export function skipExecuteTask(
251
256
  const re = new RegExp(`^(- \\[) \\] (\\*\\*${escapedTid}:)`, "m");
252
257
  if (re.test(planContent)) {
253
258
  writeFileSync(planAbs, planContent.replace(re, "$1x] $2"), "utf-8");
259
+ } else {
260
+ // Regex didn't match — checkbox format differs from expected pattern.
261
+ // Return false so callers know the plan was NOT updated and can
262
+ // fall through to other recovery strategies instead of assuming success.
263
+ return false;
254
264
  }
255
265
  }
256
266
  }
@@ -290,7 +300,10 @@ export function removePersistedKey(base: string, key: string): void {
290
300
  if (existsSync(file)) {
291
301
  let keys: string[] = JSON.parse(readFileSync(file, "utf-8"));
292
302
  keys = keys.filter(k => k !== key);
293
- writeFileSync(file, JSON.stringify(keys), "utf-8");
303
+ // Atomic write: tmp file + rename prevents partial writes on crash
304
+ const tmpFile = file + ".tmp";
305
+ writeFileSync(tmpFile, JSON.stringify(keys), "utf-8");
306
+ renameSync(tmpFile, file);
294
307
  }
295
308
  } catch (e) { /* non-fatal: removePersistedKey failure */ void e; }
296
309
  }
@@ -412,8 +425,12 @@ export async function selfHealRuntimeRecords(
412
425
  const { unitType, unitId } = record;
413
426
  const artifactPath = resolveExpectedArtifactPath(unitType, unitId, base);
414
427
 
415
- // Case 1: Artifact exists — unit completed but closeout didn't finish
416
- if (artifactPath && existsSync(artifactPath)) {
428
+ // Case 1: Artifact exists — unit completed but closeout didn't finish.
429
+ // Use verifyExpectedArtifact (not just existsSync) so that execute-task
430
+ // also checks the plan checkbox is marked [x]. Without this, a task
431
+ // whose summary exists but checkbox is unchecked would be incorrectly
432
+ // marked as completed, causing deriveState to re-dispatch it endlessly.
433
+ if (artifactPath && existsSync(artifactPath) && verifyExpectedArtifact(unitType, unitId, base)) {
417
434
  clearUnitRuntimeRecord(base, unitType, unitId);
418
435
  // Also persist completion key if missing
419
436
  const key = `${unitType}/${unitId}`;
@@ -39,7 +39,7 @@ import {
39
39
  readUnitRuntimeRecord,
40
40
  writeUnitRuntimeRecord,
41
41
  } from "./unit-runtime.js";
42
- import { resolveAutoSupervisorConfig, resolveModelWithFallbacksForUnit, loadEffectiveGSDPreferences } from "./preferences.js";
42
+ import { resolveAutoSupervisorConfig, resolveModelWithFallbacksForUnit, loadEffectiveGSDPreferences, resolveSkillDiscoveryMode } from "./preferences.js";
43
43
  import { sendDesktopNotification } from "./notifications.js";
44
44
  import type { GSDPreferences } from "./preferences.js";
45
45
  import {
@@ -68,6 +68,7 @@ import {
68
68
  } from "./metrics.js";
69
69
  import { join } from "node:path";
70
70
  import { sep as pathSep } from "node:path";
71
+ import { homedir } from "node:os";
71
72
  import { readdirSync, readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync, statSync } from "node:fs";
72
73
  import { execSync, execFileSync } from "node:child_process";
73
74
  import {
@@ -156,6 +157,33 @@ const unitRecoveryCount = new Map<string, number>();
156
157
  /** Persisted completed-unit keys — survives restarts. Loaded from .gsd/completed-units.json. */
157
158
  const completedKeySet = new Set<string>();
158
159
 
160
+ /** Resource sync timestamp captured at auto-mode start. If the managed-resources
161
+ * manifest changes mid-session (e.g. /gsd:update or dev edit + copy-resources),
162
+ * templates on disk may expect variables the in-memory code doesn't provide.
163
+ * Detect this and stop gracefully instead of crashing. */
164
+ let resourceSyncedAtOnStart: number | null = null;
165
+
166
+ function readResourceSyncedAt(): number | null {
167
+ const agentDir = process.env.GSD_CODING_AGENT_DIR || join(homedir(), ".gsd", "agent");
168
+ const manifestPath = join(agentDir, "managed-resources.json");
169
+ try {
170
+ const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
171
+ return typeof manifest?.syncedAt === "number" ? manifest.syncedAt : null;
172
+ } catch {
173
+ return null;
174
+ }
175
+ }
176
+
177
+ function checkResourcesStale(): string | null {
178
+ if (resourceSyncedAtOnStart === null) return null;
179
+ const current = readResourceSyncedAt();
180
+ if (current === null) return null;
181
+ if (current !== resourceSyncedAtOnStart) {
182
+ return "GSD resources were updated since this session started. Restart gsd to load the new code.";
183
+ }
184
+ return null;
185
+ }
186
+
159
187
  /**
160
188
  * Resolve whether auto-mode should use worktree isolation.
161
189
  * Returns true for worktree mode (default), false for branch mode.
@@ -618,6 +646,7 @@ export async function startAuto(
618
646
  resetHookState();
619
647
  restoreHookState(base);
620
648
  autoStartTime = Date.now();
649
+ resourceSyncedAtOnStart = readResourceSyncedAt();
621
650
  completedUnits = [];
622
651
  currentUnit = null;
623
652
  currentMilestoneId = state.activeMilestone?.id ?? null;
@@ -1141,6 +1170,18 @@ async function dispatchNextUnit(
1141
1170
  await new Promise(r => setTimeout(r, 200));
1142
1171
  }
1143
1172
 
1173
+ // Resource version guard: detect mid-session resource updates.
1174
+ // Templates are read from disk on each dispatch but extension code is loaded
1175
+ // once at startup. If resources were re-synced (e.g. /gsd:update, npm update,
1176
+ // or dev copy-resources), templates may expect variables the in-memory code
1177
+ // doesn't provide. Stop gracefully instead of crashing.
1178
+ const staleMsg = checkResourcesStale();
1179
+ if (staleMsg) {
1180
+ await stopAuto(ctx, pi);
1181
+ ctx.ui.notify(staleMsg, "error");
1182
+ return;
1183
+ }
1184
+
1144
1185
  // Clear all caches so deriveState sees fresh disk state (#431).
1145
1186
  // Parse cache is also cleared — doctor may have re-populated it with
1146
1187
  // stale data between handleAgentEnd and this dispatch call (Path B fix).
@@ -2,6 +2,10 @@ You are executing GSD auto-mode.
2
2
 
3
3
  ## UNIT: Complete Milestone {{milestoneId}} ("{{milestoneTitle}}")
4
4
 
5
+ ## Working Directory
6
+
7
+ Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
8
+
5
9
  ## Your Role in the Pipeline
6
10
 
7
11
  All slices are done. You are closing out the milestone — verifying that the assembled work actually delivers the promised outcome, writing the milestone summary, and updating project state. The milestone summary is the final record. After you finish, the system merges the worktree back to the integration branch. If there are queued milestones, the next one starts its own research → plan → execute cycle from a clean slate — the milestone summary is how it learns what was already built.
@@ -2,6 +2,10 @@ You are executing GSD auto-mode.
2
2
 
3
3
  ## UNIT: Plan Milestone {{milestoneId}} ("{{milestoneTitle}}")
4
4
 
5
+ ## Working Directory
6
+
7
+ Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
8
+
5
9
  All relevant context has been preloaded below — start working immediately without re-reading these files.
6
10
 
7
11
  {{inlinedContext}}
@@ -2,6 +2,10 @@ You are executing GSD auto-mode.
2
2
 
3
3
  ## UNIT: Reassess Roadmap — Milestone {{milestoneId}} after {{completedSliceId}}
4
4
 
5
+ ## Working Directory
6
+
7
+ Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
8
+
5
9
  ## Your Role in the Pipeline
6
10
 
7
11
  A slice just completed. The **complete-slice agent** verified the work and wrote a slice summary. You decide whether the remaining roadmap still makes sense given what was actually built. If you change the roadmap, the next slice's **researcher** and **planner** agents work from your updated version. If you confirm it's fine, the pipeline moves to the next slice immediately.
@@ -2,6 +2,10 @@ You are executing GSD auto-mode.
2
2
 
3
3
  ## UNIT: Replan Slice {{sliceId}} ("{{sliceTitle}}") — Milestone {{milestoneId}}
4
4
 
5
+ ## Working Directory
6
+
7
+ Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
8
+
5
9
  A completed task reported `blocker_discovered: true`, meaning the current slice plan cannot be executed as-is. Your job is to rewrite the remaining tasks in the slice plan to address the blocker while preserving all completed work.
6
10
 
7
11
  All relevant context has been preloaded below — the roadmap, current slice plan, the blocker task summary, and decisions are inlined. Start working immediately without re-reading these files.
@@ -2,6 +2,10 @@ You are executing GSD auto-mode.
2
2
 
3
3
  ## UNIT: Research Milestone {{milestoneId}} ("{{milestoneTitle}}")
4
4
 
5
+ ## Working Directory
6
+
7
+ Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
8
+
5
9
  All relevant context has been preloaded below — start working immediately without re-reading these files.
6
10
 
7
11
  {{inlinedContext}}
@@ -2,6 +2,10 @@ You are executing GSD auto-mode.
2
2
 
3
3
  ## UNIT: Run UAT — {{milestoneId}}/{{sliceId}}
4
4
 
5
+ ## Working Directory
6
+
7
+ Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
8
+
5
9
  All relevant context has been preloaded below. Start working immediately without re-reading these files.
6
10
 
7
11
  {{inlinedContext}}