@undeemed/get-shit-done-codex 1.23.2 → 1.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +51 -5
  2. package/agents/gsd-debugger.md +8 -56
  3. package/agents/gsd-planner.md +2 -118
  4. package/agents/gsd-project-researcher.md +0 -3
  5. package/agents/gsd-research-synthesizer.md +0 -3
  6. package/bin/install.js +267 -5
  7. package/commands/gsd/add-phase.md +2 -6
  8. package/commands/gsd/add-todo.md +1 -6
  9. package/commands/gsd/check-todos.md +2 -6
  10. package/commands/gsd/debug.md +1 -6
  11. package/commands/gsd/discuss-phase.md +16 -9
  12. package/commands/gsd/execute-phase.md +2 -1
  13. package/commands/gsd/new-milestone.md +8 -1
  14. package/commands/gsd/pause-work.md +1 -4
  15. package/commands/gsd/plan-phase.md +1 -2
  16. package/commands/gsd/research-phase.md +15 -17
  17. package/commands/gsd/verify-work.md +2 -1
  18. package/get-shit-done/bin/gsd-tools.cjs +4951 -121
  19. package/get-shit-done/bin/lib/commands.cjs +4 -9
  20. package/get-shit-done/bin/lib/core.cjs +102 -23
  21. package/get-shit-done/bin/lib/init.cjs +11 -11
  22. package/get-shit-done/bin/lib/milestone.cjs +54 -3
  23. package/get-shit-done/bin/lib/phase.cjs +40 -10
  24. package/get-shit-done/bin/lib/state.cjs +86 -33
  25. package/get-shit-done/references/checkpoints.md +0 -1
  26. package/get-shit-done/references/model-profile-resolution.md +13 -6
  27. package/get-shit-done/references/model-profiles.md +60 -51
  28. package/get-shit-done/templates/context.md +14 -0
  29. package/get-shit-done/templates/phase-prompt.md +0 -2
  30. package/get-shit-done/workflows/audit-milestone.md +8 -63
  31. package/get-shit-done/workflows/diagnose-issues.md +1 -1
  32. package/get-shit-done/workflows/execute-phase.md +9 -54
  33. package/get-shit-done/workflows/execute-plan.md +13 -17
  34. package/get-shit-done/workflows/help.md +3 -3
  35. package/get-shit-done/workflows/map-codebase.md +44 -32
  36. package/get-shit-done/workflows/new-milestone.md +7 -16
  37. package/get-shit-done/workflows/new-project.md +80 -49
  38. package/get-shit-done/workflows/progress.md +26 -14
  39. package/get-shit-done/workflows/quick.md +15 -24
  40. package/get-shit-done/workflows/set-profile.md +12 -8
  41. package/get-shit-done/workflows/settings.md +14 -21
  42. package/get-shit-done/workflows/transition.md +0 -5
  43. package/get-shit-done/workflows/verify-work.md +12 -11
  44. package/hooks/dist/gsd-context-monitor.js +1 -1
  45. package/package.json +3 -2
  46. package/scripts/run-tests.cjs +43 -0
@@ -4,7 +4,7 @@
4
4
 
5
5
  const fs = require('fs');
6
6
  const path = require('path');
7
- const { loadConfig, getMilestoneInfo, output, error } = require('./core.cjs');
7
+ const { loadConfig, getMilestoneInfo, getMilestonePhaseFilter, output, error } = require('./core.cjs');
8
8
  const { extractFrontmatter, reconstructFrontmatter } = require('./frontmatter.cjs');
9
9
 
10
10
  function cmdStateLoad(cwd, raw) {
@@ -65,11 +65,19 @@ function cmdStateGet(cwd, section, raw) {
65
65
  // Try to find markdown section or field
66
66
  const fieldEscaped = section.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
67
67
 
68
- // Check for **field:** value
69
- const fieldPattern = new RegExp(`\\*\\*${fieldEscaped}:\\*\\*\\s*(.*)`, 'i');
70
- const fieldMatch = content.match(fieldPattern);
71
- if (fieldMatch) {
72
- output({ [section]: fieldMatch[1].trim() }, raw, fieldMatch[1].trim());
68
+ // Check for **field:** value (bold format)
69
+ const boldPattern = new RegExp(`\\*\\*${fieldEscaped}:\\*\\*\\s*(.*)`, 'i');
70
+ const boldMatch = content.match(boldPattern);
71
+ if (boldMatch) {
72
+ output({ [section]: boldMatch[1].trim() }, raw, boldMatch[1].trim());
73
+ return;
74
+ }
75
+
76
+ // Check for field: value (plain format)
77
+ const plainPattern = new RegExp(`^${fieldEscaped}:\\s*(.*)`, 'im');
78
+ const plainMatch = content.match(plainPattern);
79
+ if (plainMatch) {
80
+ output({ [section]: plainMatch[1].trim() }, raw, plainMatch[1].trim());
73
81
  return;
74
82
  }
75
83
 
@@ -106,10 +114,15 @@ function cmdStatePatch(cwd, patches, raw) {
106
114
 
107
115
  for (const [field, value] of Object.entries(patches)) {
108
116
  const fieldEscaped = field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
109
- const pattern = new RegExp(`(\\*\\*${fieldEscaped}:\\*\\*\\s*)(.*)`, 'i');
117
+ // Try **Field:** bold format first, then plain Field: format
118
+ const boldPattern = new RegExp(`(\\*\\*${fieldEscaped}:\\*\\*\\s*)(.*)`, 'i');
119
+ const plainPattern = new RegExp(`(^${fieldEscaped}:\\s*)(.*)`, 'im');
110
120
 
111
- if (pattern.test(content)) {
112
- content = content.replace(pattern, (_match, prefix) => `${prefix}${value}`);
121
+ if (boldPattern.test(content)) {
122
+ content = content.replace(boldPattern, (_match, prefix) => `${prefix}${value}`);
123
+ results.updated.push(field);
124
+ } else if (plainPattern.test(content)) {
125
+ content = content.replace(plainPattern, (_match, prefix) => `${prefix}${value}`);
113
126
  results.updated.push(field);
114
127
  } else {
115
128
  results.failed.push(field);
@@ -135,9 +148,15 @@ function cmdStateUpdate(cwd, field, value) {
135
148
  try {
136
149
  let content = fs.readFileSync(statePath, 'utf-8');
137
150
  const fieldEscaped = field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
138
- const pattern = new RegExp(`(\\*\\*${fieldEscaped}:\\*\\*\\s*)(.*)`, 'i');
139
- if (pattern.test(content)) {
140
- content = content.replace(pattern, (_match, prefix) => `${prefix}${value}`);
151
+ // Try **Field:** bold format first, then plain Field: format
152
+ const boldPattern = new RegExp(`(\\*\\*${fieldEscaped}:\\*\\*\\s*)(.*)`, 'i');
153
+ const plainPattern = new RegExp(`(^${fieldEscaped}:\\s*)(.*)`, 'im');
154
+ if (boldPattern.test(content)) {
155
+ content = content.replace(boldPattern, (_match, prefix) => `${prefix}${value}`);
156
+ writeStateMd(statePath, content, cwd);
157
+ output({ updated: true });
158
+ } else if (plainPattern.test(content)) {
159
+ content = content.replace(plainPattern, (_match, prefix) => `${prefix}${value}`);
141
160
  writeStateMd(statePath, content, cwd);
142
161
  output({ updated: true });
143
162
  } else {
@@ -151,16 +170,27 @@ function cmdStateUpdate(cwd, field, value) {
151
170
  // ─── State Progression Engine ────────────────────────────────────────────────
152
171
 
153
172
  function stateExtractField(content, fieldName) {
154
- const pattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+)`, 'i');
155
- const match = content.match(pattern);
156
- return match ? match[1].trim() : null;
173
+ const escaped = fieldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
174
+ // Try **Field:** bold format first
175
+ const boldPattern = new RegExp(`\\*\\*${escaped}:\\*\\*\\s*(.+)`, 'i');
176
+ const boldMatch = content.match(boldPattern);
177
+ if (boldMatch) return boldMatch[1].trim();
178
+ // Fall back to plain Field: format
179
+ const plainPattern = new RegExp(`^${escaped}:\\s*(.+)`, 'im');
180
+ const plainMatch = content.match(plainPattern);
181
+ return plainMatch ? plainMatch[1].trim() : null;
157
182
  }
158
183
 
159
184
  function stateReplaceField(content, fieldName, newValue) {
160
185
  const escaped = fieldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
161
- const pattern = new RegExp(`(\\*\\*${escaped}:\\*\\*\\s*)(.*)`, 'i');
162
- if (pattern.test(content)) {
163
- return content.replace(pattern, (_match, prefix) => `${prefix}${newValue}`);
186
+ // Try **Field:** bold format first, then plain Field: format
187
+ const boldPattern = new RegExp(`(\\*\\*${escaped}:\\*\\*\\s*)(.*)`, 'i');
188
+ if (boldPattern.test(content)) {
189
+ return content.replace(boldPattern, (_match, prefix) => `${prefix}${newValue}`);
190
+ }
191
+ const plainPattern = new RegExp(`(^${escaped}:\\s*)(.*)`, 'im');
192
+ if (plainPattern.test(content)) {
193
+ return content.replace(plainPattern, (_match, prefix) => `${prefix}${newValue}`);
164
194
  }
165
195
  return null;
166
196
  }
@@ -255,9 +285,15 @@ function cmdStateUpdateProgress(cwd, raw) {
255
285
  const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barWidth - filled);
256
286
  const progressStr = `[${bar}] ${percent}%`;
257
287
 
258
- const progressPattern = /(\*\*Progress:\*\*\s*).*/i;
259
- if (progressPattern.test(content)) {
260
- content = content.replace(progressPattern, (_match, prefix) => `${prefix}${progressStr}`);
288
+ // Try **Progress:** bold format first, then plain Progress: format
289
+ const boldProgressPattern = /(\*\*Progress:\*\*\s*).*/i;
290
+ const plainProgressPattern = /^(Progress:\s*).*/im;
291
+ if (boldProgressPattern.test(content)) {
292
+ content = content.replace(boldProgressPattern, (_match, prefix) => `${prefix}${progressStr}`);
293
+ writeStateMd(statePath, content, cwd);
294
+ output({ updated: true, percent, completed: totalSummaries, total: totalPlans, bar: progressStr }, raw, progressStr);
295
+ } else if (plainProgressPattern.test(content)) {
296
+ content = content.replace(plainProgressPattern, (_match, prefix) => `${prefix}${progressStr}`);
261
297
  writeStateMd(statePath, content, cwd);
262
298
  output({ updated: true, percent, completed: totalSummaries, total: totalPlans, bar: progressStr }, raw, progressStr);
263
299
  } else {
@@ -413,11 +449,17 @@ function cmdStateSnapshot(cwd, raw) {
413
449
 
414
450
  const content = fs.readFileSync(statePath, 'utf-8');
415
451
 
416
- // Helper to extract **Field:** value patterns
452
+ // Helper to extract field values — supports both **Field:** bold format
453
+ // and plain Field: format (STATE.md may use either depending on version)
417
454
  const extractField = (fieldName) => {
418
- const pattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+)`, 'i');
419
- const match = content.match(pattern);
420
- return match ? match[1].trim() : null;
455
+ // Try **Field:** format first (bold markdown)
456
+ const boldPattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+)`, 'i');
457
+ const boldMatch = content.match(boldPattern);
458
+ if (boldMatch) return boldMatch[1].trim();
459
+ // Fall back to plain Field: format
460
+ const plainPattern = new RegExp(`^${fieldName}:\\s*(.+)`, 'im');
461
+ const plainMatch = content.match(plainPattern);
462
+ return plainMatch ? plainMatch[1].trim() : null;
421
463
  };
422
464
 
423
465
  // Extract basic fields
@@ -476,9 +518,12 @@ function cmdStateSnapshot(cwd, raw) {
476
518
  const sessionMatch = content.match(/##\s*Session\s*\n([\s\S]*?)(?=\n##|$)/i);
477
519
  if (sessionMatch) {
478
520
  const sessionSection = sessionMatch[1];
479
- const lastDateMatch = sessionSection.match(/\*\*Last Date:\*\*\s*(.+)/i);
480
- const stoppedAtMatch = sessionSection.match(/\*\*Stopped At:\*\*\s*(.+)/i);
481
- const resumeFileMatch = sessionSection.match(/\*\*Resume File:\*\*\s*(.+)/i);
521
+ const lastDateMatch = sessionSection.match(/\*\*Last Date:\*\*\s*(.+)/i)
522
+ || sessionSection.match(/^Last Date:\s*(.+)/im);
523
+ const stoppedAtMatch = sessionSection.match(/\*\*Stopped At:\*\*\s*(.+)/i)
524
+ || sessionSection.match(/^Stopped At:\s*(.+)/im);
525
+ const resumeFileMatch = sessionSection.match(/\*\*Resume File:\*\*\s*(.+)/i)
526
+ || sessionSection.match(/^Resume File:\s*(.+)/im);
482
527
 
483
528
  if (lastDateMatch) session.last_date = lastDateMatch[1].trim();
484
529
  if (stoppedAtMatch) session.stopped_at = stoppedAtMatch[1].trim();
@@ -512,10 +557,14 @@ function cmdStateSnapshot(cwd, raw) {
512
557
  * reliably via `state json` instead of fragile regex parsing.
513
558
  */
514
559
  function buildStateFrontmatter(bodyContent, cwd) {
560
+ // Supports both **Field:** bold and plain Field: format (see state-snapshot)
515
561
  const extractField = (fieldName) => {
516
- const pattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+)`, 'i');
517
- const match = bodyContent.match(pattern);
518
- return match ? match[1].trim() : null;
562
+ const boldPattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+)`, 'i');
563
+ const boldMatch = bodyContent.match(boldPattern);
564
+ if (boldMatch) return boldMatch[1].trim();
565
+ const plainPattern = new RegExp(`^${fieldName}:\\s*(.+)`, 'im');
566
+ const plainMatch = bodyContent.match(plainPattern);
567
+ return plainMatch ? plainMatch[1].trim() : null;
519
568
  };
520
569
 
521
570
  const currentPhase = extractField('Current Phase');
@@ -548,8 +597,10 @@ function buildStateFrontmatter(bodyContent, cwd) {
548
597
  try {
549
598
  const phasesDir = path.join(cwd, '.planning', 'phases');
550
599
  if (fs.existsSync(phasesDir)) {
600
+ const isDirInMilestone = getMilestonePhaseFilter(cwd);
551
601
  const phaseDirs = fs.readdirSync(phasesDir, { withFileTypes: true })
552
- .filter(e => e.isDirectory()).map(e => e.name);
602
+ .filter(e => e.isDirectory()).map(e => e.name)
603
+ .filter(isDirInMilestone);
553
604
  let diskTotalPlans = 0;
554
605
  let diskTotalSummaries = 0;
555
606
  let diskCompletedPhases = 0;
@@ -562,7 +613,9 @@ function buildStateFrontmatter(bodyContent, cwd) {
562
613
  diskTotalSummaries += summaries;
563
614
  if (plans > 0 && summaries >= plans) diskCompletedPhases++;
564
615
  }
565
- if (totalPhases === null) totalPhases = phaseDirs.length;
616
+ totalPhases = isDirInMilestone.phaseCount > 0
617
+ ? Math.max(phaseDirs.length, isDirInMilestone.phaseCount)
618
+ : phaseDirs.length;
566
619
  completedPhases = diskCompletedPhases;
567
620
  totalPlans = diskTotalPlans;
568
621
  completedPlans = diskTotalSummaries;
@@ -8,7 +8,6 @@ Plans execute autonomously. Checkpoints formalize interaction points where human
8
8
  2. **Codex sets up the verification environment** - Start dev servers, seed databases, configure env vars
9
9
  3. **User only does what requires human judgment** - Visual checks, UX evaluation, "does this feel right?"
10
10
  4. **Secrets come from user, automation comes from Codex** - Ask for API keys, then Codex uses them via CLI
11
- 5. **Auto-mode bypasses verification/decision checkpoints** — When `workflow.auto_advance` is true in config: human-verify auto-approves, decision auto-selects first option, human-action still stops (auth gates cannot be automated)
12
11
  </overview>
13
12
 
14
13
  <checkpoint_types>
@@ -14,21 +14,28 @@ Default: `balanced` if not set or config missing.
14
14
 
15
15
  @~/.codex/get-shit-done/references/model-profiles.md
16
16
 
17
- Look up the agent in the table for the resolved profile. Pass the model parameter to Task calls:
17
+ Look up the agent in the table for the resolved profile. Each entry returns:
18
+
19
+ ```json
20
+ { "model": "inherit", "thinking": "xhigh" }
21
+ ```
22
+
23
+ All agents use `gpt-5.3-codex` (via `"inherit"`). The `thinking` field controls reasoning effort.
24
+
25
+ Pass both parameters to Task calls:
18
26
 
19
27
  ```
20
28
  Task(
21
29
  prompt="...",
22
30
  subagent_type="gsd-planner",
23
- model="{resolved_model}" # "inherit", "sonnet", or "haiku"
31
+ model="inherit",
32
+ thinking="{resolved_thinking}" # "xhigh", "high", "medium", or "low"
24
33
  )
25
34
  ```
26
35
 
27
- **Note:** Opus-tier agents resolve to `"inherit"` (not `"opus"`). This causes the agent to use the parent session's model, avoiding conflicts with organization policies that may block specific opus versions.
28
-
29
36
  ## Usage
30
37
 
31
38
  1. Resolve once at orchestration start
32
39
  2. Store the profile value
33
- 3. Look up each agent's model from the table when spawning
34
- 4. Pass model parameter to each Task call (values: `"inherit"`, `"sonnet"`, `"haiku"`)
40
+ 3. Look up each agent's `{ model, thinking }` from the table
41
+ 4. Pass both model and thinking parameters to each Task call
@@ -1,92 +1,101 @@
1
1
  # Model Profiles
2
2
 
3
- Model profiles control which Codex model each GSD agent uses. This allows balancing quality vs token spend.
3
+ Model profiles control the reasoning effort level for each GSD agent. All agents use `gpt-5.3-codex` (Codex-optimized for agentic coding); what varies is the **thinking level** — how much reasoning budget each agent gets based on its role.
4
4
 
5
5
  ## Profile Definitions
6
6
 
7
- | Agent | `quality` | `balanced` | `budget` |
8
- |-------|-----------|------------|----------|
9
- | gsd-planner | opus | opus | sonnet |
10
- | gsd-roadmapper | opus | sonnet | sonnet |
11
- | gsd-executor | opus | sonnet | sonnet |
12
- | gsd-phase-researcher | opus | sonnet | haiku |
13
- | gsd-project-researcher | opus | sonnet | haiku |
14
- | gsd-research-synthesizer | sonnet | sonnet | haiku |
15
- | gsd-debugger | opus | sonnet | sonnet |
16
- | gsd-codebase-mapper | sonnet | haiku | haiku |
17
- | gsd-verifier | sonnet | sonnet | haiku |
18
- | gsd-plan-checker | sonnet | sonnet | haiku |
19
- | gsd-integration-checker | sonnet | sonnet | haiku |
7
+ | Agent | `quality` | `balanced` | `budget` |
8
+ | ------------------------ | --------- | ---------- | --------- |
9
+ | gsd-planner | 🔴 xhigh | 🔴 xhigh | 🟢 high |
10
+ | gsd-roadmapper | 🔴 xhigh | 🟢 high | 🟡 medium |
11
+ | gsd-executor | 🔴 xhigh | 🟢 high | 🟡 medium |
12
+ | gsd-phase-researcher | 🟢 high | 🟡 medium | 🟡 medium |
13
+ | gsd-project-researcher | 🟢 high | 🟡 medium | 🟡 medium |
14
+ | gsd-research-synthesizer | 🟢 high | 🟡 medium | 🟡 medium |
15
+ | gsd-debugger | 🔴 xhigh | 🔴 xhigh | 🟢 high |
16
+ | gsd-codebase-mapper | 🟡 medium | 🟡 medium | 🟡 medium |
17
+ | gsd-verifier | 🟢 high | 🟢 high | 🟡 medium |
18
+ | gsd-plan-checker | 🟢 high | 🟡 medium | 🟡 medium |
19
+ | gsd-integration-checker | 🟢 high | 🟡 medium | 🟡 medium |
20
+
21
+ All entries resolve to `model: "inherit"` (uses the session's gpt-5.3-codex). The `thinking` field controls reasoning effort.
20
22
 
21
23
  ## Profile Philosophy
22
24
 
23
- **quality** - Maximum reasoning power
24
- - Opus for all decision-making agents
25
- - Sonnet for read-only verification
26
- - Use when: quota available, critical architecture work
25
+ **quality** - Maximum reasoning for every role
27
26
 
28
- **balanced** (default) - Smart allocation
29
- - Opus only for planning (where architecture decisions happen)
30
- - Sonnet for execution and research (follows explicit instructions)
31
- - Sonnet for verification (needs reasoning, not just pattern matching)
32
- - Use when: normal development, good balance of quality and cost
27
+ - 🔴 **xhigh** for decision-makers: planner, roadmapper, executor, debugger
28
+ - 🟢 **high** for analysis: researchers, verifiers, checkers
29
+ - 🟡 **medium** for read-only mapping
30
+ - Use when: critical architecture work, complex debugging
33
31
 
34
- **budget** - Minimal Opus usage
35
- - Sonnet for anything that writes code
36
- - Haiku for research and verification
37
- - Use when: conserving quota, high-volume work, less critical phases
32
+ **balanced** (default) - Smart thinking allocation
33
+
34
+ - 🔴 **xhigh** only for planner and debugger (highest-impact decisions)
35
+ - 🟢 **high** for executor and verifier (needs reasoning but follows plans)
36
+ - 🟡 **medium** for everything else (structured output, scanning)
37
+ - Use when: normal development
38
+
39
+ **budget** - Minimal reasoning budget
40
+
41
+ - 🟢 **high** for planner and debugger (always need some reasoning)
42
+ - 🟡 **medium** for everything else
43
+ - Use when: high-volume work, less critical phases
44
+
45
+ ## Role-Based Thinking Rationale
46
+
47
+ **Why xhigh thinking for gsd-planner?**
48
+ Planning involves architecture decisions, goal decomposition, and task design. These decisions cascade through the entire phase — worth the extra reasoning budget.
49
+
50
+ **Why xhigh thinking for gsd-debugger even in balanced?**
51
+ Root cause analysis requires deep reasoning. A debugger that misdiagnoses wastes more tokens in re-runs than the reasoning cost.
52
+
53
+ **Why medium thinking for gsd-codebase-mapper?**
54
+ Read-only file scanning and pattern extraction. No decisions to make — just structured output from file contents.
55
+
56
+ **Why high thinking for gsd-verifier in balanced?**
57
+ Verification requires goal-backward reasoning — checking if code _delivers_ what the phase promised. Medium thinking may miss subtle gaps.
58
+
59
+ **Why medium thinking for researchers in balanced?**
60
+ Research agents scan and collect information. The synthesis happens elsewhere. They don't need deep reasoning for reading files.
38
61
 
39
62
  ## Resolution Logic
40
63
 
41
- Orchestrators resolve model before spawning:
64
+ Orchestrators resolve model and thinking before spawning:
42
65
 
43
66
  ```
44
67
  1. Read .planning/config.json
45
68
  2. Check model_overrides for agent-specific override
46
69
  3. If no override, look up agent in profile table
47
- 4. Pass model parameter to Task call
70
+ 4. Pass model + thinking to Task call
48
71
  ```
49
72
 
73
+ Returns: `{ model: "inherit", thinking: "xhigh"|"high"|"medium"|"low" }`
74
+
50
75
  ## Per-Agent Overrides
51
76
 
52
- Override specific agents without changing the entire profile:
77
+ Override thinking level for specific agents:
53
78
 
54
79
  ```json
55
80
  {
56
81
  "model_profile": "balanced",
57
82
  "model_overrides": {
58
- "gsd-executor": "opus",
59
- "gsd-planner": "haiku"
83
+ "gsd-executor": "xhigh",
84
+ "gsd-codebase-mapper": "high"
60
85
  }
61
86
  }
62
87
  ```
63
88
 
64
- Overrides take precedence over the profile. Valid values: `opus`, `sonnet`, `haiku`.
89
+ Valid override values: `"xhigh"`, `"high"`, `"medium"`, `"low"`.
65
90
 
66
91
  ## Switching Profiles
67
92
 
68
93
  Runtime: `$gsd-set-profile <profile>`
69
94
 
70
- Per-project default: Set in `.planning/config.json`:
95
+ Per-project default in `.planning/config.json`:
96
+
71
97
  ```json
72
98
  {
73
99
  "model_profile": "balanced"
74
100
  }
75
101
  ```
76
-
77
- ## Design Rationale
78
-
79
- **Why Opus for gsd-planner?**
80
- Planning involves architecture decisions, goal decomposition, and task design. This is where model quality has the highest impact.
81
-
82
- **Why Sonnet for gsd-executor?**
83
- Executors follow explicit PLAN.md instructions. The plan already contains the reasoning; execution is implementation.
84
-
85
- **Why Sonnet (not Haiku) for verifiers in balanced?**
86
- Verification requires goal-backward reasoning - checking if code *delivers* what the phase promised, not just pattern matching. Sonnet handles this well; Haiku may miss subtle gaps.
87
-
88
- **Why Haiku for gsd-codebase-mapper?**
89
- Read-only exploration and pattern extraction. No reasoning required, just structured output from file contents.
90
-
91
- **Why `inherit` instead of passing `opus` directly?**
92
- Codex CLI's `"opus"` alias maps to a specific model version. Organizations may block older opus versions while allowing newer ones. GSD returns `"inherit"` for opus-tier agents, causing them to use whatever opus version the user has configured in their session. This avoids version conflicts and silent fallbacks to Sonnet.
@@ -54,6 +54,20 @@ Template for `.planning/phases/XX-name/{phase_num}-CONTEXT.md` - captures implem
54
54
 
55
55
  </specifics>
56
56
 
57
+ <code_context>
58
+ ## Existing Code Insights
59
+
60
+ ### Reusable Assets
61
+ - [Component/hook/utility]: [How it could be used in this phase]
62
+
63
+ ### Established Patterns
64
+ - [Pattern]: [How it constrains/enables this phase]
65
+
66
+ ### Integration Points
67
+ - [Where new code connects to existing system]
68
+
69
+ </code_context>
70
+
57
71
  <deferred>
58
72
  ## Deferred Ideas
59
73
 
@@ -20,7 +20,6 @@ wave: N # Execution wave (1, 2, 3...). Pre-computed at plan
20
20
  depends_on: [] # Plan IDs this plan requires (e.g., ["01-01"]).
21
21
  files_modified: [] # Files this plan modifies.
22
22
  autonomous: true # false if plan has checkpoints requiring user interaction
23
- requirements: [] # REQUIRED — Requirement IDs from ROADMAP this plan addresses. MUST NOT be empty.
24
23
  user_setup: [] # Human-required setup Codex cannot automate (see below)
25
24
 
26
25
  # Goal-backward verification (derived during planning, verified after execution)
@@ -130,7 +129,6 @@ After completion, create `.planning/phases/XX-name/{phase}-{plan}-SUMMARY.md`
130
129
  | `depends_on` | Yes | Array of plan IDs this plan requires. |
131
130
  | `files_modified` | Yes | Files this plan touches. |
132
131
  | `autonomous` | Yes | `true` if no checkpoints, `false` if has checkpoints |
133
- | `requirements` | Yes | **MUST** list requirement IDs from ROADMAP. Every roadmap requirement MUST appear in at least one plan. |
134
132
  | `user_setup` | No | Array of human-required setup items (external services) |
135
133
  | `must_haves` | Yes | Goal-backward verification criteria (see below) |
136
134
 
@@ -57,8 +57,6 @@ If a phase is missing VERIFICATION.md, flag it as "unverified phase" — this is
57
57
 
58
58
  With phase context collected:
59
59
 
60
- Extract `MILESTONE_REQ_IDS` from REQUIREMENTS.md traceability table — all REQ-IDs assigned to phases in this milestone.
61
-
62
60
  ```
63
61
  Task(
64
62
  prompt="Check cross-phase integration and E2E flows.
@@ -67,11 +65,6 @@ Phases: {phase_dirs}
67
65
  Phase exports: {from SUMMARYs}
68
66
  API routes: {routes created}
69
67
 
70
- Milestone Requirements:
71
- {MILESTONE_REQ_IDS — list each REQ-ID with description and assigned phase}
72
-
73
- MUST map each integration finding to affected requirement IDs where applicable.
74
-
75
68
  Verify cross-phase wiring and E2E user flows.",
76
69
  subagent_type="gsd-integration-checker",
77
70
  model="{integration_checker_model}"
@@ -84,48 +77,12 @@ Combine:
84
77
  - Phase-level gaps and tech debt (from step 2)
85
78
  - Integration checker's report (wiring gaps, broken flows)
86
79
 
87
- ## 5. Check Requirements Coverage (3-Source Cross-Reference)
88
-
89
- MUST cross-reference three independent sources for each requirement:
90
-
91
- ### 5a. Parse REQUIREMENTS.md Traceability Table
92
-
93
- Extract all REQ-IDs mapped to milestone phases from the traceability table:
94
- - Requirement ID, description, assigned phase, current status, checked-off state (`[x]` vs `[ ]`)
95
-
96
- ### 5b. Parse Phase VERIFICATION.md Requirements Tables
97
-
98
- For each phase's VERIFICATION.md, extract the expanded requirements table:
99
- - Requirement | Source Plan | Description | Status | Evidence
100
- - Map each entry back to its REQ-ID
101
-
102
- ### 5c. Extract SUMMARY.md Frontmatter Cross-Check
103
-
104
- For each phase's SUMMARY.md, extract `requirements-completed` from YAML frontmatter:
105
- ```bash
106
- for summary in .planning/phases/*-*/*-SUMMARY.md; do
107
- node ~/.codex/get-shit-done/bin/gsd-tools.cjs summary-extract "$summary" --fields requirements_completed | jq -r '.requirements_completed'
108
- done
109
- ```
110
-
111
- ### 5d. Status Determination Matrix
112
-
113
- For each REQ-ID, determine status using all three sources:
114
-
115
- | VERIFICATION.md Status | SUMMARY Frontmatter | REQUIREMENTS.md | → Final Status |
116
- |------------------------|---------------------|-----------------|----------------|
117
- | passed | listed | `[x]` | **satisfied** |
118
- | passed | listed | `[ ]` | **satisfied** (update checkbox) |
119
- | passed | missing | any | **partial** (verify manually) |
120
- | gaps_found | any | any | **unsatisfied** |
121
- | missing | listed | any | **partial** (verification gap) |
122
- | missing | missing | any | **unsatisfied** |
123
-
124
- ### 5e. FAIL Gate and Orphan Detection
125
-
126
- **REQUIRED:** Any `unsatisfied` requirement MUST force `gaps_found` status on the milestone audit.
80
+ ## 5. Check Requirements Coverage
127
81
 
128
- **Orphan detection:** Requirements present in REQUIREMENTS.md traceability table but absent from ALL phase VERIFICATION.md files MUST be flagged as orphaned. Orphaned requirements are treated as `unsatisfied` — they were assigned but never verified by any phase.
82
+ For each requirement in REQUIREMENTS.md mapped to this milestone:
83
+ - Find owning phase
84
+ - Check phase verification status
85
+ - Determine: satisfied | partial | unsatisfied
129
86
 
130
87
  ## 6. Aggregate into v{version}-MILESTONE-AUDIT.md
131
88
 
@@ -142,14 +99,7 @@ scores:
142
99
  integration: N/M
143
100
  flows: N/M
144
101
  gaps: # Critical blockers
145
- requirements:
146
- - id: "{REQ-ID}"
147
- status: "unsatisfied | partial | orphaned"
148
- phase: "{assigned phase}"
149
- claimed_by_plans: ["{plan files that reference this requirement}"]
150
- completed_by_plans: ["{plan files whose SUMMARY marks it complete}"]
151
- verification_status: "passed | gaps_found | missing | orphaned"
152
- evidence: "{specific evidence or lack thereof}"
102
+ requirements: [...]
153
103
  integration: [...]
154
104
  flows: [...]
155
105
  tech_debt: # Non-critical, deferred
@@ -285,13 +235,8 @@ $gsd-plan-milestone-gaps
285
235
  <success_criteria>
286
236
  - [ ] Milestone scope identified
287
237
  - [ ] All phase VERIFICATION.md files read
288
- - [ ] SUMMARY.md `requirements-completed` frontmatter extracted for each phase
289
- - [ ] REQUIREMENTS.md traceability table parsed for all milestone REQ-IDs
290
- - [ ] 3-source cross-reference completed (VERIFICATION + SUMMARY + traceability)
291
- - [ ] Orphaned requirements detected (in traceability but absent from all VERIFICATIONs)
292
238
  - [ ] Tech debt and deferred gaps aggregated
293
- - [ ] Integration checker spawned with milestone requirement IDs
294
- - [ ] v{version}-MILESTONE-AUDIT.md created with structured requirement gap objects
295
- - [ ] FAIL gate enforced — any unsatisfied requirement forces gaps_found status
239
+ - [ ] Integration checker spawned for cross-phase wiring
240
+ - [ ] v{version}-MILESTONE-AUDIT.md created
296
241
  - [ ] Results presented with actionable next steps
297
242
  </success_criteria>
@@ -79,7 +79,7 @@ For each gap, fill the debug-subagent-prompt template and spawn:
79
79
 
80
80
  ```
81
81
  Task(
82
- prompt=filled_debug_subagent_prompt + "\n\n<files_to_read>\n- {phase_dir}/{phase_num}-UAT.md\n- .planning/STATE.md\n</files_to_read>",
82
+ prompt=filled_debug_subagent_prompt,
83
83
  subagent_type="general-purpose",
84
84
  description="Debug: {truth_short}"
85
85
  )