@undeemed/get-shit-done-codex 1.23.2 → 1.24.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -5
- package/agents/gsd-debugger.md +8 -56
- package/agents/gsd-planner.md +2 -118
- package/agents/gsd-project-researcher.md +0 -3
- package/agents/gsd-research-synthesizer.md +0 -3
- package/bin/install.js +267 -5
- package/commands/gsd/add-phase.md +2 -6
- package/commands/gsd/add-todo.md +1 -6
- package/commands/gsd/check-todos.md +2 -6
- package/commands/gsd/debug.md +1 -6
- package/commands/gsd/discuss-phase.md +16 -9
- package/commands/gsd/execute-phase.md +2 -1
- package/commands/gsd/new-milestone.md +8 -1
- package/commands/gsd/pause-work.md +1 -4
- package/commands/gsd/plan-phase.md +1 -2
- package/commands/gsd/research-phase.md +15 -17
- package/commands/gsd/verify-work.md +2 -1
- package/get-shit-done/bin/gsd-tools.cjs +4951 -121
- package/get-shit-done/bin/lib/commands.cjs +4 -9
- package/get-shit-done/bin/lib/core.cjs +102 -23
- package/get-shit-done/bin/lib/init.cjs +11 -11
- package/get-shit-done/bin/lib/milestone.cjs +54 -3
- package/get-shit-done/bin/lib/phase.cjs +40 -10
- package/get-shit-done/bin/lib/state.cjs +86 -33
- package/get-shit-done/references/checkpoints.md +0 -1
- package/get-shit-done/references/model-profile-resolution.md +13 -6
- package/get-shit-done/references/model-profiles.md +60 -51
- package/get-shit-done/templates/context.md +14 -0
- package/get-shit-done/templates/phase-prompt.md +0 -2
- package/get-shit-done/workflows/audit-milestone.md +8 -63
- package/get-shit-done/workflows/diagnose-issues.md +1 -1
- package/get-shit-done/workflows/execute-phase.md +9 -54
- package/get-shit-done/workflows/execute-plan.md +13 -17
- package/get-shit-done/workflows/help.md +3 -3
- package/get-shit-done/workflows/map-codebase.md +44 -32
- package/get-shit-done/workflows/new-milestone.md +7 -16
- package/get-shit-done/workflows/new-project.md +80 -49
- package/get-shit-done/workflows/progress.md +26 -14
- package/get-shit-done/workflows/quick.md +15 -24
- package/get-shit-done/workflows/set-profile.md +12 -8
- package/get-shit-done/workflows/settings.md +14 -21
- package/get-shit-done/workflows/transition.md +0 -5
- package/get-shit-done/workflows/verify-work.md +12 -11
- package/hooks/dist/gsd-context-monitor.js +1 -1
- package/package.json +3 -2
- package/scripts/run-tests.cjs +43 -0
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
const fs = require('fs');
|
|
6
6
|
const path = require('path');
|
|
7
|
-
const { loadConfig, getMilestoneInfo, output, error } = require('./core.cjs');
|
|
7
|
+
const { loadConfig, getMilestoneInfo, getMilestonePhaseFilter, output, error } = require('./core.cjs');
|
|
8
8
|
const { extractFrontmatter, reconstructFrontmatter } = require('./frontmatter.cjs');
|
|
9
9
|
|
|
10
10
|
function cmdStateLoad(cwd, raw) {
|
|
@@ -65,11 +65,19 @@ function cmdStateGet(cwd, section, raw) {
|
|
|
65
65
|
// Try to find markdown section or field
|
|
66
66
|
const fieldEscaped = section.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
67
67
|
|
|
68
|
-
// Check for **field:** value
|
|
69
|
-
const
|
|
70
|
-
const
|
|
71
|
-
if (
|
|
72
|
-
output({ [section]:
|
|
68
|
+
// Check for **field:** value (bold format)
|
|
69
|
+
const boldPattern = new RegExp(`\\*\\*${fieldEscaped}:\\*\\*\\s*(.*)`, 'i');
|
|
70
|
+
const boldMatch = content.match(boldPattern);
|
|
71
|
+
if (boldMatch) {
|
|
72
|
+
output({ [section]: boldMatch[1].trim() }, raw, boldMatch[1].trim());
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Check for field: value (plain format)
|
|
77
|
+
const plainPattern = new RegExp(`^${fieldEscaped}:\\s*(.*)`, 'im');
|
|
78
|
+
const plainMatch = content.match(plainPattern);
|
|
79
|
+
if (plainMatch) {
|
|
80
|
+
output({ [section]: plainMatch[1].trim() }, raw, plainMatch[1].trim());
|
|
73
81
|
return;
|
|
74
82
|
}
|
|
75
83
|
|
|
@@ -106,10 +114,15 @@ function cmdStatePatch(cwd, patches, raw) {
|
|
|
106
114
|
|
|
107
115
|
for (const [field, value] of Object.entries(patches)) {
|
|
108
116
|
const fieldEscaped = field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
109
|
-
|
|
117
|
+
// Try **Field:** bold format first, then plain Field: format
|
|
118
|
+
const boldPattern = new RegExp(`(\\*\\*${fieldEscaped}:\\*\\*\\s*)(.*)`, 'i');
|
|
119
|
+
const plainPattern = new RegExp(`(^${fieldEscaped}:\\s*)(.*)`, 'im');
|
|
110
120
|
|
|
111
|
-
if (
|
|
112
|
-
content = content.replace(
|
|
121
|
+
if (boldPattern.test(content)) {
|
|
122
|
+
content = content.replace(boldPattern, (_match, prefix) => `${prefix}${value}`);
|
|
123
|
+
results.updated.push(field);
|
|
124
|
+
} else if (plainPattern.test(content)) {
|
|
125
|
+
content = content.replace(plainPattern, (_match, prefix) => `${prefix}${value}`);
|
|
113
126
|
results.updated.push(field);
|
|
114
127
|
} else {
|
|
115
128
|
results.failed.push(field);
|
|
@@ -135,9 +148,15 @@ function cmdStateUpdate(cwd, field, value) {
|
|
|
135
148
|
try {
|
|
136
149
|
let content = fs.readFileSync(statePath, 'utf-8');
|
|
137
150
|
const fieldEscaped = field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
151
|
+
// Try **Field:** bold format first, then plain Field: format
|
|
152
|
+
const boldPattern = new RegExp(`(\\*\\*${fieldEscaped}:\\*\\*\\s*)(.*)`, 'i');
|
|
153
|
+
const plainPattern = new RegExp(`(^${fieldEscaped}:\\s*)(.*)`, 'im');
|
|
154
|
+
if (boldPattern.test(content)) {
|
|
155
|
+
content = content.replace(boldPattern, (_match, prefix) => `${prefix}${value}`);
|
|
156
|
+
writeStateMd(statePath, content, cwd);
|
|
157
|
+
output({ updated: true });
|
|
158
|
+
} else if (plainPattern.test(content)) {
|
|
159
|
+
content = content.replace(plainPattern, (_match, prefix) => `${prefix}${value}`);
|
|
141
160
|
writeStateMd(statePath, content, cwd);
|
|
142
161
|
output({ updated: true });
|
|
143
162
|
} else {
|
|
@@ -151,16 +170,27 @@ function cmdStateUpdate(cwd, field, value) {
|
|
|
151
170
|
// ─── State Progression Engine ────────────────────────────────────────────────
|
|
152
171
|
|
|
153
172
|
function stateExtractField(content, fieldName) {
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
173
|
+
const escaped = fieldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
174
|
+
// Try **Field:** bold format first
|
|
175
|
+
const boldPattern = new RegExp(`\\*\\*${escaped}:\\*\\*\\s*(.+)`, 'i');
|
|
176
|
+
const boldMatch = content.match(boldPattern);
|
|
177
|
+
if (boldMatch) return boldMatch[1].trim();
|
|
178
|
+
// Fall back to plain Field: format
|
|
179
|
+
const plainPattern = new RegExp(`^${escaped}:\\s*(.+)`, 'im');
|
|
180
|
+
const plainMatch = content.match(plainPattern);
|
|
181
|
+
return plainMatch ? plainMatch[1].trim() : null;
|
|
157
182
|
}
|
|
158
183
|
|
|
159
184
|
function stateReplaceField(content, fieldName, newValue) {
|
|
160
185
|
const escaped = fieldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
186
|
+
// Try **Field:** bold format first, then plain Field: format
|
|
187
|
+
const boldPattern = new RegExp(`(\\*\\*${escaped}:\\*\\*\\s*)(.*)`, 'i');
|
|
188
|
+
if (boldPattern.test(content)) {
|
|
189
|
+
return content.replace(boldPattern, (_match, prefix) => `${prefix}${newValue}`);
|
|
190
|
+
}
|
|
191
|
+
const plainPattern = new RegExp(`(^${escaped}:\\s*)(.*)`, 'im');
|
|
192
|
+
if (plainPattern.test(content)) {
|
|
193
|
+
return content.replace(plainPattern, (_match, prefix) => `${prefix}${newValue}`);
|
|
164
194
|
}
|
|
165
195
|
return null;
|
|
166
196
|
}
|
|
@@ -255,9 +285,15 @@ function cmdStateUpdateProgress(cwd, raw) {
|
|
|
255
285
|
const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barWidth - filled);
|
|
256
286
|
const progressStr = `[${bar}] ${percent}%`;
|
|
257
287
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
288
|
+
// Try **Progress:** bold format first, then plain Progress: format
|
|
289
|
+
const boldProgressPattern = /(\*\*Progress:\*\*\s*).*/i;
|
|
290
|
+
const plainProgressPattern = /^(Progress:\s*).*/im;
|
|
291
|
+
if (boldProgressPattern.test(content)) {
|
|
292
|
+
content = content.replace(boldProgressPattern, (_match, prefix) => `${prefix}${progressStr}`);
|
|
293
|
+
writeStateMd(statePath, content, cwd);
|
|
294
|
+
output({ updated: true, percent, completed: totalSummaries, total: totalPlans, bar: progressStr }, raw, progressStr);
|
|
295
|
+
} else if (plainProgressPattern.test(content)) {
|
|
296
|
+
content = content.replace(plainProgressPattern, (_match, prefix) => `${prefix}${progressStr}`);
|
|
261
297
|
writeStateMd(statePath, content, cwd);
|
|
262
298
|
output({ updated: true, percent, completed: totalSummaries, total: totalPlans, bar: progressStr }, raw, progressStr);
|
|
263
299
|
} else {
|
|
@@ -413,11 +449,17 @@ function cmdStateSnapshot(cwd, raw) {
|
|
|
413
449
|
|
|
414
450
|
const content = fs.readFileSync(statePath, 'utf-8');
|
|
415
451
|
|
|
416
|
-
// Helper to extract **Field:**
|
|
452
|
+
// Helper to extract field values — supports both **Field:** bold format
|
|
453
|
+
// and plain Field: format (STATE.md may use either depending on version)
|
|
417
454
|
const extractField = (fieldName) => {
|
|
418
|
-
|
|
419
|
-
const
|
|
420
|
-
|
|
455
|
+
// Try **Field:** format first (bold markdown)
|
|
456
|
+
const boldPattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+)`, 'i');
|
|
457
|
+
const boldMatch = content.match(boldPattern);
|
|
458
|
+
if (boldMatch) return boldMatch[1].trim();
|
|
459
|
+
// Fall back to plain Field: format
|
|
460
|
+
const plainPattern = new RegExp(`^${fieldName}:\\s*(.+)`, 'im');
|
|
461
|
+
const plainMatch = content.match(plainPattern);
|
|
462
|
+
return plainMatch ? plainMatch[1].trim() : null;
|
|
421
463
|
};
|
|
422
464
|
|
|
423
465
|
// Extract basic fields
|
|
@@ -476,9 +518,12 @@ function cmdStateSnapshot(cwd, raw) {
|
|
|
476
518
|
const sessionMatch = content.match(/##\s*Session\s*\n([\s\S]*?)(?=\n##|$)/i);
|
|
477
519
|
if (sessionMatch) {
|
|
478
520
|
const sessionSection = sessionMatch[1];
|
|
479
|
-
const lastDateMatch = sessionSection.match(/\*\*Last Date:\*\*\s*(.+)/i)
|
|
480
|
-
|
|
481
|
-
const
|
|
521
|
+
const lastDateMatch = sessionSection.match(/\*\*Last Date:\*\*\s*(.+)/i)
|
|
522
|
+
|| sessionSection.match(/^Last Date:\s*(.+)/im);
|
|
523
|
+
const stoppedAtMatch = sessionSection.match(/\*\*Stopped At:\*\*\s*(.+)/i)
|
|
524
|
+
|| sessionSection.match(/^Stopped At:\s*(.+)/im);
|
|
525
|
+
const resumeFileMatch = sessionSection.match(/\*\*Resume File:\*\*\s*(.+)/i)
|
|
526
|
+
|| sessionSection.match(/^Resume File:\s*(.+)/im);
|
|
482
527
|
|
|
483
528
|
if (lastDateMatch) session.last_date = lastDateMatch[1].trim();
|
|
484
529
|
if (stoppedAtMatch) session.stopped_at = stoppedAtMatch[1].trim();
|
|
@@ -512,10 +557,14 @@ function cmdStateSnapshot(cwd, raw) {
|
|
|
512
557
|
* reliably via `state json` instead of fragile regex parsing.
|
|
513
558
|
*/
|
|
514
559
|
function buildStateFrontmatter(bodyContent, cwd) {
|
|
560
|
+
// Supports both **Field:** bold and plain Field: format (see state-snapshot)
|
|
515
561
|
const extractField = (fieldName) => {
|
|
516
|
-
const
|
|
517
|
-
const
|
|
518
|
-
|
|
562
|
+
const boldPattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+)`, 'i');
|
|
563
|
+
const boldMatch = bodyContent.match(boldPattern);
|
|
564
|
+
if (boldMatch) return boldMatch[1].trim();
|
|
565
|
+
const plainPattern = new RegExp(`^${fieldName}:\\s*(.+)`, 'im');
|
|
566
|
+
const plainMatch = bodyContent.match(plainPattern);
|
|
567
|
+
return plainMatch ? plainMatch[1].trim() : null;
|
|
519
568
|
};
|
|
520
569
|
|
|
521
570
|
const currentPhase = extractField('Current Phase');
|
|
@@ -548,8 +597,10 @@ function buildStateFrontmatter(bodyContent, cwd) {
|
|
|
548
597
|
try {
|
|
549
598
|
const phasesDir = path.join(cwd, '.planning', 'phases');
|
|
550
599
|
if (fs.existsSync(phasesDir)) {
|
|
600
|
+
const isDirInMilestone = getMilestonePhaseFilter(cwd);
|
|
551
601
|
const phaseDirs = fs.readdirSync(phasesDir, { withFileTypes: true })
|
|
552
|
-
.filter(e => e.isDirectory()).map(e => e.name)
|
|
602
|
+
.filter(e => e.isDirectory()).map(e => e.name)
|
|
603
|
+
.filter(isDirInMilestone);
|
|
553
604
|
let diskTotalPlans = 0;
|
|
554
605
|
let diskTotalSummaries = 0;
|
|
555
606
|
let diskCompletedPhases = 0;
|
|
@@ -562,7 +613,9 @@ function buildStateFrontmatter(bodyContent, cwd) {
|
|
|
562
613
|
diskTotalSummaries += summaries;
|
|
563
614
|
if (plans > 0 && summaries >= plans) diskCompletedPhases++;
|
|
564
615
|
}
|
|
565
|
-
|
|
616
|
+
totalPhases = isDirInMilestone.phaseCount > 0
|
|
617
|
+
? Math.max(phaseDirs.length, isDirInMilestone.phaseCount)
|
|
618
|
+
: phaseDirs.length;
|
|
566
619
|
completedPhases = diskCompletedPhases;
|
|
567
620
|
totalPlans = diskTotalPlans;
|
|
568
621
|
completedPlans = diskTotalSummaries;
|
|
@@ -8,7 +8,6 @@ Plans execute autonomously. Checkpoints formalize interaction points where human
|
|
|
8
8
|
2. **Codex sets up the verification environment** - Start dev servers, seed databases, configure env vars
|
|
9
9
|
3. **User only does what requires human judgment** - Visual checks, UX evaluation, "does this feel right?"
|
|
10
10
|
4. **Secrets come from user, automation comes from Codex** - Ask for API keys, then Codex uses them via CLI
|
|
11
|
-
5. **Auto-mode bypasses verification/decision checkpoints** — When `workflow.auto_advance` is true in config: human-verify auto-approves, decision auto-selects first option, human-action still stops (auth gates cannot be automated)
|
|
12
11
|
</overview>
|
|
13
12
|
|
|
14
13
|
<checkpoint_types>
|
|
@@ -14,21 +14,28 @@ Default: `balanced` if not set or config missing.
|
|
|
14
14
|
|
|
15
15
|
@~/.codex/get-shit-done/references/model-profiles.md
|
|
16
16
|
|
|
17
|
-
Look up the agent in the table for the resolved profile.
|
|
17
|
+
Look up the agent in the table for the resolved profile. Each entry returns:
|
|
18
|
+
|
|
19
|
+
```json
|
|
20
|
+
{ "model": "inherit", "thinking": "xhigh" }
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
All agents use `gpt-5.3-codex` (via `"inherit"`). The `thinking` field controls reasoning effort.
|
|
24
|
+
|
|
25
|
+
Pass both parameters to Task calls:
|
|
18
26
|
|
|
19
27
|
```
|
|
20
28
|
Task(
|
|
21
29
|
prompt="...",
|
|
22
30
|
subagent_type="gsd-planner",
|
|
23
|
-
model="
|
|
31
|
+
model="inherit",
|
|
32
|
+
thinking="{resolved_thinking}" # "xhigh", "high", "medium", or "low"
|
|
24
33
|
)
|
|
25
34
|
```
|
|
26
35
|
|
|
27
|
-
**Note:** Opus-tier agents resolve to `"inherit"` (not `"opus"`). This causes the agent to use the parent session's model, avoiding conflicts with organization policies that may block specific opus versions.
|
|
28
|
-
|
|
29
36
|
## Usage
|
|
30
37
|
|
|
31
38
|
1. Resolve once at orchestration start
|
|
32
39
|
2. Store the profile value
|
|
33
|
-
3. Look up each agent's model from the table
|
|
34
|
-
4. Pass model
|
|
40
|
+
3. Look up each agent's `{ model, thinking }` from the table
|
|
41
|
+
4. Pass both model and thinking parameters to each Task call
|
|
@@ -1,92 +1,101 @@
|
|
|
1
1
|
# Model Profiles
|
|
2
2
|
|
|
3
|
-
Model profiles control
|
|
3
|
+
Model profiles control the reasoning effort level for each GSD agent. All agents use `gpt-5.3-codex` (Codex-optimized for agentic coding); what varies is the **thinking level** — how much reasoning budget each agent gets based on its role.
|
|
4
4
|
|
|
5
5
|
## Profile Definitions
|
|
6
6
|
|
|
7
|
-
| Agent
|
|
8
|
-
|
|
9
|
-
| gsd-planner
|
|
10
|
-
| gsd-roadmapper
|
|
11
|
-
| gsd-executor
|
|
12
|
-
| gsd-phase-researcher
|
|
13
|
-
| gsd-project-researcher
|
|
14
|
-
| gsd-research-synthesizer |
|
|
15
|
-
| gsd-debugger
|
|
16
|
-
| gsd-codebase-mapper
|
|
17
|
-
| gsd-verifier
|
|
18
|
-
| gsd-plan-checker
|
|
19
|
-
| gsd-integration-checker
|
|
7
|
+
| Agent | `quality` | `balanced` | `budget` |
|
|
8
|
+
| ------------------------ | --------- | ---------- | --------- |
|
|
9
|
+
| gsd-planner | 🔴 xhigh | 🔴 xhigh | 🟢 high |
|
|
10
|
+
| gsd-roadmapper | 🔴 xhigh | 🟢 high | 🟡 medium |
|
|
11
|
+
| gsd-executor | 🔴 xhigh | 🟢 high | 🟡 medium |
|
|
12
|
+
| gsd-phase-researcher | 🟢 high | 🟡 medium | 🟡 medium |
|
|
13
|
+
| gsd-project-researcher | 🟢 high | 🟡 medium | 🟡 medium |
|
|
14
|
+
| gsd-research-synthesizer | 🟢 high | 🟡 medium | 🟡 medium |
|
|
15
|
+
| gsd-debugger | 🔴 xhigh | 🔴 xhigh | 🟢 high |
|
|
16
|
+
| gsd-codebase-mapper | 🟡 medium | 🟡 medium | 🟡 medium |
|
|
17
|
+
| gsd-verifier | 🟢 high | 🟢 high | 🟡 medium |
|
|
18
|
+
| gsd-plan-checker | 🟢 high | 🟡 medium | 🟡 medium |
|
|
19
|
+
| gsd-integration-checker | 🟢 high | 🟡 medium | 🟡 medium |
|
|
20
|
+
|
|
21
|
+
All entries resolve to `model: "inherit"` (uses the session's gpt-5.3-codex). The `thinking` field controls reasoning effort.
|
|
20
22
|
|
|
21
23
|
## Profile Philosophy
|
|
22
24
|
|
|
23
|
-
**quality** - Maximum reasoning
|
|
24
|
-
- Opus for all decision-making agents
|
|
25
|
-
- Sonnet for read-only verification
|
|
26
|
-
- Use when: quota available, critical architecture work
|
|
25
|
+
**quality** - Maximum reasoning for every role
|
|
27
26
|
|
|
28
|
-
**
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
32
|
-
- Use when: normal development, good balance of quality and cost
|
|
27
|
+
- 🔴 **xhigh** for decision-makers: planner, roadmapper, executor, debugger
|
|
28
|
+
- 🟢 **high** for analysis: researchers, verifiers, checkers
|
|
29
|
+
- 🟡 **medium** for read-only mapping
|
|
30
|
+
- Use when: critical architecture work, complex debugging
|
|
33
31
|
|
|
34
|
-
**
|
|
35
|
-
|
|
36
|
-
-
|
|
37
|
-
-
|
|
32
|
+
**balanced** (default) - Smart thinking allocation
|
|
33
|
+
|
|
34
|
+
- 🔴 **xhigh** only for planner and debugger (highest-impact decisions)
|
|
35
|
+
- 🟢 **high** for executor and verifier (needs reasoning but follows plans)
|
|
36
|
+
- 🟡 **medium** for everything else (structured output, scanning)
|
|
37
|
+
- Use when: normal development
|
|
38
|
+
|
|
39
|
+
**budget** - Minimal reasoning budget
|
|
40
|
+
|
|
41
|
+
- 🟢 **high** for planner and debugger (always need some reasoning)
|
|
42
|
+
- 🟡 **medium** for everything else
|
|
43
|
+
- Use when: high-volume work, less critical phases
|
|
44
|
+
|
|
45
|
+
## Role-Based Thinking Rationale
|
|
46
|
+
|
|
47
|
+
**Why xhigh thinking for gsd-planner?**
|
|
48
|
+
Planning involves architecture decisions, goal decomposition, and task design. These decisions cascade through the entire phase — worth the extra reasoning budget.
|
|
49
|
+
|
|
50
|
+
**Why xhigh thinking for gsd-debugger even in balanced?**
|
|
51
|
+
Root cause analysis requires deep reasoning. A debugger that misdiagnoses wastes more tokens in re-runs than the reasoning cost.
|
|
52
|
+
|
|
53
|
+
**Why medium thinking for gsd-codebase-mapper?**
|
|
54
|
+
Read-only file scanning and pattern extraction. No decisions to make — just structured output from file contents.
|
|
55
|
+
|
|
56
|
+
**Why high thinking for gsd-verifier in balanced?**
|
|
57
|
+
Verification requires goal-backward reasoning — checking if code _delivers_ what the phase promised. Medium thinking may miss subtle gaps.
|
|
58
|
+
|
|
59
|
+
**Why medium thinking for researchers in balanced?**
|
|
60
|
+
Research agents scan and collect information. The synthesis happens elsewhere. They don't need deep reasoning for reading files.
|
|
38
61
|
|
|
39
62
|
## Resolution Logic
|
|
40
63
|
|
|
41
|
-
Orchestrators resolve model before spawning:
|
|
64
|
+
Orchestrators resolve model and thinking before spawning:
|
|
42
65
|
|
|
43
66
|
```
|
|
44
67
|
1. Read .planning/config.json
|
|
45
68
|
2. Check model_overrides for agent-specific override
|
|
46
69
|
3. If no override, look up agent in profile table
|
|
47
|
-
4. Pass model
|
|
70
|
+
4. Pass model + thinking to Task call
|
|
48
71
|
```
|
|
49
72
|
|
|
73
|
+
Returns: `{ model: "inherit", thinking: "xhigh"|"high"|"medium"|"low" }`
|
|
74
|
+
|
|
50
75
|
## Per-Agent Overrides
|
|
51
76
|
|
|
52
|
-
Override
|
|
77
|
+
Override thinking level for specific agents:
|
|
53
78
|
|
|
54
79
|
```json
|
|
55
80
|
{
|
|
56
81
|
"model_profile": "balanced",
|
|
57
82
|
"model_overrides": {
|
|
58
|
-
"gsd-executor": "
|
|
59
|
-
"gsd-
|
|
83
|
+
"gsd-executor": "xhigh",
|
|
84
|
+
"gsd-codebase-mapper": "high"
|
|
60
85
|
}
|
|
61
86
|
}
|
|
62
87
|
```
|
|
63
88
|
|
|
64
|
-
|
|
89
|
+
Valid override values: `"xhigh"`, `"high"`, `"medium"`, `"low"`.
|
|
65
90
|
|
|
66
91
|
## Switching Profiles
|
|
67
92
|
|
|
68
93
|
Runtime: `$gsd-set-profile <profile>`
|
|
69
94
|
|
|
70
|
-
Per-project default
|
|
95
|
+
Per-project default in `.planning/config.json`:
|
|
96
|
+
|
|
71
97
|
```json
|
|
72
98
|
{
|
|
73
99
|
"model_profile": "balanced"
|
|
74
100
|
}
|
|
75
101
|
```
|
|
76
|
-
|
|
77
|
-
## Design Rationale
|
|
78
|
-
|
|
79
|
-
**Why Opus for gsd-planner?**
|
|
80
|
-
Planning involves architecture decisions, goal decomposition, and task design. This is where model quality has the highest impact.
|
|
81
|
-
|
|
82
|
-
**Why Sonnet for gsd-executor?**
|
|
83
|
-
Executors follow explicit PLAN.md instructions. The plan already contains the reasoning; execution is implementation.
|
|
84
|
-
|
|
85
|
-
**Why Sonnet (not Haiku) for verifiers in balanced?**
|
|
86
|
-
Verification requires goal-backward reasoning - checking if code *delivers* what the phase promised, not just pattern matching. Sonnet handles this well; Haiku may miss subtle gaps.
|
|
87
|
-
|
|
88
|
-
**Why Haiku for gsd-codebase-mapper?**
|
|
89
|
-
Read-only exploration and pattern extraction. No reasoning required, just structured output from file contents.
|
|
90
|
-
|
|
91
|
-
**Why `inherit` instead of passing `opus` directly?**
|
|
92
|
-
Codex CLI's `"opus"` alias maps to a specific model version. Organizations may block older opus versions while allowing newer ones. GSD returns `"inherit"` for opus-tier agents, causing them to use whatever opus version the user has configured in their session. This avoids version conflicts and silent fallbacks to Sonnet.
|
|
@@ -54,6 +54,20 @@ Template for `.planning/phases/XX-name/{phase_num}-CONTEXT.md` - captures implem
|
|
|
54
54
|
|
|
55
55
|
</specifics>
|
|
56
56
|
|
|
57
|
+
<code_context>
|
|
58
|
+
## Existing Code Insights
|
|
59
|
+
|
|
60
|
+
### Reusable Assets
|
|
61
|
+
- [Component/hook/utility]: [How it could be used in this phase]
|
|
62
|
+
|
|
63
|
+
### Established Patterns
|
|
64
|
+
- [Pattern]: [How it constrains/enables this phase]
|
|
65
|
+
|
|
66
|
+
### Integration Points
|
|
67
|
+
- [Where new code connects to existing system]
|
|
68
|
+
|
|
69
|
+
</code_context>
|
|
70
|
+
|
|
57
71
|
<deferred>
|
|
58
72
|
## Deferred Ideas
|
|
59
73
|
|
|
@@ -20,7 +20,6 @@ wave: N # Execution wave (1, 2, 3...). Pre-computed at plan
|
|
|
20
20
|
depends_on: [] # Plan IDs this plan requires (e.g., ["01-01"]).
|
|
21
21
|
files_modified: [] # Files this plan modifies.
|
|
22
22
|
autonomous: true # false if plan has checkpoints requiring user interaction
|
|
23
|
-
requirements: [] # REQUIRED — Requirement IDs from ROADMAP this plan addresses. MUST NOT be empty.
|
|
24
23
|
user_setup: [] # Human-required setup Codex cannot automate (see below)
|
|
25
24
|
|
|
26
25
|
# Goal-backward verification (derived during planning, verified after execution)
|
|
@@ -130,7 +129,6 @@ After completion, create `.planning/phases/XX-name/{phase}-{plan}-SUMMARY.md`
|
|
|
130
129
|
| `depends_on` | Yes | Array of plan IDs this plan requires. |
|
|
131
130
|
| `files_modified` | Yes | Files this plan touches. |
|
|
132
131
|
| `autonomous` | Yes | `true` if no checkpoints, `false` if has checkpoints |
|
|
133
|
-
| `requirements` | Yes | **MUST** list requirement IDs from ROADMAP. Every roadmap requirement MUST appear in at least one plan. |
|
|
134
132
|
| `user_setup` | No | Array of human-required setup items (external services) |
|
|
135
133
|
| `must_haves` | Yes | Goal-backward verification criteria (see below) |
|
|
136
134
|
|
|
@@ -57,8 +57,6 @@ If a phase is missing VERIFICATION.md, flag it as "unverified phase" — this is
|
|
|
57
57
|
|
|
58
58
|
With phase context collected:
|
|
59
59
|
|
|
60
|
-
Extract `MILESTONE_REQ_IDS` from REQUIREMENTS.md traceability table — all REQ-IDs assigned to phases in this milestone.
|
|
61
|
-
|
|
62
60
|
```
|
|
63
61
|
Task(
|
|
64
62
|
prompt="Check cross-phase integration and E2E flows.
|
|
@@ -67,11 +65,6 @@ Phases: {phase_dirs}
|
|
|
67
65
|
Phase exports: {from SUMMARYs}
|
|
68
66
|
API routes: {routes created}
|
|
69
67
|
|
|
70
|
-
Milestone Requirements:
|
|
71
|
-
{MILESTONE_REQ_IDS — list each REQ-ID with description and assigned phase}
|
|
72
|
-
|
|
73
|
-
MUST map each integration finding to affected requirement IDs where applicable.
|
|
74
|
-
|
|
75
68
|
Verify cross-phase wiring and E2E user flows.",
|
|
76
69
|
subagent_type="gsd-integration-checker",
|
|
77
70
|
model="{integration_checker_model}"
|
|
@@ -84,48 +77,12 @@ Combine:
|
|
|
84
77
|
- Phase-level gaps and tech debt (from step 2)
|
|
85
78
|
- Integration checker's report (wiring gaps, broken flows)
|
|
86
79
|
|
|
87
|
-
## 5. Check Requirements Coverage
|
|
88
|
-
|
|
89
|
-
MUST cross-reference three independent sources for each requirement:
|
|
90
|
-
|
|
91
|
-
### 5a. Parse REQUIREMENTS.md Traceability Table
|
|
92
|
-
|
|
93
|
-
Extract all REQ-IDs mapped to milestone phases from the traceability table:
|
|
94
|
-
- Requirement ID, description, assigned phase, current status, checked-off state (`[x]` vs `[ ]`)
|
|
95
|
-
|
|
96
|
-
### 5b. Parse Phase VERIFICATION.md Requirements Tables
|
|
97
|
-
|
|
98
|
-
For each phase's VERIFICATION.md, extract the expanded requirements table:
|
|
99
|
-
- Requirement | Source Plan | Description | Status | Evidence
|
|
100
|
-
- Map each entry back to its REQ-ID
|
|
101
|
-
|
|
102
|
-
### 5c. Extract SUMMARY.md Frontmatter Cross-Check
|
|
103
|
-
|
|
104
|
-
For each phase's SUMMARY.md, extract `requirements-completed` from YAML frontmatter:
|
|
105
|
-
```bash
|
|
106
|
-
for summary in .planning/phases/*-*/*-SUMMARY.md; do
|
|
107
|
-
node ~/.codex/get-shit-done/bin/gsd-tools.cjs summary-extract "$summary" --fields requirements_completed | jq -r '.requirements_completed'
|
|
108
|
-
done
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
### 5d. Status Determination Matrix
|
|
112
|
-
|
|
113
|
-
For each REQ-ID, determine status using all three sources:
|
|
114
|
-
|
|
115
|
-
| VERIFICATION.md Status | SUMMARY Frontmatter | REQUIREMENTS.md | → Final Status |
|
|
116
|
-
|------------------------|---------------------|-----------------|----------------|
|
|
117
|
-
| passed | listed | `[x]` | **satisfied** |
|
|
118
|
-
| passed | listed | `[ ]` | **satisfied** (update checkbox) |
|
|
119
|
-
| passed | missing | any | **partial** (verify manually) |
|
|
120
|
-
| gaps_found | any | any | **unsatisfied** |
|
|
121
|
-
| missing | listed | any | **partial** (verification gap) |
|
|
122
|
-
| missing | missing | any | **unsatisfied** |
|
|
123
|
-
|
|
124
|
-
### 5e. FAIL Gate and Orphan Detection
|
|
125
|
-
|
|
126
|
-
**REQUIRED:** Any `unsatisfied` requirement MUST force `gaps_found` status on the milestone audit.
|
|
80
|
+
## 5. Check Requirements Coverage
|
|
127
81
|
|
|
128
|
-
|
|
82
|
+
For each requirement in REQUIREMENTS.md mapped to this milestone:
|
|
83
|
+
- Find owning phase
|
|
84
|
+
- Check phase verification status
|
|
85
|
+
- Determine: satisfied | partial | unsatisfied
|
|
129
86
|
|
|
130
87
|
## 6. Aggregate into v{version}-MILESTONE-AUDIT.md
|
|
131
88
|
|
|
@@ -142,14 +99,7 @@ scores:
|
|
|
142
99
|
integration: N/M
|
|
143
100
|
flows: N/M
|
|
144
101
|
gaps: # Critical blockers
|
|
145
|
-
requirements:
|
|
146
|
-
- id: "{REQ-ID}"
|
|
147
|
-
status: "unsatisfied | partial | orphaned"
|
|
148
|
-
phase: "{assigned phase}"
|
|
149
|
-
claimed_by_plans: ["{plan files that reference this requirement}"]
|
|
150
|
-
completed_by_plans: ["{plan files whose SUMMARY marks it complete}"]
|
|
151
|
-
verification_status: "passed | gaps_found | missing | orphaned"
|
|
152
|
-
evidence: "{specific evidence or lack thereof}"
|
|
102
|
+
requirements: [...]
|
|
153
103
|
integration: [...]
|
|
154
104
|
flows: [...]
|
|
155
105
|
tech_debt: # Non-critical, deferred
|
|
@@ -285,13 +235,8 @@ $gsd-plan-milestone-gaps
|
|
|
285
235
|
<success_criteria>
|
|
286
236
|
- [ ] Milestone scope identified
|
|
287
237
|
- [ ] All phase VERIFICATION.md files read
|
|
288
|
-
- [ ] SUMMARY.md `requirements-completed` frontmatter extracted for each phase
|
|
289
|
-
- [ ] REQUIREMENTS.md traceability table parsed for all milestone REQ-IDs
|
|
290
|
-
- [ ] 3-source cross-reference completed (VERIFICATION + SUMMARY + traceability)
|
|
291
|
-
- [ ] Orphaned requirements detected (in traceability but absent from all VERIFICATIONs)
|
|
292
238
|
- [ ] Tech debt and deferred gaps aggregated
|
|
293
|
-
- [ ] Integration checker spawned
|
|
294
|
-
- [ ] v{version}-MILESTONE-AUDIT.md created
|
|
295
|
-
- [ ] FAIL gate enforced — any unsatisfied requirement forces gaps_found status
|
|
239
|
+
- [ ] Integration checker spawned for cross-phase wiring
|
|
240
|
+
- [ ] v{version}-MILESTONE-AUDIT.md created
|
|
296
241
|
- [ ] Results presented with actionable next steps
|
|
297
242
|
</success_criteria>
|
|
@@ -79,7 +79,7 @@ For each gap, fill the debug-subagent-prompt template and spawn:
|
|
|
79
79
|
|
|
80
80
|
```
|
|
81
81
|
Task(
|
|
82
|
-
prompt=filled_debug_subagent_prompt
|
|
82
|
+
prompt=filled_debug_subagent_prompt,
|
|
83
83
|
subagent_type="general-purpose",
|
|
84
84
|
description="Debug: {truth_short}"
|
|
85
85
|
)
|