pan-wizard 2.8.1 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/bin/install.js +23 -0
- package/commands/pan/focus-design.md +235 -12
- package/commands/pan/focus-doc-audit.md +530 -0
- package/commands/pan/focus-drift-walking.md +525 -0
- package/commands/pan/focus-plan.md +204 -12
- package/commands/pan/profile.md +2 -1
- package/package.json +1 -1
- package/pan-wizard-core/bin/lib/commands.cjs +29 -7
- package/pan-wizard-core/bin/lib/config.cjs +10 -0
- package/pan-wizard-core/bin/lib/core.cjs +168 -21
- package/pan-wizard-core/bin/lib/verify.cjs +283 -4
- package/pan-wizard-core/bin/pan-tools.cjs +11 -2
- package/pan-wizard-core/references/model-profiles.md +191 -62
- package/pan-wizard-core/workflows/help.md +11 -1
- package/pan-wizard-core/workflows/profile.md +8 -1
- package/pan-wizard-core/workflows/settings.md +14 -0
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: focus-plan
|
|
3
3
|
group: Focus
|
|
4
|
-
description: Create capacity-budgeted work batch with 4 execution modes
|
|
4
|
+
description: Create capacity-budgeted work batch with spec coverage verification and 4 execution modes
|
|
5
5
|
allowed-tools:
|
|
6
6
|
- Read
|
|
7
|
+
- Write
|
|
8
|
+
- Edit
|
|
7
9
|
- Bash
|
|
8
10
|
- Grep
|
|
9
11
|
- Glob
|
|
10
12
|
---
|
|
11
13
|
|
|
12
|
-
# /pan:focus-plan — Capacity-Budgeted Work Batch Planner
|
|
14
|
+
# /pan:focus-plan — Capacity-Budgeted Work Batch Planner with Spec Coverage Verification
|
|
13
15
|
|
|
14
|
-
Create a capacity-budgeted work batch from focus-scan results
|
|
16
|
+
Create a capacity-budgeted work batch from focus-scan results **with mandatory verification that planned work covers all relevant spec and ADR requirements.** $ARGUMENTS
|
|
15
17
|
|
|
16
|
-
**Goal:** Select a right-sized batch of work items that fits within the session's point budget, ordered for maximum impact with minimum risk.
|
|
18
|
+
**Goal:** Select a right-sized batch of work items that (a) fits within the session's point budget, (b) is ordered for maximum impact with minimum risk, and (c) demonstrably covers the requirements from any associated specs, ADRs, and success criteria — flagging coverage gaps BEFORE execution begins.
|
|
17
19
|
|
|
18
20
|
---
|
|
19
21
|
|
|
@@ -42,10 +44,67 @@ If no recent scan exists, run `/pan:focus-scan` automatically before proceeding.
|
|
|
42
44
|
- `full` — Full-spectrum: enhanced budget, all priorities equally weighted (60 pts)
|
|
43
45
|
- `--priority P0-P6` — Only pick items from these priority tiers
|
|
44
46
|
- `--lean` — Apply RS filtering: exclude items with RS < 1.5
|
|
47
|
+
- `--no-spec-check` — Skip spec coverage verification (NOT recommended — use only for pure bugfix batches)
|
|
45
48
|
|
|
46
49
|
---
|
|
47
50
|
|
|
48
|
-
##
|
|
51
|
+
## Phase 1: Spec & ADR Discovery (MANDATORY)
|
|
52
|
+
|
|
53
|
+
> *Before planning work, understand what has been designed and promised.*
|
|
54
|
+
|
|
55
|
+
### 1.1 Scan for Specifications
|
|
56
|
+
Search the project for feature specifications and design documents:
|
|
57
|
+
- `docs/specs/*.md` or `docs/specs/**/*.md`
|
|
58
|
+
- `.planning/specs/` or `.planning/designs/`
|
|
59
|
+
- Any `*_featureai.md`, `*_spec.md`, `*_design.md` files
|
|
60
|
+
- README sections describing planned features
|
|
61
|
+
|
|
62
|
+
For each spec found, extract:
|
|
63
|
+
|
|
64
|
+
| Spec File | Feature Name | Status | Requirements Count | Success Criteria Count |
|
|
65
|
+
|-----------|-------------|--------|-------------------|----------------------|
|
|
66
|
+
| [path] | [name] | Proposed/In Progress/Complete | [N] | [N] |
|
|
67
|
+
|
|
68
|
+
### 1.2 Scan for ADRs
|
|
69
|
+
Search for Architecture Decision Records:
|
|
70
|
+
- `docs/decisions/ADR-*.md`
|
|
71
|
+
- `.planning/decisions/`
|
|
72
|
+
|
|
73
|
+
For each ADR, extract:
|
|
74
|
+
|
|
75
|
+
| ADR | Decision | Status | Success Criteria | Implementation Tasks |
|
|
76
|
+
|-----|----------|--------|-----------------|---------------------|
|
|
77
|
+
| [ADR-NNNN] | [summary] | Proposed/Accepted/Implemented | [count or "none defined"] | [count or "none defined"] |
|
|
78
|
+
|
|
79
|
+
### 1.3 Extract Requirement Inventory
|
|
80
|
+
From every spec and ADR found, build a **master requirements list**:
|
|
81
|
+
|
|
82
|
+
| Req ID | Source | Requirement | Type | Implemented? |
|
|
83
|
+
|--------|--------|-------------|------|-------------|
|
|
84
|
+
| SC-1 | ADR-0015 | JWT auth with 4-role RBAC | Feature | Yes/No/Partial |
|
|
85
|
+
| SC-2 | spec/extraction.md | Image extraction for JPG/PNG | Feature | Yes/No/Partial |
|
|
86
|
+
| T-3 | ADR-0018 §Task 6 | Unmatched description table | Task | Yes/No/Partial |
|
|
87
|
+
| BRK-1 | ADR-0018 §Breaking | Hierarchy roll-up for backward compat | Migration | Yes/No/Partial |
|
|
88
|
+
|
|
89
|
+
**Verification method for "Implemented?":**
|
|
90
|
+
- Search the codebase for files, classes, functions, routes, or tests matching each requirement
|
|
91
|
+
- Check if tests exist that verify the requirement
|
|
92
|
+
- Mark as `Partial` if code exists but tests don't, or if the feature is stubbed
|
|
93
|
+
|
|
94
|
+
### 1.4 Identify Unimplemented Requirements
|
|
95
|
+
Filter the master list to requirements where `Implemented? = No` or `Partial`:
|
|
96
|
+
|
|
97
|
+
| Req ID | Source | Requirement | Gap Type | Estimated Effort |
|
|
98
|
+
|--------|--------|-------------|----------|-----------------|
|
|
99
|
+
| SC-2 | ADR-0018 | Keyword count >= 500 | Not started | M |
|
|
100
|
+
| T-6 | ADR-0018 | Unmatched description table | Not started | M |
|
|
101
|
+
| BRK-1 | ADR-0018 | Hierarchy roll-up | Partial (code, no tests) | S |
|
|
102
|
+
|
|
103
|
+
This becomes the **spec gap backlog** — items that specs/ADRs promised but the codebase doesn't deliver yet.
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Phase 2: Capacity Budget System
|
|
49
108
|
|
|
50
109
|
| Size | Points | Per Session | Meaning |
|
|
51
110
|
|------|--------|-------------|---------|
|
|
@@ -57,45 +116,178 @@ If no recent scan exists, run `/pan:focus-scan` automatically before proceeding.
|
|
|
57
116
|
|
|
58
117
|
---
|
|
59
118
|
|
|
60
|
-
## Execution Modes
|
|
119
|
+
## Phase 3: Execution Modes & Batch Selection
|
|
61
120
|
|
|
62
121
|
### `bugfix` — Stability-First
|
|
63
122
|
- **Budget:** 40 pts
|
|
64
123
|
- **Algorithm:** P0 mandatory -> P1 -> P2-P4 smallest-first
|
|
65
124
|
- **Feature allocation:** None
|
|
125
|
+
- **Spec coverage:** Verify P0/P1 items close spec gaps where applicable
|
|
66
126
|
|
|
67
127
|
### `balanced` — Mix of Fixes + Features (DEFAULT)
|
|
68
128
|
- **Budget:** 50 pts
|
|
69
129
|
- **Stability pass (60%):** 30 pts for P0-P2
|
|
70
130
|
- **Feature pass (40%):** 20 pts for P3-P6
|
|
131
|
+
- **Spec coverage:** Cross-reference feature items against spec gap backlog — prefer items that close gaps
|
|
71
132
|
|
|
72
133
|
### `features` — Feature-Focused Sprint
|
|
73
134
|
- **Budget:** 50 pts
|
|
74
135
|
- **Mandatory pass:** All P0 items
|
|
75
136
|
- **Feature pass (80%):** 40 pts for P3-P5
|
|
76
137
|
- **Stability pass (20%):** 10 pts for P1-P2 quick wins
|
|
138
|
+
- **Spec coverage:** Feature items MUST map to spec requirements — reject unspecified feature work
|
|
77
139
|
|
|
78
140
|
### `full` — Full-Spectrum Marathon
|
|
79
141
|
- **Budget:** 60 pts
|
|
80
142
|
- **All priorities weighted equally, largest-impact-first**
|
|
143
|
+
- **Spec coverage:** Full traceability — every item maps to a spec/ADR requirement or is flagged as unspecified
|
|
144
|
+
|
|
145
|
+
### Batch Selection Algorithm
|
|
146
|
+
1. Build candidate list from focus-scan results
|
|
147
|
+
2. **For each candidate, attempt to map it to a spec/ADR requirement** (by keyword match, file overlap, or feature area)
|
|
148
|
+
3. Score candidates: `impact_score = base_priority_score + spec_coverage_bonus`
|
|
149
|
+
- Items that close spec gaps get +2 priority bonus
|
|
150
|
+
- Items that close success criteria get +3 priority bonus
|
|
151
|
+
- Items with no spec mapping get +0 (no penalty, but no bonus)
|
|
152
|
+
4. Apply mode-specific budget allocation
|
|
153
|
+
5. Select items greedily by score until budget exhausted
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Phase 4: Spec Coverage Analysis (MANDATORY unless `--no-spec-check`)
|
|
158
|
+
|
|
159
|
+
> *The most important output of focus-plan: does the batch actually deliver against what was designed?*
|
|
160
|
+
|
|
161
|
+
### 4.1 Coverage Matrix
|
|
162
|
+
For each spec/ADR requirement, show whether the batch covers it:
|
|
163
|
+
|
|
164
|
+
| Req ID | Source | Requirement | Batch Item | Coverage |
|
|
165
|
+
|--------|--------|-------------|-----------|----------|
|
|
166
|
+
| SC-1 | ADR-0018 | Category count >= 65 | #3: Expand categories | COVERED |
|
|
167
|
+
| SC-2 | ADR-0018 | Keyword count >= 500 | #4: Expand keywords | COVERED |
|
|
168
|
+
| SC-3 | ADR-0018 | Unmatched queue API | — | **GAP** |
|
|
169
|
+
| SC-4 | ADR-0018 | NCA affordability output | — | **GAP (deferred to v1)** |
|
|
170
|
+
| SC-5 | ADR-0018 | No regression | #1: Run existing tests | COVERED |
|
|
171
|
+
|
|
172
|
+
### 4.2 Coverage Score
|
|
173
|
+
```
|
|
174
|
+
Spec Coverage: X / Y requirements covered (Z%)
|
|
175
|
+
├── Fully covered: N items
|
|
176
|
+
├── Partially covered: N items (code but no tests, or tests but incomplete)
|
|
177
|
+
├── Gaps: N items (not in batch)
|
|
178
|
+
└── Deferred: N items (explicitly deferred to future version)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### 4.3 Gap Analysis & Justification
|
|
182
|
+
For every **GAP** in the coverage matrix, provide:
|
|
183
|
+
|
|
184
|
+
| Gap | Requirement | Why Not In This Batch | When Will It Be Addressed |
|
|
185
|
+
|-----|------------|----------------------|--------------------------|
|
|
186
|
+
| SC-3 | Unmatched queue API | Exceeds budget (M=4pts, only 2pts remaining) | Next batch (features mode) |
|
|
187
|
+
| SC-4 | NCA affordability | Depends on SC-1 + SC-2 (must complete first) | After category expansion |
|
|
188
|
+
|
|
189
|
+
**CRITICAL:** If the coverage score is < 50% for a spec that has `Status: In Progress`, flag this prominently:
|
|
190
|
+
```
|
|
191
|
+
⚠️ WARNING: Batch covers only X% of [spec name] requirements.
|
|
192
|
+
Y requirements remain unaddressed. Consider:
|
|
193
|
+
- Increasing budget (--budget N)
|
|
194
|
+
- Switching to features mode (--mode features)
|
|
195
|
+
- Breaking spec into smaller milestones
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### 4.4 Dependency Verification
|
|
199
|
+
Check that batch items respect dependency ordering from specs:
|
|
200
|
+
|
|
201
|
+
| Batch Item | Depends On | Dependency In Batch? | Order Correct? |
|
|
202
|
+
|-----------|-----------|---------------------|----------------|
|
|
203
|
+
| #4: Keywords | #3: Categories | Yes | Yes (#3 before #4) |
|
|
204
|
+
| #6: Suggestions | #5: Unmatched API | No — #5 not in batch | **BLOCKED** |
|
|
205
|
+
|
|
206
|
+
**If any item is BLOCKED:** Either add the dependency to the batch (if budget allows) or remove the blocked item and flag it.
|
|
207
|
+
|
|
208
|
+
### 4.5 Success Criteria Verification Plan
|
|
209
|
+
For each success criterion in the batch, specify HOW it will be verified after execution:
|
|
210
|
+
|
|
211
|
+
| SC ID | Criterion | Verification Command | Expected Result |
|
|
212
|
+
|-------|-----------|---------------------|-----------------|
|
|
213
|
+
| SC-1 | Category count >= 65 | `SELECT COUNT(*) FROM stx_category` | >= 65 |
|
|
214
|
+
| SC-2 | Keywords >= 500 | `SELECT COUNT(*) FROM stx_keyword` | >= 500 |
|
|
215
|
+
| SC-5 | No regression | `dotnet test` | All pass, count >= N |
|
|
216
|
+
|
|
217
|
+
This becomes the post-execution checklist for `/pan:focus-exec`.
|
|
81
218
|
|
|
82
219
|
---
|
|
83
220
|
|
|
84
|
-
## Output
|
|
221
|
+
## Phase 5: Output
|
|
85
222
|
|
|
86
223
|
Produce a batch file at `.planning/focus/batch-<YYYY-MM-DD>.json` via `pan-tools focus plan`:
|
|
87
224
|
|
|
88
225
|
```markdown
|
|
89
226
|
## Focus Batch — <date>
|
|
90
227
|
**Mode:** balanced | **Budget:** 50 pts | **Allocated:** N pts
|
|
228
|
+
**Specs referenced:** N specs, M ADRs
|
|
229
|
+
**Spec coverage:** X/Y requirements (Z%)
|
|
230
|
+
|
|
231
|
+
### Batch Items
|
|
232
|
+
|
|
233
|
+
| # | ID | Title | Priority | Size | Pts | Tier | Track | Spec Req |
|
|
234
|
+
|---|----|-------|----------|------|-----|------|-------|----------|
|
|
235
|
+
| 1 | P0-1 | Fix crash in state cmd | P0 | S | 2 | MICRO | Stability | ADR-0005 SC-3 |
|
|
236
|
+
| 2 | P2-3 | Add tests for milestone | P2 | M | 4 | STANDARD | Stability | — |
|
|
237
|
+
| 3 | P3-1 | Expand category taxonomy | P3 | M | 4 | STANDARD | Feature | ADR-0018 SC-1 |
|
|
91
238
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
|
95
|
-
|
|
96
|
-
|
|
|
239
|
+
### Spec Coverage Summary
|
|
240
|
+
|
|
241
|
+
| Source | Total Reqs | Covered | Gaps | Deferred |
|
|
242
|
+
|--------|-----------|---------|------|----------|
|
|
243
|
+
| ADR-0018 | 7 | 3 | 2 | 2 |
|
|
244
|
+
| spec/extraction.md | 5 | 5 | 0 | 0 |
|
|
245
|
+
| **Total** | **12** | **8 (67%)** | **2** | **2** |
|
|
246
|
+
|
|
247
|
+
### Uncovered Requirements (Gaps)
|
|
248
|
+
|
|
249
|
+
| Req | Source | Reason | Next Batch? |
|
|
250
|
+
|-----|--------|--------|-------------|
|
|
251
|
+
| Unmatched queue API | ADR-0018 SC-3 | Budget exceeded | Yes — features mode |
|
|
252
|
+
| NCA affordability | ADR-0018 SC-4 | Blocked by SC-1, SC-2 | After this batch |
|
|
253
|
+
|
|
254
|
+
### Dependency Order
|
|
255
|
+
```
|
|
256
|
+
#1 (P0 crash fix) → independent
|
|
257
|
+
#3 (categories) → #4 (keywords) → #5 (match types)
|
|
258
|
+
#2 (tests) → independent
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Post-Execution Verification Checklist
|
|
262
|
+
- [ ] SC-1: Category count >= 65 → `SELECT COUNT(*) FROM stx_category`
|
|
263
|
+
- [ ] SC-2: Keywords >= 500 → `SELECT COUNT(*) FROM stx_keyword`
|
|
264
|
+
- [ ] SC-5: All existing tests pass → `dotnet test`
|
|
97
265
|
|
|
98
266
|
Execution Order: MICRO first, then STANDARD, then FULL
|
|
99
267
|
```
|
|
100
268
|
|
|
101
269
|
Ready for `/pan:focus-exec`.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## NEVER DO
|
|
274
|
+
|
|
275
|
+
- Plan a batch without checking specs and ADRs for coverage gaps
|
|
276
|
+
- Include a feature item that contradicts or conflicts with an accepted ADR
|
|
277
|
+
- Ignore dependency ordering defined in specs (Task A before Task B)
|
|
278
|
+
- Claim 100% spec coverage without actually verifying each requirement against the codebase
|
|
279
|
+
- Include blocked items (items whose dependencies are not in the batch and not yet implemented)
|
|
280
|
+
- Silently drop spec requirements — every gap must be justified and scheduled
|
|
281
|
+
- Plan implementation tasks that aren't traceable to a spec, ADR, scan finding, or user request
|
|
282
|
+
- Exceed the capacity budget (hard limit — not "approximately")
|
|
283
|
+
|
|
284
|
+
## ALWAYS DO
|
|
285
|
+
|
|
286
|
+
- Discover ALL specs and ADRs before selecting batch items
|
|
287
|
+
- Cross-reference every batch item against spec requirements where applicable
|
|
288
|
+
- Flag coverage gaps prominently with justification and scheduling
|
|
289
|
+
- Verify dependency ordering matches spec-defined task dependencies
|
|
290
|
+
- Include a post-execution verification checklist with concrete commands
|
|
291
|
+
- Prefer items that close spec gaps over items with no spec mapping (when priority is equal)
|
|
292
|
+
- State the coverage score as a percentage in the batch header
|
|
293
|
+
- Report unimplemented success criteria that aren't addressed by this batch
|
package/commands/pan/profile.md
CHANGED
|
@@ -32,5 +32,6 @@ The workflow handles all logic including:
|
|
|
32
32
|
2. Config file ensuring
|
|
33
33
|
3. Config reading and updating
|
|
34
34
|
4. Model table generation from MODEL_PROFILES
|
|
35
|
-
5.
|
|
35
|
+
5. Cost estimation display (relative cost multiplier per profile)
|
|
36
|
+
6. Confirmation display
|
|
36
37
|
</process>
|
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
const fs = require('fs');
|
|
5
5
|
const path = require('path');
|
|
6
|
-
const { safeReadFile, loadConfig, isGitIgnored, isGitRepo, execGit, normalizePhaseName, comparePhaseNum, getArchivedPhaseDirs, generateSlugInternal, getMilestoneInfo, resolveModelInternal, MODEL_PROFILES, output, error, findPhaseInternal, scanPendingTodos, toPosix } = require('./core.cjs');
|
|
6
|
+
const { safeReadFile, loadConfig, isGitIgnored, isGitRepo, execGit, normalizePhaseName, comparePhaseNum, getArchivedPhaseDirs, generateSlugInternal, getMilestoneInfo, resolveModelInternal, detectProvider, resolveTierToModel, estimateCostMultiplier, MODEL_PROFILES, output, error, findPhaseInternal, scanPendingTodos, toPosix } = require('./core.cjs');
|
|
7
7
|
const { extractFrontmatter } = require('./frontmatter.cjs');
|
|
8
8
|
const { PLANNING_DIR, PHASES_DIR, MILESTONES_DIR, QUICK_DIR, STATE_FILE, ROADMAP_FILE, PROJECT_FILE, PATTERNS_FILE, SESSION_HISTORY_FILE, LEARNINGS_FILE, CONTEXT_SUFFIX, UAT_SUFFIX, VERIFICATION_SUFFIX, isPlanFile, isSummaryFile, ARCHIVE_DIR_RE, PHASE_DIR_RE, CONTEXT_WINDOW, WARNING_THRESHOLD, CRITICAL_THRESHOLD, VALID_COMMIT_TYPES, DEFAULT_SENSITIVE_PATTERNS } = require('./constants.cjs');
|
|
9
9
|
const { planningPath, phasesPath, filterPlanFiles, filterSummaryFiles } = require('./utils.cjs');
|
|
@@ -272,29 +272,50 @@ function cmdHistoryDigest(cwd, raw) {
|
|
|
272
272
|
* @param {string} cwd - Working directory path
|
|
273
273
|
* @param {string} agentType - Agent type identifier (e.g., "pan-executor", "pan-planner")
|
|
274
274
|
* @param {boolean} raw - If true, output raw model name instead of JSON
|
|
275
|
+
* @param {string} [metadataJson] - Optional JSON string with task metadata for complexity routing
|
|
275
276
|
* @returns {void}
|
|
276
277
|
*/
|
|
277
|
-
function cmdResolveModel(cwd, agentType, raw) {
|
|
278
|
+
function cmdResolveModel(cwd, agentType, raw, metadataJson) {
|
|
278
279
|
if (!agentType) {
|
|
279
280
|
error('agent-type required');
|
|
280
281
|
}
|
|
281
282
|
|
|
283
|
+
let taskMetadata = null;
|
|
284
|
+
if (metadataJson) {
|
|
285
|
+
try { taskMetadata = JSON.parse(metadataJson); }
|
|
286
|
+
catch { /* ignore invalid metadata, use static routing */ }
|
|
287
|
+
}
|
|
288
|
+
|
|
282
289
|
const config = loadConfig(cwd);
|
|
283
290
|
const profile = config.model_profile || 'balanced';
|
|
291
|
+
const strategy = config.routing?.strategy || 'static';
|
|
284
292
|
|
|
285
293
|
const agentModels = MODEL_PROFILES[agentType];
|
|
286
294
|
if (!agentModels) {
|
|
287
|
-
const
|
|
288
|
-
|
|
295
|
+
const model = resolveTierToModel('mid', detectProvider(cwd, config));
|
|
296
|
+
const result = { model, profile, strategy, unknown_agent: true };
|
|
297
|
+
output(result, raw, model);
|
|
289
298
|
return;
|
|
290
299
|
}
|
|
291
300
|
|
|
292
|
-
const
|
|
293
|
-
const
|
|
294
|
-
const result = { model, profile };
|
|
301
|
+
const model = resolveModelInternal(cwd, agentType, taskMetadata);
|
|
302
|
+
const result = { model, profile, strategy };
|
|
295
303
|
output(result, raw, model);
|
|
296
304
|
}
|
|
297
305
|
|
|
306
|
+
/**
|
|
307
|
+
* Estimate cost multipliers for all profiles.
|
|
308
|
+
* @param {string} cwd - Working directory path
|
|
309
|
+
* @param {boolean} raw - If true, output formatted text instead of JSON
|
|
310
|
+
* @returns {void}
|
|
311
|
+
*/
|
|
312
|
+
function cmdEstimateCost(cwd, raw) {
|
|
313
|
+
const estimates = ['quality', 'balanced', 'budget'].map(estimateCostMultiplier);
|
|
314
|
+
output({ estimates }, raw, estimates.map(e =>
|
|
315
|
+
`${e.profile}: ~${e.average}x baseline (${e.agentCount} agents)`
|
|
316
|
+
).join('\n'));
|
|
317
|
+
}
|
|
318
|
+
|
|
298
319
|
|
|
299
320
|
/**
|
|
300
321
|
* Stage and commit planning files to git, respecting commit_docs config and gitignore.
|
|
@@ -1416,6 +1437,7 @@ module.exports = {
|
|
|
1416
1437
|
cmdVerifyPathExists,
|
|
1417
1438
|
cmdHistoryDigest,
|
|
1418
1439
|
cmdResolveModel,
|
|
1440
|
+
cmdEstimateCost,
|
|
1419
1441
|
cmdCommit,
|
|
1420
1442
|
cmdSummaryExtract,
|
|
1421
1443
|
cmdWebsearch,
|
|
@@ -70,6 +70,15 @@ function buildConfigDefaults(hasBraveSearch, userDefaults) {
|
|
|
70
70
|
rollback_snapshots: true,
|
|
71
71
|
error_pattern_learning: true,
|
|
72
72
|
},
|
|
73
|
+
routing: {
|
|
74
|
+
strategy: 'static',
|
|
75
|
+
provider: 'auto',
|
|
76
|
+
cascade_quality_gate: true,
|
|
77
|
+
complexity_thresholds: {
|
|
78
|
+
downgrade_max: 2,
|
|
79
|
+
upgrade_min: 6,
|
|
80
|
+
},
|
|
81
|
+
},
|
|
73
82
|
};
|
|
74
83
|
return {
|
|
75
84
|
...hardcoded,
|
|
@@ -78,6 +87,7 @@ function buildConfigDefaults(hasBraveSearch, userDefaults) {
|
|
|
78
87
|
budget: { ...hardcoded.budget, ...(userDefaults.budget || {}) },
|
|
79
88
|
commit: { ...hardcoded.commit, ...(userDefaults.commit || {}) },
|
|
80
89
|
execution: { ...hardcoded.execution, ...(userDefaults.execution || {}) },
|
|
90
|
+
routing: { ...hardcoded.routing, ...(userDefaults.routing || {}) },
|
|
81
91
|
};
|
|
82
92
|
}
|
|
83
93
|
|
|
@@ -25,21 +25,41 @@ const {
|
|
|
25
25
|
MILESTONE_VERSION_RE,
|
|
26
26
|
} = require('./constants.cjs');
|
|
27
27
|
|
|
28
|
+
// ─── Multi-Model Routing ─────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Provider-specific model name mapping for each tier alias.
|
|
32
|
+
* Each provider maps reasoning/mid/fast to its native model identifiers.
|
|
33
|
+
* "inherit" means the host runtime uses its own top-tier model selection.
|
|
34
|
+
*/
|
|
35
|
+
const PROVIDER_MODELS = {
|
|
36
|
+
anthropic: { reasoning: 'inherit', mid: 'sonnet', fast: 'haiku' },
|
|
37
|
+
openai: { reasoning: 'inherit', mid: 'mid', fast: 'fast' },
|
|
38
|
+
google: { reasoning: 'inherit', mid: 'mid', fast: 'fast' },
|
|
39
|
+
default: { reasoning: 'inherit', mid: 'sonnet', fast: 'haiku' },
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
/** Maps legacy Anthropic model names to provider-agnostic tier aliases. */
|
|
43
|
+
const LEGACY_ALIASES = { opus: 'reasoning', sonnet: 'mid', haiku: 'fast' };
|
|
44
|
+
|
|
45
|
+
/** Relative cost multipliers per tier (fast = 1× baseline). */
|
|
46
|
+
const COST_MULTIPLIERS = { reasoning: 15, mid: 3, fast: 1 };
|
|
47
|
+
|
|
28
48
|
// ─── Model Profile Table ─────────────────────────────────────────────────────
|
|
29
49
|
|
|
30
50
|
const MODEL_PROFILES = {
|
|
31
|
-
'pan-planner': { quality: '
|
|
32
|
-
'pan-roadmapper': { quality: '
|
|
33
|
-
'pan-executor': { quality: '
|
|
34
|
-
'pan-phase-researcher': { quality: '
|
|
35
|
-
'pan-project-researcher': { quality: '
|
|
36
|
-
'pan-research-synthesizer': { quality: '
|
|
37
|
-
'pan-debugger': { quality: '
|
|
38
|
-
'pan-document_code': { quality: '
|
|
39
|
-
'pan-verifier': { quality: '
|
|
40
|
-
'pan-plan-checker': { quality: '
|
|
41
|
-
'pan-integration-checker': { quality: '
|
|
42
|
-
'pan-reviewer': { quality: '
|
|
51
|
+
'pan-planner': { quality: 'reasoning', balanced: 'reasoning', budget: 'mid' },
|
|
52
|
+
'pan-roadmapper': { quality: 'reasoning', balanced: 'mid', budget: 'mid' },
|
|
53
|
+
'pan-executor': { quality: 'reasoning', balanced: 'mid', budget: 'mid' },
|
|
54
|
+
'pan-phase-researcher': { quality: 'reasoning', balanced: 'mid', budget: 'fast' },
|
|
55
|
+
'pan-project-researcher': { quality: 'reasoning', balanced: 'mid', budget: 'fast' },
|
|
56
|
+
'pan-research-synthesizer': { quality: 'reasoning', balanced: 'mid', budget: 'fast' },
|
|
57
|
+
'pan-debugger': { quality: 'reasoning', balanced: 'mid', budget: 'mid' },
|
|
58
|
+
'pan-document_code': { quality: 'reasoning', balanced: 'fast', budget: 'fast' },
|
|
59
|
+
'pan-verifier': { quality: 'reasoning', balanced: 'mid', budget: 'fast' },
|
|
60
|
+
'pan-plan-checker': { quality: 'reasoning', balanced: 'mid', budget: 'fast' },
|
|
61
|
+
'pan-integration-checker': { quality: 'reasoning', balanced: 'mid', budget: 'fast' },
|
|
62
|
+
'pan-reviewer': { quality: 'reasoning', balanced: 'fast', budget: 'fast' },
|
|
43
63
|
};
|
|
44
64
|
|
|
45
65
|
// ─── Output helpers ───────────────────────────────────────────────────────────
|
|
@@ -179,6 +199,8 @@ function loadConfig(cwd) {
|
|
|
179
199
|
commit: parsed.commit || { safety_checks: true, conventional_types: true, sensitive_patterns: ['\\.env$', '\\.pem$', '\\.key$', 'credentials', 'secret', 'password', 'token'] },
|
|
180
200
|
execution: parsed.execution || { default_mode: 'wave_order', rollback_snapshots: true, error_pattern_learning: true },
|
|
181
201
|
focus: parsed.focus || { auto_commit: true },
|
|
202
|
+
model_overrides: parsed.model_overrides || {},
|
|
203
|
+
routing: parsed.routing || { strategy: 'static', provider: 'auto' },
|
|
182
204
|
};
|
|
183
205
|
} catch { // Config missing or malformed — use defaults
|
|
184
206
|
return {
|
|
@@ -187,6 +209,8 @@ function loadConfig(cwd) {
|
|
|
187
209
|
commit: { safety_checks: true, conventional_types: true, sensitive_patterns: ['\\.env$', '\\.pem$', '\\.key$', 'credentials', 'secret', 'password', 'token'] },
|
|
188
210
|
execution: { default_mode: 'wave_order', rollback_snapshots: true, error_pattern_learning: true },
|
|
189
211
|
focus: { auto_commit: true },
|
|
212
|
+
model_overrides: {},
|
|
213
|
+
routing: { strategy: 'static', provider: 'auto' },
|
|
190
214
|
};
|
|
191
215
|
}
|
|
192
216
|
}
|
|
@@ -485,27 +509,142 @@ function getRoadmapPhaseInternal(cwd, phaseNum) {
|
|
|
485
509
|
}
|
|
486
510
|
|
|
487
511
|
/**
|
|
488
|
-
*
|
|
489
|
-
*
|
|
512
|
+
* Extract a model tier override from a roadmap phase section.
|
|
513
|
+
* Looks for `<!-- model_tier: <tier> -->` in the phase section text.
|
|
514
|
+
* @param {string} cwd - Project root directory
|
|
515
|
+
* @param {string|number} phaseNum - Phase number to look up
|
|
516
|
+
* @returns {string|null} Tier alias if found, null otherwise
|
|
517
|
+
*/
|
|
518
|
+
function getPhaseModelTier(cwd, phaseNum) {
|
|
519
|
+
const phaseData = getRoadmapPhaseInternal(cwd, phaseNum);
|
|
520
|
+
if (!phaseData?.section) return null;
|
|
521
|
+
const match = phaseData.section.match(/<!--\s*model_tier:\s*(\S+)\s*-->/i);
|
|
522
|
+
return match ? match[1] : null;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
/**
|
|
526
|
+
* Resolve the model for a given agent type based on profile, provider, and routing strategy.
|
|
527
|
+
* Returns "inherit" for reasoning-tier to let the host runtime use its top-tier model.
|
|
490
528
|
* @param {string} cwd - Project root directory
|
|
491
529
|
* @param {string} agentType - Agent name (e.g., "pan-planner", "pan-executor")
|
|
492
|
-
* @
|
|
530
|
+
* @param {Object} [taskMetadata] - Optional metadata for complexity routing
|
|
531
|
+
* @returns {string} Model identifier: "inherit", "sonnet", "haiku", "mid", "fast", etc.
|
|
493
532
|
*/
|
|
494
|
-
function resolveModelInternal(cwd, agentType) {
|
|
533
|
+
function resolveModelInternal(cwd, agentType, taskMetadata) {
|
|
495
534
|
const config = loadConfig(cwd);
|
|
535
|
+
const provider = detectProvider(cwd, config);
|
|
496
536
|
|
|
497
|
-
// Check per-agent override first
|
|
537
|
+
// Check per-agent override first (highest priority)
|
|
498
538
|
const override = config.model_overrides?.[agentType];
|
|
499
539
|
if (override) {
|
|
500
|
-
return override
|
|
540
|
+
return resolveTierToModel(override, provider);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Check per-phase override from roadmap (second priority)
|
|
544
|
+
if (taskMetadata?.phaseNum) {
|
|
545
|
+
const phaseTier = getPhaseModelTier(cwd, taskMetadata.phaseNum);
|
|
546
|
+
if (phaseTier) {
|
|
547
|
+
return resolveTierToModel(phaseTier, provider);
|
|
548
|
+
}
|
|
501
549
|
}
|
|
502
550
|
|
|
503
551
|
// Fall back to profile lookup
|
|
504
552
|
const profile = config.model_profile || 'balanced';
|
|
505
553
|
const agentModels = MODEL_PROFILES[agentType];
|
|
506
|
-
if (!agentModels) return '
|
|
507
|
-
|
|
508
|
-
|
|
554
|
+
if (!agentModels) return resolveTierToModel('mid', provider);
|
|
555
|
+
|
|
556
|
+
let tier = agentModels[profile] || agentModels['balanced'] || 'mid';
|
|
557
|
+
|
|
558
|
+
// Apply routing strategy
|
|
559
|
+
const strategy = config.routing?.strategy || 'static';
|
|
560
|
+
if (strategy === 'complexity' && taskMetadata) {
|
|
561
|
+
const thresholds = config.routing?.complexity_thresholds;
|
|
562
|
+
tier = resolveComplexityTier(tier, { ...taskMetadata, thresholds });
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
return resolveTierToModel(tier, provider);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
/**
|
|
569
|
+
* Detect the LLM provider from config, environment, or runtime directory presence.
|
|
570
|
+
* @param {string} cwd - Project root directory
|
|
571
|
+
* @param {Object} config - Loaded config object
|
|
572
|
+
* @returns {string} Provider name: "anthropic", "openai", "google", or "default"
|
|
573
|
+
*/
|
|
574
|
+
function detectProvider(cwd, config) {
|
|
575
|
+
// 1. Explicit config
|
|
576
|
+
if (config.routing?.provider && config.routing.provider !== 'auto') {
|
|
577
|
+
const p = config.routing.provider;
|
|
578
|
+
return PROVIDER_MODELS[p] ? p : 'default';
|
|
579
|
+
}
|
|
580
|
+
// 2. Environment variable
|
|
581
|
+
const envProvider = process.env.PAN_PROVIDER;
|
|
582
|
+
if (envProvider) {
|
|
583
|
+
return PROVIDER_MODELS[envProvider] ? envProvider : 'default';
|
|
584
|
+
}
|
|
585
|
+
// 3. Runtime directory detection
|
|
586
|
+
const checks = [
|
|
587
|
+
['.claude', 'anthropic'], ['.codex', 'openai'],
|
|
588
|
+
['.gemini', 'google'], ['.opencode', 'openai'], ['.github', 'default'],
|
|
589
|
+
];
|
|
590
|
+
for (const [dir, provider] of checks) {
|
|
591
|
+
try { if (fs.statSync(path.join(cwd, dir)).isDirectory()) return provider; }
|
|
592
|
+
catch { /* continue */ }
|
|
593
|
+
}
|
|
594
|
+
return 'default';
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Resolve a tier alias (or legacy model name) to a provider-specific model name.
|
|
599
|
+
* @param {string} tier - Tier alias ("reasoning", "mid", "fast") or legacy name ("opus", "sonnet", "haiku")
|
|
600
|
+
* @param {string} provider - Provider key from detectProvider()
|
|
601
|
+
* @returns {string} Provider-specific model name
|
|
602
|
+
*/
|
|
603
|
+
function resolveTierToModel(tier, provider) {
|
|
604
|
+
const normalizedTier = LEGACY_ALIASES[tier] || tier;
|
|
605
|
+
const providerMap = PROVIDER_MODELS[provider] || PROVIDER_MODELS['default'];
|
|
606
|
+
return providerMap[normalizedTier] || providerMap['mid'];
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* Adjust model tier based on task complexity metadata.
|
|
611
|
+
* @param {string} baseTier - Starting tier ("reasoning", "mid", "fast")
|
|
612
|
+
* @param {Object} [taskMetadata] - Complexity indicators
|
|
613
|
+
* @returns {string} Adjusted tier
|
|
614
|
+
*/
|
|
615
|
+
function resolveComplexityTier(baseTier, taskMetadata) {
|
|
616
|
+
if (!taskMetadata) return baseTier;
|
|
617
|
+
const { fileCount = 0, waveCount = 0, requirementCount = 0, isArchitectural = false } = taskMetadata;
|
|
618
|
+
|
|
619
|
+
const score =
|
|
620
|
+
(fileCount > 15 ? 2 : fileCount > 5 ? 1 : 0) +
|
|
621
|
+
(waveCount > 3 ? 2 : waveCount > 1 ? 1 : 0) +
|
|
622
|
+
(requirementCount > 5 ? 2 : requirementCount > 2 ? 1 : 0) +
|
|
623
|
+
(isArchitectural ? 3 : 0);
|
|
624
|
+
|
|
625
|
+
const thresholds = taskMetadata.thresholds || { downgrade_max: 2, upgrade_min: 6 };
|
|
626
|
+
const tiers = ['fast', 'mid', 'reasoning'];
|
|
627
|
+
const idx = tiers.indexOf(baseTier);
|
|
628
|
+
if (idx === -1) return baseTier;
|
|
629
|
+
|
|
630
|
+
if (score <= thresholds.downgrade_max && idx > 0) return tiers[idx - 1];
|
|
631
|
+
if (score >= thresholds.upgrade_min && idx < 2) return tiers[idx + 1];
|
|
632
|
+
return baseTier;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Estimate relative cost multiplier for a given profile.
|
|
637
|
+
* @param {string} profile - "quality", "balanced", or "budget"
|
|
638
|
+
* @returns {Object} Cost estimation with total, average, agentCount
|
|
639
|
+
*/
|
|
640
|
+
function estimateCostMultiplier(profile) {
|
|
641
|
+
let total = 0;
|
|
642
|
+
const agents = Object.keys(MODEL_PROFILES);
|
|
643
|
+
for (const agent of agents) {
|
|
644
|
+
const tier = MODEL_PROFILES[agent][profile] || 'mid';
|
|
645
|
+
total += COST_MULTIPLIERS[tier] || 3;
|
|
646
|
+
}
|
|
647
|
+
return { profile, total, average: +(total / agents.length).toFixed(1), agentCount: agents.length };
|
|
509
648
|
}
|
|
510
649
|
|
|
511
650
|
// ─── Misc utilities ───────────────────────────────────────────────────────────
|
|
@@ -625,6 +764,9 @@ function scanSourceTodos(cwd) {
|
|
|
625
764
|
|
|
626
765
|
module.exports = {
|
|
627
766
|
MODEL_PROFILES,
|
|
767
|
+
PROVIDER_MODELS,
|
|
768
|
+
LEGACY_ALIASES,
|
|
769
|
+
COST_MULTIPLIERS,
|
|
628
770
|
output,
|
|
629
771
|
error,
|
|
630
772
|
verbose,
|
|
@@ -641,6 +783,11 @@ module.exports = {
|
|
|
641
783
|
getArchivedPhaseDirs,
|
|
642
784
|
getRoadmapPhaseInternal,
|
|
643
785
|
resolveModelInternal,
|
|
786
|
+
detectProvider,
|
|
787
|
+
resolveTierToModel,
|
|
788
|
+
resolveComplexityTier,
|
|
789
|
+
estimateCostMultiplier,
|
|
790
|
+
getPhaseModelTier,
|
|
644
791
|
pathExistsInternal,
|
|
645
792
|
generateSlugInternal,
|
|
646
793
|
getMilestoneInfo,
|