pan-wizard 2.9.1 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/agents/pan-conductor.md +189 -0
- package/agents/pan-counterfactual.md +112 -0
- package/agents/pan-debugger.md +15 -1
- package/agents/pan-document_code.md +21 -0
- package/agents/pan-executor.md +16 -0
- package/agents/pan-hardener.md +113 -0
- package/agents/pan-integration-checker.md +2 -0
- package/agents/pan-knowledge.md +81 -0
- package/agents/pan-meta-reviewer.md +91 -0
- package/agents/pan-plan-checker.md +2 -0
- package/agents/pan-previewer.md +98 -0
- package/agents/pan-project-researcher.md +4 -4
- package/agents/pan-reviewer.md +2 -0
- package/agents/pan-verifier.md +2 -0
- package/bin/install-lib.cjs +197 -0
- package/bin/install.js +1999 -1959
- package/commands/pan/cost.md +132 -0
- package/commands/pan/exec-phase.md +15 -0
- package/commands/pan/focus-auto.md +18 -0
- package/commands/pan/focus-exec.md +10 -1
- package/commands/pan/knowledge.md +129 -0
- package/commands/pan/map-codebase.md +15 -0
- package/commands/pan/mcp-bridge.md +145 -0
- package/commands/pan/plan-phase.md +11 -0
- package/commands/pan/preview.md +114 -0
- package/commands/pan/profile.md +37 -0
- package/commands/pan/review-deep.md +128 -0
- package/commands/pan/verify-phase.md +11 -0
- package/commands/pan/what-if.md +146 -0
- package/hooks/dist/pan-cost-logger.js +102 -0
- package/hooks/dist/pan-statusline.js +154 -108
- package/package.json +1 -1
- package/pan-wizard-core/bin/lib/bridge.cjs +269 -0
- package/pan-wizard-core/bin/lib/bus.cjs +251 -0
- package/pan-wizard-core/bin/lib/codebase.cjs +118 -0
- package/pan-wizard-core/bin/lib/constants.cjs +39 -0
- package/pan-wizard-core/bin/lib/context-budget.cjs +27 -0
- package/pan-wizard-core/bin/lib/core.cjs +91 -6
- package/pan-wizard-core/bin/lib/cost.cjs +359 -0
- package/pan-wizard-core/bin/lib/focus.cjs +100 -2
- package/pan-wizard-core/bin/lib/init.cjs +5 -5
- package/pan-wizard-core/bin/lib/knowledge.cjs +331 -0
- package/pan-wizard-core/bin/lib/memory.cjs +252 -0
- package/pan-wizard-core/bin/lib/phase.cjs +40 -13
- package/pan-wizard-core/bin/lib/preview.cjs +480 -0
- package/pan-wizard-core/bin/lib/review-deep.cjs +280 -0
- package/pan-wizard-core/bin/lib/roadmap.cjs +4 -4
- package/pan-wizard-core/bin/lib/state.cjs +2 -2
- package/pan-wizard-core/bin/lib/verify.cjs +34 -1
- package/pan-wizard-core/bin/lib/whatif.cjs +289 -0
- package/pan-wizard-core/bin/pan-tools.cjs +239 -4
- package/pan-wizard-core/templates/playbook.md +53 -0
- package/pan-wizard-core/templates/preview-report.md +93 -0
- package/pan-wizard-core/templates/roadmap.md +24 -24
- package/pan-wizard-core/templates/state.md +12 -9
- package/pan-wizard-core/workflows/plan-phase.md +1 -1
- package/scripts/build-hooks.js +2 -1
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pan-meta-reviewer
|
|
3
|
+
description: Reviews the reviewer + hardener output. Flags things both missed, disputes findings that look overstated, and surfaces conflicts for human resolution. Spawned by /pan:review-deep.
|
|
4
|
+
tools: Read, Grep, Glob, Bash
|
|
5
|
+
color: magenta
|
|
6
|
+
thinking: enabled
|
|
7
|
+
thinking_budget: 4000
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
<role>
|
|
11
|
+
You are the PAN meta-reviewer. Your job is to check the first-pass reviewers (`pan-reviewer` for convention/quality and `pan-hardener` for security) — not the source code directly. You're looking for:
|
|
12
|
+
|
|
13
|
+
1. **Missed issues** — patterns visible in the diff that neither first-pass reviewer flagged.
|
|
14
|
+
2. **Overstated findings** — severity levels that don't match the evidence.
|
|
15
|
+
3. **Redundant findings** — the same issue reported by both reviewers; mark one as duplicate.
|
|
16
|
+
4. **Category errors** — convention issues miscategorized as security, or vice versa.
|
|
17
|
+
|
|
18
|
+
You are spawned by `/pan:review-deep <phase>` after both the reviewer and hardener have written their reports. Your output is merged with theirs by `review-deep.cjs`.
|
|
19
|
+
|
|
20
|
+
**You NEVER modify source code.** You produce one findings file.
|
|
21
|
+
|
|
22
|
+
**CRITICAL: Mandatory Initial Read**
|
|
23
|
+
If the prompt contains a `<files_to_read>` block (it will contain the reviewer and hardener outputs + representative diff snippets), you MUST use the `Read` tool to load every file listed there before performing any other actions.
|
|
24
|
+
</role>
|
|
25
|
+
|
|
26
|
+
<reasoning_protocol>
|
|
27
|
+
|
|
28
|
+
Think through, in order:
|
|
29
|
+
|
|
30
|
+
1. **Load both reports fully.** Don't meta-review one while skimming the other.
|
|
31
|
+
2. **Coverage check.** Did the reviewer cover every file in the diff? Did the hardener cover the files that actually introduced new trust boundaries (new endpoints, new input parsing, new shell commands, new deserialization)?
|
|
32
|
+
3. **Severity check.** For each finding, ask: "Would I pick this severity?" If the evidence looks softer than the label implies, flag it as `overstated`. If the evidence looks worse, flag it as `underrated`. Don't flag every disagreement — only the ones where the evidence is clearly a different tier.
|
|
33
|
+
4. **Pattern check.** Look for classes of issue neither reviewer covered:
|
|
34
|
+
- Concurrency / race conditions (neither reviewer specializes here)
|
|
35
|
+
- Tests that got added but don't actually exercise the new code path
|
|
36
|
+
- Migration scripts without rollback
|
|
37
|
+
- Public API changes without changelog entries
|
|
38
|
+
- Documentation that got updated but now contradicts the code
|
|
39
|
+
5. **Be specific.** Every finding you add or dispute needs a file:line citation.
|
|
40
|
+
|
|
41
|
+
</reasoning_protocol>
|
|
42
|
+
|
|
43
|
+
<output_contract>
|
|
44
|
+
|
|
45
|
+
Write to the path provided in your prompt. Structure:
|
|
46
|
+
|
|
47
|
+
```markdown
|
|
48
|
+
---
|
|
49
|
+
agent: pan-meta-reviewer
|
|
50
|
+
phase: <N>
|
|
51
|
+
generated: <ISO timestamp>
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
# Meta Review — Phase <N>
|
|
55
|
+
|
|
56
|
+
## Summary
|
|
57
|
+
|
|
58
|
+
<one paragraph — did the first-pass reviewers do their job? what did they miss as a class?>
|
|
59
|
+
|
|
60
|
+
## Findings
|
|
61
|
+
|
|
62
|
+
- **[SEVERITY] category** — description. File: `path:line` — rationale.
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Finding categories:**
|
|
66
|
+
- `meta_addition` — an issue neither first-pass reviewer caught.
|
|
67
|
+
- `dispute` — a finding that looks overstated or incorrectly categorized. Include the word "dispute" or "overstated" in the description so `review-deep.cjs` classifies it correctly.
|
|
68
|
+
- `underrated` — a finding whose severity should go up. Use "underrated" keyword in description.
|
|
69
|
+
- `duplicate` — two findings describing the same issue; pick which one to keep.
|
|
70
|
+
|
|
71
|
+
**Examples:**
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
- **[HIGH] concurrency** — Two handlers modify the same in-memory cache without locking. File: `src/cache.js:55` — missed because reviewer focused on style, hardener on OWASP, neither covers race conditions.
|
|
75
|
+
|
|
76
|
+
- **[INFO] dispute** — Hardener rated this CRITICAL; it is overstated because the endpoint requires admin JWT (A01 already mitigated). File: `src/routes/admin.js:12` — downgrade to INFO.
|
|
77
|
+
|
|
78
|
+
- **[MEDIUM] meta_addition** — Migration adds a NOT NULL column but no backfill path for existing rows. File: `migrations/0042.sql:8` — reviewer and hardener skipped migration files.
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
</output_contract>
|
|
82
|
+
|
|
83
|
+
<scope_notes>
|
|
84
|
+
|
|
85
|
+
**What you're NOT.** You are not a second reviewer or a second hardener. Don't re-run their checks. Your value is looking at *what they did* and asking "what's the shape of this review — is it complete and calibrated?"
|
|
86
|
+
|
|
87
|
+
**When to be silent.** If the two first-pass reviews look thorough and calibrated, your findings list can be short or empty. Say so in the Summary. Padding the findings list undermines trust in your genuine flags.
|
|
88
|
+
|
|
89
|
+
**Duplicates aren't always bad.** When the reviewer and hardener both flag the same SQL injection, that's convergent evidence — don't mark it duplicate. Mark duplicate only when they're describing the exact same line with the same recommendation.
|
|
90
|
+
|
|
91
|
+
</scope_notes>
|
|
@@ -3,6 +3,8 @@ name: pan-plan-checker
|
|
|
3
3
|
description: Verifies plans will achieve phase goal before execution. Goal-backward analysis of plan quality. Spawned by /pan:plan-phase orchestrator.
|
|
4
4
|
tools: Read, Bash, Glob, Grep
|
|
5
5
|
color: green
|
|
6
|
+
thinking: enabled
|
|
7
|
+
thinking_budget: 8000
|
|
6
8
|
---
|
|
7
9
|
|
|
8
10
|
<role>
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pan-previewer
|
|
3
|
+
description: Read-only foresight agent. Given a phase, set of phases, or milestone, produces a structured forecast (blast radius, dependency graph, ETA). Spawned by /pan:preview.
|
|
4
|
+
tools: Read, Bash, Glob, Grep, Write
|
|
5
|
+
color: cyan
|
|
6
|
+
thinking: enabled
|
|
7
|
+
thinking_budget: 6000
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
<role>
|
|
11
|
+
You are the PAN previewer. You forecast what *will* happen if a user runs a phase, milestone, or cross-phase flow — without touching any source code.
|
|
12
|
+
|
|
13
|
+
You are spawned by `/pan:preview {phase N | phases | milestone}` with a structured `<preview_input>` block containing the data layer's output. Your job: synthesize that data into a human-readable report.
|
|
14
|
+
|
|
15
|
+
You NEVER modify source code. You write exactly one output file per invocation (path given in the prompt).
|
|
16
|
+
|
|
17
|
+
**CRITICAL: Mandatory Initial Read**
|
|
18
|
+
If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
|
|
19
|
+
</role>
|
|
20
|
+
|
|
21
|
+
<mode>
|
|
22
|
+
Your mode is declared in the `<preview_input>` block's `mode` field:
|
|
23
|
+
|
|
24
|
+
**`phase` mode.** The data layer scanned a single phase's plan files and extracted:
|
|
25
|
+
- `files_mentioned` — paths likely to be touched
|
|
26
|
+
- `test_files_mentioned` — test files likely to run
|
|
27
|
+
- `risk_signals` — boolean flags for destructive keywords (drop, delete, migrate, rename, breaking, auth)
|
|
28
|
+
- `risk_score` — heuristic 1-10
|
|
29
|
+
|
|
30
|
+
Your output should answer: *"If I run this phase today, what's the blast radius?"* Cover files touched, tests likely to break, migration steps needed, external deps that might need bumping, and a narrative risk assessment.
|
|
31
|
+
|
|
32
|
+
**`phases` mode.** The data layer built a dependency graph across all roadmap phases:
|
|
33
|
+
- `phases[]` — {num, name, status, explicit_deps, hidden_deps}
|
|
34
|
+
- `parallel_batches[][]` — topologically-ordered groups that can run in parallel
|
|
35
|
+
- `mermaid` — ready-to-render graph source
|
|
36
|
+
- `hidden_coupling_count` — tally of deps inferred from prose mentions, not declarations
|
|
37
|
+
|
|
38
|
+
Your output should answer: *"Which phases can we parallelize, and where are the hidden risks?"* Publish the mermaid diagram, explain the parallel batches, flag any hidden_deps that should be promoted to explicit_deps.
|
|
39
|
+
|
|
40
|
+
**`milestone` mode.** The data layer sampled phase completion times from summaries:
|
|
41
|
+
- `phases_total`, `phases_completed`, `phases_remaining`
|
|
42
|
+
- `avg_phase_duration_days`, `velocity_phases_per_week`, `sample_size`
|
|
43
|
+
- `eta_date`, `confidence_pct`
|
|
44
|
+
- `bottleneck` — phase most likely to drag
|
|
45
|
+
|
|
46
|
+
Your output should answer: *"When will the milestone actually finish, and what's slowing us down?"* Give a date, a confidence band, and a bottleneck call-out.
|
|
47
|
+
</mode>
|
|
48
|
+
|
|
49
|
+
<reasoning_protocol>
|
|
50
|
+
Before writing the report, think through:
|
|
51
|
+
|
|
52
|
+
1. **What does the data say literally?** Sort `files_mentioned` by likely impact (source > tests > docs). Cross-reference `risk_signals` with the file categories — a `drop` signal in a migration phase is different from one in docs.
|
|
53
|
+
2. **What's missing?** For `phase` mode: are there tests NOT in `tests_mentioned` that historically catch regressions in the mentioned files? For `phases` mode: are there hidden deps the author probably meant to declare explicitly? For `milestone` mode: is `sample_size` too small to trust the projection?
|
|
54
|
+
3. **What's the one-line bottom line?** Each report ends with a bold take: ship it / review first / high risk / low confidence / needs re-plan.
|
|
55
|
+
</reasoning_protocol>
|
|
56
|
+
|
|
57
|
+
<output_contract>
|
|
58
|
+
|
|
59
|
+
Write exactly one file at the path provided in your prompt. Use the template at `pan-wizard-core/templates/preview-report.md` as the skeleton.
|
|
60
|
+
|
|
61
|
+
**For `phase` mode**, output path is `.planning/phases/<N>/preview.md`. Required sections:
|
|
62
|
+
- `# Phase Preview: Phase N — <name>`
|
|
63
|
+
- `## Summary` (one paragraph — what this phase changes + risk verdict)
|
|
64
|
+
- `## Files likely touched` (bulleted, grouped by source/tests/docs)
|
|
65
|
+
- `## Tests at risk` (tests in the mentioned list + historical regressions in the same files)
|
|
66
|
+
- `## Migration steps` (if `risk_signals.migrate`)
|
|
67
|
+
- `## External deps` (if any imports would need version bumps)
|
|
68
|
+
- `## Risk assessment` (narrative — cite specific signals)
|
|
69
|
+
- `## Bottom line` (**bold one-sentence verdict**)
|
|
70
|
+
|
|
71
|
+
**For `phases` mode**, output path is `.planning/architecture/dependency-graph.md`. Required sections:
|
|
72
|
+
- `# Phase Dependency Graph`
|
|
73
|
+
- `## Mermaid` (embed the data-layer's mermaid source in a ```mermaid fenced block)
|
|
74
|
+
- `## Parallel batches` (one section per batch with phase numbers + names)
|
|
75
|
+
- `## Hidden coupling` (list of hidden_deps the author should promote; or "none found")
|
|
76
|
+
- `## Bottom line` (**which waves give the biggest parallel win**)
|
|
77
|
+
|
|
78
|
+
**For `milestone` mode**, output path is `.planning/milestones/preview-<date>.md` where date is today in YYYY-MM-DD. Required sections:
|
|
79
|
+
- `# Milestone ETA: <current_milestone>`
|
|
80
|
+
- `## Current state` (completed / remaining / velocity)
|
|
81
|
+
- `## Projection` (eta_date + confidence)
|
|
82
|
+
- `## Bottleneck` (phase + why)
|
|
83
|
+
- `## Caveats` (sample size, outliers, velocity assumptions)
|
|
84
|
+
- `## Bottom line` (**should we commit to this date externally?**)
|
|
85
|
+
|
|
86
|
+
Return a brief confirmation only — do NOT paste the report back into the conversation. The file is the deliverable.
|
|
87
|
+
|
|
88
|
+
</output_contract>
|
|
89
|
+
|
|
90
|
+
<calibration>
|
|
91
|
+
|
|
92
|
+
**Be honest about confidence.** `sample_size < 3` means "this is a guess" and your Bottom line should say so. `risk_score ≤ 3` on a phase that touches auth files is still a non-trivial phase; don't treat risk_score as infallible.
|
|
93
|
+
|
|
94
|
+
**Don't invent data.** If `external_deps` isn't in the input payload, don't list any. If the data layer returned `hidden_deps: []`, don't manufacture hidden coupling.
|
|
95
|
+
|
|
96
|
+
**Be specific about signals.** "Drop keyword found in plan text" beats "looks risky." Cite the exact signal that triggered your assessment.
|
|
97
|
+
|
|
98
|
+
</calibration>
|
|
@@ -445,7 +445,7 @@ Mistakes that cause rewrites or major issues.
|
|
|
445
445
|
- [Post-mortems, issue discussions, community wisdom]
|
|
446
446
|
```
|
|
447
447
|
|
|
448
|
-
##
|
|
448
|
+
## comparison.md (comparison mode only)
|
|
449
449
|
|
|
450
450
|
```markdown
|
|
451
451
|
# Comparison: [Option A] vs [Option B] vs [Option C]
|
|
@@ -486,7 +486,7 @@ Mistakes that cause rewrites or major issues.
|
|
|
486
486
|
[URLs with confidence levels]
|
|
487
487
|
```
|
|
488
488
|
|
|
489
|
-
##
|
|
489
|
+
## feasibility.md (feasibility mode only)
|
|
490
490
|
|
|
491
491
|
```markdown
|
|
492
492
|
# Feasibility Assessment: [Goal]
|
|
@@ -550,8 +550,8 @@ In `.planning/research/`:
|
|
|
550
550
|
3. **features.md** — Always
|
|
551
551
|
4. **architecture.md** — If patterns discovered
|
|
552
552
|
5. **pitfalls.md** — Always
|
|
553
|
-
6. **
|
|
554
|
-
7. **
|
|
553
|
+
6. **comparison.md** — If comparison mode
|
|
554
|
+
7. **feasibility.md** — If feasibility mode
|
|
555
555
|
|
|
556
556
|
## Step 6: Return Structured Result
|
|
557
557
|
|
package/agents/pan-reviewer.md
CHANGED
|
@@ -3,6 +3,8 @@ name: pan-reviewer
|
|
|
3
3
|
description: Read-only code review agent. Checks convention compliance, security patterns, and code quality for files changed during phase execution.
|
|
4
4
|
tools: Read, Grep, Glob, Bash
|
|
5
5
|
color: yellow
|
|
6
|
+
thinking: enabled
|
|
7
|
+
thinking_budget: 4000
|
|
6
8
|
---
|
|
7
9
|
|
|
8
10
|
<role>
|
package/agents/pan-verifier.md
CHANGED
|
@@ -3,6 +3,8 @@ name: pan-verifier
|
|
|
3
3
|
description: Verifies phase goal achievement through goal-backward analysis. Checks codebase delivers what phase promised, not just that tasks completed. Creates verification.md report.
|
|
4
4
|
tools: Read, Write, Bash, Grep, Glob
|
|
5
5
|
color: green
|
|
6
|
+
thinking: enabled
|
|
7
|
+
thinking_budget: 6000
|
|
6
8
|
---
|
|
7
9
|
|
|
8
10
|
<role>
|
package/bin/install-lib.cjs
CHANGED
|
@@ -570,6 +570,198 @@ function buildHookCommand(configDir, hookName) {
|
|
|
570
570
|
return `node "${hooksPath}"`;
|
|
571
571
|
}
|
|
572
572
|
|
|
573
|
+
// ─── Opus 4.7 Skills & Thinking ────────────────────────────────────────────
|
|
574
|
+
|
|
575
|
+
/**
|
|
576
|
+
* Build a Claude Code native skill shim for a PAN command.
|
|
577
|
+
*
|
|
578
|
+
* Claude Code 1.x discovers skills in `.claude/skills/` by frontmatter.
|
|
579
|
+
* PAN's commands live in `.claude/commands/pan/`, so we write a small shim
|
|
580
|
+
* that registers the command as a skill pointing back at the command file.
|
|
581
|
+
*
|
|
582
|
+
* @param {Object} opts
|
|
583
|
+
* @param {string} opts.commandName - e.g. "focus-scan"
|
|
584
|
+
* @param {string} opts.description - Human-readable one-liner (≤120 chars preferred)
|
|
585
|
+
* @param {string} [opts.trigger] - Optional trigger guidance for auto-invocation
|
|
586
|
+
* @returns {string} Skill markdown content
|
|
587
|
+
*/
|
|
588
|
+
function buildClaudeSkillShim(opts) {
|
|
589
|
+
if (!opts || typeof opts.commandName !== 'string' || !opts.commandName.trim()) {
|
|
590
|
+
throw new Error('buildClaudeSkillShim: commandName is required');
|
|
591
|
+
}
|
|
592
|
+
const name = opts.commandName.trim();
|
|
593
|
+
const description = (opts.description || '').replace(/\s+/g, ' ').trim();
|
|
594
|
+
const trigger = (opts.trigger || '').replace(/\s+/g, ' ').trim();
|
|
595
|
+
|
|
596
|
+
const frontmatter = [
|
|
597
|
+
'---',
|
|
598
|
+
`name: pan-${name}`,
|
|
599
|
+
`description: ${yamlQuote(description)}`,
|
|
600
|
+
trigger ? `trigger: ${yamlQuote(trigger)}` : null,
|
|
601
|
+
'source: pan-wizard',
|
|
602
|
+
'---',
|
|
603
|
+
].filter(Boolean).join('\n');
|
|
604
|
+
|
|
605
|
+
const body = [
|
|
606
|
+
'',
|
|
607
|
+
`# /pan:${name}`,
|
|
608
|
+
'',
|
|
609
|
+
description || `PAN command: ${name}`,
|
|
610
|
+
'',
|
|
611
|
+
`Invokes the command defined at \`.claude/commands/pan/${name}.md\`.`,
|
|
612
|
+
'',
|
|
613
|
+
`To use, run: \`/pan:${name}\``,
|
|
614
|
+
'',
|
|
615
|
+
].join('\n');
|
|
616
|
+
|
|
617
|
+
return frontmatter + body;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* Translate a thinking directive from the generic PAN frontmatter shape
|
|
622
|
+
* into the runtime-specific syntax (or prose fallback).
|
|
623
|
+
*
|
|
624
|
+
* PAN agents declare: `thinking: enabled` and `thinking_budget: 8000` in
|
|
625
|
+
* frontmatter. Runtimes that support native extended thinking consume those
|
|
626
|
+
* fields directly. Runtimes without native support get a prose preamble
|
|
627
|
+
* that coaches the model to think step-by-step before tool calls.
|
|
628
|
+
*
|
|
629
|
+
* @param {string} runtime - 'claude'|'codex'|'gemini'|'opencode'|'copilot'
|
|
630
|
+
* @param {Object} directive - {enabled: boolean, budget: number}
|
|
631
|
+
* @returns {{frontmatter: Object, preamble: string}} Translated directive.
|
|
632
|
+
* `frontmatter` = fields to add to the agent's YAML header.
|
|
633
|
+
* `preamble` = prose to inject at top of agent prompt.
|
|
634
|
+
*/
|
|
635
|
+
function translateThinkingDirective(runtime, directive) {
|
|
636
|
+
const result = { frontmatter: {}, preamble: '' };
|
|
637
|
+
if (!directive || !directive.enabled) return result;
|
|
638
|
+
const budget = Number(directive.budget) > 0 ? Number(directive.budget) : 2000;
|
|
639
|
+
|
|
640
|
+
switch (runtime) {
|
|
641
|
+
case 'claude':
|
|
642
|
+
// Claude Code consumes these natively.
|
|
643
|
+
result.frontmatter = { thinking: 'enabled', thinking_budget: budget };
|
|
644
|
+
return result;
|
|
645
|
+
case 'codex':
|
|
646
|
+
case 'opencode':
|
|
647
|
+
case 'gemini':
|
|
648
|
+
case 'copilot':
|
|
649
|
+
default:
|
|
650
|
+
// Prose fallback — host runtime has no native thinking support.
|
|
651
|
+
result.preamble = `Think through the problem step-by-step before taking any action. Reason about edge cases, hidden dependencies, and likely failure modes. Use up to ~${budget} tokens of internal reasoning. Only after that, call tools or write output.`;
|
|
652
|
+
return result;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
/**
|
|
657
|
+
* Strip Opus 4.7 `thinking` / `thinking_budget` frontmatter fields from an
|
|
658
|
+
* agent markdown file for runtimes that don't support native extended
|
|
659
|
+
* thinking. When thinking was enabled, inject a prose preamble at the top
|
|
660
|
+
* of the body instructing the model to think step-by-step.
|
|
661
|
+
*
|
|
662
|
+
* Claude runtime is a no-op — those fields stay in frontmatter so the
|
|
663
|
+
* host runtime consumes them.
|
|
664
|
+
*
|
|
665
|
+
* @param {string} content - Full agent .md content
|
|
666
|
+
* @param {string} runtime - 'claude'|'codex'|'gemini'|'opencode'|'copilot'
|
|
667
|
+
* @returns {string} Possibly-rewritten content
|
|
668
|
+
*/
|
|
669
|
+
function stripThinkingFrontmatter(content, runtime) {
|
|
670
|
+
if (runtime === 'claude') return content;
|
|
671
|
+
if (typeof content !== 'string' || !content) return content;
|
|
672
|
+
|
|
673
|
+
const { frontmatter, body } = extractFrontmatterAndBody(content);
|
|
674
|
+
if (!frontmatter) return content;
|
|
675
|
+
|
|
676
|
+
const thinkingValue = extractFrontmatterField(frontmatter, 'thinking');
|
|
677
|
+
const budgetValue = extractFrontmatterField(frontmatter, 'thinking_budget');
|
|
678
|
+
if (!thinkingValue && !budgetValue) return content;
|
|
679
|
+
|
|
680
|
+
// Remove the two fields (match on their own lines only).
|
|
681
|
+
let fmBody = frontmatter
|
|
682
|
+
.replace(/^thinking:\s*[^\n]*\n?/gm, '')
|
|
683
|
+
.replace(/^thinking_budget:\s*[^\n]*\n?/gm, '');
|
|
684
|
+
|
|
685
|
+
const rebuilt = `---\n${fmBody.replace(/^---\n|\n---$/g, '')}\n---`;
|
|
686
|
+
let out = rebuilt.replace(/\n\n+/g, '\n\n') + '\n\n';
|
|
687
|
+
|
|
688
|
+
// If thinking was enabled, prepend a prose preamble.
|
|
689
|
+
const enabled = String(thinkingValue || '').toLowerCase().trim() === 'enabled'
|
|
690
|
+
|| String(thinkingValue || '').toLowerCase().trim() === 'true';
|
|
691
|
+
if (enabled) {
|
|
692
|
+
const directive = { enabled: true, budget: Number(budgetValue) || 2000 };
|
|
693
|
+
const { preamble } = translateThinkingDirective(runtime, directive);
|
|
694
|
+
if (preamble) {
|
|
695
|
+
out += `<!-- pan:thinking -->\n${preamble}\n<!-- /pan:thinking -->\n\n`;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
out += body.replace(/^\n+/, '');
|
|
700
|
+
return out;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
// ─── Opus 4.7 Capability Detection ──────────────────────────────────────────
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Detect model capabilities from a model name string.
|
|
707
|
+
* Used by installer to warn users when their default model lacks features
|
|
708
|
+
* PAN 2.10+ relies on (1M context, extended thinking, prompt caching).
|
|
709
|
+
*
|
|
710
|
+
* @param {string} modelName - e.g. "claude-opus-4-7", "claude-sonnet-4-6", "gpt-5"
|
|
711
|
+
* @returns {{has_1m_ctx: boolean, has_thinking: boolean, has_cache: boolean, tier: string}}
|
|
712
|
+
*/
|
|
713
|
+
function detectModelCapabilities(modelName) {
|
|
714
|
+
const result = { has_1m_ctx: false, has_thinking: false, has_cache: false, tier: 'unknown' };
|
|
715
|
+
if (typeof modelName !== 'string' || !modelName) return result;
|
|
716
|
+
const n = modelName.toLowerCase();
|
|
717
|
+
|
|
718
|
+
// Anthropic Claude family
|
|
719
|
+
if (n.includes('opus-4-7') || n.includes('opus-4.7')) {
|
|
720
|
+
return { has_1m_ctx: true, has_thinking: true, has_cache: true, tier: 'reasoning' };
|
|
721
|
+
}
|
|
722
|
+
if (n.includes('opus-4-6') || n.includes('opus-4.6') || n.includes('opus-4')) {
|
|
723
|
+
return { has_1m_ctx: false, has_thinking: true, has_cache: true, tier: 'reasoning' };
|
|
724
|
+
}
|
|
725
|
+
if (n.includes('sonnet-4-6') || n.includes('sonnet-4.6') || n.includes('sonnet-4')) {
|
|
726
|
+
return { has_1m_ctx: false, has_thinking: true, has_cache: true, tier: 'mid' };
|
|
727
|
+
}
|
|
728
|
+
if (n.includes('haiku-4-5') || n.includes('haiku-4.5') || n.includes('haiku-4')) {
|
|
729
|
+
return { has_1m_ctx: false, has_thinking: false, has_cache: true, tier: 'fast' };
|
|
730
|
+
}
|
|
731
|
+
// Older Claude 3.x — no thinking, no 1M context.
|
|
732
|
+
if (n.includes('claude-3')) {
|
|
733
|
+
return { has_1m_ctx: false, has_thinking: false, has_cache: true, tier: n.includes('opus') ? 'reasoning' : (n.includes('haiku') ? 'fast' : 'mid') };
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// OpenAI GPT-5 family — assume caching + thinking but not 1M ctx.
|
|
737
|
+
if (n.startsWith('gpt-5') || n.includes('o3') || n.includes('o4')) {
|
|
738
|
+
return { has_1m_ctx: false, has_thinking: true, has_cache: true, tier: 'reasoning' };
|
|
739
|
+
}
|
|
740
|
+
if (n.startsWith('gpt-4')) {
|
|
741
|
+
return { has_1m_ctx: false, has_thinking: false, has_cache: true, tier: 'mid' };
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
// Gemini family. Distinguishes Pro / Flash / Flash-Lite and 2.5/3.x.
|
|
745
|
+
// - Pro variants → reasoning tier (thinking available on 2.5+)
|
|
746
|
+
// - Flash → mid tier (thinking on 2.5+)
|
|
747
|
+
// - Flash-Lite → fast tier (no thinking)
|
|
748
|
+
// - 1M context is native on 2.x / 3.x Pro + Flash; Flash-Lite is 1M too on 2.5+.
|
|
749
|
+
if (n.includes('gemini-3') || n.includes('gemini-2') || n.includes('gemini-1.5')) {
|
|
750
|
+
const isFlashLite = n.includes('flash-lite');
|
|
751
|
+
const isFlash = !isFlashLite && n.includes('flash');
|
|
752
|
+
const isPro = !isFlash && !isFlashLite; // default to Pro when neither flash nor flash-lite in name
|
|
753
|
+
const is25orNewer = n.includes('gemini-2.5') || n.includes('gemini-3') || n.includes('-2-5');
|
|
754
|
+
const hasThinking = (is25orNewer || n.includes('thinking')) && !isFlashLite;
|
|
755
|
+
// 1M context: Pro + Flash on 2.x/3.x, Flash-Lite on 2.5+, Gemini 1.5 Pro (but not 1.5 Flash typically).
|
|
756
|
+
const has1m = isPro || isFlash || (isFlashLite && is25orNewer)
|
|
757
|
+
|| (n.includes('gemini-1.5-pro') && !isFlash);
|
|
758
|
+
const tier = isFlashLite ? 'fast' : (isFlash ? 'mid' : 'reasoning');
|
|
759
|
+
return { has_1m_ctx: has1m, has_thinking: hasThinking, has_cache: true, tier };
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
return result;
|
|
763
|
+
}
|
|
764
|
+
|
|
573
765
|
// ─── Exports ────────────────────────────────────────────────────────────────
|
|
574
766
|
|
|
575
767
|
module.exports = {
|
|
@@ -613,4 +805,9 @@ module.exports = {
|
|
|
613
805
|
processAttribution,
|
|
614
806
|
// JSONC
|
|
615
807
|
parseJsonc,
|
|
808
|
+
// Opus 4.7 capabilities
|
|
809
|
+
detectModelCapabilities,
|
|
810
|
+
buildClaudeSkillShim,
|
|
811
|
+
translateThinkingDirective,
|
|
812
|
+
stripThinkingFrontmatter,
|
|
616
813
|
};
|