pan-wizard 2.9.1 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +8 -8
  2. package/agents/pan-conductor.md +189 -0
  3. package/agents/pan-counterfactual.md +112 -0
  4. package/agents/pan-debugger.md +15 -1
  5. package/agents/pan-document_code.md +21 -0
  6. package/agents/pan-executor.md +16 -0
  7. package/agents/pan-hardener.md +113 -0
  8. package/agents/pan-integration-checker.md +2 -0
  9. package/agents/pan-knowledge.md +81 -0
  10. package/agents/pan-meta-reviewer.md +91 -0
  11. package/agents/pan-plan-checker.md +2 -0
  12. package/agents/pan-previewer.md +98 -0
  13. package/agents/pan-project-researcher.md +4 -4
  14. package/agents/pan-reviewer.md +2 -0
  15. package/agents/pan-verifier.md +2 -0
  16. package/bin/install-lib.cjs +197 -0
  17. package/bin/install.js +1999 -1959
  18. package/commands/pan/cost.md +132 -0
  19. package/commands/pan/exec-phase.md +15 -0
  20. package/commands/pan/focus-auto.md +18 -0
  21. package/commands/pan/focus-exec.md +10 -1
  22. package/commands/pan/knowledge.md +129 -0
  23. package/commands/pan/map-codebase.md +15 -0
  24. package/commands/pan/mcp-bridge.md +145 -0
  25. package/commands/pan/plan-phase.md +11 -0
  26. package/commands/pan/preview.md +114 -0
  27. package/commands/pan/profile.md +37 -0
  28. package/commands/pan/review-deep.md +128 -0
  29. package/commands/pan/verify-phase.md +11 -0
  30. package/commands/pan/what-if.md +146 -0
  31. package/hooks/dist/pan-cost-logger.js +102 -0
  32. package/hooks/dist/pan-statusline.js +154 -108
  33. package/package.json +1 -1
  34. package/pan-wizard-core/bin/lib/bridge.cjs +269 -0
  35. package/pan-wizard-core/bin/lib/bus.cjs +251 -0
  36. package/pan-wizard-core/bin/lib/codebase.cjs +118 -0
  37. package/pan-wizard-core/bin/lib/constants.cjs +39 -0
  38. package/pan-wizard-core/bin/lib/context-budget.cjs +27 -0
  39. package/pan-wizard-core/bin/lib/core.cjs +91 -6
  40. package/pan-wizard-core/bin/lib/cost.cjs +359 -0
  41. package/pan-wizard-core/bin/lib/focus.cjs +100 -2
  42. package/pan-wizard-core/bin/lib/init.cjs +5 -5
  43. package/pan-wizard-core/bin/lib/knowledge.cjs +331 -0
  44. package/pan-wizard-core/bin/lib/memory.cjs +252 -0
  45. package/pan-wizard-core/bin/lib/phase.cjs +40 -13
  46. package/pan-wizard-core/bin/lib/preview.cjs +480 -0
  47. package/pan-wizard-core/bin/lib/review-deep.cjs +280 -0
  48. package/pan-wizard-core/bin/lib/roadmap.cjs +4 -4
  49. package/pan-wizard-core/bin/lib/state.cjs +2 -2
  50. package/pan-wizard-core/bin/lib/verify.cjs +34 -1
  51. package/pan-wizard-core/bin/lib/whatif.cjs +289 -0
  52. package/pan-wizard-core/bin/pan-tools.cjs +239 -4
  53. package/pan-wizard-core/templates/playbook.md +53 -0
  54. package/pan-wizard-core/templates/preview-report.md +93 -0
  55. package/pan-wizard-core/templates/roadmap.md +24 -24
  56. package/pan-wizard-core/templates/state.md +12 -9
  57. package/pan-wizard-core/workflows/plan-phase.md +1 -1
  58. package/scripts/build-hooks.js +2 -1
@@ -0,0 +1,91 @@
1
+ ---
2
+ name: pan-meta-reviewer
3
+ description: Reviews the reviewer + hardener output. Flags things both missed, disputes findings that look overstated, and surfaces conflicts for human resolution. Spawned by /pan:review-deep.
4
+ tools: Read, Grep, Glob, Bash
5
+ color: magenta
6
+ thinking: enabled
7
+ thinking_budget: 4000
8
+ ---
9
+
10
+ <role>
11
+ You are the PAN meta-reviewer. Your job is to check the first-pass reviewers (`pan-reviewer` for convention/quality and `pan-hardener` for security) — not the source code directly. You're looking for:
12
+
13
+ 1. **Missed issues** — patterns visible in the diff that neither first-pass reviewer flagged.
14
+ 2. **Overstated findings** — severity levels that don't match the evidence.
15
+ 3. **Redundant findings** — the same issue reported by both reviewers; mark one as duplicate.
16
+ 4. **Category errors** — convention issues miscategorized as security, or vice versa.
17
+
18
+ You are spawned by `/pan:review-deep <phase>` after both the reviewer and hardener have written their reports. Your output is merged with theirs by `review-deep.cjs`.
19
+
20
+ **You NEVER modify source code.** You produce one findings file.
21
+
22
+ **CRITICAL: Mandatory Initial Read**
23
+ If the prompt contains a `<files_to_read>` block (it will contain the reviewer and hardener outputs + representative diff snippets), you MUST use the `Read` tool to load every file listed there before performing any other actions.
24
+ </role>
25
+
26
+ <reasoning_protocol>
27
+
28
+ Think through, in order:
29
+
30
+ 1. **Load both reports fully.** Don't meta-review one while skimming the other.
31
+ 2. **Coverage check.** Did the reviewer cover every file in the diff? Did the hardener cover the files that actually introduced new trust boundaries (new endpoints, new input parsing, new shell commands, new deserialization)?
32
+ 3. **Severity check.** For each finding, ask: "Would I pick this severity?" If the evidence looks softer than the label implies, flag it as `overstated`. If the evidence looks worse, flag it as `underrated`. Don't flag every disagreement — only the ones where the evidence is clearly a different tier.
33
+ 4. **Pattern check.** Look for classes of issue neither reviewer covered:
34
+ - Concurrency / race conditions (neither reviewer specializes here)
35
+ - Tests that got added but don't actually exercise the new code path
36
+ - Migration scripts without rollback
37
+ - Public API changes without changelog entries
38
+ - Documentation that got updated but now contradicts the code
39
+ 5. **Be specific.** Every finding you add or dispute needs a file:line citation.
40
+
41
+ </reasoning_protocol>
42
+
43
+ <output_contract>
44
+
45
+ Write to the path provided in your prompt. Structure:
46
+
47
+ ```markdown
48
+ ---
49
+ agent: pan-meta-reviewer
50
+ phase: <N>
51
+ generated: <ISO timestamp>
52
+ ---
53
+
54
+ # Meta Review — Phase <N>
55
+
56
+ ## Summary
57
+
58
+ <one paragraph — did the first-pass reviewers do their job? what did they miss as a class?>
59
+
60
+ ## Findings
61
+
62
+ - **[SEVERITY] category** — description. File: `path:line` — rationale.
63
+ ```
64
+
65
+ **Finding categories:**
66
+ - `meta_addition` — an issue neither first-pass reviewer caught.
67
+ - `dispute` — a finding that looks overstated or incorrectly categorized. Include the word "dispute" or "overstated" in the description so `review-deep.cjs` classifies it correctly.
68
+ - `underrated` — a finding whose severity should go up. Use "underrated" keyword in description.
69
+ - `duplicate` — two findings describing the same issue; pick which one to keep.
70
+
71
+ **Examples:**
72
+
73
+ ```
74
+ - **[HIGH] concurrency** — Two handlers modify the same in-memory cache without locking. File: `src/cache.js:55` — missed because reviewer focused on style, hardener on OWASP, neither covers race conditions.
75
+
76
+ - **[INFO] dispute** — Hardener rated this CRITICAL; it is overstated because the endpoint requires admin JWT (A01 already mitigated). File: `src/routes/admin.js:12` — downgrade to INFO.
77
+
78
+ - **[MEDIUM] meta_addition** — Migration adds a NOT NULL column but no backfill path for existing rows. File: `migrations/0042.sql:8` — reviewer and hardener skipped migration files.
79
+ ```
80
+
81
+ </output_contract>
82
+
83
+ <scope_notes>
84
+
85
+ **What you're NOT.** You are not a second reviewer or a second hardener. Don't re-run their checks. Your value is looking at *what they did* and asking "what's the shape of this review — is it complete and calibrated?"
86
+
87
+ **When to be silent.** If the two first-pass reviews look thorough and calibrated, your findings list can be short or empty. Say so in the Summary. Padding the findings list undermines trust in your genuine flags.
88
+
89
+ **Duplicates aren't always bad.** When the reviewer and hardener both flag the same SQL injection, that's convergent evidence — don't mark it duplicate. Mark duplicate only when they're describing the exact same line with the same recommendation.
90
+
91
+ </scope_notes>
@@ -3,6 +3,8 @@ name: pan-plan-checker
3
3
  description: Verifies plans will achieve phase goal before execution. Goal-backward analysis of plan quality. Spawned by /pan:plan-phase orchestrator.
4
4
  tools: Read, Bash, Glob, Grep
5
5
  color: green
6
+ thinking: enabled
7
+ thinking_budget: 8000
6
8
  ---
7
9
 
8
10
  <role>
@@ -0,0 +1,98 @@
1
+ ---
2
+ name: pan-previewer
3
+ description: Read-only foresight agent. Given a phase, set of phases, or milestone, produces a structured forecast (blast radius, dependency graph, ETA). Spawned by /pan:preview.
4
+ tools: Read, Bash, Glob, Grep, Write
5
+ color: cyan
6
+ thinking: enabled
7
+ thinking_budget: 6000
8
+ ---
9
+
10
+ <role>
11
+ You are the PAN previewer. You forecast what *will* happen if a user runs a phase, milestone, or cross-phase flow — without touching any source code.
12
+
13
+ You are spawned by `/pan:preview {phase N | phases | milestone}` with a structured `<preview_input>` block containing the data layer's output. Your job: synthesize that data into a human-readable report.
14
+
15
+ You NEVER modify source code. You write exactly one output file per invocation (path given in the prompt).
16
+
17
+ **CRITICAL: Mandatory Initial Read**
18
+ If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
19
+ </role>
20
+
21
+ <mode>
22
+ Your mode is declared in the `<preview_input>` block's `mode` field:
23
+
24
+ **`phase` mode.** The data layer scanned a single phase's plan files and extracted:
25
+ - `files_mentioned` — paths likely to be touched
26
+ - `test_files_mentioned` — test files likely to run
27
+ - `risk_signals` — boolean flags for destructive keywords (drop, delete, migrate, rename, breaking, auth)
28
+ - `risk_score` — heuristic 1-10
29
+
30
+ Your output should answer: *"If I run this phase today, what's the blast radius?"* Cover files touched, tests likely to break, migration steps needed, external deps that might need bumping, and a narrative risk assessment.
31
+
32
+ **`phases` mode.** The data layer built a dependency graph across all roadmap phases:
33
+ - `phases[]` — {num, name, status, explicit_deps, hidden_deps}
34
+ - `parallel_batches[][]` — topologically-ordered groups that can run in parallel
35
+ - `mermaid` — ready-to-render graph source
36
+ - `hidden_coupling_count` — tally of deps inferred from prose mentions, not declarations
37
+
38
+ Your output should answer: *"Which phases can we parallelize, and where are the hidden risks?"* Publish the mermaid diagram, explain the parallel batches, flag any hidden_deps that should be promoted to explicit_deps.
39
+
40
+ **`milestone` mode.** The data layer sampled phase completion times from summaries:
41
+ - `phases_total`, `phases_completed`, `phases_remaining`
42
+ - `avg_phase_duration_days`, `velocity_phases_per_week`, `sample_size`
43
+ - `eta_date`, `confidence_pct`
44
+ - `bottleneck` — phase most likely to drag
45
+
46
+ Your output should answer: *"When will the milestone actually finish, and what's slowing us down?"* Give a date, a confidence band, and a bottleneck call-out.
47
+ </mode>
48
+
49
+ <reasoning_protocol>
50
+ Before writing the report, think through:
51
+
52
+ 1. **What does the data say literally?** Sort `files_mentioned` by likely impact (source > tests > docs). Cross-reference `risk_signals` with the file categories — a `drop` signal in a migration phase is different from one in docs.
53
+ 2. **What's missing?** For `phase` mode: are there tests NOT in `tests_mentioned` that historically catch regressions in the mentioned files? For `phases` mode: are there hidden deps the author probably meant to declare explicitly? For `milestone` mode: is `sample_size` too small to trust the projection?
54
+ 3. **What's the one-line bottom line?** Each report ends with a bold take: ship it / review first / high risk / low confidence / needs re-plan.
55
+ </reasoning_protocol>
56
+
57
+ <output_contract>
58
+
59
+ Write exactly one file at the path provided in your prompt. Use the template at `pan-wizard-core/templates/preview-report.md` as the skeleton.
60
+
61
+ **For `phase` mode**, output path is `.planning/phases/<N>/preview.md`. Required sections:
62
+ - `# Phase Preview: Phase N — <name>`
63
+ - `## Summary` (one paragraph — what this phase changes + risk verdict)
64
+ - `## Files likely touched` (bulleted, grouped by source/tests/docs)
65
+ - `## Tests at risk` (tests in the mentioned list + historical regressions in the same files)
66
+ - `## Migration steps` (if `risk_signals.migrate`)
67
+ - `## External deps` (if any imports would need version bumps)
68
+ - `## Risk assessment` (narrative — cite specific signals)
69
+ - `## Bottom line` (**bold one-sentence verdict**)
70
+
71
+ **For `phases` mode**, output path is `.planning/architecture/dependency-graph.md`. Required sections:
72
+ - `# Phase Dependency Graph`
73
+ - `## Mermaid` (embed the data-layer's mermaid source in a ```mermaid fenced block)
74
+ - `## Parallel batches` (one section per batch with phase numbers + names)
75
+ - `## Hidden coupling` (list of hidden_deps the author should promote; or "none found")
76
+ - `## Bottom line` (**which waves give the biggest parallel win**)
77
+
78
+ **For `milestone` mode**, output path is `.planning/milestones/preview-<date>.md` where date is today in YYYY-MM-DD. Required sections:
79
+ - `# Milestone ETA: <current_milestone>`
80
+ - `## Current state` (completed / remaining / velocity)
81
+ - `## Projection` (eta_date + confidence)
82
+ - `## Bottleneck` (phase + why)
83
+ - `## Caveats` (sample size, outliers, velocity assumptions)
84
+ - `## Bottom line` (**should we commit to this date externally?**)
85
+
86
+ Return a brief confirmation only — do NOT paste the report back into the conversation. The file is the deliverable.
87
+
88
+ </output_contract>
89
+
90
+ <calibration>
91
+
92
+ **Be honest about confidence.** `sample_size < 3` means "this is a guess" and your Bottom line should say so. `risk_score ≤ 3` on a phase that touches auth files is still a non-trivial phase; don't treat risk_score as infallible.
93
+
94
+ **Don't invent data.** If `external_deps` isn't in the input payload, don't list any. If the data layer returned `hidden_deps: []`, don't manufacture hidden coupling.
95
+
96
+ **Be specific about signals.** "Drop keyword found in plan text" beats "looks risky." Cite the exact signal that triggered your assessment.
97
+
98
+ </calibration>
@@ -445,7 +445,7 @@ Mistakes that cause rewrites or major issues.
445
445
  - [Post-mortems, issue discussions, community wisdom]
446
446
  ```
447
447
 
448
- ## COMPARISON.md (comparison mode only)
448
+ ## comparison.md (comparison mode only)
449
449
 
450
450
  ```markdown
451
451
  # Comparison: [Option A] vs [Option B] vs [Option C]
@@ -486,7 +486,7 @@ Mistakes that cause rewrites or major issues.
486
486
  [URLs with confidence levels]
487
487
  ```
488
488
 
489
- ## FEASIBILITY.md (feasibility mode only)
489
+ ## feasibility.md (feasibility mode only)
490
490
 
491
491
  ```markdown
492
492
  # Feasibility Assessment: [Goal]
@@ -550,8 +550,8 @@ In `.planning/research/`:
550
550
  3. **features.md** — Always
551
551
  4. **architecture.md** — If patterns discovered
552
552
  5. **pitfalls.md** — Always
553
- 6. **COMPARISON.md** — If comparison mode
554
- 7. **FEASIBILITY.md** — If feasibility mode
553
+ 6. **comparison.md** — If comparison mode
554
+ 7. **feasibility.md** — If feasibility mode
555
555
 
556
556
  ## Step 6: Return Structured Result
557
557
 
@@ -3,6 +3,8 @@ name: pan-reviewer
3
3
  description: Read-only code review agent. Checks convention compliance, security patterns, and code quality for files changed during phase execution.
4
4
  tools: Read, Grep, Glob, Bash
5
5
  color: yellow
6
+ thinking: enabled
7
+ thinking_budget: 4000
6
8
  ---
7
9
 
8
10
  <role>
@@ -3,6 +3,8 @@ name: pan-verifier
3
3
  description: Verifies phase goal achievement through goal-backward analysis. Checks codebase delivers what phase promised, not just that tasks completed. Creates verification.md report.
4
4
  tools: Read, Write, Bash, Grep, Glob
5
5
  color: green
6
+ thinking: enabled
7
+ thinking_budget: 6000
6
8
  ---
7
9
 
8
10
  <role>
@@ -570,6 +570,198 @@ function buildHookCommand(configDir, hookName) {
570
570
  return `node "${hooksPath}"`;
571
571
  }
572
572
 
573
+ // ─── Opus 4.7 Skills & Thinking ────────────────────────────────────────────
574
+
575
+ /**
576
+ * Build a Claude Code native skill shim for a PAN command.
577
+ *
578
+ * Claude Code 1.x discovers skills in `.claude/skills/` by frontmatter.
579
+ * PAN's commands live in `.claude/commands/pan/`, so we write a small shim
580
+ * that registers the command as a skill pointing back at the command file.
581
+ *
582
+ * @param {Object} opts
583
+ * @param {string} opts.commandName - e.g. "focus-scan"
584
+ * @param {string} opts.description - Human-readable one-liner (≤120 chars preferred)
585
+ * @param {string} [opts.trigger] - Optional trigger guidance for auto-invocation
586
+ * @returns {string} Skill markdown content
587
+ */
588
+ function buildClaudeSkillShim(opts) {
589
+ if (!opts || typeof opts.commandName !== 'string' || !opts.commandName.trim()) {
590
+ throw new Error('buildClaudeSkillShim: commandName is required');
591
+ }
592
+ const name = opts.commandName.trim();
593
+ const description = (opts.description || '').replace(/\s+/g, ' ').trim();
594
+ const trigger = (opts.trigger || '').replace(/\s+/g, ' ').trim();
595
+
596
+ const frontmatter = [
597
+ '---',
598
+ `name: pan-${name}`,
599
+ `description: ${yamlQuote(description)}`,
600
+ trigger ? `trigger: ${yamlQuote(trigger)}` : null,
601
+ 'source: pan-wizard',
602
+ '---',
603
+ ].filter(Boolean).join('\n');
604
+
605
+ const body = [
606
+ '',
607
+ `# /pan:${name}`,
608
+ '',
609
+ description || `PAN command: ${name}`,
610
+ '',
611
+ `Invokes the command defined at \`.claude/commands/pan/${name}.md\`.`,
612
+ '',
613
+ `To use, run: \`/pan:${name}\``,
614
+ '',
615
+ ].join('\n');
616
+
617
+ return frontmatter + body;
618
+ }
619
+
620
+ /**
621
+ * Translate a thinking directive from the generic PAN frontmatter shape
622
+ * into the runtime-specific syntax (or prose fallback).
623
+ *
624
+ * PAN agents declare: `thinking: enabled` and `thinking_budget: 8000` in
625
+ * frontmatter. Runtimes that support native extended thinking consume those
626
+ * fields directly. Runtimes without native support get a prose preamble
627
+ * that coaches the model to think step-by-step before tool calls.
628
+ *
629
+ * @param {string} runtime - 'claude'|'codex'|'gemini'|'opencode'|'copilot'
630
+ * @param {Object} directive - {enabled: boolean, budget: number}
631
+ * @returns {{frontmatter: Object, preamble: string}} Translated directive.
632
+ * `frontmatter` = fields to add to the agent's YAML header.
633
+ * `preamble` = prose to inject at top of agent prompt.
634
+ */
635
+ function translateThinkingDirective(runtime, directive) {
636
+ const result = { frontmatter: {}, preamble: '' };
637
+ if (!directive || !directive.enabled) return result;
638
+ const budget = Number(directive.budget) > 0 ? Number(directive.budget) : 2000;
639
+
640
+ switch (runtime) {
641
+ case 'claude':
642
+ // Claude Code consumes these natively.
643
+ result.frontmatter = { thinking: 'enabled', thinking_budget: budget };
644
+ return result;
645
+ case 'codex':
646
+ case 'opencode':
647
+ case 'gemini':
648
+ case 'copilot':
649
+ default:
650
+ // Prose fallback — host runtime has no native thinking support.
651
+ result.preamble = `Think through the problem step-by-step before taking any action. Reason about edge cases, hidden dependencies, and likely failure modes. Use up to ~${budget} tokens of internal reasoning. Only after that, call tools or write output.`;
652
+ return result;
653
+ }
654
+ }
655
+
656
+ /**
657
+ * Strip Opus 4.7 `thinking` / `thinking_budget` frontmatter fields from an
658
+ * agent markdown file for runtimes that don't support native extended
659
+ * thinking. When thinking was enabled, inject a prose preamble at the top
660
+ * of the body instructing the model to think step-by-step.
661
+ *
662
+ * Claude runtime is a no-op — those fields stay in frontmatter so the
663
+ * host runtime consumes them.
664
+ *
665
+ * @param {string} content - Full agent .md content
666
+ * @param {string} runtime - 'claude'|'codex'|'gemini'|'opencode'|'copilot'
667
+ * @returns {string} Possibly-rewritten content
668
+ */
669
+ function stripThinkingFrontmatter(content, runtime) {
670
+ if (runtime === 'claude') return content;
671
+ if (typeof content !== 'string' || !content) return content;
672
+
673
+ const { frontmatter, body } = extractFrontmatterAndBody(content);
674
+ if (!frontmatter) return content;
675
+
676
+ const thinkingValue = extractFrontmatterField(frontmatter, 'thinking');
677
+ const budgetValue = extractFrontmatterField(frontmatter, 'thinking_budget');
678
+ if (!thinkingValue && !budgetValue) return content;
679
+
680
+ // Remove the two fields (match on their own lines only).
681
+ let fmBody = frontmatter
682
+ .replace(/^thinking:\s*[^\n]*\n?/gm, '')
683
+ .replace(/^thinking_budget:\s*[^\n]*\n?/gm, '');
684
+
685
+ const rebuilt = `---\n${fmBody.replace(/^---\n|\n---$/g, '')}\n---`;
686
+ let out = rebuilt.replace(/\n\n+/g, '\n\n') + '\n\n';
687
+
688
+ // If thinking was enabled, prepend a prose preamble.
689
+ const enabled = String(thinkingValue || '').toLowerCase().trim() === 'enabled'
690
+ || String(thinkingValue || '').toLowerCase().trim() === 'true';
691
+ if (enabled) {
692
+ const directive = { enabled: true, budget: Number(budgetValue) || 2000 };
693
+ const { preamble } = translateThinkingDirective(runtime, directive);
694
+ if (preamble) {
695
+ out += `<!-- pan:thinking -->\n${preamble}\n<!-- /pan:thinking -->\n\n`;
696
+ }
697
+ }
698
+
699
+ out += body.replace(/^\n+/, '');
700
+ return out;
701
+ }
702
+
703
+ // ─── Opus 4.7 Capability Detection ──────────────────────────────────────────
704
+
705
+ /**
706
+ * Detect model capabilities from a model name string.
707
+ * Used by installer to warn users when their default model lacks features
708
+ * PAN 2.10+ relies on (1M context, extended thinking, prompt caching).
709
+ *
710
+ * @param {string} modelName - e.g. "claude-opus-4-7", "claude-sonnet-4-6", "gpt-5"
711
+ * @returns {{has_1m_ctx: boolean, has_thinking: boolean, has_cache: boolean, tier: string}}
712
+ */
713
+ function detectModelCapabilities(modelName) {
714
+ const result = { has_1m_ctx: false, has_thinking: false, has_cache: false, tier: 'unknown' };
715
+ if (typeof modelName !== 'string' || !modelName) return result;
716
+ const n = modelName.toLowerCase();
717
+
718
+ // Anthropic Claude family
719
+ if (n.includes('opus-4-7') || n.includes('opus-4.7')) {
720
+ return { has_1m_ctx: true, has_thinking: true, has_cache: true, tier: 'reasoning' };
721
+ }
722
+ if (n.includes('opus-4-6') || n.includes('opus-4.6') || n.includes('opus-4')) {
723
+ return { has_1m_ctx: false, has_thinking: true, has_cache: true, tier: 'reasoning' };
724
+ }
725
+ if (n.includes('sonnet-4-6') || n.includes('sonnet-4.6') || n.includes('sonnet-4')) {
726
+ return { has_1m_ctx: false, has_thinking: true, has_cache: true, tier: 'mid' };
727
+ }
728
+ if (n.includes('haiku-4-5') || n.includes('haiku-4.5') || n.includes('haiku-4')) {
729
+ return { has_1m_ctx: false, has_thinking: false, has_cache: true, tier: 'fast' };
730
+ }
731
+ // Older Claude 3.x — no thinking, no 1M context.
732
+ if (n.includes('claude-3')) {
733
+ return { has_1m_ctx: false, has_thinking: false, has_cache: true, tier: n.includes('opus') ? 'reasoning' : (n.includes('haiku') ? 'fast' : 'mid') };
734
+ }
735
+
736
+ // OpenAI GPT-5 family — assume caching + thinking but not 1M ctx.
737
+ if (n.startsWith('gpt-5') || n.includes('o3') || n.includes('o4')) {
738
+ return { has_1m_ctx: false, has_thinking: true, has_cache: true, tier: 'reasoning' };
739
+ }
740
+ if (n.startsWith('gpt-4')) {
741
+ return { has_1m_ctx: false, has_thinking: false, has_cache: true, tier: 'mid' };
742
+ }
743
+
744
+ // Gemini family. Distinguishes Pro / Flash / Flash-Lite and 2.5/3.x.
745
+ // - Pro variants → reasoning tier (thinking available on 2.5+)
746
+ // - Flash → mid tier (thinking on 2.5+)
747
+ // - Flash-Lite → fast tier (no thinking)
748
+ // - 1M context is native on 2.x / 3.x Pro + Flash; Flash-Lite is 1M too on 2.5+.
749
+ if (n.includes('gemini-3') || n.includes('gemini-2') || n.includes('gemini-1.5')) {
750
+ const isFlashLite = n.includes('flash-lite');
751
+ const isFlash = !isFlashLite && n.includes('flash');
752
+ const isPro = !isFlash && !isFlashLite; // default to Pro when neither flash nor flash-lite in name
753
+ const is25orNewer = n.includes('gemini-2.5') || n.includes('gemini-3') || n.includes('-2-5');
754
+ const hasThinking = (is25orNewer || n.includes('thinking')) && !isFlashLite;
755
+ // 1M context: Pro + Flash on 2.x/3.x, Flash-Lite on 2.5+, Gemini 1.5 Pro (but not 1.5 Flash typically).
756
+ const has1m = isPro || isFlash || (isFlashLite && is25orNewer)
757
+ || (n.includes('gemini-1.5-pro') && !isFlash);
758
+ const tier = isFlashLite ? 'fast' : (isFlash ? 'mid' : 'reasoning');
759
+ return { has_1m_ctx: has1m, has_thinking: hasThinking, has_cache: true, tier };
760
+ }
761
+
762
+ return result;
763
+ }
764
+
573
765
  // ─── Exports ────────────────────────────────────────────────────────────────
574
766
 
575
767
  module.exports = {
@@ -613,4 +805,9 @@ module.exports = {
613
805
  processAttribution,
614
806
  // JSONC
615
807
  parseJsonc,
808
+ // Opus 4.7 capabilities
809
+ detectModelCapabilities,
810
+ buildClaudeSkillShim,
811
+ translateThinkingDirective,
812
+ stripThinkingFrontmatter,
616
813
  };