hail-hydra-cc 2.3.2 β†’ 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,128 +1,131 @@
1
- #!/usr/bin/env node
2
-
3
- // Hydra StatusLine β€” persistent status bar at bottom of Claude Code
4
- // Receives session JSON via stdin, outputs one formatted line to stdout.
5
- //
6
- // Display format:
7
- // πŸ‰ β”‚ Opus β”‚ Ctx: 37% β–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘ β”‚ $0.42 β”‚ my-project β”‚ ⚑ Update available
8
- //
9
- // Context bar is color-coded:
10
- // Green (0-49%) β†’ Yellow (50-79%) β†’ Red (80%+)
11
-
12
- const fs = require('fs');
13
- const path = require('path');
14
- const os = require('os');
15
-
16
- const cacheFile = path.join(os.homedir(), '.claude', 'cache', 'hydra-update-check.json');
17
-
18
- let input = '';
19
- process.stdin.on('data', (chunk) => (input += chunk));
20
- process.stdin.on('end', () => {
21
- try {
22
- const data = JSON.parse(input);
23
-
24
- // === Model ===
25
- const model = data.model?.display_name || 'Unknown';
26
-
27
- // === Context Usage ===
28
- // Use precomputed used_percentage from Claude Code (most reliable)
29
- const ctxPct = Math.round(data.context_window?.used_percentage || 0);
30
-
31
- // Build visual context bar (10 chars wide)
32
- const filled = Math.round(ctxPct / 10);
33
- const empty = 10 - filled;
34
- const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(empty);
35
-
36
- // Color-code: Green <50%, Yellow 50-79%, Red 80%+
37
- let ctxColor;
38
- if (ctxPct < 50) {
39
- ctxColor = '\x1b[32m'; // Green
40
- } else if (ctxPct < 80) {
41
- ctxColor = '\x1b[33m'; // Yellow
42
- } else {
43
- ctxColor = '\x1b[31m'; // Red
44
- }
45
- const reset = '\x1b[0m';
46
- const dim = '\x1b[2m';
47
-
48
- const ctxDisplay = `${ctxColor}Ctx: ${ctxPct}% ${bar}${reset}`;
49
-
50
- // === Session Cost ===
51
- const cost = (data.cost?.total_cost_usd || 0).toFixed(2);
52
-
53
- // === Savings vs all-Opus baseline (cached, silent on failure) ===
54
- let savingsStr = '';
55
- try {
56
- const tokenMath = require('./hydra-token-math');
57
- const summary = tokenMath.computeSummaryCached();
58
- if (summary.available && summary.savedUSD >= 0.01) {
59
- savingsStr = ` \x1b[32m↓$${summary.savedUSD.toFixed(2)}\x1b[0m`;
60
- }
61
- } catch (e) { /* silent fallback */ }
62
-
63
- // === Working Directory ===
64
- const dirName = path.basename(data.workspace?.current_dir || data.cwd || '');
65
-
66
- // === Update Check (read from cache) ===
67
- let updateNotice = '';
68
- try {
69
- const cache = JSON.parse(fs.readFileSync(cacheFile, 'utf8'));
70
- if (cache.update_available) {
71
- updateNotice = ` \x1b[33m\u26A1 v${cache.latest} available${reset}`;
72
- }
73
- } catch (e) {
74
- // No cache β€” skip update notice
75
- }
76
-
77
- // === Compose Status Line ===
78
- const parts = [
79
- '\x1b[32m\uD83D\uDC32\x1b[0m', // Green dragon emoji (πŸ‰)
80
- `${dim}${model}${reset}`, // Dim model name
81
- ctxDisplay, // Color-coded context bar
82
- `${dim}$${cost}${reset}${savingsStr}`, // Dim cost + green ↓savings
83
- `${dim}${dirName}${reset}`, // Dim directory
84
- ];
85
-
86
- // Append update notice if available
87
- if (updateNotice) {
88
- parts.push(updateNotice);
89
- }
90
-
91
- // Compaction warning β€” only show at 70%+ context usage
92
- if (ctxPct >= 80) {
93
- parts.push(`\x1b[31m\u26A0 Compacting soon!\x1b[0m`);
94
- } else if (ctxPct >= 70) {
95
- parts.push(`\x1b[31m\u26A0 Auto-compact at 85%\x1b[0m`);
96
- }
97
-
98
- // === Sentinel Pending Warning ===
99
- // Check if code changes were made but sentinel hasn't run yet
100
- let sentinelWarning = '';
101
- try {
102
- const sentinelDir = path.join(os.tmpdir(), 'hydra-sentinel');
103
- const sessionId = data.session_id || 'unknown';
104
- const sentinelFlag = path.join(sentinelDir, `${sessionId}-pending.json`);
105
- const pendingData = JSON.parse(fs.readFileSync(sentinelFlag, 'utf8'));
106
-
107
- // Only show if flag is recent (within last 10 minutes)
108
- // and has files pending
109
- const age = Date.now() - (pendingData.updated_at || 0);
110
- if (pendingData.files?.length > 0 && age < 600000) {
111
- const count = pendingData.files.length;
112
- sentinelWarning = ` \x1b[31m\u26A0 Sentinel pending (${count} files)\x1b[0m`;
113
- }
114
- } catch (e) {
115
- // No flag file β€” sentinel is clean or hasn't been needed
116
- }
117
-
118
- if (sentinelWarning) {
119
- parts.push(sentinelWarning);
120
- }
121
-
122
- process.stdout.write(parts.join(' \u2502 '));
123
-
124
- } catch (e) {
125
- // Fallback if JSON parse fails
126
- process.stdout.write('\uD83D\uDC32 Hydra');
127
- }
128
- });
1
+ #!/usr/bin/env node
2
+
3
+ // Hydra StatusLine β€” persistent status bar at bottom of Claude Code
4
+ // Receives session JSON via stdin, outputs one formatted line to stdout.
5
+ //
6
+ // Display format:
7
+ // πŸ‰ β”‚ Opus β”‚ Ctx: 37% β–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘ β”‚ $0.42 β”‚ my-project β”‚ ⚑ Update available
8
+ //
9
+ // Context bar is color-coded:
10
+ // Green (0-49%) β†’ Yellow (50-79%) β†’ Red (80%+)
11
+
12
+ const fs = require('fs');
13
+ const path = require('path');
14
+ const os = require('os');
15
+
16
+ const cacheFile = path.join(os.homedir(), '.claude', 'cache', 'hydra-update-check.json');
17
+
18
+ let input = '';
19
+ process.stdin.on('data', (chunk) => (input += chunk));
20
+ process.stdin.on('end', () => {
21
+ try {
22
+ const data = JSON.parse(input);
23
+
24
+ // === Model ===
25
+ const model = data.model?.display_name || 'Unknown';
26
+
27
+ // === Context Usage ===
28
+ // Use precomputed used_percentage from Claude Code (most reliable)
29
+ const ctxPct = Math.round(data.context_window?.used_percentage || 0);
30
+
31
+ // Build visual context bar (10 chars wide)
32
+ const filled = Math.round(ctxPct / 10);
33
+ const empty = 10 - filled;
34
+ const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(empty);
35
+
36
+ // Color-code: Green <50%, Yellow 50-79%, Red 80%+
37
+ let ctxColor;
38
+ if (ctxPct < 50) {
39
+ ctxColor = '\x1b[32m'; // Green
40
+ } else if (ctxPct < 80) {
41
+ ctxColor = '\x1b[33m'; // Yellow
42
+ } else {
43
+ ctxColor = '\x1b[31m'; // Red
44
+ }
45
+ const reset = '\x1b[0m';
46
+ const dim = '\x1b[2m';
47
+
48
+ const ctxDisplay = `${ctxColor}Ctx: ${ctxPct}% ${bar}${reset}`;
49
+
50
+ // === Session Cost ===
51
+ const cost = (data.cost?.total_cost_usd || 0).toFixed(2);
52
+
53
+ // === Savings vs all-Opus baseline (cached, silent on failure) ===
54
+ let savingsStr = '';
55
+ try {
56
+ const tokenMath = require('./hydra-token-math');
57
+ const summary = tokenMath.computeSummaryCached();
58
+ if (summary.available && summary.savedUSD >= 0.01) {
59
+ savingsStr = ` \x1b[32m↓$${summary.savedUSD.toFixed(2)}\x1b[0m`;
60
+ }
61
+ } catch (e) { /* silent fallback */ }
62
+
63
+ // === Working Directory ===
64
+ const dirName = path.basename(data.workspace?.current_dir || data.cwd || '');
65
+
66
+ // === Update Check (read from cache) ===
67
+ let updateNotice = '';
68
+ try {
69
+ const cache = JSON.parse(fs.readFileSync(cacheFile, 'utf8'));
70
+ if (cache.update_available) {
71
+ updateNotice = ` \x1b[33m\u26A1 v${cache.latest} available${reset}`;
72
+ }
73
+ } catch (e) {
74
+ // No cache β€” skip update notice
75
+ }
76
+
77
+ // === Compose Status Line ===
78
+ const parts = [
79
+ '\x1b[32m\uD83D\uDC32\x1b[0m', // Green dragon emoji (πŸ‰)
80
+ `${dim}${model}${reset}`, // Dim model name
81
+ ctxDisplay, // Color-coded context bar
82
+ `${dim}$${cost}${reset}${savingsStr}`, // Dim cost + green ↓savings
83
+ `${dim}${dirName}${reset}`, // Dim directory
84
+ ];
85
+
86
+ // Append update notice if available
87
+ if (updateNotice) {
88
+ parts.push(updateNotice);
89
+ }
90
+
91
+ // Compaction warning β€” only show at 70%+ context usage
92
+ if (ctxPct >= 80) {
93
+ parts.push(`\x1b[31m\u26A0 Compacting soon!\x1b[0m`);
94
+ } else if (ctxPct >= 70) {
95
+ parts.push(`\x1b[31m\u26A0 Auto-compact at 85%\x1b[0m`);
96
+ }
97
+
98
+ // === Sentinel Indicator: 3 states (pending / clean / quiet) ===
99
+ try {
100
+ const sessionId = data.session_id || 'unknown';
101
+ const sentinelDir = path.join(os.tmpdir(), 'hydra-sentinel');
102
+ const pendingFile = path.join(sentinelDir, `${sessionId}-pending.json`);
103
+ const scanMarker = path.join(sentinelDir, `${sessionId}-last-scan`);
104
+
105
+ const pendingExists = fs.existsSync(pendingFile);
106
+ const markerExists = fs.existsSync(scanMarker);
107
+
108
+ if (pendingExists) {
109
+ const pendingData = JSON.parse(fs.readFileSync(pendingFile, 'utf8'));
110
+ const count = pendingData.files?.length || 0;
111
+ const age = Date.now() - (pendingData.updated_at || 0);
112
+ if (count > 0 && age < 600000) {
113
+ parts.push(`\x1b[33m\u26A0 Sentinel pending (${count} file${count === 1 ? '' : 's'})\x1b[0m`);
114
+ }
115
+ } else if (markerExists) {
116
+ const markerMs = parseInt(fs.readFileSync(scanMarker, 'utf8').trim(), 10) * 1000;
117
+ if (Date.now() - markerMs < 60000) {
118
+ parts.push(`\x1b[32m\u2705 Sentinel clean\x1b[0m`);
119
+ }
120
+ }
121
+ } catch (e) {
122
+ // No flag β€” silent quiet state
123
+ }
124
+
125
+ process.stdout.write(parts.join(' \u2502 '));
126
+
127
+ } catch (e) {
128
+ // Fallback if JSON parse fails
129
+ process.stdout.write('\uD83D\uDC32 Hydra');
130
+ }
131
+ });
@@ -1,164 +1,164 @@
1
- # Model Capabilities Reference
2
-
3
- Understanding what each model does well (and where it struggles) is key to effective routing.
4
- This reference helps calibrate delegation decisions.
5
-
6
- ## Claude Haiku 4.5
7
-
8
- ### Strengths
9
- - Extremely fast response times (~10Γ— faster than Opus)
10
- - Very low cost per token (~5Γ— cheaper than Opus 4.6 β€” $1/$5 vs $5/$25 per MTok)
11
- - Excellent at following clear, well-defined instructions
12
- - Strong at text extraction, search, and pattern matching
13
- - Good at generating code from templates and clear patterns
14
- - Reliable for mechanical tasks with unambiguous specifications
15
- - Great at summarization and information retrieval
16
-
17
- ### Limitations
18
- - Weaker at multi-step reasoning chains
19
- - Can miss subtle bugs or edge cases in code review
20
- - Less reliable with complex architectural decisions
21
- - May produce simpler solutions when a nuanced approach is needed
22
- - Can struggle with ambiguous or underspecified requirements
23
- - Less creative in problem-solving approaches
24
-
25
- ### Ideal Task Profile
26
- Short context, clear instructions, well-defined output, no judgment calls needed.
27
-
28
- ### Auto-Accept Thresholds
29
- Haiku outputs qualify for auto-accept when they are raw, factual, and unambiguous:
30
- - **hydra-scout**: File paths, grep results, directory listings, code snippets with location markers
31
- - **hydra-runner**: All-pass results, clean build/lint output, git status output
32
- - **hydra-scribe**: Internal docstrings, inline comments, changelog entries
33
- - **Requires verify**: Any analysis, interpretation, or user-facing documentation
34
-
35
- ### hydra-scout (Haiku 4.5) β€” Updated in v2.1.0
36
- - **Strengths**: Codebase exploration, file search, reading, AND codebase
37
- map building/maintenance
38
- - **New capability**: Builds and incrementally updates the codebase dependency
39
- map using grep-based import extraction. No external parsers required.
40
- - **Memory focus**: Codebase structure, key file locations, module boundaries,
41
- map build history, files that failed to parse
42
-
43
- ### hydra-sentinel-scan (Haiku 4.5) β€” Updated in v2.1.0
44
- - **Strengths**: Pattern matching, grep-level analysis, import tracing,
45
- fast structural checks, AND map-based instant blast-radius lookups
46
- - **New capability**: Reads codebase map for instant dependency lookups
47
- instead of grepping. Falls back to grep if map doesn't exist.
48
- - **Map-aware checks**: Risk-based severity, test coverage warnings,
49
- env var index lookups, blast radius reporting
50
- - **Limitations**: Cannot understand semantic meaning of data shapes,
51
- may produce false positives on complex contract changes
52
- - **Memory focus**: Codebase dependency graph, coupling patterns,
53
- false positive history
54
-
55
- ---
56
-
57
- ## Claude Sonnet 4.6
58
-
59
- ### Strengths
60
- - Strong code generation across most languages and frameworks
61
- - Good reasoning about code structure and patterns
62
- - Reliable bug fixing when errors are identifiable
63
- - Effective code review for common issues
64
- - Good at test writing with understanding of business logic
65
- - Handles refactoring with awareness of dependencies
66
- - Balances speed and capability well
67
-
68
- ### Limitations
69
- - May not catch the most subtle architectural issues
70
- - Less reliable than Opus for novel algorithm design
71
- - Can sometimes miss non-obvious security implications
72
- - May not fully optimize complex performance bottlenecks
73
- - Less effective at synthesizing large amounts of disparate information
74
-
75
- ### Ideal Task Profile
76
- Standard software engineering tasks: implementation, testing, debugging, review. Tasks where
77
- the approach is established even if the specific implementation requires thought.
78
-
79
- ### Auto-Accept Thresholds
80
- Sonnet outputs always require orchestrator review β€” code changes and analysis are never auto-accepted:
81
- - **hydra-coder**: ALWAYS verify β€” scan for correctness, edge cases, project pattern alignment
82
- - **hydra-analyst**: ALWAYS verify β€” validate reasoning, check suggested fix against actual code
83
-
84
- ### hydra-sentinel (Sonnet 4.6)
85
- - **Strengths**: Semantic understanding of data flow, contract validation
86
- across component boundaries, accurate false positive filtering,
87
- specific fix suggestions
88
- - **Limitations**: Slower and more expensive β€” only triggered when needed
89
- - **Memory focus**: API patterns, architectural boundaries, historical
90
- breakage patterns, component communication flows
91
-
92
- ---
93
-
94
- ## Claude Opus 4.6
95
-
96
- ### Strengths
97
- - Deepest reasoning and analysis capability
98
- - Best at novel problem-solving and architecture design
99
- - Most reliable for subtle bug detection
100
- - Strongest at synthesizing complex, multi-source information
101
- - Best judgment on ambiguous tradeoffs
102
- - Most creative in approach selection
103
- - Highest accuracy on edge cases
104
-
105
- ### Limitations
106
- - Slowest response time
107
- - Highest cost per token
108
- - Overkill for routine tasks (same quality as Sonnet on standard work)
109
-
110
- ### Ideal Task Profile
111
- Hard problems: architecture design, subtle debugging, complex tradeoffs, novel implementations,
112
- security analysis, anything where getting it wrong is costly.
113
-
114
- ### Auto-Accept Thresholds
115
- N/A β€” Opus is the orchestrator, not a delegated head. Opus output goes directly to the user.
116
-
117
- ---
118
-
119
- ## Cost and Speed Comparison (February 2026 Pricing)
120
-
121
- | Model | Input Cost | Output Cost | Relative Speed | Input Cost vs Opus 4.6 | Output Cost vs Opus 4.6 |
122
- |-------|-----------|-------------|----------------|----------------------|------------------------|
123
- | Haiku 4.5 | $1 / MTok | $5 / MTok | ~10Γ— faster | 5Γ— cheaper | 5Γ— cheaper |
124
- | Sonnet 4.6 | $3 / MTok | $15 / MTok | ~3Γ— faster | ~1.7Γ— cheaper | ~1.7Γ— cheaper |
125
- | Opus 4.6 | $5 / MTok | $25 / MTok | 1Γ— (baseline) | 1Γ— (baseline) | 1Γ— (baseline) |
126
-
127
- Source: https://platform.claude.com/docs/en/about-claude/pricing
128
-
129
- ### Blended Cost with Hydra (typical 50/30/20 task split)
130
-
131
- | Metric | All Opus 4.6 | With Hydra | Savings |
132
- |--------|-------------|------------|---------|
133
- | Input cost / MTok | $5.00 | $2.40 | 52% |
134
- | Output cost / MTok | $25.00 | $12.00 | 52% |
135
- | Blended effective cost | $30.00 / MTok | $14.40 / MTok | ~50% |
136
-
137
- Note: Savings calculated against Opus 4.6 pricing ($5/$25 per MTok) as of February 2026.
138
- Savings would be significantly higher when compared to Opus 4.1/4.0 pricing ($15/$75 per MTok).
139
-
140
- These are approximate ratios. The key insight: for 60-70% of coding tasks, Haiku 4.5 or
141
- Sonnet 4.6 produces output identical in quality to what Opus 4.6 would produce, but
142
- dramatically faster and cheaper. The skill is in identifying the 30-40% where Opus 4.6
143
- is genuinely needed.
144
-
145
- ---
146
-
147
- ## Acceptance Rate Expectations
148
-
149
- Drawing from speculative decoding theory, track these metrics mentally:
150
-
151
- | Draft Model | Expected Acceptance Rate | Notes |
152
- |-------------|------------------------|-------|
153
- | Haiku β†’ Opus verification | ~85-90% | For well-classified Tier 1 tasks |
154
- | Sonnet β†’ Opus verification | ~90-95% | For well-classified Tier 2 tasks |
155
- | sentinel-scan β†’ sentinel escalation | ~20% | ~80%+ of scans return clean β€” only ~20% escalate to deep analysis |
156
- | sentinel β†’ Opus verification | ~95% | Sonnet's deep analysis is highly accurate; Opus rarely overrides |
157
-
158
- If your acceptance rate drops below 80%, you're likely misclassifying tasks β€” shift borderline
159
- tasks to a higher tier. If it's consistently above 95%, you might be too conservative.
160
-
161
- The analogy to speculative decoding is direct: just as the paper found acceptance rates of
162
- ~0.7-0.9 for draft tokens depending on domain, our task-level acceptance rates should be
163
- similar or better, since we have more context for classification than a draft model has for
164
- next-token prediction.
1
+ # Model Capabilities Reference
2
+
3
+ Understanding what each model does well (and where it struggles) is key to effective routing.
4
+ This reference helps calibrate delegation decisions.
5
+
6
+ ## Claude Haiku 4.5
7
+
8
+ ### Strengths
9
+ - Extremely fast response times (~10Γ— faster than Opus)
10
+ - Very low cost per token (~5Γ— cheaper than Opus 4.6 β€” $1/$5 vs $5/$25 per MTok)
11
+ - Excellent at following clear, well-defined instructions
12
+ - Strong at text extraction, search, and pattern matching
13
+ - Good at generating code from templates and clear patterns
14
+ - Reliable for mechanical tasks with unambiguous specifications
15
+ - Great at summarization and information retrieval
16
+
17
+ ### Limitations
18
+ - Weaker at multi-step reasoning chains
19
+ - Can miss subtle bugs or edge cases in code review
20
+ - Less reliable with complex architectural decisions
21
+ - May produce simpler solutions when a nuanced approach is needed
22
+ - Can struggle with ambiguous or underspecified requirements
23
+ - Less creative in problem-solving approaches
24
+
25
+ ### Ideal Task Profile
26
+ Short context, clear instructions, well-defined output, no judgment calls needed.
27
+
28
+ ### Auto-Accept Thresholds
29
+ Haiku outputs qualify for auto-accept when they are raw, factual, and unambiguous:
30
+ - **hydra-scout**: File paths, grep results, directory listings, code snippets with location markers
31
+ - **hydra-runner**: All-pass results, clean build/lint output, git status output
32
+ - **hydra-scribe**: Internal docstrings, inline comments, changelog entries
33
+ - **Requires verify**: Any analysis, interpretation, or user-facing documentation
34
+
35
+ ### hydra-scout (Haiku 4.5) β€” Updated in v2.1.0
36
+ - **Strengths**: Codebase exploration, file search, reading, AND codebase
37
+ map building/maintenance
38
+ - **New capability**: Builds and incrementally updates the codebase dependency
39
+ map using grep-based import extraction. No external parsers required.
40
+ - **Memory focus**: Codebase structure, key file locations, module boundaries,
41
+ map build history, files that failed to parse
42
+
43
+ ### hydra-sentinel-scan (Haiku 4.5) β€” Updated in v2.1.0
44
+ - **Strengths**: Pattern matching, grep-level analysis, import tracing,
45
+ fast structural checks, AND map-based instant blast-radius lookups
46
+ - **New capability**: Reads codebase map for instant dependency lookups
47
+ instead of grepping. Falls back to grep if map doesn't exist.
48
+ - **Map-aware checks**: Risk-based severity, test coverage warnings,
49
+ env var index lookups, blast radius reporting
50
+ - **Limitations**: Cannot understand semantic meaning of data shapes,
51
+ may produce false positives on complex contract changes
52
+ - **Memory focus**: Codebase dependency graph, coupling patterns,
53
+ false positive history
54
+
55
+ ---
56
+
57
+ ## Claude Sonnet 4.6
58
+
59
+ ### Strengths
60
+ - Strong code generation across most languages and frameworks
61
+ - Good reasoning about code structure and patterns
62
+ - Reliable bug fixing when errors are identifiable
63
+ - Effective code review for common issues
64
+ - Good at test writing with understanding of business logic
65
+ - Handles refactoring with awareness of dependencies
66
+ - Balances speed and capability well
67
+
68
+ ### Limitations
69
+ - May not catch the most subtle architectural issues
70
+ - Less reliable than Opus for novel algorithm design
71
+ - Can sometimes miss non-obvious security implications
72
+ - May not fully optimize complex performance bottlenecks
73
+ - Less effective at synthesizing large amounts of disparate information
74
+
75
+ ### Ideal Task Profile
76
+ Standard software engineering tasks: implementation, testing, debugging, review. Tasks where
77
+ the approach is established even if the specific implementation requires thought.
78
+
79
+ ### Auto-Accept Thresholds
80
+ Sonnet outputs always require orchestrator review β€” code changes and analysis are never auto-accepted:
81
+ - **hydra-coder**: ALWAYS verify β€” scan for correctness, edge cases, project pattern alignment
82
+ - **hydra-analyst**: ALWAYS verify β€” validate reasoning, check suggested fix against actual code
83
+
84
+ ### hydra-sentinel (Sonnet 4.6)
85
+ - **Strengths**: Semantic understanding of data flow, contract validation
86
+ across component boundaries, accurate false positive filtering,
87
+ specific fix suggestions
88
+ - **Limitations**: Slower and more expensive β€” only triggered when needed
89
+ - **Memory focus**: API patterns, architectural boundaries, historical
90
+ breakage patterns, component communication flows
91
+
92
+ ---
93
+
94
+ ## Claude Opus 4.6
95
+
96
+ ### Strengths
97
+ - Deepest reasoning and analysis capability
98
+ - Best at novel problem-solving and architecture design
99
+ - Most reliable for subtle bug detection
100
+ - Strongest at synthesizing complex, multi-source information
101
+ - Best judgment on ambiguous tradeoffs
102
+ - Most creative in approach selection
103
+ - Highest accuracy on edge cases
104
+
105
+ ### Limitations
106
+ - Slowest response time
107
+ - Highest cost per token
108
+ - Overkill for routine tasks (same quality as Sonnet on standard work)
109
+
110
+ ### Ideal Task Profile
111
+ Hard problems: architecture design, subtle debugging, complex tradeoffs, novel implementations,
112
+ security analysis, anything where getting it wrong is costly.
113
+
114
+ ### Auto-Accept Thresholds
115
+ N/A β€” Opus is the orchestrator, not a delegated head. Opus output goes directly to the user.
116
+
117
+ ---
118
+
119
+ ## Cost and Speed Comparison (February 2026 Pricing)
120
+
121
+ | Model | Input Cost | Output Cost | Relative Speed | Input Cost vs Opus 4.6 | Output Cost vs Opus 4.6 |
122
+ |-------|-----------|-------------|----------------|----------------------|------------------------|
123
+ | Haiku 4.5 | $1 / MTok | $5 / MTok | ~10Γ— faster | 5Γ— cheaper | 5Γ— cheaper |
124
+ | Sonnet 4.6 | $3 / MTok | $15 / MTok | ~3Γ— faster | ~1.7Γ— cheaper | ~1.7Γ— cheaper |
125
+ | Opus 4.6 | $5 / MTok | $25 / MTok | 1Γ— (baseline) | 1Γ— (baseline) | 1Γ— (baseline) |
126
+
127
+ Source: https://platform.claude.com/docs/en/about-claude/pricing
128
+
129
+ ### Blended Cost with Hydra (typical 50/30/20 task split)
130
+
131
+ | Metric | All Opus 4.6 | With Hydra | Savings |
132
+ |--------|-------------|------------|---------|
133
+ | Input cost / MTok | $5.00 | $2.40 | 52% |
134
+ | Output cost / MTok | $25.00 | $12.00 | 52% |
135
+ | Blended effective cost | $30.00 / MTok | $14.40 / MTok | ~50% |
136
+
137
+ Note: Savings calculated against Opus 4.6 pricing ($5/$25 per MTok) as of February 2026.
138
+ Savings would be significantly higher when compared to Opus 4.1/4.0 pricing ($15/$75 per MTok).
139
+
140
+ These are approximate ratios. The key insight: for 60-70% of coding tasks, Haiku 4.5 or
141
+ Sonnet 4.6 produces output identical in quality to what Opus 4.6 would produce, but
142
+ dramatically faster and cheaper. The skill is in identifying the 30-40% where Opus 4.6
143
+ is genuinely needed.
144
+
145
+ ---
146
+
147
+ ## Acceptance Rate Expectations
148
+
149
+ Drawing from speculative decoding theory, track these metrics mentally:
150
+
151
+ | Draft Model | Expected Acceptance Rate | Notes |
152
+ |-------------|------------------------|-------|
153
+ | Haiku β†’ Opus verification | ~85-90% | For well-classified Tier 1 tasks |
154
+ | Sonnet β†’ Opus verification | ~90-95% | For well-classified Tier 2 tasks |
155
+ | sentinel-scan β†’ sentinel escalation | ~20% | ~80%+ of scans return clean β€” only ~20% escalate to deep analysis |
156
+ | sentinel β†’ Opus verification | ~95% | Sonnet's deep analysis is highly accurate; Opus rarely overrides |
157
+
158
+ If your acceptance rate drops below 80%, you're likely misclassifying tasks β€” shift borderline
159
+ tasks to a higher tier. If it's consistently above 95%, you might be too conservative.
160
+
161
+ The analogy to speculative decoding is direct: just as the paper found acceptance rates of
162
+ ~0.7-0.9 for draft tokens depending on domain, our task-level acceptance rates should be
163
+ similar or better, since we have more context for classification than a draft model has for
164
+ next-token prediction.