dual-brain 7.1.21 → 7.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dual-brain.mjs +2580 -717
- package/hooks/budget-balancer.mjs +104 -266
- package/hooks/wave-orchestrator.mjs +29 -26
- package/package.json +13 -3
- package/scripts/verify-publish.mjs +26 -0
- package/src/context.mjs +389 -0
- package/src/decide.mjs +283 -60
- package/src/detect.mjs +133 -1
- package/src/dispatch.mjs +175 -30
- package/src/doctor.mjs +577 -0
- package/src/failure-memory.mjs +178 -0
- package/src/nextstep.mjs +100 -0
- package/src/observer.mjs +241 -0
- package/src/outcome.mjs +256 -0
- package/src/pipeline.mjs +759 -0
- package/src/profile.mjs +357 -485
- package/src/receipt.mjs +131 -0
- package/src/session.mjs +358 -10
package/src/decide.mjs
CHANGED
|
@@ -6,21 +6,69 @@
|
|
|
6
6
|
* to use and explains why in one sentence.
|
|
7
7
|
*
|
|
8
8
|
* Exports: decideRoute, getModelCapabilities, getAvailableModels,
|
|
9
|
-
*
|
|
9
|
+
* WORK_STYLES, getWorkStyle, estimateBudgetPressure,
|
|
10
|
+
* shouldDualBrain, explainDecision, getFailoverOrder
|
|
10
11
|
*
|
|
11
12
|
* CLI: node src/decide.mjs --profile /path/to/profile.json \
|
|
12
13
|
* --detection '{"intent":"edit","risk":"low","complexity":"simple","effort":"medium","tier":"execute"}'
|
|
13
14
|
*/
|
|
14
15
|
|
|
15
|
-
import {
|
|
16
|
+
import { readFileSync } from 'fs';
|
|
16
17
|
import { join, dirname } from 'path';
|
|
17
18
|
import { fileURLToPath } from 'url';
|
|
18
19
|
import { getProviderScore, checkCooldown } from './health.mjs';
|
|
19
20
|
|
|
20
|
-
const __dirname
|
|
21
|
-
const WORKSPACE
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
const WORKSPACE = join(__dirname, '..');
|
|
23
|
+
|
|
24
|
+
// ─── Work Styles ─────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Work styles control how aggressively the router uses stronger models,
|
|
28
|
+
* challenger (dual-brain) reviews, and checkpoints.
|
|
29
|
+
* The user picks a style regardless of provider or plan — no price gating.
|
|
30
|
+
*/
|
|
31
|
+
export const WORK_STYLES = {
|
|
32
|
+
fast: {
|
|
33
|
+
label: 'Fast',
|
|
34
|
+
defaultWorker: 'claude-sonnet-4-6',
|
|
35
|
+
complexWorker: 'claude-sonnet-4-6',
|
|
36
|
+
challengerPolicy: 'never',
|
|
37
|
+
checkpointPolicy: 'never',
|
|
38
|
+
reviewPolicy: 'skip',
|
|
39
|
+
description: 'Quick answers, single model, minimal reviews',
|
|
40
|
+
},
|
|
41
|
+
balanced: {
|
|
42
|
+
label: 'Balanced',
|
|
43
|
+
defaultWorker: 'claude-sonnet-4-6',
|
|
44
|
+
complexWorker: 'claude-opus-4-6',
|
|
45
|
+
challengerPolicy: 'high-risk', // only on high/critical risk
|
|
46
|
+
checkpointPolicy: 'risky-ops', // before risky operations
|
|
47
|
+
reviewPolicy: 'important', // important changes only
|
|
48
|
+
description: 'Smart routing, reviews on important changes',
|
|
49
|
+
},
|
|
50
|
+
fullpower: {
|
|
51
|
+
label: 'Full Power',
|
|
52
|
+
defaultWorker: 'claude-sonnet-4-6',
|
|
53
|
+
complexWorker: 'claude-opus-4-6',
|
|
54
|
+
challengerPolicy: 'medium-risk', // medium+ risk
|
|
55
|
+
checkpointPolicy: 'all-edits', // before all edits
|
|
56
|
+
reviewPolicy: 'non-trivial', // everything non-trivial
|
|
57
|
+
description: 'Deep reasoning, dual-brain on everything that matters',
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Read the active work style from the profile.
|
|
63
|
+
* Falls back to 'balanced' if not set or unrecognized.
|
|
64
|
+
* @param {object} profile
|
|
65
|
+
* @returns {object} The matching WORK_STYLES entry, with a `key` property added.
|
|
66
|
+
*/
|
|
67
|
+
export function getWorkStyle(profile) {
|
|
68
|
+
const key = profile?.workStyle || profile?.work_style || 'balanced';
|
|
69
|
+
const style = WORK_STYLES[key] ?? WORK_STYLES.balanced;
|
|
70
|
+
return { ...style, key: WORK_STYLES[key] ? key : 'balanced' };
|
|
71
|
+
}
|
|
24
72
|
|
|
25
73
|
// ─── Slim Model Capabilities (routing-relevant only) ─────────────────────────
|
|
26
74
|
|
|
@@ -100,22 +148,32 @@ const MODEL_CAPABILITIES = {
|
|
|
100
148
|
},
|
|
101
149
|
};
|
|
102
150
|
|
|
103
|
-
// ───
|
|
151
|
+
// ─── Canonical Work Model Names ──────────────────────────────────────────────
|
|
104
152
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
153
|
+
/**
|
|
154
|
+
* These are the authoritative model IDs used when dispatching work.
|
|
155
|
+
* The session model (what the user runs Claude Code with) is separate and
|
|
156
|
+
* does not need to be changed — the router assigns work models independently.
|
|
157
|
+
*
|
|
158
|
+
* Role → model mapping:
|
|
159
|
+
* execute → claude-sonnet-4-6 (native tool use, reliable workhorse)
|
|
160
|
+
* think → claude-opus-4-6 (deep reasoning, complex single-brain tasks)
|
|
161
|
+
* search → claude-haiku-4-5-20251001 / gpt-4o-mini (cheap, fast, disposable)
|
|
162
|
+
* challenger → o3 or gpt-4o (independence — different training = different blind spots)
|
|
163
|
+
*/
|
|
164
|
+
const WORK_MODELS = {
|
|
165
|
+
execute: 'claude-sonnet-4-6',
|
|
166
|
+
think: 'claude-opus-4-6',
|
|
167
|
+
search: 'claude-haiku-4-5-20251001',
|
|
168
|
+
challengerGpt: 'o3', // preferred challenger; falls back to gpt-4o when o3 unavailable
|
|
169
|
+
challengerGptFallback: 'gpt-4o',
|
|
170
|
+
searchGpt: 'gpt-4o-mini', // GPT-side search/classify
|
|
115
171
|
};
|
|
116
172
|
|
|
117
|
-
|
|
118
|
-
const
|
|
173
|
+
/** Always recommend Sonnet as the session model. */
|
|
174
|
+
const RECOMMENDED_SESSION_MODEL = 'claude-sonnet-4-6';
|
|
175
|
+
const RECOMMENDED_SESSION_REASON =
|
|
176
|
+
'Sonnet has native tool use and is the most cost-effective session model for orchestrating work agents.';
|
|
119
177
|
|
|
120
178
|
// ─── Exported: getModelCapabilities ──────────────────────────────────────────
|
|
121
179
|
|
|
@@ -131,19 +189,67 @@ export function getModelCapabilities(model) {
|
|
|
131
189
|
// ─── Exported: getAvailableModels ─────────────────────────────────────────────
|
|
132
190
|
|
|
133
191
|
/**
|
|
134
|
-
* Return which models the user can access
|
|
135
|
-
*
|
|
192
|
+
* Return which models the user can access.
|
|
193
|
+
* All known models are available by default; providers can explicitly restrict
|
|
194
|
+
* via profile.providers.<provider>.models (array of allowed model short names).
|
|
195
|
+
* This does NOT gate on price or configured plan — we cannot verify those from here.
|
|
196
|
+
* @param {{ providers?: { claude?: { enabled?: boolean, models?: string[] }, openai?: { enabled?: boolean, models?: string[] } } }} profile
|
|
136
197
|
* @returns {{ claude: string[], openai: string[] }}
|
|
137
198
|
*/
|
|
138
199
|
export function getAvailableModels(profile) {
|
|
139
|
-
const
|
|
140
|
-
const
|
|
200
|
+
const ALL_CLAUDE = ['haiku', 'sonnet', 'opus'];
|
|
201
|
+
const ALL_OPENAI = ['gpt-4o-mini', 'gpt-4.1-mini', 'gpt-4.1', 'gpt-4o', 'o4-mini', 'o3'];
|
|
202
|
+
|
|
203
|
+
const claudeModels = profile?.providers?.claude?.models;
|
|
204
|
+
const openaiModels = profile?.providers?.openai?.models;
|
|
205
|
+
|
|
141
206
|
return {
|
|
142
|
-
claude:
|
|
143
|
-
openai:
|
|
207
|
+
claude: Array.isArray(claudeModels) ? claudeModels : ALL_CLAUDE,
|
|
208
|
+
openai: Array.isArray(openaiModels) ? openaiModels : ALL_OPENAI,
|
|
144
209
|
};
|
|
145
210
|
}
|
|
146
211
|
|
|
212
|
+
// ─── Internal: challenger model selection ────────────────────────────────────
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Pick the best challenger model from the opposing provider.
|
|
216
|
+
* Claude primary → GPT challenger (o3 preferred, gpt-4o fallback).
|
|
217
|
+
* GPT primary → Claude Opus challenger (Sonnet fallback).
|
|
218
|
+
* Falls back gracefully when the other provider is not available.
|
|
219
|
+
*
|
|
220
|
+
* @param {string} primaryProvider 'claude'|'openai'
|
|
221
|
+
* @param {object} available Result of getAvailableModels()
|
|
222
|
+
* @returns {string|null}
|
|
223
|
+
*/
|
|
224
|
+
function pickChallengerModel(primaryProvider, available) {
|
|
225
|
+
if (primaryProvider === 'claude') {
|
|
226
|
+
// Claude is primary → use GPT as challenger
|
|
227
|
+
if (available.openai.includes(WORK_MODELS.challengerGpt)) return WORK_MODELS.challengerGpt;
|
|
228
|
+
if (available.openai.includes(WORK_MODELS.challengerGptFallback)) return WORK_MODELS.challengerGptFallback;
|
|
229
|
+
return null; // OpenAI not available
|
|
230
|
+
} else {
|
|
231
|
+
// OpenAI is primary → use Claude Opus as challenger
|
|
232
|
+
if (available.claude.includes('opus')) return WORK_MODELS.think;
|
|
233
|
+
if (available.claude.includes('sonnet')) return WORK_MODELS.execute;
|
|
234
|
+
return null; // Claude not available
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Decide whether to trigger a challenger based on the work style policy and task risk.
|
|
240
|
+
* When only one provider is available, challenger is never triggered (no cross-provider review possible).
|
|
241
|
+
* @param {string} challengerPolicy 'never'|'high-risk'|'medium-risk'
|
|
242
|
+
* @param {'low'|'medium'|'high'|'critical'} risk
|
|
243
|
+
* @param {boolean} hasBothProviders
|
|
244
|
+
* @returns {boolean}
|
|
245
|
+
*/
|
|
246
|
+
function shouldTriggerChallenger(challengerPolicy, risk, hasBothProviders) {
|
|
247
|
+
if (challengerPolicy === 'never' || !hasBothProviders) return false;
|
|
248
|
+
if (challengerPolicy === 'high-risk') return ['high', 'critical'].includes(risk);
|
|
249
|
+
if (challengerPolicy === 'medium-risk') return ['medium', 'high', 'critical'].includes(risk);
|
|
250
|
+
return false;
|
|
251
|
+
}
|
|
252
|
+
|
|
147
253
|
// ─── Exported: estimateBudgetPressure (deprecated stub) ──────────────────────
|
|
148
254
|
|
|
149
255
|
/**
|
|
@@ -358,7 +464,7 @@ function chooseProvider(detection, profile, healthScores) {
|
|
|
358
464
|
const openaiScore = healthScores.openai;
|
|
359
465
|
|
|
360
466
|
// OpenAI not configured or not enabled → always use Claude
|
|
361
|
-
if (!profile?.providers?.openai?.enabled
|
|
467
|
+
if (!profile?.providers?.openai?.enabled) return 'claude';
|
|
362
468
|
|
|
363
469
|
// Both hot (score=0) → pick the one with the higher score; if tied, prefer Claude
|
|
364
470
|
if (claudeScore === 0 && openaiScore === 0) {
|
|
@@ -390,38 +496,43 @@ function chooseProvider(detection, profile, healthScores) {
|
|
|
390
496
|
* @returns {string}
|
|
391
497
|
*/
|
|
392
498
|
export function explainDecision(decision, detection, profile) {
|
|
393
|
-
const { provider, model, effort, dualBrain } = decision;
|
|
499
|
+
const { provider, model, effort, dualBrain, workStyle, challengerModel } = decision;
|
|
394
500
|
const { intent = 'task', risk = 'low', complexity = 'simple', tier = 'execute' } = detection;
|
|
395
501
|
const healthScores = decision._healthScores || {};
|
|
396
502
|
const mode = profile?.mode || profile?.profile || 'auto';
|
|
397
503
|
|
|
504
|
+
const ws = decision._workStyle ?? getWorkStyle(profile);
|
|
505
|
+
const wsLabel = ws.label ?? workStyle ?? 'Balanced';
|
|
398
506
|
const modelLabel = effort ? `${model} ${effort}` : model;
|
|
399
507
|
|
|
508
|
+
if (dualBrain && challengerModel) {
|
|
509
|
+
return `${wsLabel} mode: ${modelLabel} for ${intent}, ${challengerModel} challenger on ${risk}-risk changes.`;
|
|
510
|
+
}
|
|
400
511
|
if (dualBrain) {
|
|
401
|
-
return
|
|
512
|
+
return `${wsLabel} mode: ${modelLabel} with dual-brain review because this ${intent} change is ${risk} risk.`;
|
|
402
513
|
}
|
|
403
514
|
// Health-based explanations
|
|
404
515
|
const claudeScore = healthScores.claude ?? 100;
|
|
405
516
|
const providerScore = healthScores[provider] ?? 100;
|
|
406
517
|
if (claudeScore === 0 && provider === 'openai') {
|
|
407
|
-
return
|
|
518
|
+
return `${wsLabel} mode: using ${modelLabel} because Claude is rate-limited and this is an isolated ${tier} task.`;
|
|
408
519
|
}
|
|
409
520
|
if (providerScore < 50) {
|
|
410
|
-
return
|
|
521
|
+
return `${wsLabel} mode: using ${modelLabel} (downgraded due to rate-limit cooldown) for this ${complexity} ${intent}.`;
|
|
411
522
|
}
|
|
412
523
|
if (mode === 'cost-saver') {
|
|
413
|
-
return
|
|
524
|
+
return `${wsLabel} mode: using ${modelLabel} (cost-saver bias) for ${risk}-risk ${intent}.`;
|
|
414
525
|
}
|
|
415
526
|
if (mode === 'quality-first') {
|
|
416
|
-
return
|
|
527
|
+
return `${wsLabel} mode: using ${modelLabel} (quality-first bias) for ${intent}.`;
|
|
417
528
|
}
|
|
418
529
|
if (THINK_INTENTS.includes(intent)) {
|
|
419
|
-
return
|
|
530
|
+
return `${wsLabel} mode: ${modelLabel} for ${intent} — deep reasoning needed.`;
|
|
420
531
|
}
|
|
421
532
|
if (tier === 'search' || SEARCH_INTENTS.includes(intent)) {
|
|
422
|
-
return
|
|
533
|
+
return `${wsLabel} mode: ${modelLabel} for lightweight ${intent} lookup.`;
|
|
423
534
|
}
|
|
424
|
-
return
|
|
535
|
+
return `${wsLabel} mode: ${modelLabel} for ${intent} (${risk} risk, ${provider} healthy).`;
|
|
425
536
|
}
|
|
426
537
|
|
|
427
538
|
// ─── Exported: parsePreferences ──────────────────────────────────────────────
|
|
@@ -507,7 +618,10 @@ function applyCriticalRiskFloor(model, provider, available, risk) {
|
|
|
507
618
|
* @returns {object} Routing decision
|
|
508
619
|
*/
|
|
509
620
|
export function decideRoute({ profile = {}, detection = {}, cwd } = {}) {
|
|
510
|
-
const available
|
|
621
|
+
const available = getAvailableModels(profile);
|
|
622
|
+
|
|
623
|
+
// Resolve active work style
|
|
624
|
+
const workStyle = getWorkStyle(profile);
|
|
511
625
|
|
|
512
626
|
// Parse free-text user preferences into routing signals
|
|
513
627
|
const prefSignals = parsePreferences(profile.preferences);
|
|
@@ -517,13 +631,16 @@ export function decideRoute({ profile = {}, detection = {}, cwd } = {}) {
|
|
|
517
631
|
? { ...profile, mode: prefSignals.biasOverride }
|
|
518
632
|
: profile;
|
|
519
633
|
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
if (prefSignals.alwaysDualBrain) dual = true;
|
|
523
|
-
if (prefSignals.neverDualBrain) dual = false;
|
|
634
|
+
const { tier = 'execute', risk = 'low', complexity = 'simple', effort: detectionEffort } = detection;
|
|
635
|
+
const isHighStakes = ['critical', 'high'].includes(risk);
|
|
524
636
|
|
|
525
|
-
|
|
526
|
-
|
|
637
|
+
// Determine whether to use the complexWorker (Opus) or defaultWorker (Sonnet).
|
|
638
|
+
// "High reasoning depth" means: think-tier intent, high/critical risk, or complex+high-risk.
|
|
639
|
+
const needsDeepReasoning =
|
|
640
|
+
THINK_INTENTS.includes(detection.intent || '') ||
|
|
641
|
+
risk === 'critical' ||
|
|
642
|
+
(complexity === 'complex' && ['high', 'critical'].includes(risk)) ||
|
|
643
|
+
detectionEffort === 'xhigh';
|
|
527
644
|
|
|
528
645
|
// Get health scores for current tier
|
|
529
646
|
const healthScores = getHealthScores(tier, cwd);
|
|
@@ -534,23 +651,31 @@ export function decideRoute({ profile = {}, detection = {}, cwd } = {}) {
|
|
|
534
651
|
// Apply preferProvider / avoidProvider signals from preferences
|
|
535
652
|
if (prefSignals.preferProvider) {
|
|
536
653
|
const preferred = prefSignals.preferProvider;
|
|
537
|
-
const prefEnabled = profile?.providers?.[preferred]?.enabled
|
|
654
|
+
const prefEnabled = profile?.providers?.[preferred]?.enabled;
|
|
538
655
|
const prefScore = healthScores[preferred] ?? 0;
|
|
539
|
-
// Use preferred provider if it is configured and has any health score (even degraded)
|
|
540
656
|
if (prefEnabled && prefScore > 0) provider = preferred;
|
|
541
657
|
}
|
|
542
658
|
if (prefSignals.avoidProvider && provider === prefSignals.avoidProvider) {
|
|
543
|
-
// Switch to the other provider only if it is configured and healthy
|
|
544
659
|
const other = prefSignals.avoidProvider === 'claude' ? 'openai' : 'claude';
|
|
545
|
-
const otherEnabled = profile?.providers?.[other]?.enabled
|
|
660
|
+
const otherEnabled = profile?.providers?.[other]?.enabled;
|
|
546
661
|
const otherScore = healthScores[other] ?? 0;
|
|
547
662
|
if (otherEnabled && otherScore > 0) provider = other;
|
|
548
663
|
}
|
|
549
664
|
|
|
550
|
-
// Select base model
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
665
|
+
// Select base model using work style worker assignments.
|
|
666
|
+
// For Claude primary: use complexWorker (opus) on deep reasoning, defaultWorker (sonnet) otherwise.
|
|
667
|
+
// For OpenAI primary: mirror the same logic using GPT equivalents.
|
|
668
|
+
let model;
|
|
669
|
+
if (provider === 'claude') {
|
|
670
|
+
const wantOpus = needsDeepReasoning && workStyle.key !== 'fast';
|
|
671
|
+
model = wantOpus && available.claude.includes('opus') ? 'opus' : 'sonnet';
|
|
672
|
+
if (!available.claude.includes(model)) model = available.claude[available.claude.length - 1] ?? 'sonnet';
|
|
673
|
+
} else {
|
|
674
|
+
// OpenAI primary — use o3 for deep reasoning in fullpower, gpt-4o otherwise
|
|
675
|
+
const wantO3 = needsDeepReasoning && workStyle.key === 'fullpower';
|
|
676
|
+
model = wantO3 && available.openai.includes('o3') ? 'o3' : 'gpt-4o';
|
|
677
|
+
if (!available.openai.includes(model)) model = available.openai[available.openai.length - 1] ?? 'gpt-4o';
|
|
678
|
+
}
|
|
554
679
|
|
|
555
680
|
// Apply health-based downgrade (only if score < 50 and not high-stakes)
|
|
556
681
|
model = applyHealthDowngrade(model, healthScores[provider], provider, available[provider], isHighStakes);
|
|
@@ -569,19 +694,40 @@ export function decideRoute({ profile = {}, detection = {}, cwd } = {}) {
|
|
|
569
694
|
}
|
|
570
695
|
}
|
|
571
696
|
|
|
697
|
+
// ── Challenger / dual-brain decision ─────────────────────────────────────
|
|
698
|
+
const hasBothProviders = !!(
|
|
699
|
+
profile?.providers?.claude?.enabled &&
|
|
700
|
+
profile?.providers?.openai?.enabled
|
|
701
|
+
);
|
|
702
|
+
|
|
703
|
+
// Work-style challenger: triggered by challengerPolicy + risk level
|
|
704
|
+
const challengerTriggered = shouldTriggerChallenger(
|
|
705
|
+
workStyle.challengerPolicy,
|
|
706
|
+
risk,
|
|
707
|
+
hasBothProviders,
|
|
708
|
+
);
|
|
709
|
+
|
|
710
|
+
// Legacy designImpact dual-brain gate (mandatory review, bypass hasBothProviders check)
|
|
711
|
+
const legacyDualBrain = !!(detection.designImpact && profile?.dual_brain_enabled !== false);
|
|
712
|
+
|
|
713
|
+
// Preference overrides
|
|
714
|
+
let dual = challengerTriggered || legacyDualBrain || shouldDualBrain(detection, profile);
|
|
715
|
+
if (prefSignals.alwaysDualBrain) dual = true;
|
|
716
|
+
if (prefSignals.neverDualBrain) dual = false;
|
|
717
|
+
|
|
718
|
+
// When only one provider available and challenger was the reason, downgrade to single-brain
|
|
719
|
+
if (dual && !hasBothProviders && !legacyDualBrain) dual = false;
|
|
720
|
+
|
|
721
|
+
const degradedDualBrain = !!(legacyDualBrain && !hasBothProviders);
|
|
722
|
+
|
|
723
|
+
// Pick challenger model (from the opposing provider)
|
|
724
|
+
const challengerModel = dual ? pickChallengerModel(provider, available) : null;
|
|
725
|
+
|
|
572
726
|
// Determine effort, modes, sandbox
|
|
573
727
|
const effort = pickEffort(model, detection);
|
|
574
728
|
const modes = pickModes(model, detection);
|
|
575
729
|
const sandbox = pickSandbox(model, detection);
|
|
576
730
|
|
|
577
|
-
const hasBothProviders = !!(
|
|
578
|
-
profile?.providers?.claude?.enabled &&
|
|
579
|
-
profile?.providers?.claude?.plan &&
|
|
580
|
-
profile?.providers?.openai?.enabled &&
|
|
581
|
-
profile?.providers?.openai?.plan
|
|
582
|
-
);
|
|
583
|
-
const degradedDualBrain = !!(dual && detection.designImpact && !hasBothProviders);
|
|
584
|
-
|
|
585
731
|
const decision = {
|
|
586
732
|
provider,
|
|
587
733
|
model,
|
|
@@ -589,19 +735,96 @@ export function decideRoute({ profile = {}, detection = {}, cwd } = {}) {
|
|
|
589
735
|
tier,
|
|
590
736
|
dualBrain: dual,
|
|
591
737
|
...(degradedDualBrain && { degradedDualBrain: true }),
|
|
738
|
+
...(challengerModel && { challengerModel }),
|
|
739
|
+
workStyle: workStyle.key,
|
|
592
740
|
modes,
|
|
593
741
|
sandbox,
|
|
594
742
|
explanation: '',
|
|
595
743
|
_healthScores: healthScores,
|
|
744
|
+
_workStyle: workStyle,
|
|
596
745
|
};
|
|
597
746
|
|
|
598
747
|
decision.explanation = explainDecision(decision, detection, profileWithEffectiveBias);
|
|
599
748
|
|
|
600
|
-
// Remove internal
|
|
601
|
-
const { _healthScores, ...result } = decision;
|
|
749
|
+
// Remove internal fields from public output
|
|
750
|
+
const { _healthScores, _workStyle, ...result } = decision;
|
|
602
751
|
return result;
|
|
603
752
|
}
|
|
604
753
|
|
|
754
|
+
// ─── Exported: getFailoverOrder ──────────────────────────────────────────────
|
|
755
|
+
|
|
756
|
+
/**
|
|
757
|
+
* Given a failed routing decision and the active profile, return an ordered list
|
|
758
|
+
* of fallback options to try next.
|
|
759
|
+
*
|
|
760
|
+
* Priority order:
|
|
761
|
+
* 1. Other subscriptions of the same provider (e.g. Claude Max #2 before Claude Pro)
|
|
762
|
+
* 2. Other provider (OpenAI or Claude, whichever wasn't tried)
|
|
763
|
+
*
|
|
764
|
+
* Within each group, options are ordered by capability match for the tier
|
|
765
|
+
* (best fit first, cheapest last).
|
|
766
|
+
*
|
|
767
|
+
* @param {object} decision The routing decision that just failed (provider, model, tier)
|
|
768
|
+
* @param {object} profile Active profile with providers/subscriptions info
|
|
769
|
+
* @returns {Array<{ provider: string, model: string, plan: string, label: string }>}
|
|
770
|
+
*/
|
|
771
|
+
export function getFailoverOrder(decision, profile) {
|
|
772
|
+
const { provider: failedProvider, model: failedModel, tier = 'execute' } = decision;
|
|
773
|
+
const available = getAvailableModels(profile);
|
|
774
|
+
|
|
775
|
+
// Build a ranked model list for Claude (best capability for tier → cheapest)
|
|
776
|
+
const claudeRankByTier = {
|
|
777
|
+
think: ['opus', 'sonnet', 'haiku'],
|
|
778
|
+
execute: ['sonnet', 'opus', 'haiku'],
|
|
779
|
+
search: ['haiku', 'sonnet', 'opus'],
|
|
780
|
+
};
|
|
781
|
+
const openaiRankByTier = {
|
|
782
|
+
think: ['o3', 'gpt-4o', 'gpt-4.1', 'gpt-4.1-mini', 'gpt-4o-mini'],
|
|
783
|
+
execute: ['gpt-4o', 'gpt-4.1', 'o3', 'gpt-4.1-mini', 'gpt-4o-mini'],
|
|
784
|
+
search: ['gpt-4o-mini', 'gpt-4.1-mini', 'gpt-4.1', 'gpt-4o', 'o3'],
|
|
785
|
+
};
|
|
786
|
+
|
|
787
|
+
const claudeRank = claudeRankByTier[tier] ?? claudeRankByTier.execute;
|
|
788
|
+
const openaiRank = openaiRankByTier[tier] ?? openaiRankByTier.execute;
|
|
789
|
+
|
|
790
|
+
const claudeEnabled = !!(profile?.providers?.claude?.enabled);
|
|
791
|
+
const openaiEnabled = !!(profile?.providers?.openai?.enabled);
|
|
792
|
+
|
|
793
|
+
const fallbacks = [];
|
|
794
|
+
|
|
795
|
+
if (failedProvider === 'claude') {
|
|
796
|
+
// Same-provider fallbacks: other Claude models (skip the one that just failed)
|
|
797
|
+
for (const m of claudeRank) {
|
|
798
|
+
if (m === failedModel) continue;
|
|
799
|
+
if (!available.claude.includes(m)) continue;
|
|
800
|
+
fallbacks.push({ provider: 'claude', model: m, label: `Claude ${m}` });
|
|
801
|
+
}
|
|
802
|
+
// Cross-provider fallbacks: OpenAI models
|
|
803
|
+
if (openaiEnabled) {
|
|
804
|
+
for (const m of openaiRank) {
|
|
805
|
+
if (!available.openai.includes(m)) continue;
|
|
806
|
+
fallbacks.push({ provider: 'openai', model: m, label: `OpenAI ${m}` });
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
} else {
|
|
810
|
+
// Same-provider fallbacks: other OpenAI models (skip the one that just failed)
|
|
811
|
+
for (const m of openaiRank) {
|
|
812
|
+
if (m === failedModel) continue;
|
|
813
|
+
if (!available.openai.includes(m)) continue;
|
|
814
|
+
fallbacks.push({ provider: 'openai', model: m, label: `OpenAI ${m}` });
|
|
815
|
+
}
|
|
816
|
+
// Cross-provider fallbacks: Claude models
|
|
817
|
+
if (claudeEnabled) {
|
|
818
|
+
for (const m of claudeRank) {
|
|
819
|
+
if (!available.claude.includes(m)) continue;
|
|
820
|
+
fallbacks.push({ provider: 'claude', model: m, label: `Claude ${m}` });
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
return fallbacks;
|
|
826
|
+
}
|
|
827
|
+
|
|
605
828
|
// ─── CLI ──────────────────────────────────────────────────────────────────────
|
|
606
829
|
|
|
607
830
|
if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
|
package/src/detect.mjs
CHANGED
|
@@ -157,6 +157,129 @@ function buildExplanation({ intent, risk, complexity, fileCount, priorFailures }
|
|
|
157
157
|
return parts.join(' ') + '.';
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
+
// ─── Reasoning depth classification ───────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
const ULTRA_UNCERTAINTY = /\b(not sure|maybe|should we|architect|design|trade-?off|approach)\b/i;
|
|
163
|
+
const ULTRA_DEEP_ANALYSIS = /\b(think about|analyze|analyse|evaluate|compare options)\b/i;
|
|
164
|
+
const HIGH_CROSS_CUTTING = /\b(refactor|rename across|update all|migration)\b/i;
|
|
165
|
+
const LOW_SIMPLE = /\b(grep|find|search|list|show|what is|where is)\b/i;
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Classify the reasoning depth needed for a task.
|
|
169
|
+
* Returns { depth: 'low'|'medium'|'high'|'ultra', signals: string[] }
|
|
170
|
+
*/
|
|
171
|
+
function classifyReasoningDepth(prompt, files = [], priorOutcomes = []) {
|
|
172
|
+
const signals = [];
|
|
173
|
+
|
|
174
|
+
// Gather prior failure count from priorOutcomes array
|
|
175
|
+
const failures = priorOutcomes.filter(o => o && (o.failed || o.status === 'failed' || o.outcome === 'failed' || o.success === false)).length;
|
|
176
|
+
|
|
177
|
+
// File-based risk (reuse classifyRisk)
|
|
178
|
+
const { level: fileRisk } = classifyRisk(files);
|
|
179
|
+
|
|
180
|
+
// Keyword risk from prompt (reuse RISK_KEYWORDS)
|
|
181
|
+
let keywordRisk = 'low';
|
|
182
|
+
for (const { level, regex } of RISK_KEYWORDS) {
|
|
183
|
+
if (regex.test(prompt)) { keywordRisk = level; break; }
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const risk = higherRisk(fileRisk, keywordRisk);
|
|
187
|
+
|
|
188
|
+
// Directory spread from files
|
|
189
|
+
const dirs = new Set(files.map(f => {
|
|
190
|
+
const parts = f.replace(/^\//, '').split('/');
|
|
191
|
+
return parts.length > 1 ? parts[0] : '.';
|
|
192
|
+
}));
|
|
193
|
+
const dirCount = dirs.size;
|
|
194
|
+
|
|
195
|
+
// ── Ultra signals ──────────────────────────────────────────────────────────
|
|
196
|
+
const ultraSignals = [];
|
|
197
|
+
|
|
198
|
+
if (ULTRA_UNCERTAINTY.test(prompt)) {
|
|
199
|
+
const match = prompt.match(ULTRA_UNCERTAINTY);
|
|
200
|
+
ultraSignals.push(`prompt contains '${match[0]}'`);
|
|
201
|
+
}
|
|
202
|
+
if (ULTRA_DEEP_ANALYSIS.test(prompt)) {
|
|
203
|
+
const match = prompt.match(ULTRA_DEEP_ANALYSIS);
|
|
204
|
+
ultraSignals.push(`prompt requests deep analysis ('${match[0]}')`);
|
|
205
|
+
}
|
|
206
|
+
if (risk === 'critical') {
|
|
207
|
+
ultraSignals.push('risk classified as critical');
|
|
208
|
+
}
|
|
209
|
+
if (failures >= 2) {
|
|
210
|
+
ultraSignals.push(`${failures} prior failures on similar task`);
|
|
211
|
+
}
|
|
212
|
+
if (fileRisk === 'critical') {
|
|
213
|
+
ultraSignals.push('files include auth/security/billing/migration patterns');
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if (ultraSignals.length > 0) {
|
|
217
|
+
return { depth: 'ultra', signals: ultraSignals };
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ── High signals ───────────────────────────────────────────────────────────
|
|
221
|
+
const highSignals = [];
|
|
222
|
+
|
|
223
|
+
if (risk === 'high') {
|
|
224
|
+
highSignals.push('risk classified as high');
|
|
225
|
+
}
|
|
226
|
+
if (files.length > 5) {
|
|
227
|
+
highSignals.push(`${files.length} files provided`);
|
|
228
|
+
}
|
|
229
|
+
if (failures === 1) {
|
|
230
|
+
highSignals.push('1 prior failure on similar task');
|
|
231
|
+
}
|
|
232
|
+
if (HIGH_CROSS_CUTTING.test(prompt)) {
|
|
233
|
+
const match = prompt.match(HIGH_CROSS_CUTTING);
|
|
234
|
+
highSignals.push(`prompt mentions cross-cutting concern ('${match[0]}')`);
|
|
235
|
+
}
|
|
236
|
+
if (dirCount >= 3) {
|
|
237
|
+
highSignals.push(`files span ${dirCount} directories`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (highSignals.length > 0) {
|
|
241
|
+
return { depth: 'high', signals: highSignals };
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// ── Medium signals ─────────────────────────────────────────────────────────
|
|
245
|
+
const MEDIUM_IMPL = /\b(add|implement|build|create|fix|update)\b/i;
|
|
246
|
+
const mediumSignals = [];
|
|
247
|
+
|
|
248
|
+
if (risk === 'medium') {
|
|
249
|
+
mediumSignals.push('risk classified as medium');
|
|
250
|
+
}
|
|
251
|
+
if (files.length >= 2 && files.length <= 5) {
|
|
252
|
+
mediumSignals.push(`${files.length} files provided`);
|
|
253
|
+
}
|
|
254
|
+
if (MEDIUM_IMPL.test(prompt)) {
|
|
255
|
+
const match = prompt.match(MEDIUM_IMPL);
|
|
256
|
+
mediumSignals.push(`prompt contains implementation keyword ('${match[0]}')`);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (mediumSignals.length > 0) {
|
|
260
|
+
return { depth: 'medium', signals: mediumSignals };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// ── Low signals ────────────────────────────────────────────────────────────
|
|
264
|
+
const lowSignals = [];
|
|
265
|
+
|
|
266
|
+
if (risk === 'low') {
|
|
267
|
+
lowSignals.push('risk classified as low');
|
|
268
|
+
}
|
|
269
|
+
if (files.length <= 1) {
|
|
270
|
+
lowSignals.push(files.length === 0 ? 'no files provided' : '1 file provided');
|
|
271
|
+
}
|
|
272
|
+
if (LOW_SIMPLE.test(prompt)) {
|
|
273
|
+
const match = prompt.match(LOW_SIMPLE);
|
|
274
|
+
lowSignals.push(`prompt is a simple lookup ('${match[0]}')`);
|
|
275
|
+
}
|
|
276
|
+
if (failures === 0) {
|
|
277
|
+
lowSignals.push('no prior failures');
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return { depth: 'low', signals: lowSignals.length > 0 ? lowSignals : ['no elevated signals detected'] };
|
|
281
|
+
}
|
|
282
|
+
|
|
160
283
|
/** Main detection function. Input: { prompt, files?, priorFailures? } */
|
|
161
284
|
function detectTask(input) {
|
|
162
285
|
const { prompt = '', files = [], priorFailures = 0 } = input;
|
|
@@ -213,6 +336,12 @@ function detectTask(input) {
|
|
|
213
336
|
// 8. Explanation
|
|
214
337
|
const explanation = buildExplanation({ intent, risk, complexity, fileCount, priorFailures });
|
|
215
338
|
|
|
339
|
+
// 9. Reasoning depth
|
|
340
|
+
const priorOutcomes = priorFailures > 0
|
|
341
|
+
? Array.from({ length: priorFailures }, () => ({ failed: true }))
|
|
342
|
+
: [];
|
|
343
|
+
const { depth: reasoningDepth, signals: reasoningSignals } = classifyReasoningDepth(prompt, files, priorOutcomes);
|
|
344
|
+
|
|
216
345
|
return {
|
|
217
346
|
intent,
|
|
218
347
|
risk,
|
|
@@ -225,6 +354,8 @@ function detectTask(input) {
|
|
|
225
354
|
requiresWrite: requiresWrite(intent),
|
|
226
355
|
explanation,
|
|
227
356
|
specialist: specialistResult,
|
|
357
|
+
reasoningDepth,
|
|
358
|
+
reasoningSignals,
|
|
228
359
|
};
|
|
229
360
|
}
|
|
230
361
|
|
|
@@ -238,6 +369,7 @@ const DEFAULT_SPECIALISTS = {
|
|
|
238
369
|
html: { triggers: { extensions: ['.html', '.css', '.scss', '.svg'], keywords: ['html', 'css', 'accessibility', 'a11y', 'aria', 'responsive', 'tailwind'] } },
|
|
239
370
|
linux: { triggers: { extensions: ['.sh', '.bash', '.conf', '.service', '.dockerfile'], keywords: ['linux', 'bash', 'shell', 'systemd', 'nginx', 'docker', 'ssh', 'deploy'] } },
|
|
240
371
|
security: { triggers: { extensions: [], keywords: ['auth', 'oauth', 'jwt', 'credential', 'secret', 'encrypt', 'vulnerability', 'vulnerabilities', 'audit', 'owasp', 'xss', 'csrf'] }, tier_bias: 'think' },
|
|
372
|
+
doctor: { triggers: { extensions: [], keywords: ['doctor', 'health', 'diagnose', 'diagnosis', 'checkup', 'drift', 'completeness', 'broken', 'regression', 'audit health', 'package health', 'health check', 'health report', 'health-manifest'] }, tier_bias: 'think' },
|
|
241
373
|
};
|
|
242
374
|
|
|
243
375
|
function loadSpecialistRegistry() {
|
|
@@ -342,4 +474,4 @@ if (process.argv[1] && new URL(import.meta.url).pathname === process.argv[1]) {
|
|
|
342
474
|
console.log(JSON.stringify(result, null, 2));
|
|
343
475
|
}
|
|
344
476
|
|
|
345
|
-
export { detectTask, classifyIntent, classifyRisk, estimateComplexity, inferTier, extractPaths, classifySpecialist };
|
|
477
|
+
export { detectTask, classifyIntent, classifyRisk, estimateComplexity, inferTier, extractPaths, classifySpecialist, classifyReasoningDepth };
|