dual-brain 7.1.27 → 7.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/doctor.mjs +716 -1
package/package.json
CHANGED
package/src/doctor.mjs
CHANGED
|
@@ -15,9 +15,12 @@
|
|
|
15
15
|
* checkFoundations, checkRoleBoundaries, checkEvidence, checkTokenWaste,
|
|
16
16
|
* runHealthCheck, formatHealthReport, compareHealth,
|
|
17
17
|
* doctorDiagnose, doctorRecover
|
|
18
|
+
*
|
|
19
|
+
* VERIFY system (runtime assumption verification):
|
|
20
|
+
* verify, verifyAll, getVerificationCache, getStaleAssumptions, formatVerifications
|
|
18
21
|
*/
|
|
19
22
|
|
|
20
|
-
import { existsSync, readFileSync, writeFileSync, renameSync } from 'fs';
|
|
23
|
+
import { existsSync, readFileSync, writeFileSync, renameSync, appendFileSync } from 'fs';
|
|
21
24
|
import { join } from 'path';
|
|
22
25
|
import { readdir, readFile } from 'fs/promises';
|
|
23
26
|
import { exec, execSync } from 'child_process';
|
|
@@ -549,6 +552,718 @@ export async function doctorRecover(run, failure = {}) {
|
|
|
549
552
|
return { proposal, avoidApproaches, escalation };
|
|
550
553
|
}
|
|
551
554
|
|
|
555
|
+
// ─── VERIFY System ────────────────────────────────────────────────────────────
|
|
556
|
+
|
|
557
|
+
// TTL constants (ms)
|
|
558
|
+
const TTL_RUNTIME = 5 * 60 * 1000; // 5 minutes — env/key checks
|
|
559
|
+
const TTL_TOOL = 24 * 60 * 60 * 1000; // 24 hours — installed tool checks
|
|
560
|
+
const TTL_REGISTRY = 7 * 24 * 60 * 60 * 1000; // 7 days — registry freshness
|
|
561
|
+
|
|
562
|
+
const VERIFIERS = {
|
|
563
|
+
'claude-available': { ttl: TTL_TOOL, fn: () => {
|
|
564
|
+
try { execSync('which claude', { stdio: 'pipe', timeout: 2000 }); return { status: 'verified', evidence: 'claude CLI found', probe: 'which claude' }; }
|
|
565
|
+
catch { return { status: 'failed', evidence: 'claude CLI not found', probe: 'which claude' }; }
|
|
566
|
+
}},
|
|
567
|
+
'openai-key': { ttl: TTL_RUNTIME, fn: () => {
|
|
568
|
+
const has = !!process.env.OPENAI_API_KEY;
|
|
569
|
+
return { status: has ? 'verified' : 'failed', evidence: has ? 'OPENAI_API_KEY present' : 'OPENAI_API_KEY missing', probe: 'env check' };
|
|
570
|
+
}},
|
|
571
|
+
'anthropic-key': { ttl: TTL_RUNTIME, fn: () => {
|
|
572
|
+
const has = !!(process.env.ANTHROPIC_API_KEY || process.env.CLAUDE_API_KEY);
|
|
573
|
+
return { status: has ? 'verified' : 'failed', evidence: has ? 'API key present' : 'API key missing', probe: 'env check' };
|
|
574
|
+
}},
|
|
575
|
+
'git-available': { ttl: TTL_TOOL, fn: () => {
|
|
576
|
+
try { const v = execSync('git --version', { stdio: 'pipe', timeout: 2000 }).toString().trim(); return { status: 'verified', evidence: v, probe: 'git --version' }; }
|
|
577
|
+
catch { return { status: 'failed', evidence: 'git not found', probe: 'git --version' }; }
|
|
578
|
+
}},
|
|
579
|
+
'npm-auth': { ttl: TTL_RUNTIME, fn: () => {
|
|
580
|
+
try { const who = execSync('npm whoami', { stdio: 'pipe', timeout: 5000 }).toString().trim(); return { status: 'verified', evidence: `logged in as ${who}`, probe: 'npm whoami' }; }
|
|
581
|
+
catch { return { status: 'failed', evidence: 'npm auth failed', probe: 'npm whoami' }; }
|
|
582
|
+
}},
|
|
583
|
+
'database-reachable': { ttl: TTL_RUNTIME, fn: () => {
|
|
584
|
+
const url = process.env.DATABASE_URL;
|
|
585
|
+
if (!url) return { status: 'failed', evidence: 'DATABASE_URL not set', probe: 'env check' };
|
|
586
|
+
return { status: 'verified', evidence: 'DATABASE_URL configured (not connection-tested)', probe: 'env check' };
|
|
587
|
+
}},
|
|
588
|
+
'codex-available': { ttl: TTL_TOOL, fn: () => {
|
|
589
|
+
try { execSync('which codex', { stdio: 'pipe', timeout: 2000 }); return { status: 'verified', evidence: 'codex CLI found', probe: 'which codex' }; }
|
|
590
|
+
catch { return { status: 'failed', evidence: 'codex CLI not found', probe: 'which codex' }; }
|
|
591
|
+
}},
|
|
592
|
+
'rg-available': { ttl: TTL_TOOL, fn: () => {
|
|
593
|
+
try { execSync('which rg', { stdio: 'pipe', timeout: 2000 }); return { status: 'verified', evidence: 'ripgrep found', probe: 'which rg' }; }
|
|
594
|
+
catch { return { status: 'failed', evidence: 'ripgrep not found', probe: 'which rg' }; }
|
|
595
|
+
}},
|
|
596
|
+
'living-docs-init': { ttl: TTL_RUNTIME, fn: (cwd) => {
|
|
597
|
+
const exists = existsSync(join(cwd || process.cwd(), '.dual-brain'));
|
|
598
|
+
return { status: exists ? 'verified' : 'failed', evidence: exists ? '.dual-brain/ exists' : '.dual-brain/ not initialized', probe: 'fs check' };
|
|
599
|
+
}},
|
|
600
|
+
'model-registry-fresh': { ttl: TTL_REGISTRY, fn: () => {
|
|
601
|
+
try {
|
|
602
|
+
const age = Math.floor((Date.now() - new Date('2026-05-15').getTime()) / 86400000);
|
|
603
|
+
return { status: age <= 30 ? 'verified' : 'failed', evidence: `Registry ${age} days old`, probe: 'registry age check' };
|
|
604
|
+
} catch { return { status: 'unknown', evidence: 'Could not check', probe: 'registry age' }; }
|
|
605
|
+
}},
|
|
606
|
+
};
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* verify(claim, cwd) — test a single assumption by claim identifier.
|
|
610
|
+
* Returns a verification result object with status, evidence, probe, and timestamps.
|
|
611
|
+
*/
|
|
612
|
+
export function verify(claim, cwd) {
|
|
613
|
+
const checkedAt = new Date().toISOString();
|
|
614
|
+
const verifier = VERIFIERS[claim];
|
|
615
|
+
if (!verifier) {
|
|
616
|
+
const expiresAt = new Date(Date.now() + TTL_RUNTIME).toISOString();
|
|
617
|
+
return { claim, status: 'unknown', evidence: `No verifier registered for "${claim}"`, checkedAt, expiresAt, probe: 'none' };
|
|
618
|
+
}
|
|
619
|
+
try {
|
|
620
|
+
const result = verifier.fn(cwd);
|
|
621
|
+
const expiresAt = new Date(Date.now() + verifier.ttl).toISOString();
|
|
622
|
+
return { claim, status: result.status, evidence: result.evidence, checkedAt, expiresAt, probe: result.probe };
|
|
623
|
+
} catch (err) {
|
|
624
|
+
const expiresAt = new Date(Date.now() + TTL_RUNTIME).toISOString();
|
|
625
|
+
return { claim, status: 'unknown', evidence: `Verifier threw: ${err.message || String(err)}`, checkedAt, expiresAt, probe: 'error' };
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* verifyAll(cwd) — run all registered verifiers and append results to .dual-brain/verifications.jsonl.
|
|
631
|
+
* Returns array of verification result objects.
|
|
632
|
+
*/
|
|
633
|
+
export function verifyAll(cwd = process.cwd()) {
|
|
634
|
+
const results = Object.keys(VERIFIERS).map(claim => verify(claim, cwd));
|
|
635
|
+
|
|
636
|
+
// Persist to .dual-brain/verifications.jsonl (append-only)
|
|
637
|
+
try {
|
|
638
|
+
const dir = join(cwd, '.dual-brain');
|
|
639
|
+
if (existsSync(dir)) {
|
|
640
|
+
const logPath = join(dir, 'verifications.jsonl');
|
|
641
|
+
const lines = results.map(r => JSON.stringify(r)).join('\n') + '\n';
|
|
642
|
+
appendFileSync(logPath, lines, 'utf8');
|
|
643
|
+
}
|
|
644
|
+
} catch { /* storage failure is non-fatal */ }
|
|
645
|
+
|
|
646
|
+
return results;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
/**
|
|
650
|
+
* getVerificationCache(cwd) — read .dual-brain/verifications.jsonl, return most recent
|
|
651
|
+
* non-expired result per claim. Expired entries are skipped.
|
|
652
|
+
*/
|
|
653
|
+
export function getVerificationCache(cwd = process.cwd()) {
|
|
654
|
+
const logPath = join(cwd, '.dual-brain', 'verifications.jsonl');
|
|
655
|
+
if (!existsSync(logPath)) return [];
|
|
656
|
+
|
|
657
|
+
let lines;
|
|
658
|
+
try { lines = readFileSync(logPath, 'utf8').trim().split('\n').filter(Boolean); }
|
|
659
|
+
catch { return []; }
|
|
660
|
+
|
|
661
|
+
const now = new Date().toISOString();
|
|
662
|
+
const latest = {};
|
|
663
|
+
|
|
664
|
+
for (const line of lines) {
|
|
665
|
+
let entry;
|
|
666
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
667
|
+
if (!entry.claim || !entry.expiresAt) continue;
|
|
668
|
+
if (entry.expiresAt < now) continue; // expired — skip
|
|
669
|
+
// Keep the most recent non-expired entry per claim
|
|
670
|
+
if (!latest[entry.claim] || entry.checkedAt > latest[entry.claim].checkedAt) {
|
|
671
|
+
latest[entry.claim] = entry;
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
return Object.values(latest);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* getStaleAssumptions(cwd) — return claims that are expired or failed.
|
|
680
|
+
* Checks cache first; any claim not in cache (or failed in cache) is considered stale.
|
|
681
|
+
*/
|
|
682
|
+
export function getStaleAssumptions(cwd = process.cwd()) {
|
|
683
|
+
const cached = getVerificationCache(cwd);
|
|
684
|
+
const cachedMap = Object.fromEntries(cached.map(r => [r.claim, r]));
|
|
685
|
+
const stale = [];
|
|
686
|
+
|
|
687
|
+
for (const claim of Object.keys(VERIFIERS)) {
|
|
688
|
+
const entry = cachedMap[claim];
|
|
689
|
+
if (!entry) {
|
|
690
|
+
// No valid cached result — treat as stale
|
|
691
|
+
stale.push({ claim, reason: 'no-cache', status: 'unknown', evidence: 'Never verified or all results expired' });
|
|
692
|
+
} else if (entry.status === 'failed') {
|
|
693
|
+
stale.push({ claim, reason: 'failed', status: 'failed', evidence: entry.evidence, checkedAt: entry.checkedAt });
|
|
694
|
+
}
|
|
695
|
+
// 'verified' and 'unknown' with valid cache are not stale
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
return stale;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* formatVerifications(results) — display string for a list of verification results.
|
|
703
|
+
*/
|
|
704
|
+
export function formatVerifications(results) {
|
|
705
|
+
const lines = ['SYSTEM VERIFICATION'];
|
|
706
|
+
for (const r of results) {
|
|
707
|
+
const icon = r.status === 'verified' ? '✓' : r.status === 'failed' ? '✗' : '⚠';
|
|
708
|
+
lines.push(` ${icon} ${r.claim}: ${r.evidence}`);
|
|
709
|
+
}
|
|
710
|
+
return lines.join('\n');
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// ─── LEARN System ─────────────────────────────────────────────────────────────
|
|
714
|
+
|
|
715
|
+
const THINK_TIER_MODELS = new Set(['claude-opus-4-6', 'o3', 'gpt-5.5']);
|
|
716
|
+
const FAST_TIER_MODELS = new Set(['claude-haiku-4-5-20251001', 'gpt-4o-mini']);
|
|
717
|
+
const CODE_TASK_TYPES = new Set(['fix', 'feature', 'refactor', 'implement', 'test', 'build', 'edit']);
|
|
718
|
+
const REASONING_MODELS = new Set(['o3']);
|
|
719
|
+
|
|
720
|
+
function learningsPath(cwd) {
|
|
721
|
+
return join(cwd, '.dual-brain', 'learnings.jsonl');
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
function readLearnings(cwd) {
|
|
725
|
+
const p = learningsPath(cwd);
|
|
726
|
+
if (!existsSync(p)) return [];
|
|
727
|
+
try {
|
|
728
|
+
return readFileSync(p, 'utf8').trim().split('\n').filter(Boolean).flatMap(line => {
|
|
729
|
+
try { return [JSON.parse(line)]; } catch { return []; }
|
|
730
|
+
});
|
|
731
|
+
} catch { return []; }
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
function deriveModelFit(taskResult) {
|
|
735
|
+
const { success, model, tier, taskType, duration, filesChanged } = taskResult;
|
|
736
|
+
const isThinkModel = THINK_TIER_MODELS.has(model);
|
|
737
|
+
const isFastModel = FAST_TIER_MODELS.has(model);
|
|
738
|
+
const isReasoningModel = REASONING_MODELS.has(model);
|
|
739
|
+
const isCodeTask = CODE_TASK_TYPES.has(taskType);
|
|
740
|
+
|
|
741
|
+
if (isReasoningModel && isCodeTask) return 'wrong_type';
|
|
742
|
+
if (!isCodeTask && !isReasoningModel && isThinkModel && tier === 'search') return 'wrong_type';
|
|
743
|
+
|
|
744
|
+
if (!success) {
|
|
745
|
+
if (isFastModel && tier !== 'search') return 'underpowered';
|
|
746
|
+
return 'good';
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
if (isThinkModel && (tier === 'search' || (filesChanged <= 1 && duration < 30000))) return 'overkill';
|
|
750
|
+
if (isFastModel && filesChanged > 3) return 'underpowered';
|
|
751
|
+
|
|
752
|
+
return 'good';
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
function deriveRoutingAccuracy(taskResult) {
|
|
756
|
+
const { success, modelFit, tier, duration, model } = taskResult;
|
|
757
|
+
const isFastModel = FAST_TIER_MODELS.has(model);
|
|
758
|
+
const isThinkModel = THINK_TIER_MODELS.has(model);
|
|
759
|
+
|
|
760
|
+
if (success && (modelFit === 'good' || modelFit === 'wrong_type')) return 'correct';
|
|
761
|
+
if (!success && isFastModel && tier !== 'search') return 'should_have_escalated';
|
|
762
|
+
if (success && isThinkModel && duration > 120000 && modelFit === 'overkill') return 'should_have_simplified';
|
|
763
|
+
if (success && modelFit === 'overkill') return 'should_have_simplified';
|
|
764
|
+
if (!success) return 'should_have_escalated';
|
|
765
|
+
return 'correct';
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
export function recordLearning(taskResult, cwd = process.cwd()) {
|
|
769
|
+
try {
|
|
770
|
+
const {
|
|
771
|
+
taskType = 'unknown',
|
|
772
|
+
prompt = '',
|
|
773
|
+
model = '',
|
|
774
|
+
provider = '',
|
|
775
|
+
tier = '',
|
|
776
|
+
reasoningDepth = 'low',
|
|
777
|
+
wasEnriched = false,
|
|
778
|
+
wasDualBrain = false,
|
|
779
|
+
success = false,
|
|
780
|
+
duration = 0,
|
|
781
|
+
filesChanged = 0,
|
|
782
|
+
} = taskResult;
|
|
783
|
+
|
|
784
|
+
const modelFit = deriveModelFit({ success, model, tier, taskType, duration, filesChanged });
|
|
785
|
+
|
|
786
|
+
const record = {
|
|
787
|
+
id: `learn_${Date.now()}`,
|
|
788
|
+
timestamp: new Date().toISOString(),
|
|
789
|
+
taskType,
|
|
790
|
+
prompt: String(prompt).slice(0, 200),
|
|
791
|
+
model,
|
|
792
|
+
provider,
|
|
793
|
+
tier,
|
|
794
|
+
reasoningDepth,
|
|
795
|
+
wasEnriched,
|
|
796
|
+
wasDualBrain,
|
|
797
|
+
success,
|
|
798
|
+
duration,
|
|
799
|
+
filesChanged,
|
|
800
|
+
modelFit,
|
|
801
|
+
routingAccuracy: deriveRoutingAccuracy({ success, modelFit, tier, duration, model }),
|
|
802
|
+
};
|
|
803
|
+
|
|
804
|
+
const p = learningsPath(cwd);
|
|
805
|
+
const dir = join(cwd, '.dual-brain');
|
|
806
|
+
if (existsSync(dir)) {
|
|
807
|
+
appendFileSync(p, JSON.stringify(record) + '\n', 'utf8');
|
|
808
|
+
}
|
|
809
|
+
return record;
|
|
810
|
+
} catch { return null; }
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
export function getModelSuccessRates(cwd = process.cwd(), days = 7) {
|
|
814
|
+
const cutoff = new Date(Date.now() - days * 86400000).toISOString();
|
|
815
|
+
const learnings = readLearnings(cwd).filter(l => l.timestamp >= cutoff);
|
|
816
|
+
|
|
817
|
+
const stats = {};
|
|
818
|
+
for (const l of learnings) {
|
|
819
|
+
if (!l.model) continue;
|
|
820
|
+
if (!stats[l.model]) stats[l.model] = { total: 0, success: 0, totalDuration: 0, tierCounts: {} };
|
|
821
|
+
stats[l.model].total += 1;
|
|
822
|
+
if (l.success) stats[l.model].success += 1;
|
|
823
|
+
stats[l.model].totalDuration += (l.duration || 0);
|
|
824
|
+
const tierKey = `${l.tier || 'unknown'}:${l.taskType || 'unknown'}`;
|
|
825
|
+
stats[l.model].tierCounts[tierKey] = (stats[l.model].tierCounts[tierKey] || 0) + 1;
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
const result = {};
|
|
829
|
+
for (const [model, s] of Object.entries(stats)) {
|
|
830
|
+
const topTiers = Object.entries(s.tierCounts)
|
|
831
|
+
.sort((a, b) => b[1] - a[1])
|
|
832
|
+
.slice(0, 3)
|
|
833
|
+
.map(([key]) => key.split(':')[0] + ':' + key.split(':')[1]);
|
|
834
|
+
result[model] = {
|
|
835
|
+
total: s.total,
|
|
836
|
+
success: s.success,
|
|
837
|
+
rate: s.total > 0 ? Math.round((s.success / s.total) * 100) / 100 : 0,
|
|
838
|
+
avgDuration: s.total > 0 ? Math.round(s.totalDuration / s.total) : 0,
|
|
839
|
+
bestFor: [...new Set(topTiers.map(t => t.split(':')[0]))],
|
|
840
|
+
};
|
|
841
|
+
}
|
|
842
|
+
return result;
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
export function getRoutingInsights(cwd = process.cwd()) {
|
|
846
|
+
const learnings = readLearnings(cwd);
|
|
847
|
+
if (learnings.length === 0) return [];
|
|
848
|
+
|
|
849
|
+
const insights = [];
|
|
850
|
+
const MIN_POINTS = 5;
|
|
851
|
+
|
|
852
|
+
const byModelTask = {};
|
|
853
|
+
for (const l of learnings) {
|
|
854
|
+
const key = `${l.model}:${l.taskType}`;
|
|
855
|
+
if (!byModelTask[key]) byModelTask[key] = { success: 0, total: 0, overkill: 0, underpowered: 0 };
|
|
856
|
+
byModelTask[key].total += 1;
|
|
857
|
+
if (l.success) byModelTask[key].success += 1;
|
|
858
|
+
if (l.modelFit === 'overkill') byModelTask[key].overkill += 1;
|
|
859
|
+
if (l.modelFit === 'underpowered') byModelTask[key].underpowered += 1;
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
for (const [key, s] of Object.entries(byModelTask)) {
|
|
863
|
+
if (s.total < MIN_POINTS) continue;
|
|
864
|
+
const [model, taskType] = key.split(':');
|
|
865
|
+
const rate = s.success / s.total;
|
|
866
|
+
const overkillRate = s.overkill / s.total;
|
|
867
|
+
const underpoweredRate = s.underpowered / s.total;
|
|
868
|
+
|
|
869
|
+
if (rate >= 0.9 && overkillRate < 0.1) {
|
|
870
|
+
insights.push({
|
|
871
|
+
insight: `${model} succeeds ${Math.round(rate * 100)}% on ${taskType} tasks — reliable for this work`,
|
|
872
|
+
confidence: Math.min(0.95, 0.6 + s.total * 0.01),
|
|
873
|
+
evidence: `${s.success}/${s.total} tasks`,
|
|
874
|
+
});
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
if (overkillRate > 0.3 && rate >= 0.85) {
|
|
878
|
+
insights.push({
|
|
879
|
+
insight: `${model} is overkill for ${taskType} — a cheaper model likely sufficient`,
|
|
880
|
+
confidence: Math.min(0.9, 0.5 + s.total * 0.01),
|
|
881
|
+
evidence: `${s.overkill}/${s.total} tasks flagged overkill`,
|
|
882
|
+
});
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
if (underpoweredRate > 0.3 || rate < 0.7) {
|
|
886
|
+
insights.push({
|
|
887
|
+
insight: `${model} struggles on ${taskType} (${Math.round(rate * 100)}% success) — consider escalating`,
|
|
888
|
+
confidence: Math.min(0.9, 0.5 + s.total * 0.01),
|
|
889
|
+
evidence: `${s.success}/${s.total} tasks`,
|
|
890
|
+
});
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
const enriched = learnings.filter(l => l.wasEnriched);
|
|
895
|
+
const notEnriched = learnings.filter(l => !l.wasEnriched);
|
|
896
|
+
if (enriched.length >= MIN_POINTS && notEnriched.length >= MIN_POINTS) {
|
|
897
|
+
const rateEnriched = enriched.filter(l => l.success).length / enriched.length;
|
|
898
|
+
const rateNotEnriched = notEnriched.filter(l => l.success).length / notEnriched.length;
|
|
899
|
+
const delta = Math.round((rateEnriched - rateNotEnriched) * 100);
|
|
900
|
+
if (Math.abs(delta) >= 10) {
|
|
901
|
+
insights.push({
|
|
902
|
+
insight: delta > 0
|
|
903
|
+
? `Prompt enrichment improved success rate by ${delta}%`
|
|
904
|
+
: `Prompt enrichment had no benefit — success rate ${Math.abs(delta)}% lower`,
|
|
905
|
+
confidence: Math.min(0.9, 0.5 + Math.min(enriched.length, notEnriched.length) * 0.01),
|
|
906
|
+
evidence: `${enriched.length} enriched vs ${notEnriched.length} raw`,
|
|
907
|
+
});
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
const dualBrain = learnings.filter(l => l.wasDualBrain);
|
|
912
|
+
const singleBrain = learnings.filter(l => !l.wasDualBrain);
|
|
913
|
+
if (dualBrain.length >= MIN_POINTS && singleBrain.length >= MIN_POINTS) {
|
|
914
|
+
const rateDual = dualBrain.filter(l => l.success).length / dualBrain.length;
|
|
915
|
+
const rateSingle = singleBrain.filter(l => l.success).length / singleBrain.length;
|
|
916
|
+
const delta = Math.round((rateDual - rateSingle) * 100);
|
|
917
|
+
if (delta >= 10) {
|
|
918
|
+
insights.push({
|
|
919
|
+
insight: `Dual-brain review improves success rate by ${delta}% over single-brain`,
|
|
920
|
+
confidence: Math.min(0.85, 0.5 + Math.min(dualBrain.length, singleBrain.length) * 0.015),
|
|
921
|
+
evidence: `${dualBrain.length} dual vs ${singleBrain.length} single`,
|
|
922
|
+
});
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
return insights;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
export function suggestRoutingAdjustment(taskType, currentModel, cwd = process.cwd()) {
|
|
930
|
+
const learnings = readLearnings(cwd).filter(
|
|
931
|
+
l => l.taskType === taskType && l.model === currentModel
|
|
932
|
+
);
|
|
933
|
+
|
|
934
|
+
if (learnings.length < 5) {
|
|
935
|
+
return { suggestion: 'keep', reason: 'insufficient data', confidence: 0, evidenceCount: learnings.length, suggestedModel: null };
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
const total = learnings.length;
|
|
939
|
+
const successCount = learnings.filter(l => l.success).length;
|
|
940
|
+
const successRate = successCount / total;
|
|
941
|
+
const overkillCount = learnings.filter(l => l.modelFit === 'overkill').length;
|
|
942
|
+
const overkillRate = overkillCount / total;
|
|
943
|
+
|
|
944
|
+
if (successRate > 0.9 && overkillRate > 0.3) {
|
|
945
|
+
const isFastModel = FAST_TIER_MODELS.has(currentModel);
|
|
946
|
+
const isThinkModel = THINK_TIER_MODELS.has(currentModel);
|
|
947
|
+
let suggestedModel = null;
|
|
948
|
+
if (isThinkModel) {
|
|
949
|
+
suggestedModel = currentModel.startsWith('claude') ? 'claude-sonnet-4-6' : 'gpt-4o';
|
|
950
|
+
} else if (!isFastModel) {
|
|
951
|
+
suggestedModel = currentModel.startsWith('claude') ? 'claude-haiku-4-5-20251001' : 'gpt-4o-mini';
|
|
952
|
+
}
|
|
953
|
+
return {
|
|
954
|
+
suggestion: 'simplify',
|
|
955
|
+
reason: `${Math.round(successRate * 100)}% success rate with ${Math.round(overkillRate * 100)}% overkill signal`,
|
|
956
|
+
confidence: Math.min(0.9, 0.5 + total * 0.01),
|
|
957
|
+
evidenceCount: total,
|
|
958
|
+
suggestedModel,
|
|
959
|
+
};
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
if (successRate < 0.7) {
|
|
963
|
+
const isThinkModel = THINK_TIER_MODELS.has(currentModel);
|
|
964
|
+
let suggestedModel = null;
|
|
965
|
+
if (!isThinkModel) {
|
|
966
|
+
suggestedModel = currentModel.startsWith('claude') ? 'claude-opus-4-6' : 'o3';
|
|
967
|
+
}
|
|
968
|
+
return {
|
|
969
|
+
suggestion: 'escalate',
|
|
970
|
+
reason: `${Math.round(successRate * 100)}% success rate on ${taskType} — below acceptable threshold`,
|
|
971
|
+
confidence: Math.min(0.9, 0.5 + total * 0.01),
|
|
972
|
+
evidenceCount: total,
|
|
973
|
+
suggestedModel,
|
|
974
|
+
};
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
return {
|
|
978
|
+
suggestion: 'keep',
|
|
979
|
+
reason: `${Math.round(successRate * 100)}% success rate — routing is appropriate`,
|
|
980
|
+
confidence: Math.min(0.9, 0.5 + total * 0.01),
|
|
981
|
+
evidenceCount: total,
|
|
982
|
+
suggestedModel: null,
|
|
983
|
+
};
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
export function formatLearnings(insights, cwd = process.cwd()) {
|
|
987
|
+
const learnings = readLearnings(cwd);
|
|
988
|
+
const rates = getModelSuccessRates(cwd);
|
|
989
|
+
const total = learnings.length;
|
|
990
|
+
|
|
991
|
+
const lines = [`ROUTING INTELLIGENCE (${total} task${total === 1 ? '' : 's'} analyzed)`];
|
|
992
|
+
|
|
993
|
+
for (const [model, s] of Object.entries(rates)) {
|
|
994
|
+
if (s.total < 3) continue;
|
|
995
|
+
const pct = Math.round(s.rate * 100);
|
|
996
|
+
const tasks = s.bestFor.join('/') || 'various';
|
|
997
|
+
const icon = pct >= 85 ? '📈' : pct >= 70 ? '📊' : '⚠️ ';
|
|
998
|
+
lines.push(` ${icon} ${model}: ${pct}% success on ${tasks} tasks (${s.total} tasks)`);
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
for (const ins of (insights || [])) {
|
|
1002
|
+
const pct = Math.round(ins.confidence * 100);
|
|
1003
|
+
const isWarning = ins.insight.toLowerCase().includes('struggle') || ins.insight.toLowerCase().includes('below') || ins.insight.toLowerCase().includes('no benefit');
|
|
1004
|
+
const icon = isWarning ? '⚠️ ' : '💡';
|
|
1005
|
+
lines.push(` ${icon} ${ins.insight}`);
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
return lines.join('\n');
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
export function getLearningStats(cwd = process.cwd()) {
|
|
1012
|
+
const learnings = readLearnings(cwd);
|
|
1013
|
+
if (learnings.length === 0) {
|
|
1014
|
+
return { totalLearnings: 0, oldestEntry: null, newestEntry: null, modelsTracked: 0, avgSuccessRate: 0 };
|
|
1015
|
+
}
|
|
1016
|
+
const timestamps = learnings.map(l => l.timestamp).sort();
|
|
1017
|
+
const models = new Set(learnings.map(l => l.model).filter(Boolean));
|
|
1018
|
+
const successCount = learnings.filter(l => l.success).length;
|
|
1019
|
+
return {
|
|
1020
|
+
totalLearnings: learnings.length,
|
|
1021
|
+
oldestEntry: timestamps[0],
|
|
1022
|
+
newestEntry: timestamps[timestamps.length - 1],
|
|
1023
|
+
modelsTracked: models.size,
|
|
1024
|
+
avgSuccessRate: Math.round((successCount / learnings.length) * 100) / 100,
|
|
1025
|
+
};
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
// ─── DISCOVER System ─────────────────────────────────────────────────────────
|
|
1029
|
+
|
|
1030
|
+
const KNOWN_TOOLS = ['git','node','npm','codex','claude','rg','gh','replit','docker','python','python3','pip','cargo','go','java','ruby','deno','bun','pnpm','yarn'];
|
|
1031
|
+
const STANDARD_AWARENESS = new Set(['git','node','npm','codex','claude','rg','gh','replit']);
|
|
1032
|
+
|
|
1033
|
+
const SERVICE_PATTERNS = {
|
|
1034
|
+
'REDIS_URL': 'Redis',
|
|
1035
|
+
'MONGODB_URI': 'MongoDB',
|
|
1036
|
+
'MONGO_URL': 'MongoDB',
|
|
1037
|
+
'ELASTICSEARCH_URL': 'Elasticsearch',
|
|
1038
|
+
'RABBITMQ_URL': 'RabbitMQ',
|
|
1039
|
+
'S3_BUCKET': 'S3 Storage',
|
|
1040
|
+
'AWS_ACCESS_KEY_ID': 'AWS',
|
|
1041
|
+
'GCP_PROJECT': 'Google Cloud',
|
|
1042
|
+
'STRIPE_SECRET_KEY': 'Stripe',
|
|
1043
|
+
'SENDGRID_API_KEY': 'SendGrid',
|
|
1044
|
+
'TWILIO_ACCOUNT_SID': 'Twilio',
|
|
1045
|
+
'SENTRY_DSN': 'Sentry',
|
|
1046
|
+
'DATADOG_API_KEY': 'Datadog',
|
|
1047
|
+
'SUPABASE_URL': 'Supabase',
|
|
1048
|
+
'FIREBASE_PROJECT_ID': 'Firebase',
|
|
1049
|
+
'NEON_DATABASE_URL': 'Neon DB',
|
|
1050
|
+
};
|
|
1051
|
+
|
|
1052
|
+
const KNOWN_FRAMEWORKS = ['express','next','react','vue','fastify','prisma','drizzle','nestjs','koa','hapi','svelte','nuxt','remix','astro','trpc'];
|
|
1053
|
+
|
|
1054
|
+
function safeExecSyncDiscover(cmd) {
|
|
1055
|
+
try {
|
|
1056
|
+
return execSync(cmd, { timeout: 2000, encoding: 'utf8', stdio: ['pipe','pipe','pipe'] }).trim();
|
|
1057
|
+
} catch { return null; }
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
function discoverCLITools() {
|
|
1061
|
+
const found = [];
|
|
1062
|
+
for (const tool of KNOWN_TOOLS) {
|
|
1063
|
+
const toolPath = safeExecSyncDiscover(`which ${tool}`);
|
|
1064
|
+
if (toolPath && !STANDARD_AWARENESS.has(tool)) {
|
|
1065
|
+
found.push({ type: 'tool', name: tool, detail: `${tool} CLI available at ${toolPath}`, source: 'PATH scan' });
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
return found;
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
/**
|
|
1072
|
+
* discoverMCPTools(cwd) — scan for MCP servers across known config locations.
|
|
1073
|
+
* Returns array of { name, command, args } for each configured MCP server.
|
|
1074
|
+
*/
|
|
1075
|
+
export function discoverMCPTools(cwd = process.cwd()) {
|
|
1076
|
+
const locations = [
|
|
1077
|
+
join(process.env.HOME || '/root', '.claude', 'claude_desktop_config.json'),
|
|
1078
|
+
join(cwd, '.claude', 'settings.json'),
|
|
1079
|
+
join(cwd, '.claude', 'settings.local.json'),
|
|
1080
|
+
];
|
|
1081
|
+
const servers = [];
|
|
1082
|
+
const seen = new Set();
|
|
1083
|
+
for (const loc of locations) {
|
|
1084
|
+
if (!existsSync(loc)) continue;
|
|
1085
|
+
let cfg;
|
|
1086
|
+
try { cfg = JSON.parse(readFileSync(loc, 'utf8')); } catch { continue; }
|
|
1087
|
+
const mcpServers = cfg.mcpServers || (cfg.mcp && cfg.mcp.servers) || {};
|
|
1088
|
+
for (const [name, conf] of Object.entries(mcpServers)) {
|
|
1089
|
+
if (seen.has(name)) continue;
|
|
1090
|
+
seen.add(name);
|
|
1091
|
+
servers.push({ name, command: conf.command || null, args: conf.args || [] });
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
return servers;
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
function discoverMCPCapabilities(cwd) {
|
|
1098
|
+
const servers = discoverMCPTools(cwd);
|
|
1099
|
+
return servers.map(s => ({
|
|
1100
|
+
type: 'mcp',
|
|
1101
|
+
name: s.name,
|
|
1102
|
+
detail: `MCP server: ${[s.command, ...(s.args || [])].filter(Boolean).join(' ')}`.trim(),
|
|
1103
|
+
source: 'MCP config scan',
|
|
1104
|
+
}));
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
function discoverEnvServices() {
|
|
1108
|
+
const found = [];
|
|
1109
|
+
const seen = new Set();
|
|
1110
|
+
for (const [envKey, service] of Object.entries(SERVICE_PATTERNS)) {
|
|
1111
|
+
if (process.env[envKey] !== undefined && !seen.has(service)) {
|
|
1112
|
+
seen.add(service);
|
|
1113
|
+
// Report presence only — NEVER expose values
|
|
1114
|
+
found.push({ type: 'env', name: service, detail: `${service} configured via ${envKey}`, source: 'env scan' });
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
return found;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
function discoverProjectTools(cwd) {
|
|
1121
|
+
const pkgPath = join(cwd, 'package.json');
|
|
1122
|
+
if (!existsSync(pkgPath)) return [];
|
|
1123
|
+
let pkg;
|
|
1124
|
+
try { pkg = JSON.parse(readFileSync(pkgPath, 'utf8')); } catch { return []; }
|
|
1125
|
+
|
|
1126
|
+
const found = [];
|
|
1127
|
+
for (const [name] of Object.entries(pkg.scripts || {})) {
|
|
1128
|
+
found.push({ type: 'cli', name: `npm run ${name}`, detail: `Project script: ${name}`, source: 'package.json scripts' });
|
|
1129
|
+
}
|
|
1130
|
+
const allDeps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) };
|
|
1131
|
+
for (const fw of KNOWN_FRAMEWORKS) {
|
|
1132
|
+
if (allDeps[fw]) {
|
|
1133
|
+
found.push({ type: 'config', name: fw, detail: `${fw} framework detected (${allDeps[fw]})`, source: 'package.json deps' });
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
return found;
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
function discoverReplitFeatures(cwd) {
|
|
1140
|
+
const replitPath = join(cwd, '.replit');
|
|
1141
|
+
if (!existsSync(replitPath)) return [];
|
|
1142
|
+
let content;
|
|
1143
|
+
try { content = readFileSync(replitPath, 'utf8'); } catch { return []; }
|
|
1144
|
+
|
|
1145
|
+
const found = [];
|
|
1146
|
+
if (/\[deployment\]/i.test(content))
|
|
1147
|
+
found.push({ type: 'service', name: 'replit-deployment', detail: 'Replit deployment config present', source: '.replit' });
|
|
1148
|
+
if (/\[auth\]/i.test(content))
|
|
1149
|
+
found.push({ type: 'service', name: 'replit-auth', detail: 'Replit auth config present', source: '.replit' });
|
|
1150
|
+
|
|
1151
|
+
const moduleMatch = content.match(/^modules\s*=\s*\[([^\]]+)\]/m);
|
|
1152
|
+
if (moduleMatch) {
|
|
1153
|
+
const modules = moduleMatch[1].split(',').map(m => m.trim().replace(/['"]/g, '')).filter(Boolean);
|
|
1154
|
+
for (const mod of modules) {
|
|
1155
|
+
found.push({ type: 'config', name: `replit-module:${mod}`, detail: `Replit module: ${mod}`, source: '.replit' });
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
const nixChannelPath = join(cwd, '.replit', 'nix', 'channel');
|
|
1160
|
+
if (existsSync(nixChannelPath)) {
|
|
1161
|
+
let channel;
|
|
1162
|
+
try { channel = readFileSync(nixChannelPath, 'utf8').trim(); } catch { channel = 'unknown'; }
|
|
1163
|
+
found.push({ type: 'config', name: 'nix', detail: `Nix channel: ${channel}`, source: '.replit/nix/channel' });
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
return found;
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
function loadLastDiscovery(cwd) {
|
|
1170
|
+
const logPath = join(cwd, '.dual-brain', 'discoveries.jsonl');
|
|
1171
|
+
if (!existsSync(logPath)) return null;
|
|
1172
|
+
try {
|
|
1173
|
+
const lines = readFileSync(logPath, 'utf8').trim().split('\n').filter(Boolean);
|
|
1174
|
+
if (lines.length === 0) return null;
|
|
1175
|
+
return JSON.parse(lines[lines.length - 1]);
|
|
1176
|
+
} catch { return null; }
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
function appendDiscoveryLog(cwd, entry) {
|
|
1180
|
+
const dir = join(cwd, '.dual-brain');
|
|
1181
|
+
try {
|
|
1182
|
+
if (!existsSync(dir)) execSync(`mkdir -p "${dir}"`, { timeout: 2000 });
|
|
1183
|
+
appendFileSync(join(dir, 'discoveries.jsonl'), JSON.stringify(entry) + '\n', 'utf8');
|
|
1184
|
+
} catch { /* graceful degradation */ }
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
/**
|
|
1188
|
+
* discover(cwd) — scan for capabilities not in the standard awareness set.
|
|
1189
|
+
* Returns { discoveredAt, newCapabilities, knownCapabilities, totalFound }.
|
|
1190
|
+
*/
|
|
1191
|
+
export function discover(cwd = process.cwd()) {
|
|
1192
|
+
const discoveredAt = new Date().toISOString();
|
|
1193
|
+
const allFound = [];
|
|
1194
|
+
|
|
1195
|
+
// Each probe is independent — failures don't stop others
|
|
1196
|
+
try { allFound.push(...discoverCLITools()); } catch { /* ignore */ }
|
|
1197
|
+
try { allFound.push(...discoverMCPCapabilities(cwd)); } catch { /* ignore */ }
|
|
1198
|
+
try { allFound.push(...discoverEnvServices()); } catch { /* ignore */ }
|
|
1199
|
+
try { allFound.push(...discoverProjectTools(cwd)); } catch { /* ignore */ }
|
|
1200
|
+
try { allFound.push(...discoverReplitFeatures(cwd)); } catch { /* ignore */ }
|
|
1201
|
+
|
|
1202
|
+
const last = loadLastDiscovery(cwd);
|
|
1203
|
+
const lastNames = new Set(last ? (last.newCapabilities || []).map(c => `${c.type}:${c.name}`) : []);
|
|
1204
|
+
const prevKnown = last ? (last.knownCapabilities || 0) : 0;
|
|
1205
|
+
|
|
1206
|
+
const newCapabilities = allFound.filter(c => !lastNames.has(`${c.type}:${c.name}`));
|
|
1207
|
+
|
|
1208
|
+
const result = {
|
|
1209
|
+
discoveredAt,
|
|
1210
|
+
newCapabilities,
|
|
1211
|
+
knownCapabilities: prevKnown + (allFound.length - newCapabilities.length),
|
|
1212
|
+
totalFound: allFound.length,
|
|
1213
|
+
};
|
|
1214
|
+
|
|
1215
|
+
appendDiscoveryLog(cwd, result);
|
|
1216
|
+
return result;
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
/**
|
|
1220
|
+
* getDiscoveryLog(cwd, limit) — read recent discovery entries from .dual-brain/discoveries.jsonl.
|
|
1221
|
+
*/
|
|
1222
|
+
export function getDiscoveryLog(cwd = process.cwd(), limit = 20) {
|
|
1223
|
+
const logPath = join(cwd, '.dual-brain', 'discoveries.jsonl');
|
|
1224
|
+
if (!existsSync(logPath)) return [];
|
|
1225
|
+
try {
|
|
1226
|
+
const lines = readFileSync(logPath, 'utf8').trim().split('\n').filter(Boolean);
|
|
1227
|
+
return lines.slice(-limit).map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
|
|
1228
|
+
} catch { return []; }
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
/**
|
|
1232
|
+
* getNewSinceLastScan(cwd) — run discover(), return only capabilities not seen in previous scan.
|
|
1233
|
+
*/
|
|
1234
|
+
export function getNewSinceLastScan(cwd = process.cwd()) {
|
|
1235
|
+
const last = loadLastDiscovery(cwd);
|
|
1236
|
+
const lastNames = new Set(last ? (last.newCapabilities || []).map(c => `${c.type}:${c.name}`) : []);
|
|
1237
|
+
|
|
1238
|
+
const current = discover(cwd);
|
|
1239
|
+
const trulyNew = current.newCapabilities.filter(c => !lastNames.has(`${c.type}:${c.name}`));
|
|
1240
|
+
return { ...current, newCapabilities: trulyNew };
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
/**
|
|
1244
|
+
* formatDiscovery(result) — format discovery result as a human-readable string.
|
|
1245
|
+
*/
|
|
1246
|
+
export function formatDiscovery(result) {
|
|
1247
|
+
const { newCapabilities = [], totalFound = 0 } = result;
|
|
1248
|
+
const newCount = newCapabilities.length;
|
|
1249
|
+
const lines = [`CAPABILITY DISCOVERY (${totalFound} found, ${newCount} new)`];
|
|
1250
|
+
|
|
1251
|
+
for (const cap of newCapabilities) {
|
|
1252
|
+
lines.push(` 🆕 ${cap.type}: ${cap.name} — ${cap.detail}`);
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
const alreadyKnown = totalFound - newCount;
|
|
1256
|
+
if (alreadyKnown > 0) {
|
|
1257
|
+
lines.push(` ── ${alreadyKnown} known capability${alreadyKnown === 1 ? '' : 'ies'} (already tracked)`);
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
if (newCount === 0 && totalFound === 0) {
|
|
1261
|
+
lines.push(' (no capabilities detected)');
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
return lines.join('\n');
|
|
1265
|
+
}
|
|
1266
|
+
|
|
552
1267
|
// ─── Health Baseline Comparison ───────────────────────────────────────────────
|
|
553
1268
|
export async function compareHealth(cwd = process.cwd()) {
|
|
554
1269
|
const bpath = join(cwd, '.dualbrain', 'health-baseline.json');
|