dual-brain 7.1.21 → 7.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/dispatch.mjs CHANGED
@@ -14,6 +14,7 @@ import { fileURLToPath } from 'node:url';
14
14
  import { createHash } from 'node:crypto';
15
15
  import { markHot, markDegraded, markHealthy, recordDispatch } from './health.mjs';
16
16
  import { redact } from './redact.mjs';
17
+ import { getFailoverOrder } from './decide.mjs';
17
18
 
18
19
  const __dirname = dirname(fileURLToPath(import.meta.url));
19
20
  const USAGE_DIR = join(__dirname, '..', '.dualbrain', 'usage');
@@ -93,6 +94,44 @@ function medianDuration(provider, model) {
93
94
  // Rate-limit error keywords
94
95
  const RATE_LIMIT_PATTERNS = /rate.?limit|quota|capacity|too many requests|overloaded|throttl/i;
95
96
 
97
+ // ─── Auto-heal failover helpers ───────────────────────────────────────────────
98
+
99
+ const FAILOVER_LOG_DIR = join(__dirname, '..', '.dualbrain', 'audit');
100
+
101
+ /** Retryable exit-code-1 patterns: rate limits, quota, capacity, timeouts */
102
+ const RETRYABLE_PATTERNS = /rate.?limit|429|quota.?exceeded|capacity|overloaded|timeout/i;
103
+
104
+ /** Non-retryable patterns: auth failures, bad input, user cancellation */
105
+ const NON_RETRYABLE_PATTERNS = /unauthorized|forbidden|invalid.?api.?key|authentication|bad.?request|cancelled|canceled/i;
106
+
107
+ /**
108
+ * Decide if a subprocess result is a retryable failure.
109
+ * Must be exit code 1 (or non-zero) AND match retryable keywords AND NOT match
110
+ * non-retryable keywords.
111
+ * @param {{ exitCode: number, stderr: string, stdout: string }} result
112
+ * @returns {boolean}
113
+ */
114
+ function isRetryableFailure({ exitCode, stderr, stdout }) {
115
+ if (exitCode === 0) return false;
116
+ const errText = `${stderr} ${stdout}`.slice(0, 1000);
117
+ if (NON_RETRYABLE_PATTERNS.test(errText)) return false;
118
+ return RETRYABLE_PATTERNS.test(errText);
119
+ }
120
+
121
+ /**
122
+ * Append a failover event to .dualbrain/audit/failover.jsonl.
123
+ * @param {{ from: string, to: string, reason: string, attempt: number }} info
124
+ */
125
+ function logFailover({ from, to, reason, attempt }) {
126
+ try {
127
+ mkdirSync(FAILOVER_LOG_DIR, { recursive: true });
128
+ appendFileSync(
129
+ join(FAILOVER_LOG_DIR, 'failover.jsonl'),
130
+ JSON.stringify({ ts: new Date().toISOString(), from, to, reason, attempt }) + '\n',
131
+ );
132
+ } catch {}
133
+ }
134
+
96
135
  // ─── Native Claude Code detection ────────────────────────────────────────────
97
136
 
98
137
  /**
@@ -605,9 +644,26 @@ function _prependDispatchMarker(prompt) {
605
644
  return `<!-- dual-brain-dispatch: ${runId} -->\n${prompt}`;
606
645
  }
607
646
 
647
+ // ─── Related session age label ────────────────────────────────────────────────
648
+
649
+ /**
650
+ * Human-readable age label for a related session date string.
651
+ * @param {string} isoDate
652
+ * @returns {string}
653
+ */
654
+ function _relatedSessionAge(isoDate) {
655
+ const diff = Date.now() - Date.parse(isoDate);
656
+ const mins = Math.floor(diff / 60000);
657
+ if (mins < 60) return `${mins}m ago`;
658
+ const hours = Math.floor(mins / 60);
659
+ if (hours < 24) return `${hours}h ago`;
660
+ const days = Math.floor(hours / 24);
661
+ return `${days}d ago`;
662
+ }
663
+
608
664
  // ─── Main dispatch ────────────────────────────────────────────────────────────
609
665
  async function dispatch(input = {}) {
610
- const { files = [], cwd = process.cwd(), dryRun = false } = input;
666
+ const { files = [], cwd = process.cwd(), dryRun = false, verbose = false } = input;
611
667
  let decision = input.decision ?? {};
612
668
  let { prompt } = input;
613
669
 
@@ -616,6 +672,32 @@ async function dispatch(input = {}) {
616
672
  // Safety gate: redact secrets before anything reaches a subprocess or log
617
673
  prompt = redact(prompt);
618
674
 
675
+ // ── Related session context injection ────────────────────────────────────────
676
+ // Find past sessions related to this task and prepend a context block.
677
+ // Only injected when confidence is high (score > 5). Fast: index-only, no JSONL parsing.
678
+ if (!input._skipRelatedContext) {
679
+ try {
680
+ const { findRelatedSessions } = await import('./session.mjs');
681
+ const related = findRelatedSessions(prompt, files, cwd);
682
+ const highConfidence = related.filter(r => r.score > 5);
683
+ if (highConfidence.length > 0) {
684
+ const lines = highConfidence.map(r => {
685
+ const dateLabel = r.date ? _relatedSessionAge(r.date) : null;
686
+ const datePart = dateLabel ? `, ${dateLabel}` : '';
687
+ const msgPart = r.messageCount > 0 ? `, ${r.messageCount} messages` : '';
688
+ const fileList = r.matchedFiles.length > 0
689
+ ? `: touched ${r.matchedFiles.map(f => f.split('/').pop()).join(', ')}`
690
+ : '';
691
+ return `- "${r.smartName}"${datePart}${msgPart}${fileList}`;
692
+ });
693
+ const contextBlock = `[Prior context from related sessions:]\n${lines.join('\n')}\n[End prior context]\n\n`;
694
+ prompt = contextBlock + prompt;
695
+ if (verbose) process.stderr.write(`[dual-brain] injected related session context (${highConfidence.length} sessions)\n`);
696
+ }
697
+ } catch { /* non-fatal — never block dispatch */ }
698
+ }
699
+ // ── End related session context ──────────────────────────────────────────────
700
+
619
701
  // Stamp the prompt with the dispatch marker so enforce-tier.mjs can recognise
620
702
  // that this agent call came through the official pipeline.
621
703
  prompt = _prependDispatchMarker(prompt);
@@ -629,7 +711,7 @@ async function dispatch(input = {}) {
629
711
  const specialistPrompt = loadSpecialistPrompt(specialist);
630
712
  if (specialistPrompt) {
631
713
  prompt = `${specialistPrompt}\n\n---\n\n${prompt}`;
632
- process.stderr.write(`[dual-brain] specialist: ${specialist}\n`);
714
+ if (verbose) process.stderr.write(`[dual-brain] specialist: ${specialist}\n`);
633
715
  }
634
716
 
635
717
  // Apply tier_bias from registry if decision didn't already pin a tier
@@ -638,7 +720,7 @@ async function dispatch(input = {}) {
638
720
  const tierBias = registry?.specialists?.[specialist]?.tier_bias;
639
721
  if (tierBias) {
640
722
  decision = { ...decision, tier: tierBias };
641
- process.stderr.write(`[dual-brain] specialist tier_bias applied: ${tierBias}\n`);
723
+ if (verbose) process.stderr.write(`[dual-brain] specialist tier_bias applied: ${tierBias}\n`);
642
724
  }
643
725
  }
644
726
  }
@@ -736,7 +818,39 @@ async function dispatch(input = {}) {
736
818
  _recordDispatchBudget(prompt);
737
819
 
738
820
  const dispatchEnv = { DUAL_BRAIN_DISPATCH: '1' };
739
- const { exitCode, stdout, stderr, durationMs } = await runProcess(command, cwd, timeoutMs, dispatchEnv);
821
+
822
+ // ── Auto-heal failover retry loop (native Claude path) ────────────────
823
+ const MAX_FAILOVER_ATTEMPTS = 2;
824
+ let currentProvider = effectiveProvider;
825
+ let currentModel = effectiveModel;
826
+ let currentDecision = effectiveDecision;
827
+ let currentCommand = command;
828
+ let lastRaw;
829
+
830
+ for (let attempt = 0; attempt <= MAX_FAILOVER_ATTEMPTS; attempt++) {
831
+ lastRaw = await runProcess(currentCommand, cwd, timeoutMs, dispatchEnv);
832
+ if (lastRaw.exitCode === 0 || !isRetryableFailure(lastRaw) || attempt === MAX_FAILOVER_ATTEMPTS) break;
833
+
834
+ const failoverList = getFailoverOrder(
835
+ { provider: currentProvider, model: currentModel, tier },
836
+ input.profile ?? {},
837
+ );
838
+ if (failoverList.length === 0) break;
839
+
840
+ const next = failoverList[0];
841
+ const reason = `${lastRaw.stderr || lastRaw.stdout}`.slice(0, 120);
842
+ logFailover({ from: `${currentProvider}/${currentModel}`, to: `${next.provider}/${next.model}`, reason, attempt: attempt + 1 });
843
+ process.stderr.write(`\x1b[2m[dual-brain] Provider busy, failing over to ${next.label}...\x1b[0m\n`);
844
+
845
+ markHot(currentProvider, currentModel, cwd);
846
+ currentProvider = next.provider;
847
+ currentModel = next.model;
848
+ currentDecision = { ...currentDecision, provider: currentProvider, model: currentModel };
849
+ currentCommand = buildCommand(currentDecision, prompt, files, cwd);
850
+ }
851
+
852
+ const { exitCode, stdout, stderr, durationMs } = lastRaw;
853
+ // ── End failover loop ────────────────────────────────────────────────
740
854
 
741
855
  // Extract token usage from JSON output if available
742
856
  let usage = null;
@@ -753,25 +867,25 @@ async function dispatch(input = {}) {
753
867
 
754
868
  // ── Health tracking ────────────────────────────────────────────────────
755
869
  if (success) {
756
- recordDuration(effectiveProvider, effectiveModel, durationMs);
757
- const median = medianDuration(effectiveProvider, effectiveModel);
870
+ recordDuration(currentProvider, currentModel, durationMs);
871
+ const median = medianDuration(currentProvider, currentModel);
758
872
  if (median !== null && durationMs > median * 3) {
759
- markDegraded(effectiveProvider, effectiveModel, cwd);
873
+ markDegraded(currentProvider, currentModel, cwd);
760
874
  } else {
761
- markHealthy(effectiveProvider, effectiveModel, cwd);
875
+ markHealthy(currentProvider, currentModel, cwd);
762
876
  }
763
877
  const totalTokens = (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0);
764
- recordDispatch(effectiveProvider, effectiveModel, totalTokens, cwd);
878
+ recordDispatch(currentProvider, currentModel, totalTokens, cwd);
765
879
  } else {
766
880
  if (RATE_LIMIT_PATTERNS.test(errorText)) {
767
- markHot(effectiveProvider, effectiveModel, cwd);
881
+ markHot(currentProvider, currentModel, cwd);
768
882
  }
769
883
  }
770
884
  // ── End health tracking ────────────────────────────────────────────────
771
885
 
772
886
  recordUsage({
773
- provider: effectiveProvider,
774
- model: effectiveModel,
887
+ provider: currentProvider,
888
+ model: currentModel,
775
889
  tier,
776
890
  durationMs,
777
891
  inputTokens: usage?.inputTokens ?? null,
@@ -782,10 +896,10 @@ async function dispatch(input = {}) {
782
896
  return {
783
897
  status: success ? 'completed' : 'failed',
784
898
  type: 'native-agent',
785
- provider: effectiveProvider,
786
- model: effectiveModel,
899
+ provider: currentProvider,
900
+ model: currentModel,
787
901
  specialist: specialist ?? 'generic',
788
- command,
902
+ command: currentCommand,
789
903
  nativeDispatch: nativeDescriptor,
790
904
  exitCode,
791
905
  summary,
@@ -804,7 +918,38 @@ async function dispatch(input = {}) {
804
918
  // Record this dispatch against the budget
805
919
  _recordDispatchBudget(prompt);
806
920
 
807
- const { exitCode, stdout, stderr, durationMs } = await runProcess(command, cwd, timeoutMs);
921
+ // ── Auto-heal failover retry loop (subprocess path) ──────────────────────
922
+ const MAX_FAILOVER_ATTEMPTS_SUB = 2;
923
+ let subProvider = effectiveProvider;
924
+ let subModel = effectiveModel;
925
+ let subDecision = effectiveDecision;
926
+ let subCommand = command;
927
+ let subRaw;
928
+
929
+ for (let attempt = 0; attempt <= MAX_FAILOVER_ATTEMPTS_SUB; attempt++) {
930
+ subRaw = await runProcess(subCommand, cwd, timeoutMs);
931
+ if (subRaw.exitCode === 0 || !isRetryableFailure(subRaw) || attempt === MAX_FAILOVER_ATTEMPTS_SUB) break;
932
+
933
+ const failoverList = getFailoverOrder(
934
+ { provider: subProvider, model: subModel, tier },
935
+ input.profile ?? {},
936
+ );
937
+ if (failoverList.length === 0) break;
938
+
939
+ const next = failoverList[0];
940
+ const reason = `${subRaw.stderr || subRaw.stdout}`.slice(0, 120);
941
+ logFailover({ from: `${subProvider}/${subModel}`, to: `${next.provider}/${next.model}`, reason, attempt: attempt + 1 });
942
+ process.stderr.write(`\x1b[2m[dual-brain] Provider busy, failing over to ${next.label}...\x1b[0m\n`);
943
+
944
+ markHot(subProvider, subModel, cwd);
945
+ subProvider = next.provider;
946
+ subModel = next.model;
947
+ subDecision = { ...subDecision, provider: subProvider, model: subModel };
948
+ subCommand = buildCommand(subDecision, prompt, files, cwd);
949
+ }
950
+
951
+ const { exitCode, stdout, stderr, durationMs } = subRaw;
952
+ // ── End failover loop ──────────────────────────────────────────────────────
808
953
 
809
954
  // Extract token usage from JSON output if available
810
955
  let usage = null;
@@ -821,25 +966,25 @@ async function dispatch(input = {}) {
821
966
 
822
967
  // ── Health tracking ──────────────────────────────────────────────────────
823
968
  if (success) {
824
- recordDuration(effectiveProvider, effectiveModel, durationMs);
825
- const median = medianDuration(effectiveProvider, effectiveModel);
969
+ recordDuration(subProvider, subModel, durationMs);
970
+ const median = medianDuration(subProvider, subModel);
826
971
  if (median !== null && durationMs > median * 3) {
827
- markDegraded(effectiveProvider, effectiveModel, cwd);
972
+ markDegraded(subProvider, subModel, cwd);
828
973
  } else {
829
- markHealthy(effectiveProvider, effectiveModel, cwd);
974
+ markHealthy(subProvider, subModel, cwd);
830
975
  }
831
976
  const totalTokens = (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0);
832
- recordDispatch(effectiveProvider, effectiveModel, totalTokens, cwd);
977
+ recordDispatch(subProvider, subModel, totalTokens, cwd);
833
978
  } else {
834
979
  if (RATE_LIMIT_PATTERNS.test(errorText)) {
835
- markHot(effectiveProvider, effectiveModel, cwd);
980
+ markHot(subProvider, subModel, cwd);
836
981
  }
837
982
  }
838
983
  // ── End health tracking ──────────────────────────────────────────────────
839
984
 
840
985
  recordUsage({
841
- provider: effectiveProvider,
842
- model: effectiveModel,
986
+ provider: subProvider,
987
+ model: subModel,
843
988
  tier,
844
989
  durationMs,
845
990
  inputTokens: usage?.inputTokens ?? null,
@@ -849,10 +994,10 @@ async function dispatch(input = {}) {
849
994
 
850
995
  return {
851
996
  status: success ? 'completed' : 'failed',
852
- provider: effectiveProvider,
853
- model: effectiveModel,
997
+ provider: subProvider,
998
+ model: subModel,
854
999
  specialist: specialist ?? 'generic',
855
- command,
1000
+ command: subCommand,
856
1001
  exitCode,
857
1002
  summary,
858
1003
  durationMs,
@@ -863,7 +1008,7 @@ async function dispatch(input = {}) {
863
1008
 
864
1009
  // ─── Dual-brain dispatch (parallel) ───────────────────────────────────────────
865
1010
  async function dispatchDualBrain(input = {}) {
866
- const { decision = {}, files = [], cwd = process.cwd(), dryRun = false } = input;
1011
+ const { decision = {}, files = [], cwd = process.cwd(), dryRun = false, verbose = false } = input;
867
1012
  let { prompt } = input;
868
1013
  if (!prompt) throw new Error('prompt is required');
869
1014
 
@@ -887,10 +1032,10 @@ async function dispatchDualBrain(input = {}) {
887
1032
  const [claudeResult, openaiResult] = await Promise.all([
888
1033
  validatedClaude._error
889
1034
  ? Promise.resolve({ status: 'error', provider: 'claude', model: claudeDecision.model, command: null, exitCode: null, summary: validatedClaude._error, durationMs: 0, usage: null, error: validatedClaude._error })
890
- : dispatch({ decision: validatedClaude, prompt, files, cwd, dryRun }),
1035
+ : dispatch({ decision: validatedClaude, prompt, files, cwd, dryRun, verbose }),
891
1036
  validatedOpenai._error
892
1037
  ? Promise.resolve({ status: 'error', provider: 'openai', model: openaiDecision.model, command: null, exitCode: null, summary: validatedOpenai._error, durationMs: 0, usage: null, error: validatedOpenai._error })
893
- : dispatch({ decision: validatedOpenai, prompt, files, cwd, dryRun }),
1038
+ : dispatch({ decision: validatedOpenai, prompt, files, cwd, dryRun, verbose }),
894
1039
  ]);
895
1040
 
896
1041
  return {