dual-brain 7.1.21 → 7.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/dispatch.mjs CHANGED
@@ -14,6 +14,7 @@ import { fileURLToPath } from 'node:url';
14
14
  import { createHash } from 'node:crypto';
15
15
  import { markHot, markDegraded, markHealthy, recordDispatch } from './health.mjs';
16
16
  import { redact } from './redact.mjs';
17
+ import { getFailoverOrder } from './decide.mjs';
17
18
 
18
19
  const __dirname = dirname(fileURLToPath(import.meta.url));
19
20
  const USAGE_DIR = join(__dirname, '..', '.dualbrain', 'usage');
@@ -93,6 +94,44 @@ function medianDuration(provider, model) {
93
94
  // Rate-limit error keywords
94
95
  const RATE_LIMIT_PATTERNS = /rate.?limit|quota|capacity|too many requests|overloaded|throttl/i;
95
96
 
97
+ // ─── Auto-heal failover helpers ───────────────────────────────────────────────
98
+
99
+ const FAILOVER_LOG_DIR = join(__dirname, '..', '.dualbrain', 'audit');
100
+
101
+ /** Retryable exit-code-1 patterns: rate limits, quota, capacity, timeouts */
102
+ const RETRYABLE_PATTERNS = /rate.?limit|429|quota.?exceeded|capacity|overloaded|timeout/i;
103
+
104
+ /** Non-retryable patterns: auth failures, bad input, user cancellation */
105
+ const NON_RETRYABLE_PATTERNS = /unauthorized|forbidden|invalid.?api.?key|authentication|bad.?request|cancelled|canceled/i;
106
+
107
+ /**
108
+ * Decide if a subprocess result is a retryable failure.
109
+ * Must be exit code 1 (or non-zero) AND match retryable keywords AND NOT match
110
+ * non-retryable keywords.
111
+ * @param {{ exitCode: number, stderr: string, stdout: string }} result
112
+ * @returns {boolean}
113
+ */
114
+ function isRetryableFailure({ exitCode, stderr, stdout }) {
115
+ if (exitCode === 0) return false;
116
+ const errText = `${stderr} ${stdout}`.slice(0, 1000);
117
+ if (NON_RETRYABLE_PATTERNS.test(errText)) return false;
118
+ return RETRYABLE_PATTERNS.test(errText);
119
+ }
120
+
121
+ /**
122
+ * Append a failover event to .dualbrain/audit/failover.jsonl.
123
+ * @param {{ from: string, to: string, reason: string, attempt: number }} info
124
+ */
125
+ function logFailover({ from, to, reason, attempt }) {
126
+ try {
127
+ mkdirSync(FAILOVER_LOG_DIR, { recursive: true });
128
+ appendFileSync(
129
+ join(FAILOVER_LOG_DIR, 'failover.jsonl'),
130
+ JSON.stringify({ ts: new Date().toISOString(), from, to, reason, attempt }) + '\n',
131
+ );
132
+ } catch {}
133
+ }
134
+
96
135
  // ─── Native Claude Code detection ────────────────────────────────────────────
97
136
 
98
137
  /**
@@ -605,9 +644,26 @@ function _prependDispatchMarker(prompt) {
605
644
  return `<!-- dual-brain-dispatch: ${runId} -->\n${prompt}`;
606
645
  }
607
646
 
647
+ // ─── Related session age label ────────────────────────────────────────────────
648
+
649
+ /**
650
+ * Human-readable age label for a related session date string.
651
+ * @param {string} isoDate
652
+ * @returns {string}
653
+ */
654
+ function _relatedSessionAge(isoDate) {
655
+ const diff = Date.now() - Date.parse(isoDate);
656
+ const mins = Math.floor(diff / 60000);
657
+ if (mins < 60) return `${mins}m ago`;
658
+ const hours = Math.floor(mins / 60);
659
+ if (hours < 24) return `${hours}h ago`;
660
+ const days = Math.floor(hours / 24);
661
+ return `${days}d ago`;
662
+ }
663
+
608
664
  // ─── Main dispatch ────────────────────────────────────────────────────────────
609
665
  async function dispatch(input = {}) {
610
- const { files = [], cwd = process.cwd(), dryRun = false } = input;
666
+ const { files = [], cwd = process.cwd(), dryRun = false, verbose = false } = input;
611
667
  let decision = input.decision ?? {};
612
668
  let { prompt } = input;
613
669
 
@@ -616,10 +672,47 @@ async function dispatch(input = {}) {
616
672
  // Safety gate: redact secrets before anything reaches a subprocess or log
617
673
  prompt = redact(prompt);
618
674
 
675
+ // ── Related session context injection ────────────────────────────────────────
676
+ // Find past sessions related to this task and prepend a context block.
677
+ // Only injected when confidence is high (score > 5). Fast: index-only, no JSONL parsing.
678
+ if (!input._skipRelatedContext) {
679
+ try {
680
+ const { findRelatedSessions } = await import('./session.mjs');
681
+ const related = findRelatedSessions(prompt, files, cwd);
682
+ const highConfidence = related.filter(r => r.score > 5);
683
+ if (highConfidence.length > 0) {
684
+ const lines = highConfidence.map(r => {
685
+ const dateLabel = r.date ? _relatedSessionAge(r.date) : null;
686
+ const datePart = dateLabel ? `, ${dateLabel}` : '';
687
+ const msgPart = r.messageCount > 0 ? `, ${r.messageCount} messages` : '';
688
+ const fileList = r.matchedFiles.length > 0
689
+ ? `: touched ${r.matchedFiles.map(f => f.split('/').pop()).join(', ')}`
690
+ : '';
691
+ return `- "${r.smartName}"${datePart}${msgPart}${fileList}`;
692
+ });
693
+ const contextBlock = `[Prior context from related sessions:]\n${lines.join('\n')}\n[End prior context]\n\n`;
694
+ prompt = contextBlock + prompt;
695
+ if (verbose) process.stderr.write(`[dual-brain] injected related session context (${highConfidence.length} sessions)\n`);
696
+ }
697
+ } catch { /* non-fatal — never block dispatch */ }
698
+ }
699
+ // ── End related session context ──────────────────────────────────────────────
700
+
619
701
  // Stamp the prompt with the dispatch marker so enforce-tier.mjs can recognise
620
702
  // that this agent call came through the official pipeline.
621
703
  prompt = _prependDispatchMarker(prompt);
622
704
 
705
+ // ── Situation brief injection ────────────────────────────────────────────────
706
+ // Prepend a compact project-state summary when provided by the pipeline.
707
+ // This gives every dispatched agent immediate context about the project reality.
708
+ const situationBrief = typeof input.situationBrief === 'string' && input.situationBrief.trim()
709
+ ? input.situationBrief.trim()
710
+ : null;
711
+ if (situationBrief) {
712
+ prompt = `--- SITUATION BRIEF ---\n${situationBrief}\n--- END BRIEF ---\n\n${prompt}`;
713
+ }
714
+ // ── End situation brief ──────────────────────────────────────────────────────
715
+
623
716
  // ── Specialist prompt injection ──────────────────────────────────────────────
624
717
  const specialist = decision.specialist && decision.specialist !== 'generic'
625
718
  ? decision.specialist
@@ -629,7 +722,7 @@ async function dispatch(input = {}) {
629
722
  const specialistPrompt = loadSpecialistPrompt(specialist);
630
723
  if (specialistPrompt) {
631
724
  prompt = `${specialistPrompt}\n\n---\n\n${prompt}`;
632
- process.stderr.write(`[dual-brain] specialist: ${specialist}\n`);
725
+ if (verbose) process.stderr.write(`[dual-brain] specialist: ${specialist}\n`);
633
726
  }
634
727
 
635
728
  // Apply tier_bias from registry if decision didn't already pin a tier
@@ -638,7 +731,7 @@ async function dispatch(input = {}) {
638
731
  const tierBias = registry?.specialists?.[specialist]?.tier_bias;
639
732
  if (tierBias) {
640
733
  decision = { ...decision, tier: tierBias };
641
- process.stderr.write(`[dual-brain] specialist tier_bias applied: ${tierBias}\n`);
734
+ if (verbose) process.stderr.write(`[dual-brain] specialist tier_bias applied: ${tierBias}\n`);
642
735
  }
643
736
  }
644
737
  }
@@ -736,7 +829,39 @@ async function dispatch(input = {}) {
736
829
  _recordDispatchBudget(prompt);
737
830
 
738
831
  const dispatchEnv = { DUAL_BRAIN_DISPATCH: '1' };
739
- const { exitCode, stdout, stderr, durationMs } = await runProcess(command, cwd, timeoutMs, dispatchEnv);
832
+
833
+ // ── Auto-heal failover retry loop (native Claude path) ────────────────
834
+ const MAX_FAILOVER_ATTEMPTS = 2;
835
+ let currentProvider = effectiveProvider;
836
+ let currentModel = effectiveModel;
837
+ let currentDecision = effectiveDecision;
838
+ let currentCommand = command;
839
+ let lastRaw;
840
+
841
+ for (let attempt = 0; attempt <= MAX_FAILOVER_ATTEMPTS; attempt++) {
842
+ lastRaw = await runProcess(currentCommand, cwd, timeoutMs, dispatchEnv);
843
+ if (lastRaw.exitCode === 0 || !isRetryableFailure(lastRaw) || attempt === MAX_FAILOVER_ATTEMPTS) break;
844
+
845
+ const failoverList = getFailoverOrder(
846
+ { provider: currentProvider, model: currentModel, tier },
847
+ input.profile ?? {},
848
+ );
849
+ if (failoverList.length === 0) break;
850
+
851
+ const next = failoverList[0];
852
+ const reason = `${lastRaw.stderr || lastRaw.stdout}`.slice(0, 120);
853
+ logFailover({ from: `${currentProvider}/${currentModel}`, to: `${next.provider}/${next.model}`, reason, attempt: attempt + 1 });
854
+ process.stderr.write(`\x1b[2m[dual-brain] Provider busy, failing over to ${next.label}...\x1b[0m\n`);
855
+
856
+ markHot(currentProvider, currentModel, cwd);
857
+ currentProvider = next.provider;
858
+ currentModel = next.model;
859
+ currentDecision = { ...currentDecision, provider: currentProvider, model: currentModel };
860
+ currentCommand = buildCommand(currentDecision, prompt, files, cwd);
861
+ }
862
+
863
+ const { exitCode, stdout, stderr, durationMs } = lastRaw;
864
+ // ── End failover loop ────────────────────────────────────────────────
740
865
 
741
866
  // Extract token usage from JSON output if available
742
867
  let usage = null;
@@ -753,25 +878,25 @@ async function dispatch(input = {}) {
753
878
 
754
879
  // ── Health tracking ────────────────────────────────────────────────────
755
880
  if (success) {
756
- recordDuration(effectiveProvider, effectiveModel, durationMs);
757
- const median = medianDuration(effectiveProvider, effectiveModel);
881
+ recordDuration(currentProvider, currentModel, durationMs);
882
+ const median = medianDuration(currentProvider, currentModel);
758
883
  if (median !== null && durationMs > median * 3) {
759
- markDegraded(effectiveProvider, effectiveModel, cwd);
884
+ markDegraded(currentProvider, currentModel, cwd);
760
885
  } else {
761
- markHealthy(effectiveProvider, effectiveModel, cwd);
886
+ markHealthy(currentProvider, currentModel, cwd);
762
887
  }
763
888
  const totalTokens = (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0);
764
- recordDispatch(effectiveProvider, effectiveModel, totalTokens, cwd);
889
+ recordDispatch(currentProvider, currentModel, totalTokens, cwd);
765
890
  } else {
766
891
  if (RATE_LIMIT_PATTERNS.test(errorText)) {
767
- markHot(effectiveProvider, effectiveModel, cwd);
892
+ markHot(currentProvider, currentModel, cwd);
768
893
  }
769
894
  }
770
895
  // ── End health tracking ────────────────────────────────────────────────
771
896
 
772
897
  recordUsage({
773
- provider: effectiveProvider,
774
- model: effectiveModel,
898
+ provider: currentProvider,
899
+ model: currentModel,
775
900
  tier,
776
901
  durationMs,
777
902
  inputTokens: usage?.inputTokens ?? null,
@@ -782,10 +907,10 @@ async function dispatch(input = {}) {
782
907
  return {
783
908
  status: success ? 'completed' : 'failed',
784
909
  type: 'native-agent',
785
- provider: effectiveProvider,
786
- model: effectiveModel,
910
+ provider: currentProvider,
911
+ model: currentModel,
787
912
  specialist: specialist ?? 'generic',
788
- command,
913
+ command: currentCommand,
789
914
  nativeDispatch: nativeDescriptor,
790
915
  exitCode,
791
916
  summary,
@@ -804,7 +929,38 @@ async function dispatch(input = {}) {
804
929
  // Record this dispatch against the budget
805
930
  _recordDispatchBudget(prompt);
806
931
 
807
- const { exitCode, stdout, stderr, durationMs } = await runProcess(command, cwd, timeoutMs);
932
+ // ── Auto-heal failover retry loop (subprocess path) ──────────────────────
933
+ const MAX_FAILOVER_ATTEMPTS_SUB = 2;
934
+ let subProvider = effectiveProvider;
935
+ let subModel = effectiveModel;
936
+ let subDecision = effectiveDecision;
937
+ let subCommand = command;
938
+ let subRaw;
939
+
940
+ for (let attempt = 0; attempt <= MAX_FAILOVER_ATTEMPTS_SUB; attempt++) {
941
+ subRaw = await runProcess(subCommand, cwd, timeoutMs);
942
+ if (subRaw.exitCode === 0 || !isRetryableFailure(subRaw) || attempt === MAX_FAILOVER_ATTEMPTS_SUB) break;
943
+
944
+ const failoverList = getFailoverOrder(
945
+ { provider: subProvider, model: subModel, tier },
946
+ input.profile ?? {},
947
+ );
948
+ if (failoverList.length === 0) break;
949
+
950
+ const next = failoverList[0];
951
+ const reason = `${subRaw.stderr || subRaw.stdout}`.slice(0, 120);
952
+ logFailover({ from: `${subProvider}/${subModel}`, to: `${next.provider}/${next.model}`, reason, attempt: attempt + 1 });
953
+ process.stderr.write(`\x1b[2m[dual-brain] Provider busy, failing over to ${next.label}...\x1b[0m\n`);
954
+
955
+ markHot(subProvider, subModel, cwd);
956
+ subProvider = next.provider;
957
+ subModel = next.model;
958
+ subDecision = { ...subDecision, provider: subProvider, model: subModel };
959
+ subCommand = buildCommand(subDecision, prompt, files, cwd);
960
+ }
961
+
962
+ const { exitCode, stdout, stderr, durationMs } = subRaw;
963
+ // ── End failover loop ──────────────────────────────────────────────────────
808
964
 
809
965
  // Extract token usage from JSON output if available
810
966
  let usage = null;
@@ -821,25 +977,25 @@ async function dispatch(input = {}) {
821
977
 
822
978
  // ── Health tracking ──────────────────────────────────────────────────────
823
979
  if (success) {
824
- recordDuration(effectiveProvider, effectiveModel, durationMs);
825
- const median = medianDuration(effectiveProvider, effectiveModel);
980
+ recordDuration(subProvider, subModel, durationMs);
981
+ const median = medianDuration(subProvider, subModel);
826
982
  if (median !== null && durationMs > median * 3) {
827
- markDegraded(effectiveProvider, effectiveModel, cwd);
983
+ markDegraded(subProvider, subModel, cwd);
828
984
  } else {
829
- markHealthy(effectiveProvider, effectiveModel, cwd);
985
+ markHealthy(subProvider, subModel, cwd);
830
986
  }
831
987
  const totalTokens = (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0);
832
- recordDispatch(effectiveProvider, effectiveModel, totalTokens, cwd);
988
+ recordDispatch(subProvider, subModel, totalTokens, cwd);
833
989
  } else {
834
990
  if (RATE_LIMIT_PATTERNS.test(errorText)) {
835
- markHot(effectiveProvider, effectiveModel, cwd);
991
+ markHot(subProvider, subModel, cwd);
836
992
  }
837
993
  }
838
994
  // ── End health tracking ──────────────────────────────────────────────────
839
995
 
840
996
  recordUsage({
841
- provider: effectiveProvider,
842
- model: effectiveModel,
997
+ provider: subProvider,
998
+ model: subModel,
843
999
  tier,
844
1000
  durationMs,
845
1001
  inputTokens: usage?.inputTokens ?? null,
@@ -849,10 +1005,10 @@ async function dispatch(input = {}) {
849
1005
 
850
1006
  return {
851
1007
  status: success ? 'completed' : 'failed',
852
- provider: effectiveProvider,
853
- model: effectiveModel,
1008
+ provider: subProvider,
1009
+ model: subModel,
854
1010
  specialist: specialist ?? 'generic',
855
- command,
1011
+ command: subCommand,
856
1012
  exitCode,
857
1013
  summary,
858
1014
  durationMs,
@@ -863,7 +1019,7 @@ async function dispatch(input = {}) {
863
1019
 
864
1020
  // ─── Dual-brain dispatch (parallel) ───────────────────────────────────────────
865
1021
  async function dispatchDualBrain(input = {}) {
866
- const { decision = {}, files = [], cwd = process.cwd(), dryRun = false } = input;
1022
+ const { decision = {}, files = [], cwd = process.cwd(), dryRun = false, verbose = false } = input;
867
1023
  let { prompt } = input;
868
1024
  if (!prompt) throw new Error('prompt is required');
869
1025
 
@@ -873,6 +1029,15 @@ async function dispatchDualBrain(input = {}) {
873
1029
  // Stamp with dispatch marker so enforce-tier.mjs allows this Agent call
874
1030
  prompt = _prependDispatchMarker(prompt);
875
1031
 
1032
+ // ── Situation brief injection ────────────────────────────────────────────────
1033
+ const _dualBrainBrief = typeof input.situationBrief === 'string' && input.situationBrief.trim()
1034
+ ? input.situationBrief.trim()
1035
+ : null;
1036
+ if (_dualBrainBrief) {
1037
+ prompt = `--- SITUATION BRIEF ---\n${_dualBrainBrief}\n--- END BRIEF ---\n\n${prompt}`;
1038
+ }
1039
+ // ── End situation brief ──────────────────────────────────────────────────────
1040
+
876
1041
  // Feature 1: Validate both sub-decisions before spawning anything
877
1042
  const rt = await detectRuntime();
878
1043
  const tier = decision.tier ?? 'execute';
@@ -887,10 +1052,10 @@ async function dispatchDualBrain(input = {}) {
887
1052
  const [claudeResult, openaiResult] = await Promise.all([
888
1053
  validatedClaude._error
889
1054
  ? Promise.resolve({ status: 'error', provider: 'claude', model: claudeDecision.model, command: null, exitCode: null, summary: validatedClaude._error, durationMs: 0, usage: null, error: validatedClaude._error })
890
- : dispatch({ decision: validatedClaude, prompt, files, cwd, dryRun }),
1055
+ : dispatch({ decision: validatedClaude, prompt, files, cwd, dryRun, verbose }),
891
1056
  validatedOpenai._error
892
1057
  ? Promise.resolve({ status: 'error', provider: 'openai', model: openaiDecision.model, command: null, exitCode: null, summary: validatedOpenai._error, durationMs: 0, usage: null, error: validatedOpenai._error })
893
- : dispatch({ decision: validatedOpenai, prompt, files, cwd, dryRun }),
1058
+ : dispatch({ decision: validatedOpenai, prompt, files, cwd, dryRun, verbose }),
894
1059
  ]);
895
1060
 
896
1061
  return {