@kinqs/brainrouter-cli 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/changelog/0.2.0.md +15 -0
  2. package/changelog/0.3.0.md +20 -0
  3. package/changelog/0.3.1.md +22 -0
  4. package/changelog/0.3.2.md +15 -0
  5. package/changelog/0.3.3.md +19 -0
  6. package/changelog/0.3.4.md +20 -0
  7. package/changelog/0.3.5.md +9 -0
  8. package/changelog/0.3.6.md +9 -0
  9. package/changelog/0.3.7.md +20 -0
  10. package/changelog/0.3.8.md +30 -0
  11. package/changelog/README.md +41 -0
  12. package/dist/agent/agent.d.ts +22 -0
  13. package/dist/agent/agent.js +259 -82
  14. package/dist/agent/toolCallRecovery.d.ts +57 -0
  15. package/dist/agent/toolCallRecovery.js +130 -0
  16. package/dist/agent/toolSafety.d.ts +17 -0
  17. package/dist/agent/toolSafety.js +102 -0
  18. package/dist/cli/banner.js +2 -2
  19. package/dist/cli/cliPrompt.js +65 -0
  20. package/dist/cli/commands/config.js +1 -1
  21. package/dist/cli/commands/mcp.d.ts +1 -1
  22. package/dist/cli/commands/mcp.js +29 -7
  23. package/dist/cli/commands/mcpInstall.d.ts +20 -0
  24. package/dist/cli/commands/mcpInstall.js +87 -0
  25. package/dist/cli/commands/orchestration.js +33 -0
  26. package/dist/cli/commands/releaseNotes.d.ts +24 -0
  27. package/dist/cli/commands/releaseNotes.js +109 -0
  28. package/dist/cli/commands/schedule.d.ts +18 -0
  29. package/dist/cli/commands/schedule.js +189 -0
  30. package/dist/cli/commands/ui.js +2 -2
  31. package/dist/cli/ink/Picker.d.ts +6 -0
  32. package/dist/cli/ink/Picker.js +41 -6
  33. package/dist/cli/ink/runChat.js +112 -1
  34. package/dist/cli/ink/toolFormat.d.ts +11 -9
  35. package/dist/cli/ink/toolFormat.js +42 -16
  36. package/dist/cli/repl.d.ts +1 -1
  37. package/dist/cli/repl.js +9 -2
  38. package/dist/config/config.d.ts +1 -1
  39. package/dist/index.js +10 -1
  40. package/dist/memory/briefing.js +4 -4
  41. package/dist/orchestration/tools.d.ts +95 -2
  42. package/dist/orchestration/tools.js +119 -4
  43. package/dist/prompt/systemPrompt.js +5 -4
  44. package/dist/runtime/anthropicAdapter.d.ts +100 -0
  45. package/dist/runtime/anthropicAdapter.js +293 -0
  46. package/dist/runtime/cronParser.d.ts +23 -0
  47. package/dist/runtime/cronParser.js +122 -0
  48. package/dist/runtime/mcpClient.js +1 -1
  49. package/dist/runtime/mcpPool.d.ts +8 -0
  50. package/dist/runtime/mcpPool.js +19 -0
  51. package/dist/runtime/mcpUtils.d.ts +14 -0
  52. package/dist/runtime/mcpUtils.js +23 -0
  53. package/dist/runtime/scheduleTicker.d.ts +33 -0
  54. package/dist/runtime/scheduleTicker.js +99 -0
  55. package/dist/runtime/vendorSnippets.d.ts +45 -0
  56. package/dist/runtime/vendorSnippets.js +153 -0
  57. package/dist/state/scheduleStore.d.ts +37 -0
  58. package/dist/state/scheduleStore.js +64 -0
  59. package/package.json +7 -4
@@ -8,7 +8,7 @@ import { askChoice, askYesNo, getActiveReadline, NoTTYError } from '../cli/cliPr
8
8
  import { appendTranscriptEntry } from '../state/sessionStore.js';
9
9
  import { buildSystemPrompt, loadWorkspaceInstructionSummary } from '../prompt/systemPrompt.js';
10
10
  import { formatPlan, readPlan, updatePlan } from '../state/taskStore.js';
11
- import { createSpawnAgentTool, createSpawnAgentsTool, createListAgentsTool, createWaitAgentTool, createWaitAgentsTool, createReadAgentTranscriptTool, createCloseAgentTool, createRouteAgentTool, executeOrchestrationTool, isOrchestrationToolName, } from '../orchestration/tools.js';
11
+ import { createTaskAgentTool, createDelegateAgentTool, createSpawnAgentTool, createSpawnAgentsTool, createListAgentsTool, createWaitAgentTool, createWaitAgentsTool, createReadAgentTranscriptTool, createCloseAgentTool, createRouteAgentTool, executeOrchestrationTool, isOrchestrationToolName, } from '../orchestration/tools.js';
12
12
  import { buildMemoryBriefing, selectCitedRecordIds } from '../memory/briefing.js';
13
13
  import { callMcpTool, extractToolText } from '../runtime/mcpUtils.js';
14
14
  import { acquireLLMSlot } from '../runtime/llmSemaphore.js';
@@ -17,12 +17,16 @@ import { runHooks } from '../state/hooksStore.js';
17
17
  import { resolveSandboxConfig, runShell } from '../runtime/sandbox.js';
18
18
  import { isDangerousCommand, resolveRunCommandApproval } from '../runtime/dangerousCommand.js';
19
19
  import { readPreferences, resolveEffort } from '../state/preferencesStore.js';
20
+ import { shouldUseAnthropicNative, callAnthropic } from '../runtime/anthropicAdapter.js';
20
21
  import { startSpan, traceEvent } from '../runtime/tracing.js';
21
22
  import { buildHookifyContext, evaluateHookify, listHookifyRules } from '../state/hookifyStore.js';
22
23
  import { renderCompactSystemMessage, runCompaction } from '../prompt/compactor.js';
23
24
  import { buildFanOutHint, shouldSuggestFanOut } from '../prompt/breadthHint.js';
25
+ import { isParallelSafe, parallelExecutionEnabled } from './toolSafety.js';
26
+ import { dedupeToolCalls, parseArgumentsOrError, synthesizeOrphanResults, suggestSimilarToolName, } from './toolCallRecovery.js';
24
27
  const execPromise = promisify(exec);
25
28
  const IGNORED_DIRS = new Set(['node_modules', '.git', 'dist', '.DS_Store', '.next']);
29
+ const DEFAULT_CHILD_DRAIN_TIMEOUT_MS = 30_000;
26
30
  function parseJsonObject(text) {
27
31
  try {
28
32
  const parsed = JSON.parse(text);
@@ -49,11 +53,21 @@ function collectChildIds(value) {
49
53
  return [...new Set(ids)];
50
54
  }
51
55
  function trackChildObservation(toolName, args, resultText, spawned, waited) {
52
- if (toolName === 'spawn_agent' || toolName === 'spawn_agents') {
56
+ if (toolName === 'spawn_agent' ||
57
+ toolName === 'spawn_agents' ||
58
+ toolName === 'task_agent' ||
59
+ toolName === 'delegate_agent') {
53
60
  const ids = collectChildIds(parseJsonObject(resultText));
54
61
  for (const id of ids) {
55
62
  spawned.add(id);
56
- if (toolName === 'spawn_agent' && args?.wait)
63
+ // task_agent always blocks internally (wraps spawn with wait: true);
64
+ // spawn_agent({ wait: true }) is the legacy form. Both count as
65
+ // already-observed, so the child-drain guardrail doesn't double-wait.
66
+ // delegate_agent is fire-and-forget — must remain unwaited so the
67
+ // guardrail can force a wait_agents call before the parent answers.
68
+ if (toolName === 'task_agent')
69
+ waited.add(id);
70
+ else if (toolName === 'spawn_agent' && args?.wait)
57
71
  waited.add(id);
58
72
  }
59
73
  return;
@@ -70,6 +84,35 @@ function trackChildObservation(toolName, args, resultText, spawned, waited) {
70
84
  waited.add(id);
71
85
  }
72
86
  }
87
+ function parseChildDrainTimeouts(resultText) {
88
+ const parsed = parseJsonObject(resultText);
89
+ const agents = Array.isArray(parsed?.agents) ? parsed.agents : [];
90
+ return agents
91
+ .filter((entry) => {
92
+ return !!entry && typeof entry === 'object' && entry.status === 'timeout';
93
+ })
94
+ .map((entry) => ({
95
+ id: typeof entry.id === 'string' ? entry.id : '(unknown)',
96
+ role: typeof entry.role === 'string' ? entry.role : undefined,
97
+ status: 'timeout',
98
+ childStatus: typeof entry.childStatus === 'string' ? entry.childStatus : undefined,
99
+ summary: typeof entry.summary === 'string' ? entry.summary : undefined,
100
+ }));
101
+ }
102
+ function formatChildDrainTimeoutAnswer(timeouts) {
103
+ const lines = [
104
+ `Children still running after the bounded wait (${timeouts.length}):`,
105
+ ...timeouts.map((child) => {
106
+ const role = child.role ? ` role=${child.role}` : '';
107
+ const status = child.childStatus ? ` status=${child.childStatus}` : '';
108
+ const summary = child.summary ? ` — ${child.summary}` : '';
109
+ return `- ${child.id}${role}${status}${summary}`;
110
+ }),
111
+ '',
112
+ 'Use `/continue` to drain the pending child output and synthesize the result when it is ready.',
113
+ ];
114
+ return lines.join('\n');
115
+ }
73
116
  export const LOCAL_TOOLS = [
74
117
  {
75
118
  name: 'read_file',
@@ -187,6 +230,8 @@ export const LOCAL_TOOLS = [
187
230
  required: ['patch']
188
231
  }
189
232
  },
233
+ createTaskAgentTool(),
234
+ createDelegateAgentTool(),
190
235
  createSpawnAgentTool(),
191
236
  createSpawnAgentsTool(),
192
237
  createListAgentsTool(),
@@ -524,20 +569,22 @@ export class Agent {
524
569
  }
525
570
  rawMcpToolName(name) {
526
571
  const serverId = this.serverIdFromMcpToolName(name);
527
- return serverId ? name.slice(`mcp__${serverId}__`.length) : name;
572
+ return serverId ? name.slice(`mcp_${serverId}_`.length) : name;
528
573
  }
529
574
  serverIdFromMcpToolName(name) {
530
- if (!name.startsWith('mcp__'))
575
+ // Canonical single-underscore prefix: `mcp_<server>_<tool>`. The pool
576
+ // normalises to this shape at its boundary (0.3.8-R5).
577
+ if (!name.startsWith('mcp_'))
531
578
  return undefined;
532
- const rest = name.slice('mcp__'.length);
579
+ const rest = name.slice('mcp_'.length);
533
580
  if (typeof this.mcpClient.getServerIds === 'function') {
534
581
  const ids = this.mcpClient.getServerIds();
535
582
  for (const id of ids.sort((a, b) => b.length - a.length)) {
536
- if (rest.startsWith(`${id}__`))
583
+ if (rest.startsWith(`${id}_`))
537
584
  return id;
538
585
  }
539
586
  }
540
- const idx = rest.indexOf('__');
587
+ const idx = rest.indexOf('_');
541
588
  return idx >= 0 ? rest.slice(0, idx) : undefined;
542
589
  }
543
590
  allowedToolsForAccess() {
@@ -546,7 +593,7 @@ export class Agent {
546
593
  // a goal cleanly (goal_complete / goal_blocked) or observe state.
547
594
  const readOnly = new Set([
548
595
  'read_file', 'list_dir', 'grep_search', 'glob_files', 'fetch_url', 'web_search', 'update_plan',
549
- 'spawn_agent', 'spawn_agents', 'list_agents', 'wait_agent', 'wait_agents',
596
+ 'task_agent', 'delegate_agent', 'spawn_agent', 'spawn_agents', 'list_agents', 'wait_agent', 'wait_agents',
550
597
  'read_agent_transcript', 'close_agent', 'route_agent',
551
598
  'goal_complete', 'goal_blocked',
552
599
  // ask_user_choice doesn't touch the workspace — it's an interaction
@@ -609,7 +656,7 @@ export class Agent {
609
656
  const filteredLocalTools = LOCAL_TOOLS.filter(t => allowed.has(t.name));
610
657
  // Multi-MCP parity: expose every connected third-party MCP tool and the
611
658
  // model-safe BrainRouter MCP tools in one turn, using the pool's
612
- // `mcp__<serverId>__<tool>` namespaces. BrainRouter's auto-pipeline/admin
659
+ // `mcp_<serverId>_<tool>` namespaces. BrainRouter's auto-pipeline/admin
613
660
  // tools stay hidden because the CLI owns those flows.
614
661
  const visibleMcpTools = mcpTools.filter((t) => this.isModelVisibleMcpTool(t));
615
662
  const allTools = [...filteredLocalTools, ...visibleMcpTools];
@@ -701,7 +748,32 @@ export class Agent {
701
748
  const REPEAT_GUARD_LIMIT = 3;
702
749
  const spawnedChildIdsThisTurn = new Set();
703
750
  const waitedChildIdsThisTurn = new Set();
704
- let spawnWaitGuardInjected = false;
751
+ const buildOrchestrationContext = () => ({
752
+ workspaceRoot: this.workspaceRoot,
753
+ parentSessionKey: this.sessionKey,
754
+ parentAccessMode: this.accessMode,
755
+ // Thread the parent's trace context so child agents nest their
756
+ // per-turn spans under THIS turn instead of starting a fresh
757
+ // trace tree. Lets observability backends reconstruct fan-out.
758
+ parentTraceId: turnSpan.traceId,
759
+ parentSpanId: turnSpan.spanId,
760
+ parentAgentId: this.agentId,
761
+ parentTier: this.tier,
762
+ depth: this.agentDepth,
763
+ mcpClient: this.mcpClient,
764
+ llmConfig: this.llmConfig,
765
+ launchCwd: this.launchCwd,
766
+ recordOffload: (chars) => { this.memoryMetrics.offloadCharsAvoided += chars; },
767
+ onChildToolStart: (event) => {
768
+ callbacks.onChildToolStart?.(event);
769
+ },
770
+ onChildToolEnd: (event) => {
771
+ callbacks.onChildToolEnd?.(event);
772
+ },
773
+ onChildComplete: (event) => {
774
+ callbacks.onChildComplete?.(event);
775
+ },
776
+ });
705
777
  while (loopCount < maxLoops) {
706
778
  loopCount++;
707
779
  callbacks.onStatusUpdate(`Thinking (turn ${loopCount})...`);
@@ -711,7 +783,15 @@ export class Agent {
711
783
  // (which only refreshes the system prompt) also updates the next
712
784
  // request's reasoning_effort slot — no restart needed.
713
785
  const effort = resolveEffort(this.workspaceRoot).effort;
714
- response = await callOpenAI(this.llmConfig, this.chatHistory, allTools, { effort });
786
+ if (shouldUseAnthropicNative(this.llmConfig)) {
787
+ response = await callAnthropic(this.llmConfig, this.chatHistory, allTools, {
788
+ effort,
789
+ onThinking: (text) => callbacks.onStatusUpdate(`Thinking: ${text.slice(0, 200)}`),
790
+ });
791
+ }
792
+ else {
793
+ response = await callOpenAI(this.llmConfig, this.chatHistory, allTools, { effort });
794
+ }
715
795
  }
716
796
  catch (err) {
717
797
  throw new Error(`LLM Execution failed: ${err.message}`);
@@ -721,6 +801,21 @@ export class Agent {
721
801
  this.lastTurnUsage.completionTokens += response.usage.completion_tokens ?? 0;
722
802
  this.lastTurnUsage.calls += 1;
723
803
  }
804
+ // 0.3.8-I4: Strict tool-call recovery. Real-world LLMs (especially
805
+ // smaller / quantised) sometimes emit duplicate tool_call ids in a
806
+ // single response. If we let both through, OpenAI's next request 400s
807
+ // because one of the duplicates has no paired tool_result. Dedupe
808
+ // before pushing the assistant message — last occurrence wins (closest
809
+ // to the model's final intent).
810
+ // Adapted from deer-flow/backend/packages/harness/deerflow/agents/
811
+ // middlewares/dangling_tool_call_middleware.py — same well-formed
812
+ // history invariant, applied per-response instead of pre-request.
813
+ if (response.toolCalls && response.toolCalls.length > 0) {
814
+ const deduped = dedupeToolCalls(response.toolCalls, (id) => {
815
+ callbacks.onStatusUpdate(`Recovery: dropped duplicate tool_call id "${id}" (last occurrence wins).`);
816
+ });
817
+ response.toolCalls = deduped;
818
+ }
724
819
  // Record Assistant message
725
820
  const assistantMsg = { role: 'assistant', content: response.content };
726
821
  if (response.toolCalls) {
@@ -730,50 +825,75 @@ export class Agent {
730
825
  this.recordTranscript(assistantMsg);
731
826
  if (!response.toolCalls || response.toolCalls.length === 0) {
732
827
  const unobservedChildIds = [...spawnedChildIdsThisTurn].filter((id) => !waitedChildIdsThisTurn.has(id));
733
- if (unobservedChildIds.length > 0 && !spawnWaitGuardInjected) {
734
- spawnWaitGuardInjected = true;
735
- const waitTool = unobservedChildIds.length === 1 ? 'wait_agent' : 'wait_agents';
828
+ if (unobservedChildIds.length > 0) {
829
+ const drainTimeoutMs = Math.max(1, Number(process.env.BRAINROUTER_CHILD_DRAIN_TIMEOUT_MS) || DEFAULT_CHILD_DRAIN_TIMEOUT_MS);
830
+ const waitName = 'wait_agents';
831
+ const waitArgs = { ids: unobservedChildIds, timeoutMs: drainTimeoutMs };
832
+ callbacks.onStatusUpdate(`Auto-draining ${unobservedChildIds.length} spawned child agent${unobservedChildIds.length === 1 ? '' : 's'}...`);
833
+ callbacks.onToolStart(waitName, waitArgs);
834
+ this.lastTurnToolCalls += 1;
835
+ let waitResultText = '';
836
+ let waitFailed = false;
837
+ let waitSummary = '';
838
+ try {
839
+ waitResultText = await executeOrchestrationTool(waitName, waitArgs, buildOrchestrationContext());
840
+ waitSummary = getToolSummary(waitName, waitArgs, waitResultText);
841
+ trackChildObservation(waitName, waitArgs, waitResultText, spawnedChildIdsThisTurn, waitedChildIdsThisTurn);
842
+ }
843
+ catch (err) {
844
+ // Wait tool failure: surface the error text to the model so it can
845
+ // report failure rather than silently synthesizing stale output.
846
+ waitFailed = true;
847
+ waitResultText = `Tool execution failed: ${err?.message ?? String(err)}`;
848
+ waitSummary = err?.message ?? String(err);
849
+ }
850
+ callbacks.onToolEnd(waitName, { success: !waitFailed, summary: waitSummary, preview: !waitFailed ? getToolPreview(waitName, waitArgs, waitResultText) : undefined });
851
+ const timeouts = parseChildDrainTimeouts(waitResultText);
852
+ if (timeouts.length > 0) {
853
+ finalAnswer = formatChildDrainTimeoutAnswer(timeouts);
854
+ exitedCleanly = true;
855
+ break;
856
+ }
736
857
  const correction = [
737
- `You spawned ${unobservedChildIds.length} child agent${unobservedChildIds.length === 1 ? '' : 's'} in this turn but have not waited for their outputs yet.`,
738
- `Call \`${waitTool}\` now for: ${unobservedChildIds.join(', ')}.`,
739
- 'Do not tell the user you are waiting in prose; use the tool call, then synthesize the returned child output.',
740
- ].join(' ');
858
+ `Runtime child-drain guardrail auto-called \`${waitName}\` because this turn spawned child agents and the model tried to answer without observing them.`,
859
+ `Child wait result:\n${waitResultText}`,
860
+ 'Now synthesize the child output for the user. Do not say you are waiting unless the wait result timed out.',
861
+ ].join('\n\n');
741
862
  const guardMsg = { role: 'user', content: correction };
742
863
  this.chatHistory.push(guardMsg);
743
864
  this.recordTranscript(guardMsg);
744
- callbacks.onStatusUpdate(`Waiting required for ${unobservedChildIds.length} child agent${unobservedChildIds.length === 1 ? '' : 's'}...`);
745
865
  continue;
746
866
  }
747
867
  finalAnswer = response.content;
748
868
  exitedCleanly = true;
749
869
  break;
750
870
  }
751
- // Execute tool calls chosen by the LLM
752
- for (const tc of response.toolCalls) {
871
+ // Execute tool calls chosen by the LLM.
872
+ //
873
+ // 0.3.8-R4 — Independent read-only tool calls (read_file, list_dir,
874
+ // grep_search, glob_files, fetch_url, web_search, MCP memory reads)
875
+ // are dispatched concurrently when emitted in the same assistant
876
+ // response; consecutive serial tools (writes, shell, orchestration,
877
+ // unknown names) execute one-by-one in their original position to
878
+ // preserve causality. Tool-result messages are still appended to
879
+ // chatHistory in the ORIGINAL call order so the model's next turn
880
+ // sees a deterministic trace even if a later read settled first.
881
+ const candidates = [
882
+ ...LOCAL_TOOLS.map((lt) => lt.name),
883
+ ...mcpTools.map((t) => t.name).filter((n) => typeof n === 'string'),
884
+ ];
885
+ const toolCalls = response.toolCalls ?? [];
886
+ const normalizedNames = toolCalls.map((tc) => normalizeToolName(tc.function.name, candidates));
887
+ const parallelEnabled = parallelExecutionEnabled();
888
+ const safeFlags = toolCalls.map((_tc, idx) => parallelEnabled && isParallelSafe(normalizedNames[idx]));
889
+ const processOneToolCall = async (tc, name) => {
753
890
  this.lastTurnToolCalls += 1;
754
- // Normalize the tool name against both local and MCP candidates so
755
- // common LLM hallucinations like `Read_File` / `read-file` resolve
756
- // to `read_file` instead of falling through to `-32601 Unknown tool`.
757
- const rawName = tc.function.name;
758
- const candidates = [
759
- ...LOCAL_TOOLS.map((lt) => lt.name),
760
- ...mcpTools.map((t) => t.name).filter((n) => typeof n === 'string'),
761
- ];
762
- const name = normalizeToolName(rawName, candidates);
763
- // Parse JSON args. If the LLM produced malformed JSON, surface that
764
- // explicitly via the tool result so it can self-correct on the next
765
- // turn — the old fallback silently set args={} and the LLM had no
766
- // signal that anything was wrong.
767
- let args = {};
768
- let argParseError;
769
- try {
770
- args = typeof tc.function.arguments === 'string'
771
- ? JSON.parse(tc.function.arguments)
772
- : tc.function.arguments;
773
- }
774
- catch (e) {
775
- argParseError = `Tool argument JSON was malformed: ${e.message}. Re-issue the tool call with valid JSON arguments.`;
776
- }
891
+ // 0.3.8-I4: Use the strict-recovery helper so a malformed-arguments
892
+ // tool_call surfaces as a structured tool_result (with the raw
893
+ // arguments echoed back) instead of throwing out of the loop.
894
+ const parsedArgs = parseArgumentsOrError(tc);
895
+ let args = parsedArgs.args;
896
+ const argParseError = parsedArgs.error;
777
897
  const isLocal = LOCAL_TOOLS.some(lt => lt.name === name);
778
898
  callbacks.onToolStart(name, args);
779
899
  let resultText = '';
@@ -788,9 +908,7 @@ export class Agent {
788
908
  callbacks.onToolEnd(name, { success: false, summary });
789
909
  traceEvent('brainrouter.tool', { tool: name, ok: false, local: isLocal, session_key: this.sessionKey, guard: 'bad_args' }, { traceId: turnSpan.traceId, parentSpanId: turnSpan.spanId });
790
910
  const toolMsg = { role: 'tool', tool_call_id: tc.id, name, content: resultText, isError };
791
- this.chatHistory.push(toolMsg);
792
- this.recordTranscript(toolMsg);
793
- continue;
911
+ return { toolMsg, fullResultText: resultText };
794
912
  }
795
913
  // Repeat-loop guard: if the model has already issued this exact
796
914
  // (name, args) call REPEAT_GUARD_LIMIT times in this turn, short-
@@ -813,9 +931,7 @@ export class Agent {
813
931
  callbacks.onToolEnd(name, { success: false, summary });
814
932
  traceEvent('brainrouter.tool', { tool: name, ok: false, local: isLocal, session_key: this.sessionKey, guard: 'repeat' }, { traceId: turnSpan.traceId, parentSpanId: turnSpan.spanId });
815
933
  const toolMsg = { role: 'tool', tool_call_id: tc.id, name, content: resultText, isError };
816
- this.chatHistory.push(toolMsg);
817
- this.recordTranscript(toolMsg);
818
- continue;
934
+ return { toolMsg, fullResultText: resultText };
819
935
  }
820
936
  recentToolSignatures.push(signature);
821
937
  // Keep the window small so the guard only blocks tight loops, not
@@ -853,31 +969,7 @@ export class Agent {
853
969
  throw new Error(`Tool "${name}" is not permitted in access mode "${this.accessMode}".`);
854
970
  }
855
971
  if (isOrchestrationToolName(name)) {
856
- resultText = await executeOrchestrationTool(name, args, {
857
- workspaceRoot: this.workspaceRoot,
858
- parentSessionKey: this.sessionKey,
859
- parentAccessMode: this.accessMode,
860
- // Thread the parent's trace context so child agents nest their
861
- // per-turn spans under THIS turn instead of starting a fresh
862
- // trace tree. Lets observability backends reconstruct fan-out.
863
- parentTraceId: turnSpan.traceId,
864
- parentSpanId: turnSpan.spanId,
865
- parentAgentId: this.agentId,
866
- parentTier: this.tier,
867
- depth: this.agentDepth,
868
- mcpClient: this.mcpClient,
869
- llmConfig: this.llmConfig,
870
- launchCwd: this.launchCwd,
871
- recordOffload: (chars) => { this.memoryMetrics.offloadCharsAvoided += chars; },
872
- onChildToolEvent: (event) => {
873
- // Surface to the REPL via the same onToolStart channel so the
874
- // user sees child activity live, prefixed with the child id.
875
- callbacks.onToolStart(`${event.role}:${event.childId} → ${event.tool}`, { ok: event.ok, summary: event.summary });
876
- },
877
- onChildComplete: (event) => {
878
- callbacks.onChildComplete?.(event);
879
- },
880
- });
972
+ resultText = await executeOrchestrationTool(name, args, buildOrchestrationContext());
881
973
  summary = getToolSummary(name, args, resultText);
882
974
  trackChildObservation(name, args, resultText, spawnedChildIdsThisTurn, waitedChildIdsThisTurn);
883
975
  }
@@ -909,8 +1001,14 @@ export class Agent {
909
1001
  // the next iteration self-corrects instead of retrying garbage.
910
1002
  if (/-32601|Unknown tool|MethodNotFound/i.test(message)) {
911
1003
  const hint = explainUnknownToolName(name);
912
- resultText = `Tool "${name}" does not exist. ${hint}\nUnderlying error: ${message}`;
913
- summary = `unknown tool — ${hint.slice(0, 120)}`;
1004
+ // 0.3.8-I4: surface a "did you mean: X?" suggestion when the
1005
+ // LLM-emitted name normalises to a real registered tool (case,
1006
+ // separator, or alias mismatch). This is cheaper for the model
1007
+ // to recover from than the generic skill-vs-tool explanation.
1008
+ const didYouMean = suggestSimilarToolName(name, candidates, normalizeToolName);
1009
+ const suggestionLine = didYouMean ? `did you mean: ${didYouMean}?\n` : '';
1010
+ resultText = `Tool "${name}" does not exist. ${suggestionLine}${hint}\nUnderlying error: ${message}`;
1011
+ summary = didYouMean ? `unknown tool — did you mean ${didYouMean}?` : `unknown tool — ${hint.slice(0, 120)}`;
914
1012
  }
915
1013
  else {
916
1014
  resultText = `Tool execution failed: ${message}`;
@@ -954,10 +1052,89 @@ export class Agent {
954
1052
  content: clampedContent,
955
1053
  isError
956
1054
  };
957
- this.chatHistory.push(toolMsg);
1055
+ // Return; the caller pushes to chatHistory in original call order
1056
+ // (NOT settle order) and records the FULL untruncated result for
1057
+ // /transcript. Doing the push here would let parallel batches land
1058
+ // in finish order, which the LLM's next turn would see as a
1059
+ // non-deterministic trace.
1060
+ return { toolMsg, fullResultText: resultText };
1061
+ };
1062
+ // Partition the tool_calls into runs of consecutive parallel-safe
1063
+ // calls separated by single serial calls. Each run preserves original
1064
+ // position; safe runs of size ≥ 2 dispatch with Promise.allSettled,
1065
+ // serial runs (and unknown-tool fallbacks) execute one-by-one. The
1066
+ // result array is indexed by original call position so the
1067
+ // chatHistory push at the end is deterministic.
1068
+ const processed = new Array(toolCalls.length);
1069
+ const runSafeBatch = async (startIdx, endIdx) => {
1070
+ // [startIdx, endIdx) — at least 1 entry; size > 1 means concurrent.
1071
+ // Calling `processOneToolCall` synchronously schedules every batch
1072
+ // member's onToolStart + repeat-guard prep BEFORE any await yields,
1073
+ // so the user sees N "in flight" tool rows immediately. Promise.
1074
+ // allSettled then waits for all to settle; any rejection is
1075
+ // translated into a "Tool execution failed" envelope so the LLM's
1076
+ // next turn still sees a tool_result for every original tool_call_id.
1077
+ const slice = toolCalls.slice(startIdx, endIdx);
1078
+ const promises = slice.map((tc, j) => processOneToolCall(tc, normalizedNames[startIdx + j]));
1079
+ const settled = await Promise.allSettled(promises);
1080
+ for (let k = 0; k < settled.length; k++) {
1081
+ const s = settled[k];
1082
+ if (s.status === 'fulfilled') {
1083
+ processed[startIdx + k] = s.value;
1084
+ }
1085
+ else {
1086
+ const tc = slice[k];
1087
+ const name = normalizedNames[startIdx + k];
1088
+ const message = s.reason?.message ?? String(s.reason);
1089
+ const resultText = `Tool execution failed: ${message}`;
1090
+ processed[startIdx + k] = {
1091
+ toolMsg: { role: 'tool', tool_call_id: tc.id, name, content: resultText, isError: true },
1092
+ fullResultText: resultText,
1093
+ };
1094
+ }
1095
+ }
1096
+ };
1097
+ let i = 0;
1098
+ while (i < toolCalls.length) {
1099
+ if (safeFlags[i]) {
1100
+ let j = i + 1;
1101
+ while (j < toolCalls.length && safeFlags[j])
1102
+ j++;
1103
+ await runSafeBatch(i, j);
1104
+ i = j;
1105
+ }
1106
+ else {
1107
+ // Serial slot — run in isolation so any state mutation (write,
1108
+ // spawn_agent, update_plan) completes before the next call starts.
1109
+ processed[i] = await processOneToolCall(toolCalls[i], normalizedNames[i]);
1110
+ i++;
1111
+ }
1112
+ }
1113
+ for (const entry of processed) {
1114
+ if (!entry)
1115
+ continue;
1116
+ this.chatHistory.push(entry.toolMsg);
958
1117
  // Record the FULL untruncated result so /transcript shows everything,
959
1118
  // even when the LLM-facing copy was clamped.
960
- this.recordTranscript({ ...toolMsg, content: resultText });
1119
+ this.recordTranscript({ ...entry.toolMsg, content: entry.fullResultText });
1120
+ }
1121
+ // 0.3.8-I4: orphan safety net. Even after dedupe + the per-call
1122
+ // recovery branches above, a tool_call without a paired tool_result
1123
+ // would 400 the next OpenAI request. Synthesize ERROR envelopes for
1124
+ // any unmatched id so strict tool_call ↔ tool_result pairing is
1125
+ // preserved. Synthetic content is a plain `ERROR: …` string so the
1126
+ // R1 child-drain guardrail's parseJsonObject(resultText) returns
1127
+ // undefined and we don't accidentally claim a child was spawned.
1128
+ // Synthetics do NOT bump lastTurnToolCalls — they aren't real
1129
+ // dispatches, just a well-formed-history fix.
1130
+ // Adapted from deer-flow/backend/packages/harness/deerflow/agents/
1131
+ // middlewares/dangling_tool_call_middleware.py.
1132
+ const producedResults = processed.filter((p) => !!p).map((p) => p.toolMsg);
1133
+ const orphans = synthesizeOrphanResults(toolCalls, producedResults);
1134
+ for (const synthetic of orphans) {
1135
+ this.chatHistory.push(synthetic);
1136
+ this.recordTranscript(synthetic);
1137
+ callbacks.onStatusUpdate(`Recovery: synthesized placeholder for orphan tool_call ${synthetic.tool_call_id}.`);
961
1138
  }
962
1139
  }
963
1140
  // Normalize the final answer FIRST so every exit path (loop limit, empty
@@ -1212,7 +1389,7 @@ export class Agent {
1212
1389
  try {
1213
1390
  const res = await fetch(url, {
1214
1391
  headers: {
1215
- 'User-Agent': 'Mozilla/5.0 (compatible; BrainRouterCLI/0.3.7)'
1392
+ 'User-Agent': 'Mozilla/5.0 (compatible; BrainRouterCLI/0.3.8)'
1216
1393
  }
1217
1394
  });
1218
1395
  if (!res.ok) {
@@ -1820,7 +1997,7 @@ async function runWebSearch(query, maxResults) {
1820
1997
  }
1821
1998
  try {
1822
1999
  const url = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
1823
- const res = await fetch(url, { headers: { 'User-Agent': 'BrainRouterCLI/0.3.7' } });
2000
+ const res = await fetch(url, { headers: { 'User-Agent': 'BrainRouterCLI/0.3.8' } });
1824
2001
  if (!res.ok) {
1825
2002
  return `web_search failed: DuckDuckGo returned ${res.status} ${res.statusText}.`;
1826
2003
  }
@@ -0,0 +1,57 @@
1
+ export interface ToolCallLike {
2
+ id: string;
3
+ type?: string;
4
+ function: {
5
+ name: string;
6
+ arguments: string | object;
7
+ };
8
+ }
9
+ export interface ToolResultMessage {
10
+ role: 'tool';
11
+ tool_call_id: string;
12
+ name: string;
13
+ content: string;
14
+ isError?: boolean;
15
+ }
16
+ /**
17
+ * Drop duplicate tool_call ids inside a single assistant response. Keeps the
18
+ * LAST occurrence (closest to the model's final intent). Calls without a
19
+ * string id are passed through unchanged — the orphan safety net will catch
20
+ * them later.
21
+ *
22
+ * `onDuplicate` is invoked once per dropped duplicate so callers can log a
23
+ * warning without coupling this module to a logger.
24
+ */
25
+ export declare function dedupeToolCalls<T extends ToolCallLike>(calls: T[] | undefined | null, onDuplicate?: (id: string, droppedIndex: number) => void): T[];
26
+ export interface ParsedArguments {
27
+ args: Record<string, any>;
28
+ /** Defined iff the LLM emitted malformed JSON; ready-to-use error string for a tool_result envelope. */
29
+ error?: string;
30
+ rawArguments: string;
31
+ }
32
+ /**
33
+ * Try-parse `tool_call.function.arguments`. On parse failure return a
34
+ * structured error string instead of throwing, so the caller can attach a
35
+ * synthetic tool_result that the next model turn can read.
36
+ */
37
+ export declare function parseArgumentsOrError(call: ToolCallLike): ParsedArguments;
38
+ /**
39
+ * For every tool_call in `calls` that has no matching tool_result in
40
+ * `results`, build a synthetic tool message so the next LLM request stays
41
+ * well-formed (OpenAI strictly requires tool_call ↔ tool_result pairing).
42
+ *
43
+ * IMPORTANT: the synthetic `content` MUST start with `ERROR:` and be a plain
44
+ * string. The agent runtime's R1 child-drain guardrail tracks spawned
45
+ * children by `parseJsonObject(resultText)` on tool results — if the
46
+ * synthetic envelope parses as JSON with an `id` field, the guardrail would
47
+ * incorrectly think a child agent was spawned and try to wait on it.
48
+ */
49
+ export declare function synthesizeOrphanResults<T extends ToolCallLike>(calls: T[] | undefined | null, results: ToolResultMessage[]): ToolResultMessage[];
50
+ /**
51
+ * Use the caller's existing `normalizeToolName` to surface a "did you mean"
52
+ * suggestion when the LLM emits a tool name that doesn't exist as-is but
53
+ * normalizes to a real registered tool. Tolerates the single-underscore
54
+ * `mcp_<server>_<tool>` prefix (R5 convention) since `normalizeToolName`
55
+ * matches by flattened form.
56
+ */
57
+ export declare function suggestSimilarToolName(raw: string, candidates: string[], normalize: (raw: string, candidates: string[]) => string): string | undefined;