npm - @kinqs/brainrouter-cli - Versions diffs - 0.3.6 → 0.3.8 - Mend

@kinqs/brainrouter-cli 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/README.md +29 -52
package/agents/architect.json +18 -0
package/agents/explorer.json +18 -0
package/agents/reviewer.json +18 -0
package/agents/verifier.json +18 -0
package/agents/worker.json +18 -0
package/changelog/0.2.0.md +15 -0
package/changelog/0.3.0.md +20 -0
package/changelog/0.3.1.md +22 -0
package/changelog/0.3.2.md +15 -0
package/changelog/0.3.3.md +19 -0
package/changelog/0.3.4.md +20 -0
package/changelog/0.3.5.md +9 -0
package/changelog/0.3.6.md +9 -0
package/changelog/0.3.7.md +20 -0
package/changelog/0.3.8.md +30 -0
package/changelog/README.md +41 -0
package/dist/agent/agent.d.ts +34 -1
package/dist/agent/agent.js +372 -79
package/dist/agent/toolCallRecovery.d.ts +57 -0
package/dist/agent/toolCallRecovery.js +130 -0
package/dist/agent/toolSafety.d.ts +17 -0
package/dist/agent/toolSafety.js +102 -0
package/dist/cli/banner.d.ts +20 -0
package/dist/cli/banner.js +47 -14
package/dist/cli/cliPrompt.d.ts +40 -3
package/dist/cli/cliPrompt.js +117 -25
package/dist/cli/commands/_context.d.ts +3 -1
package/dist/cli/commands/_helpers.d.ts +1 -1
package/dist/cli/commands/config.d.ts +46 -0
package/dist/cli/commands/config.js +1042 -0
package/dist/cli/commands/init.d.ts +20 -0
package/dist/cli/commands/init.js +64 -0
package/dist/cli/commands/login.d.ts +13 -0
package/dist/cli/commands/login.js +179 -0
package/dist/cli/commands/mcp.d.ts +13 -11
package/dist/cli/commands/mcp.js +261 -74
package/dist/cli/commands/mcpInstall.d.ts +20 -0
package/dist/cli/commands/mcpInstall.js +87 -0
package/dist/cli/commands/orchestration.js +51 -0
package/dist/cli/commands/releaseNotes.d.ts +24 -0
package/dist/cli/commands/releaseNotes.js +109 -0
package/dist/cli/commands/schedule.d.ts +18 -0
package/dist/cli/commands/schedule.js +189 -0
package/dist/cli/commands/ui.js +119 -60
package/dist/cli/commands/workflow.d.ts +2 -0
package/dist/cli/commands/workflow.js +54 -8
package/dist/cli/ink/ChatApp.d.ts +206 -0
package/dist/cli/ink/ChatApp.js +493 -0
package/dist/cli/ink/Frame.d.ts +26 -0
package/dist/cli/ink/Frame.js +5 -0
package/dist/cli/ink/Picker.d.ts +71 -0
package/dist/cli/ink/Picker.js +168 -0
package/dist/cli/ink/SlashPalette.d.ts +51 -0
package/dist/cli/ink/SlashPalette.js +136 -0
package/dist/cli/ink/TextField.d.ts +34 -0
package/dist/cli/ink/TextField.js +47 -0
package/dist/cli/ink/WizardApp.d.ts +7 -0
package/dist/cli/ink/WizardApp.js +422 -0
package/dist/cli/ink/ambientChat.d.ts +34 -0
package/dist/cli/ink/ambientChat.js +7 -0
package/dist/cli/ink/consoleCapture.d.ts +11 -0
package/dist/cli/ink/consoleCapture.js +33 -0
package/dist/cli/ink/markdownRender.d.ts +41 -0
package/dist/cli/ink/markdownRender.js +278 -0
package/dist/cli/ink/renderWithResizeClear.d.ts +14 -0
package/dist/cli/ink/renderWithResizeClear.js +33 -0
package/dist/cli/ink/runChat.d.ts +34 -0
package/dist/cli/ink/runChat.js +682 -0
package/dist/cli/ink/runPicker.d.ts +31 -0
package/dist/cli/ink/runPicker.js +139 -0
package/dist/cli/ink/runSlashPalette.d.ts +23 -0
package/dist/cli/ink/runSlashPalette.js +33 -0
package/dist/cli/ink/runWizard.d.ts +22 -0
package/dist/cli/ink/runWizard.js +133 -0
package/dist/cli/ink/stdinHandoff.d.ts +51 -0
package/dist/cli/ink/stdinHandoff.js +78 -0
package/dist/cli/ink/toolFormat.d.ts +75 -0
package/dist/cli/ink/toolFormat.js +206 -0
package/dist/cli/ink/useTerminalSize.d.ts +35 -0
package/dist/cli/ink/useTerminalSize.js +26 -0
package/dist/cli/repl.d.ts +25 -3
package/dist/cli/repl.js +52 -714
package/dist/cli/slashSuggest.d.ts +32 -0
package/dist/cli/slashSuggest.js +146 -0
package/dist/cli/wizard/modelsApi.d.ts +72 -0
package/dist/cli/wizard/modelsApi.js +166 -0
package/dist/cli/wizard/picker.d.ts +202 -0
package/dist/cli/wizard/picker.js +547 -0
package/dist/cli/wizard/providers.d.ts +86 -0
package/dist/cli/wizard/providers.js +190 -0
package/dist/cli/wizard/runner.d.ts +13 -0
package/dist/cli/wizard/runner.js +488 -0
package/dist/cli/wizard/types.d.ts +122 -0
package/dist/cli/wizard/types.js +109 -0
package/dist/config/config.d.ts +13 -1
package/dist/config/config.js +45 -3
package/dist/index.js +157 -206
package/dist/memory/briefing.d.ts +1 -1
package/dist/memory/briefing.js +4 -4
package/dist/memory/consolidation.d.ts +1 -1
package/dist/orchestration/agentRegistry.d.ts +36 -0
package/dist/orchestration/agentRegistry.js +64 -0
package/dist/orchestration/orchestrator.d.ts +7 -0
package/dist/orchestration/orchestrator.js +2 -0
package/dist/orchestration/tools.d.ts +105 -3
package/dist/orchestration/tools.js +167 -8
package/dist/prompt/skillCatalog.d.ts +11 -0
package/dist/prompt/skillCatalog.js +134 -0
package/dist/prompt/skillRunner.d.ts +2 -2
package/dist/prompt/skillRunner.js +2 -31
package/dist/prompt/systemPrompt.js +7 -2
package/dist/runtime/anthropicAdapter.d.ts +100 -0
package/dist/runtime/anthropicAdapter.js +293 -0
package/dist/runtime/cronParser.d.ts +23 -0
package/dist/runtime/cronParser.js +122 -0
package/dist/runtime/mcpClient.js +14 -11
package/dist/runtime/mcpPool.d.ts +170 -0
package/dist/runtime/mcpPool.js +442 -0
package/dist/runtime/mcpUtils.d.ts +17 -1
package/dist/runtime/mcpUtils.js +23 -0
package/dist/runtime/scheduleTicker.d.ts +33 -0
package/dist/runtime/scheduleTicker.js +99 -0
package/dist/runtime/vendorSnippets.d.ts +45 -0
package/dist/runtime/vendorSnippets.js +153 -0
package/dist/state/scheduleStore.d.ts +37 -0
package/dist/state/scheduleStore.js +64 -0
package/package.json +14 -5
package/.env.example +0 -116

package/dist/agent/agent.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { askChoice, askYesNo, getActiveReadline, NoTTYError } from '../cli/cliPr
 import { appendTranscriptEntry } from '../state/sessionStore.js';
 import { buildSystemPrompt, loadWorkspaceInstructionSummary } from '../prompt/systemPrompt.js';
 import { formatPlan, readPlan, updatePlan } from '../state/taskStore.js';
-import { createSpawnAgentTool, createSpawnAgentsTool, createListAgentsTool, createWaitAgentTool, createWaitAgentsTool, createReadAgentTranscriptTool, createCloseAgentTool, createRouteAgentTool, executeOrchestrationTool, isOrchestrationToolName, } from '../orchestration/tools.js';
+import { createTaskAgentTool, createDelegateAgentTool, createSpawnAgentTool, createSpawnAgentsTool, createListAgentsTool, createWaitAgentTool, createWaitAgentsTool, createReadAgentTranscriptTool, createCloseAgentTool, createRouteAgentTool, executeOrchestrationTool, isOrchestrationToolName, } from '../orchestration/tools.js';
 import { buildMemoryBriefing, selectCitedRecordIds } from '../memory/briefing.js';
 import { callMcpTool, extractToolText } from '../runtime/mcpUtils.js';
 import { acquireLLMSlot } from '../runtime/llmSemaphore.js';
@@ -17,12 +17,102 @@ import { runHooks } from '../state/hooksStore.js';
 import { resolveSandboxConfig, runShell } from '../runtime/sandbox.js';
 import { isDangerousCommand, resolveRunCommandApproval } from '../runtime/dangerousCommand.js';
 import { readPreferences, resolveEffort } from '../state/preferencesStore.js';
+import { shouldUseAnthropicNative, callAnthropic } from '../runtime/anthropicAdapter.js';
 import { startSpan, traceEvent } from '../runtime/tracing.js';
 import { buildHookifyContext, evaluateHookify, listHookifyRules } from '../state/hookifyStore.js';
 import { renderCompactSystemMessage, runCompaction } from '../prompt/compactor.js';
 import { buildFanOutHint, shouldSuggestFanOut } from '../prompt/breadthHint.js';
+import { isParallelSafe, parallelExecutionEnabled } from './toolSafety.js';
+import { dedupeToolCalls, parseArgumentsOrError, synthesizeOrphanResults, suggestSimilarToolName, } from './toolCallRecovery.js';
 const execPromise = promisify(exec);
 const IGNORED_DIRS = new Set(['node_modules', '.git', 'dist', '.DS_Store', '.next']);
+const DEFAULT_CHILD_DRAIN_TIMEOUT_MS = 30_000;
+function parseJsonObject(text) {
+    try {
+        const parsed = JSON.parse(text);
+        return parsed && typeof parsed === 'object' ? parsed : undefined;
+    }
+    catch {
+        return undefined;
+    }
+}
+function collectChildIds(value) {
+    if (!value || typeof value !== 'object')
+        return [];
+    const ids = [];
+    const maybeRecord = value;
+    if (typeof maybeRecord.id === 'string')
+        ids.push(maybeRecord.id);
+    if (Array.isArray(maybeRecord.agents)) {
+        for (const entry of maybeRecord.agents) {
+            if (entry && typeof entry === 'object' && typeof entry.id === 'string') {
+                ids.push(entry.id);
+            }
+        }
+    }
+    return [...new Set(ids)];
+}
+function trackChildObservation(toolName, args, resultText, spawned, waited) {
+    if (toolName === 'spawn_agent' ||
+        toolName === 'spawn_agents' ||
+        toolName === 'task_agent' ||
+        toolName === 'delegate_agent') {
+        const ids = collectChildIds(parseJsonObject(resultText));
+        for (const id of ids) {
+            spawned.add(id);
+            // task_agent always blocks internally (wraps spawn with wait: true);
+            // spawn_agent({ wait: true }) is the legacy form. Both count as
+            // already-observed, so the child-drain guardrail doesn't double-wait.
+            // delegate_agent is fire-and-forget — must remain unwaited so the
+            // guardrail can force a wait_agents call before the parent answers.
+            if (toolName === 'task_agent')
+                waited.add(id);
+            else if (toolName === 'spawn_agent' && args?.wait)
+                waited.add(id);
+        }
+        return;
+    }
+    if (toolName === 'wait_agent') {
+        const id = typeof args?.id === 'string' ? args.id : undefined;
+        if (id)
+            waited.add(id);
+        return;
+    }
+    if (toolName === 'wait_agents') {
+        const ids = Array.isArray(args?.ids) ? args.ids.filter((id) => typeof id === 'string') : [];
+        for (const id of ids)
+            waited.add(id);
+    }
+}
+function parseChildDrainTimeouts(resultText) {
+    const parsed = parseJsonObject(resultText);
+    const agents = Array.isArray(parsed?.agents) ? parsed.agents : [];
+    return agents
+        .filter((entry) => {
+        return !!entry && typeof entry === 'object' && entry.status === 'timeout';
+    })
+        .map((entry) => ({
+        id: typeof entry.id === 'string' ? entry.id : '(unknown)',
+        role: typeof entry.role === 'string' ? entry.role : undefined,
+        status: 'timeout',
+        childStatus: typeof entry.childStatus === 'string' ? entry.childStatus : undefined,
+        summary: typeof entry.summary === 'string' ? entry.summary : undefined,
+    }));
+}
+function formatChildDrainTimeoutAnswer(timeouts) {
+    const lines = [
+        `Children still running after the bounded wait (${timeouts.length}):`,
+        ...timeouts.map((child) => {
+            const role = child.role ? ` role=${child.role}` : '';
+            const status = child.childStatus ? ` status=${child.childStatus}` : '';
+            const summary = child.summary ? ` — ${child.summary}` : '';
+            return `- ${child.id}${role}${status}${summary}`;
+        }),
+        '',
+        'Use `/continue` to drain the pending child output and synthesize the result when it is ready.',
+    ];
+    return lines.join('\n');
+}
 export const LOCAL_TOOLS = [
     {
         name: 'read_file',
@@ -140,6 +230,8 @@ export const LOCAL_TOOLS = [
             required: ['patch']
         }
     },
+    createTaskAgentTool(),
+    createDelegateAgentTool(),
     createSpawnAgentTool(),
     createSpawnAgentsTool(),
     createListAgentsTool(),
@@ -414,6 +506,10 @@ export class Agent {
     agentId = `agent-${Math.random().toString(36).slice(2, 8)}`;
     /** agent_id of the parent (set by spawn_agent for children). */
     parentAgentId;
+    /** Agent tier — forwarded to OrchestrationContext so grandchildren can inherit hierarchy checks. */
+    tier;
+    /** Spawn-chain depth (0 = direct chat-root child). Forwarded to hierarchy checks. */
+    agentDepth;
     constructor(mcpClient, llmConfig, options) {
         this.mcpClient = mcpClient;
         this.llmConfig = llmConfig;
@@ -437,6 +533,8 @@ export class Agent {
         this.systemPromptOverride = options.systemPromptOverride;
         this.parentTraceId = options.parentTraceId;
         this.parentSpanId = options.parentSpanId;
+        this.tier = options.tier;
+        this.agentDepth = options.agentDepth ?? 0;
     }
     /** Expose for orchestration so spawn_agent can record the parent linkage. */
     getAgentId() {
@@ -446,13 +544,56 @@ export class Agent {
     setParentAgentId(id) {
         this.parentAgentId = id;
     }
+    isModelVisibleMcpTool(tool) {
+        const hiddenBrainrouterTools = new Set([
+            'memory_capture_turn',
+            'memory_mark_cited',
+            'memory_resolve_session',
+            'memory_register_skill_hints',
+            'memory_hook_register',
+            'memory_hook_status',
+        ]);
+        const name = String(tool?.name ?? '');
+        const rawName = String(tool?.__rawName ?? this.rawMcpToolName(name));
+        if (!hiddenBrainrouterTools.has(rawName))
+            return true;
+        const serverId = typeof tool?.__serverId === 'string'
+            ? tool.__serverId
+            : this.serverIdFromMcpToolName(name);
+        const status = serverId && typeof this.mcpClient.getStatus === 'function'
+            ? this.mcpClient.getStatus(serverId)
+            : undefined;
+        // Hide only BrainRouter auto-pipeline/admin tools. Third-party MCP tools
+        // with coincidentally similar names stay visible.
+        return status?.identity !== 'brainrouter';
+    }
+    rawMcpToolName(name) {
+        const serverId = this.serverIdFromMcpToolName(name);
+        return serverId ? name.slice(`mcp_${serverId}_`.length) : name;
+    }
+    serverIdFromMcpToolName(name) {
+        // Canonical single-underscore prefix: `mcp_<server>_<tool>`. The pool
+        // normalises to this shape at its boundary (0.3.8-R5).
+        if (!name.startsWith('mcp_'))
+            return undefined;
+        const rest = name.slice('mcp_'.length);
+        if (typeof this.mcpClient.getServerIds === 'function') {
+            const ids = this.mcpClient.getServerIds();
+            for (const id of ids.sort((a, b) => b.length - a.length)) {
+                if (rest.startsWith(`${id}_`))
+                    return id;
+            }
+        }
+        const idx = rest.indexOf('_');
+        return idx >= 0 ? rest.slice(0, idx) : undefined;
+    }
     allowedToolsForAccess() {
         // Lifecycle / inspection tools are always available regardless of access
         // mode — they don't touch the workspace and the agent needs them to end
         // a goal cleanly (goal_complete / goal_blocked) or observe state.
         const readOnly = new Set([
             'read_file', 'list_dir', 'grep_search', 'glob_files', 'fetch_url', 'web_search', 'update_plan',
-            'spawn_agent', 'spawn_agents', 'list_agents', 'wait_agent', 'wait_agents',
+            'task_agent', 'delegate_agent', 'spawn_agent', 'spawn_agents', 'list_agents', 'wait_agent', 'wait_agents',
             'read_agent_transcript', 'close_agent', 'route_agent',
             'goal_complete', 'goal_blocked',
             // ask_user_choice doesn't touch the workspace — it's an interaction
@@ -504,27 +645,20 @@ export class Agent {
         // whenever the inventory shape changed (online → offline or vice
         // versa) so the next LLM call sees the correct system message.
         const prevTools = this.lastKnownMcpTools?.map((t) => t.name).sort().join(',');
-        this.lastKnownMcpTools = mcpTools.map((t) => ({ name: t.name }));
+        this.lastKnownMcpTools = mcpTools.map((t) => ({
+            name: String(t?.__rawName ?? this.rawMcpToolName(String(t?.name ?? ''))),
+        }));
         const newTools = this.lastKnownMcpTools.map((t) => t.name).sort().join(',');
         if (prevTools !== newTools && this.chatHistory.length > 0 && this.chatHistory[0].role === 'system') {
             this.chatHistory[0] = this.createSystemMessage();
         }
         const allowed = this.allowedToolsForAccess();
         const filteredLocalTools = LOCAL_TOOLS.filter(t => allowed.has(t.name));
-        // Hide MCP tools we already call automatically. Small models otherwise
-        // try to invoke them with the wrong arguments (most commonly
-        // memory_capture_turn — "Required, Required" comes from missing
-        // sessionKey + messages). These tools are still callable; the CLI just
-        // doesn't tell the LLM about them since the auto-pipeline owns them.
-        const HIDDEN_FROM_LLM = new Set([
-            'memory_capture_turn', // called automatically post-turn
-            'memory_mark_cited', // called automatically with real citation IDs
-            'memory_resolve_session', // called automatically at bootstrap
-            'memory_register_skill_hints', // boot-time, not turn-level
-            'memory_hook_register', // managed via /hooks
-            'memory_hook_status',
-        ]);
-        const visibleMcpTools = mcpTools.filter((t) => !HIDDEN_FROM_LLM.has(t.name));
+        // Multi-MCP parity: expose every connected third-party MCP tool and the
+        // model-safe BrainRouter MCP tools in one turn, using the pool's
+        // `mcp_<serverId>_<tool>` namespaces. BrainRouter's auto-pipeline/admin
+        // tools stay hidden because the CLI owns those flows.
+        const visibleMcpTools = mcpTools.filter((t) => this.isModelVisibleMcpTool(t));
         const allTools = [...filteredLocalTools, ...visibleMcpTools];
         callbacks.onStatusUpdate(`Loaded ${filteredLocalTools.length} local tools and ${mcpTools.length} MCP tools.`);
         // Auto-compact: if the chat history has grown past the configured token
@@ -612,6 +746,34 @@ export class Agent {
         // signatures so we can interrupt the loop with corrective feedback.
         const recentToolSignatures = [];
         const REPEAT_GUARD_LIMIT = 3;
+        const spawnedChildIdsThisTurn = new Set();
+        const waitedChildIdsThisTurn = new Set();
+        const buildOrchestrationContext = () => ({
+            workspaceRoot: this.workspaceRoot,
+            parentSessionKey: this.sessionKey,
+            parentAccessMode: this.accessMode,
+            // Thread the parent's trace context so child agents nest their
+            // per-turn spans under THIS turn instead of starting a fresh
+            // trace tree. Lets observability backends reconstruct fan-out.
+            parentTraceId: turnSpan.traceId,
+            parentSpanId: turnSpan.spanId,
+            parentAgentId: this.agentId,
+            parentTier: this.tier,
+            depth: this.agentDepth,
+            mcpClient: this.mcpClient,
+            llmConfig: this.llmConfig,
+            launchCwd: this.launchCwd,
+            recordOffload: (chars) => { this.memoryMetrics.offloadCharsAvoided += chars; },
+            onChildToolStart: (event) => {
+                callbacks.onChildToolStart?.(event);
+            },
+            onChildToolEnd: (event) => {
+                callbacks.onChildToolEnd?.(event);
+            },
+            onChildComplete: (event) => {
+                callbacks.onChildComplete?.(event);
+            },
+        });
         while (loopCount < maxLoops) {
             loopCount++;
             callbacks.onStatusUpdate(`Thinking (turn ${loopCount})...`);
@@ -621,7 +783,15 @@ export class Agent {
                 // (which only refreshes the system prompt) also updates the next
                 // request's reasoning_effort slot — no restart needed.
                 const effort = resolveEffort(this.workspaceRoot).effort;
-                response = await callOpenAI(this.llmConfig, this.chatHistory, allTools, { effort });
+                if (shouldUseAnthropicNative(this.llmConfig)) {
+                    response = await callAnthropic(this.llmConfig, this.chatHistory, allTools, {
+                        effort,
+                        onThinking: (text) => callbacks.onStatusUpdate(`Thinking: ${text.slice(0, 200)}`),
+                    });
+                }
+                else {
+                    response = await callOpenAI(this.llmConfig, this.chatHistory, allTools, { effort });
+                }
             }
             catch (err) {
                 throw new Error(`LLM Execution failed: ${err.message}`);
@@ -631,6 +801,21 @@ export class Agent {
                 this.lastTurnUsage.completionTokens += response.usage.completion_tokens ?? 0;
                 this.lastTurnUsage.calls += 1;
             }
+            // 0.3.8-I4: Strict tool-call recovery. Real-world LLMs (especially
+            // smaller / quantised) sometimes emit duplicate tool_call ids in a
+            // single response. If we let both through, OpenAI's next request 400s
+            // because one of the duplicates has no paired tool_result. Dedupe
+            // before pushing the assistant message — last occurrence wins (closest
+            // to the model's final intent).
+            // Adapted from deer-flow/backend/packages/harness/deerflow/agents/
+            //   middlewares/dangling_tool_call_middleware.py — same well-formed
+            //   history invariant, applied per-response instead of pre-request.
+            if (response.toolCalls && response.toolCalls.length > 0) {
+                const deduped = dedupeToolCalls(response.toolCalls, (id) => {
+                    callbacks.onStatusUpdate(`Recovery: dropped duplicate tool_call id "${id}" (last occurrence wins).`);
+                });
+                response.toolCalls = deduped;
+            }
             // Record Assistant message
             const assistantMsg = { role: 'assistant', content: response.content };
             if (response.toolCalls) {
@@ -639,36 +824,76 @@ export class Agent {
             this.chatHistory.push(assistantMsg);
             this.recordTranscript(assistantMsg);
             if (!response.toolCalls || response.toolCalls.length === 0) {
+                const unobservedChildIds = [...spawnedChildIdsThisTurn].filter((id) => !waitedChildIdsThisTurn.has(id));
+                if (unobservedChildIds.length > 0) {
+                    const drainTimeoutMs = Math.max(1, Number(process.env.BRAINROUTER_CHILD_DRAIN_TIMEOUT_MS) || DEFAULT_CHILD_DRAIN_TIMEOUT_MS);
+                    const waitName = 'wait_agents';
+                    const waitArgs = { ids: unobservedChildIds, timeoutMs: drainTimeoutMs };
+                    callbacks.onStatusUpdate(`Auto-draining ${unobservedChildIds.length} spawned child agent${unobservedChildIds.length === 1 ? '' : 's'}...`);
+                    callbacks.onToolStart(waitName, waitArgs);
+                    this.lastTurnToolCalls += 1;
+                    let waitResultText = '';
+                    let waitFailed = false;
+                    let waitSummary = '';
+                    try {
+                        waitResultText = await executeOrchestrationTool(waitName, waitArgs, buildOrchestrationContext());
+                        waitSummary = getToolSummary(waitName, waitArgs, waitResultText);
+                        trackChildObservation(waitName, waitArgs, waitResultText, spawnedChildIdsThisTurn, waitedChildIdsThisTurn);
+                    }
+                    catch (err) {
+                        // Wait tool failure: surface the error text to the model so it can
+                        // report failure rather than silently synthesizing stale output.
+                        waitFailed = true;
+                        waitResultText = `Tool execution failed: ${err?.message ?? String(err)}`;
+                        waitSummary = err?.message ?? String(err);
+                    }
+                    callbacks.onToolEnd(waitName, { success: !waitFailed, summary: waitSummary, preview: !waitFailed ? getToolPreview(waitName, waitArgs, waitResultText) : undefined });
+                    const timeouts = parseChildDrainTimeouts(waitResultText);
+                    if (timeouts.length > 0) {
+                        finalAnswer = formatChildDrainTimeoutAnswer(timeouts);
+                        exitedCleanly = true;
+                        break;
+                    }
+                    const correction = [
+                        `Runtime child-drain guardrail auto-called \`${waitName}\` because this turn spawned child agents and the model tried to answer without observing them.`,
+                        `Child wait result:\n${waitResultText}`,
+                        'Now synthesize the child output for the user. Do not say you are waiting unless the wait result timed out.',
+                    ].join('\n\n');
+                    const guardMsg = { role: 'user', content: correction };
+                    this.chatHistory.push(guardMsg);
+                    this.recordTranscript(guardMsg);
+                    continue;
+                }
                 finalAnswer = response.content;
                 exitedCleanly = true;
                 break;
             }
-            // Execute tool calls chosen by the LLM
-            for (const tc of response.toolCalls) {
+            // Execute tool calls chosen by the LLM.
+            //
+            // 0.3.8-R4 — Independent read-only tool calls (read_file, list_dir,
+            // grep_search, glob_files, fetch_url, web_search, MCP memory reads)
+            // are dispatched concurrently when emitted in the same assistant
+            // response; consecutive serial tools (writes, shell, orchestration,
+            // unknown names) execute one-by-one in their original position to
+            // preserve causality. Tool-result messages are still appended to
+            // chatHistory in the ORIGINAL call order so the model's next turn
+            // sees a deterministic trace even if a later read settled first.
+            const candidates = [
+                ...LOCAL_TOOLS.map((lt) => lt.name),
+                ...mcpTools.map((t) => t.name).filter((n) => typeof n === 'string'),
+            ];
+            const toolCalls = response.toolCalls ?? [];
+            const normalizedNames = toolCalls.map((tc) => normalizeToolName(tc.function.name, candidates));
+            const parallelEnabled = parallelExecutionEnabled();
+            const safeFlags = toolCalls.map((_tc, idx) => parallelEnabled && isParallelSafe(normalizedNames[idx]));
+            const processOneToolCall = async (tc, name) => {
                 this.lastTurnToolCalls += 1;
-                // Normalize the tool name against both local and MCP candidates so
-                // common LLM hallucinations like `Read_File` / `read-file` resolve
-                // to `read_file` instead of falling through to `-32601 Unknown tool`.
-                const rawName = tc.function.name;
-                const candidates = [
-                    ...LOCAL_TOOLS.map((lt) => lt.name),
-                    ...mcpTools.map((t) => t.name).filter((n) => typeof n === 'string'),
-                ];
-                const name = normalizeToolName(rawName, candidates);
-                // Parse JSON args. If the LLM produced malformed JSON, surface that
-                // explicitly via the tool result so it can self-correct on the next
-                // turn — the old fallback silently set args={} and the LLM had no
-                // signal that anything was wrong.
-                let args = {};
-                let argParseError;
-                try {
-                    args = typeof tc.function.arguments === 'string'
-                        ? JSON.parse(tc.function.arguments)
-                        : tc.function.arguments;
-                }
-                catch (e) {
-                    argParseError = `Tool argument JSON was malformed: ${e.message}. Re-issue the tool call with valid JSON arguments.`;
-                }
+                // 0.3.8-I4: Use the strict-recovery helper so a malformed-arguments
+                // tool_call surfaces as a structured tool_result (with the raw
+                // arguments echoed back) instead of throwing out of the loop.
+                const parsedArgs = parseArgumentsOrError(tc);
+                let args = parsedArgs.args;
+                const argParseError = parsedArgs.error;
                 const isLocal = LOCAL_TOOLS.some(lt => lt.name === name);
                 callbacks.onToolStart(name, args);
                 let resultText = '';
@@ -683,9 +908,7 @@ export class Agent {
                     callbacks.onToolEnd(name, { success: false, summary });
                     traceEvent('brainrouter.tool', { tool: name, ok: false, local: isLocal, session_key: this.sessionKey, guard: 'bad_args' }, { traceId: turnSpan.traceId, parentSpanId: turnSpan.spanId });
                     const toolMsg = { role: 'tool', tool_call_id: tc.id, name, content: resultText, isError };
-                    this.chatHistory.push(toolMsg);
-                    this.recordTranscript(toolMsg);
-                    continue;
+                    return { toolMsg, fullResultText: resultText };
                 }
                 // Repeat-loop guard: if the model has already issued this exact
                 // (name, args) call REPEAT_GUARD_LIMIT times in this turn, short-
@@ -708,9 +931,7 @@ export class Agent {
                     callbacks.onToolEnd(name, { success: false, summary });
                     traceEvent('brainrouter.tool', { tool: name, ok: false, local: isLocal, session_key: this.sessionKey, guard: 'repeat' }, { traceId: turnSpan.traceId, parentSpanId: turnSpan.spanId });
                     const toolMsg = { role: 'tool', tool_call_id: tc.id, name, content: resultText, isError };
-                    this.chatHistory.push(toolMsg);
-                    this.recordTranscript(toolMsg);
-                    continue;
+                    return { toolMsg, fullResultText: resultText };
                 }
                 recentToolSignatures.push(signature);
                 // Keep the window small so the guard only blocks tight loops, not
@@ -748,30 +969,9 @@ export class Agent {
                         throw new Error(`Tool "${name}" is not permitted in access mode "${this.accessMode}".`);
                     }
                     if (isOrchestrationToolName(name)) {
-                        resultText = await executeOrchestrationTool(name, args, {
-                            workspaceRoot: this.workspaceRoot,
-                            parentSessionKey: this.sessionKey,
-                            parentAccessMode: this.accessMode,
-                            // Thread the parent's trace context so child agents nest their
-                            // per-turn spans under THIS turn instead of starting a fresh
-                            // trace tree. Lets observability backends reconstruct fan-out.
-                            parentTraceId: turnSpan.traceId,
-                            parentSpanId: turnSpan.spanId,
-                            parentAgentId: this.agentId,
-                            mcpClient: this.mcpClient,
-                            llmConfig: this.llmConfig,
-                            launchCwd: this.launchCwd,
-                            recordOffload: (chars) => { this.memoryMetrics.offloadCharsAvoided += chars; },
-                            onChildToolEvent: (event) => {
-                                // Surface to the REPL via the same onToolStart channel so the
-                                // user sees child activity live, prefixed with the child id.
-                                callbacks.onToolStart(`${event.role}:${event.childId} → ${event.tool}`, { ok: event.ok, summary: event.summary });
-                            },
-                            onChildComplete: (event) => {
-                                callbacks.onChildComplete?.(event);
-                            },
-                        });
+                        resultText = await executeOrchestrationTool(name, args, buildOrchestrationContext());
                         summary = getToolSummary(name, args, resultText);
+                        trackChildObservation(name, args, resultText, spawnedChildIdsThisTurn, waitedChildIdsThisTurn);
                     }
                     else if (isLocal) {
                         resultText = await this.executeLocalTool(name, args);
@@ -801,8 +1001,14 @@ export class Agent {
                     // the next iteration self-corrects instead of retrying garbage.
                     if (/-32601|Unknown tool|MethodNotFound/i.test(message)) {
                         const hint = explainUnknownToolName(name);
-                        resultText = `Tool "${name}" does not exist. ${hint}\nUnderlying error: ${message}`;
-                        summary = `unknown tool — ${hint.slice(0, 120)}`;
+                        // 0.3.8-I4: surface a "did you mean: X?" suggestion when the
+                        // LLM-emitted name normalises to a real registered tool (case,
+                        // separator, or alias mismatch). This is cheaper for the model
+                        // to recover from than the generic skill-vs-tool explanation.
+                        const didYouMean = suggestSimilarToolName(name, candidates, normalizeToolName);
+                        const suggestionLine = didYouMean ? `did you mean: ${didYouMean}?\n` : '';
+                        resultText = `Tool "${name}" does not exist. ${suggestionLine}${hint}\nUnderlying error: ${message}`;
+                        summary = didYouMean ? `unknown tool — did you mean ${didYouMean}?` : `unknown tool — ${hint.slice(0, 120)}`;
                     }
                     else {
                         resultText = `Tool execution failed: ${message}`;
@@ -846,10 +1052,89 @@ export class Agent {
                     content: clampedContent,
                     isError
                 };
-                this.chatHistory.push(toolMsg);
+                // Return; the caller pushes to chatHistory in original call order
+                // (NOT settle order) and records the FULL untruncated result for
+                // /transcript. Doing the push here would let parallel batches land
+                // in finish order, which the LLM's next turn would see as a
+                // non-deterministic trace.
+                return { toolMsg, fullResultText: resultText };
+            };
+            // Partition the tool_calls into runs of consecutive parallel-safe
+            // calls separated by single serial calls. Each run preserves original
+            // position; safe runs of size ≥ 2 dispatch with Promise.allSettled,
+            // serial runs (and unknown-tool fallbacks) execute one-by-one. The
+            // result array is indexed by original call position so the
+            // chatHistory push at the end is deterministic.
+            const processed = new Array(toolCalls.length);
+            const runSafeBatch = async (startIdx, endIdx) => {
+                // [startIdx, endIdx) — at least 1 entry; size > 1 means concurrent.
+                // Calling `processOneToolCall` synchronously schedules every batch
+                // member's onToolStart + repeat-guard prep BEFORE any await yields,
+                // so the user sees N "in flight" tool rows immediately. Promise.
+                // allSettled then waits for all to settle; any rejection is
+                // translated into a "Tool execution failed" envelope so the LLM's
+                // next turn still sees a tool_result for every original tool_call_id.
+                const slice = toolCalls.slice(startIdx, endIdx);
+                const promises = slice.map((tc, j) => processOneToolCall(tc, normalizedNames[startIdx + j]));
+                const settled = await Promise.allSettled(promises);
+                for (let k = 0; k < settled.length; k++) {
+                    const s = settled[k];
+                    if (s.status === 'fulfilled') {
+                        processed[startIdx + k] = s.value;
+                    }
+                    else {
+                        const tc = slice[k];
+                        const name = normalizedNames[startIdx + k];
+                        const message = s.reason?.message ?? String(s.reason);
+                        const resultText = `Tool execution failed: ${message}`;
+                        processed[startIdx + k] = {
+                            toolMsg: { role: 'tool', tool_call_id: tc.id, name, content: resultText, isError: true },
+                            fullResultText: resultText,
+                        };
+                    }
+                }
+            };
+            let i = 0;
+            while (i < toolCalls.length) {
+                if (safeFlags[i]) {
+                    let j = i + 1;
+                    while (j < toolCalls.length && safeFlags[j])
+                        j++;
+                    await runSafeBatch(i, j);
+                    i = j;
+                }
+                else {
+                    // Serial slot — run in isolation so any state mutation (write,
+                    // spawn_agent, update_plan) completes before the next call starts.
+                    processed[i] = await processOneToolCall(toolCalls[i], normalizedNames[i]);
+                    i++;
+                }
+            }
+            for (const entry of processed) {
+                if (!entry)
+                    continue;
+                this.chatHistory.push(entry.toolMsg);
                 // Record the FULL untruncated result so /transcript shows everything,
                 // even when the LLM-facing copy was clamped.
-                this.recordTranscript({ ...toolMsg, content: resultText });
+                this.recordTranscript({ ...entry.toolMsg, content: entry.fullResultText });
+            }
+            // 0.3.8-I4: orphan safety net. Even after dedupe + the per-call
+            // recovery branches above, a tool_call without a paired tool_result
+            // would 400 the next OpenAI request. Synthesize ERROR envelopes for
+            // any unmatched id so strict tool_call ↔ tool_result pairing is
+            // preserved. Synthetic content is a plain `ERROR: …` string so the
+            // R1 child-drain guardrail's parseJsonObject(resultText) returns
+            // undefined and we don't accidentally claim a child was spawned.
+            // Synthetics do NOT bump lastTurnToolCalls — they aren't real
+            // dispatches, just a well-formed-history fix.
+            // Adapted from deer-flow/backend/packages/harness/deerflow/agents/
+            //   middlewares/dangling_tool_call_middleware.py.
+            const producedResults = processed.filter((p) => !!p).map((p) => p.toolMsg);
+            const orphans = synthesizeOrphanResults(toolCalls, producedResults);
+            for (const synthetic of orphans) {
+                this.chatHistory.push(synthetic);
+                this.recordTranscript(synthetic);
+                callbacks.onStatusUpdate(`Recovery: synthesized placeholder for orphan tool_call ${synthetic.tool_call_id}.`);
             }
         }
         // Normalize the final answer FIRST so every exit path (loop limit, empty
@@ -1104,7 +1389,7 @@ export class Agent {
                 try {
                     const res = await fetch(url, {
                         headers: {
-                            'User-Agent': 'Mozilla/5.0 (compatible; BrainRouterCLI/0.3.5)'
+                            'User-Agent': 'Mozilla/5.0 (compatible; BrainRouterCLI/0.3.8)'
                         }
                     });
                     if (!res.ok) {
@@ -1712,7 +1997,7 @@ async function runWebSearch(query, maxResults) {
     }
     try {
         const url = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
-        const res = await fetch(url, { headers: { 'User-Agent': 'BrainRouterCLI/0.3.5' } });
+        const res = await fetch(url, { headers: { 'User-Agent': 'BrainRouterCLI/0.3.8' } });
         if (!res.ok) {
             return `web_search failed: DuckDuckGo returned ${res.status} ${res.statusText}.`;
         }
@@ -2267,7 +2552,15 @@ export function buildChatCompletionPayload(config, messages, tools, options = {}
     return body;
 }
 export async function callOpenAI(config, messages, tools, options = {}) {
-    const endpoint = config.endpoint || 'https://api.openai.com/v1';
+    // Normalize the endpoint to a base URL (everything UP TO `/chat/completions`
+    // exclusive). Earlier callers stored the full chat-completions URL in
+    // `config.endpoint` (e.g. "https://api.openai.com/v1/chat/completions")
+    // because the in-terminal wizard's provider catalog wrote the full path.
+    // We then re-append `/chat/completions` below, producing a duplicate
+    // `/chat/completions/chat/completions` and a 404. Strip the suffix
+    // defensively so both shapes (full URL or base URL) work.
+    const rawEndpoint = config.endpoint || 'https://api.openai.com/v1';
+    const endpoint = rawEndpoint.replace(/\/+$/, '').replace(/\/chat\/completions$/, '');
     let apiKey = config.apiKey || process.env.OPENAI_API_KEY || '';
     const isLocal = endpoint.includes('localhost') || endpoint.includes('127.0.0.1');
     if (!apiKey && !isLocal) {