npm - @kinqs/brainrouter-cli - Versions diffs - 0.3.4 - Mend

@kinqs/brainrouter-cli 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/.env.example +109 -0
package/README.md +185 -0
package/dist/agent/agent.d.ts +765 -0
package/dist/agent/agent.js +1977 -0
package/dist/cli/cliPrompt.d.ts +15 -0
package/dist/cli/cliPrompt.js +62 -0
package/dist/cli/commands/_context.d.ts +53 -0
package/dist/cli/commands/_context.js +14 -0
package/dist/cli/commands/_helpers.d.ts +45 -0
package/dist/cli/commands/_helpers.js +140 -0
package/dist/cli/commands/guard.d.ts +6 -0
package/dist/cli/commands/guard.js +292 -0
package/dist/cli/commands/memory.d.ts +12 -0
package/dist/cli/commands/memory.js +263 -0
package/dist/cli/commands/obs.d.ts +6 -0
package/dist/cli/commands/obs.js +208 -0
package/dist/cli/commands/orchestration.d.ts +6 -0
package/dist/cli/commands/orchestration.js +218 -0
package/dist/cli/commands/session.d.ts +6 -0
package/dist/cli/commands/session.js +191 -0
package/dist/cli/commands/ui.d.ts +6 -0
package/dist/cli/commands/ui.js +477 -0
package/dist/cli/commands/workflow.d.ts +6 -0
package/dist/cli/commands/workflow.js +691 -0
package/dist/cli/repl.d.ts +12 -0
package/dist/cli/repl.js +894 -0
package/dist/config/config.d.ts +22 -0
package/dist/config/config.js +105 -0
package/dist/config/workspace.d.ts +7 -0
package/dist/config/workspace.js +62 -0
package/dist/index.d.ts +2 -0
package/dist/index.js +610 -0
package/dist/memory/briefing.d.ts +46 -0
package/dist/memory/briefing.js +152 -0
package/dist/memory/consolidation.d.ts +60 -0
package/dist/memory/consolidation.js +208 -0
package/dist/memory/formatters.d.ts +38 -0
package/dist/memory/formatters.js +102 -0
package/dist/memory/mentions.d.ts +10 -0
package/dist/memory/mentions.js +72 -0
package/dist/orchestration/orchestrator.d.ts +36 -0
package/dist/orchestration/orchestrator.js +71 -0
package/dist/orchestration/roles.d.ts +11 -0
package/dist/orchestration/roles.js +117 -0
package/dist/orchestration/tools.d.ts +244 -0
package/dist/orchestration/tools.js +528 -0
package/dist/prompt/breadthHint.d.ts +48 -0
package/dist/prompt/breadthHint.js +93 -0
package/dist/prompt/compactor.d.ts +31 -0
package/dist/prompt/compactor.js +112 -0
package/dist/prompt/initAgentMd.d.ts +13 -0
package/dist/prompt/initAgentMd.js +194 -0
package/dist/prompt/skillRunner.d.ts +34 -0
package/dist/prompt/skillRunner.js +146 -0
package/dist/prompt/systemPrompt.d.ts +10 -0
package/dist/prompt/systemPrompt.js +171 -0
package/dist/runtime/clipboard.d.ts +17 -0
package/dist/runtime/clipboard.js +52 -0
package/dist/runtime/llmSemaphore.d.ts +30 -0
package/dist/runtime/llmSemaphore.js +67 -0
package/dist/runtime/loopRunner.d.ts +25 -0
package/dist/runtime/loopRunner.js +79 -0
package/dist/runtime/mcpClient.d.ts +156 -0
package/dist/runtime/mcpClient.js +234 -0
package/dist/runtime/mcpUtils.d.ts +36 -0
package/dist/runtime/mcpUtils.js +64 -0
package/dist/runtime/sandbox.d.ts +48 -0
package/dist/runtime/sandbox.js +156 -0
package/dist/runtime/tracing.d.ts +25 -0
package/dist/runtime/tracing.js +91 -0
package/dist/state/cliState.d.ts +59 -0
package/dist/state/cliState.js +311 -0
package/dist/state/goalStore.d.ts +174 -0
package/dist/state/goalStore.js +410 -0
package/dist/state/hookifyStore.d.ts +80 -0
package/dist/state/hookifyStore.js +237 -0
package/dist/state/hooksStore.d.ts +42 -0
package/dist/state/hooksStore.js +71 -0
package/dist/state/preferencesStore.d.ts +41 -0
package/dist/state/preferencesStore.js +25 -0
package/dist/state/sessionStore.d.ts +42 -0
package/dist/state/sessionStore.js +193 -0
package/dist/state/taskStore.d.ts +23 -0
package/dist/state/taskStore.js +80 -0
package/dist/state/workflowArtifacts.d.ts +33 -0
package/dist/state/workflowArtifacts.js +139 -0
package/package.json +71 -0

package/dist/prompt/systemPrompt.js ADDED Viewed

@@ -0,0 +1,171 @@
+import fs from 'node:fs';
+import path from 'node:path';
+function personalityOverlay(style) {
+    switch (style) {
+        case 'concise':
+            return [
+                '## Communication style: concise',
+                '- Default to ≤ 2 sentences per answer when the task allows it.',
+                '- Skip headers and bullet lists unless they materially add clarity.',
+                '- Skip closing summaries when the diff or tool result is self-explanatory.',
+            ].join('\n');
+        case 'detailed':
+            return [
+                '## Communication style: detailed',
+                '- Walk through your reasoning before tool calls when the task is non-trivial.',
+                '- After completing work, summarize what changed, why, and what was verified.',
+                '- Cite file paths and line numbers when explaining decisions.',
+            ].join('\n');
+        case 'pair-programmer':
+            return [
+                '## Communication style: pair programmer',
+                '- Narrate decisions as you make them — "I\'ll edit X next because Y".',
+                '- Surface tradeoffs you considered, even briefly, before committing to one.',
+                '- Invite the user to redirect when you hit a fork: "I\'m about to do A; let me know if you want B."',
+            ].join('\n');
+        default:
+            return '';
+    }
+}
+export function buildSystemPrompt(context) {
+    const instructionSummary = context.instructionSummary?.trim()
+        ? context.instructionSummary.trim()
+        : 'No workspace AGENT.md or AGENTS.md instruction file was found.';
+    return [
+        'You are BrainRouter CLI, an autonomous software engineering agent running in a terminal.',
+        'Your edge over generic coding agents is being direct, tool-driven, memory-aware, and workspace-aware — every turn should reflect that.',
+        '',
+        '## Runtime Context',
+        `- Workspace root: ${context.workspaceRoot}`,
+        `- Launch directory: ${context.launchCwd}`,
+        `- BrainRouter sessionKey: ${context.sessionKey}`,
+        '- All relative file paths are resolved from the workspace root, not from the CLI installation directory.',
+        '- If the user asks about "the session", answer with the current BrainRouter sessionKey and workspace root.',
+        '',
+        '## Workspace Instructions',
+        instructionSummary,
+        '',
+        '## Memory-First Workflow (the BrainRouter differentiator — non-negotiable)',
+        'BrainRouter is a cognitive memory engine first and a coding agent second. Treat memory as a primary tool, not an afterthought. The user pays for this routing — you must use it.',
+        '',
+        '### Before doing the work',
+        '- The CLI already injects a "## BrainRouter Memory Briefing" system message with recalled cognitive memories, persona, focus scenes, and recent context. READ it before you reason. If it is empty, do NOT assume the user is new — call `memory_search` and `memory_recall` to look further.',
+        '- For ANY non-trivial request, call `memory_recall` with the current sessionKey AND the user request as the query. Look for `recordId` values you can cite later.',
+        '- If the request mentions a specific file, also call `memory_file_history` with that path — past changes and known issues live there.',
+        '- If the request mentions a domain/feature concept, call `memory_graph_query` with the entity name to find related memories across the knowledge graph (2-hop default).',
+        '- When you don\'t have a sessionKey yet, call `memory_resolve_session` with the workspacePath.',
+        '',
+        '### During the work',
+        '- Surface the record IDs you are relying on. Quote them inline like `[rec_xxx]` so the user sees what you used.',
+        '- For long-running tasks, call `memory_task_state` to check whether this work was started before and `memory_task_update` to record progress (blockers, decisions, next actions).',
+        '- If you produce a payload over ~1,000 tokens (analysis, diff, large summary), call `memory_working_offload` and refer back to it by its ref node id instead of pasting again.',
+        '- The briefing only fires ONCE at turn start with the prompt as the query. **Re-call memory tools manually** when (a) you pivot to a new topic mid-turn, (b) the briefing came back thin/empty, or (c) you need explanations (`memory_explain_recall`), file history (`memory_file_history`), prior failures (`memory_failed_attempts`), or graph adjacency (`memory_graph_query`). The CLI surfaces every memory tool call as `🧠 Briefing` / `💾 Captured` / `📌 Reinforced` so the user can see what you used.',
+        '',
+        '### After the work',
+        '- The CLI auto-runs `memory_mark_cited` with the records you actually used (detected by content match against your final answer) and `memory_capture_turn`. You do NOT need to call these unless you want to force capture mid-turn after a particularly meaningful step.',
+        '',
+        '### Never do',
+        '- Never say "I do not have information about your current projects" if the briefing is non-empty or if you have not first run `memory_search` / `memory_recall` for the question.',
+        '- Never re-discover something that already lives in memory. Recall first, then read files.',
+        '- Never cite a recordId that did not appear in the briefing or in a recall result you ran.',
+        '',
+        '### Anti-hallucination rules when summarizing recall (critical)',
+        '- When recall returns memories, do NOT generalize. Quote the content verbatim or paraphrase to within a few words. Always include the recordId in `[brackets]`.',
+        '- Memory records can be STALE or from a DIFFERENT project. If a recalled fact looks inconsistent with the user\'s current question (e.g. recall says "Vue.js + Go" but the user is editing a TypeScript-only repo), say so explicitly: "Recalled record [rec_xxx] mentions Vue.js + Go — this looks inconsistent with the current workspace. Should I archive it via `memory_update`?"',
+        '- Do not invent project facts that aren\'t in either (a) the briefing, (b) a recall/search result you just ran, or (c) files you actually read. If unsure, say "I don\'t see this in memory or in the workspace files I\'ve read — please confirm before I proceed."',
+        '- When unsure whether a recall result is current, call `memory_verify` to flag it for re-checking, or suggest the user run `/forget <recordId>` to archive obvious garbage.',
+        '',
+        '## Tool Policy',
+        '- You may call local workspace tools and BrainRouter MCP tools yourself.',
+        '- Prefer tool calls over asking the user for information that can be discovered from the workspace or MCP memory.',
+        '- If the user asks about files, project structure, code, tests, or configuration, inspect files with list_dir, glob_files, grep_search, or read_file.',
+        '- **MCP-first for everything cognitive.** Skills, personas, memory, evidence, scenes, working canvas, contradictions, audit — anything the MCP exposes — MUST be accessed through the MCP tools. Do not reimplement them with filesystem reads. If a task mentions a workflow or a skill, the first move is `list_skills` / `search_skills` → `get_skill`, not random `read_file` on the skills/ folder.',
+        '- **Skills are NOT tools.** Names like `incremental-skill`, `spec-driven-skill`, `code-structure-cleanup` are workflow documentation — they cannot be called with `tool_calls`. To use one: call `list_skills` (or `search_skills`) to discover the canonical name, then `get_skill({ name: "<name>" })` to load its instructions, and then follow the steps with regular tools (`read_file`, `write_file`, `run_command`, `spawn_agent`, …).',
+        '- **Never call a tool whose name was not in the tool list returned at turn start.** If the name ends in `-skill`, `-implementation`, `-workflow`, `-driven`, or contains "skill", it is almost certainly a skill — load it via `get_skill` instead of inventing a tool call. Hallucinated tool names fail with `-32601 Unknown tool` and waste an iteration.',
+        '- **No tight loops.** The CLI has a repeat-loop guard: calling the same tool with identical args 3 times in a single turn returns an error instead of executing. If the result you got was insufficient, do something different — read a different file, write the output you have, spawn a child, or call `goal_blocked` with a concrete reason.',
+        '',
+        '## Multi-Agent Orchestration',
+        '- You may delegate bounded, parallelizable work to child agents with `spawn_agent` (one child) or `spawn_agents` (a batch in one tool call).',
+        '- Available roles: explorer (read-only investigation), architect (design alternatives), reviewer (code review), worker (implementation with write access), verifier (runs tests/checks). Omit `role` in `spawn_agents` to auto-route from the leading verb of the prompt; use `route_agent` for a dry run.',
+        '- Use `list_agents` / `read_agent_transcript` to observe, `wait_agent` (single) or `wait_agents` (batch) to drain, and `close_agent` for cleanup.',
+        '- **Fan-out triggers.** ALWAYS prefer `spawn_agents` (≥3 children) when the user prompt says any of: "everything", "all", "in 1 go", "in parallel", "thoroughly", "comprehensive", "as much as", "test more X", "explore all Y", "across the codebase". One tool call + a paragraph asking "what next?" is NOT acceptable for these prompts.',
+        '- **Standard fan-out templates.**',
+        '   • "Test all the MCP tools" → 5 explorers, each focused on a different tool category (memory_*, list_skills/get_skill, governance/*, working/*, hooks/*).',
+        '   • "Explore this codebase" → 3 explorers covering server / client / shared types.',
+        '   • "Design feature X" → 2 architects with different stack constraints + 1 reviewer.',
+        '- Delegate when there are 2+ independent investigations or when you would otherwise produce a large isolated output. The repeat-loop guard fires after 3 identical tool calls — fan out instead of re-trying the same thing.',
+        '- Always synthesize child outputs in your own words — never claim work is done just because a child returned.',
+        '',
+        '## Durable Workflow Artifacts (single source of truth)',
+        '- Every multi-step request (spec, feature plan, review, implementation plan) MUST land as files inside `.brainrouter/cli/workflows/<slug>/`.',
+        '- Required artifacts: `spec.md` (what + why + boundaries), `tasks.md` (ordered task breakdown), `walkthrough.md` (post-implementation summary). Use `write_file` with the workspace-relative path the CLI provides — never paste long specs into chat alone.',
+        '- For free-form prompts that look like spec/plan requests, tell the user to use `/spec <title>` or `/feature-dev <title>` instead of producing a chat-only plan. Those commands set up the directory and pre-fill the meta record for you.',
+        '- Never produce a multi-section plan response in chat without also writing it to the workflow folder. If you cannot write the file, say so explicitly.',
+        '',
+        '## Local Tools',
+        '- read_file: read workspace files with optional line ranges.',
+        '- write_file: create or overwrite files inside the workspace.',
+        '- edit_file: replace exactly one target string in an existing file.',
+        '- list_dir: list a workspace directory.',
+        '- grep_search: search workspace files for a string.',
+        '- glob_files: find workspace files by glob pattern.',
+        '- run_command (alias: bash / shell / sh): run shell commands after explicit terminal confirmation.',
+        '- fetch_url: fetch HTTP(S) text content when needed.',
+        '',
+        '## BrainRouter MCP Tools',
+        '- memory_resolve_session, memory_recall, memory_search, memory_graph_query, memory_contradictions.',
+        '- memory_working_context, memory_working_offload, memory_working_reset.',
+        '- memory_capture_turn, memory_mark_cited, memory_task_state, memory_task_update, memory_file_history, memory_debug_trace_search.',
+        '- list_skills, get_skill, search_skills, get_persona, get_reference, list_template_docs, get_template_doc.',
+        '',
+        '## Autonomy and tool batching (read carefully)',
+        '- **Do not block on unnecessary confirmations.** When the user gives you a clear instruction, execute it. Do not ask "shall I proceed?" between tool calls. Do not stop mid-flow to enumerate what you *could* do — DO it.',
+        '- **Batch your tool calls.** Most OpenAI-compatible chat APIs accept multiple `tool_calls` in a single assistant response. When the user asks you to do several things, emit ALL the necessary tool calls in one response. The CLI executes them in order and feeds the results back to you.',
+        '- **Parallelize independent work.** Independent reads (`read_file`, `grep_search`, `list_dir`, `memory_recall`, `memory_search`, `memory_working_context`, `memory_task_state`) can be requested in the same response. Independent `spawn_agent` calls likewise.',
+        '- When the user says "test all", "every X", "do everything", "run them all", treat it as a single batched request. Fire the relevant tools in one round, then summarize results in your final message. Do not iterate "now I will test X / would you like to proceed".',
+        '- After your tools return, either (a) call more tools that need the previous results, or (b) write the final answer. Do not produce intermediate "I will now do Y" prose with no tool call attached.',
+        '- If sub-agents (spawn_agent) are running, `wait_agent` for them before yielding the turn.',
+        '',
+        '## Persistence on tool failure (CRITICAL — read every turn)',
+        'When a tool call fails or returns an empty/unexpected result, you MUST attempt to recover before yielding the turn. **Do not** apologize and ask the user what to do next — that is the single biggest way you waste their time.',
+        '',
+        '**Standard recovery moves (try at least ONE before giving up):**',
+        '1. **Extension swap.** If `read_file` on `foo/bar.js` fails with "File not found", try `foo/bar.ts`, `foo/bar.tsx`, `foo/bar.mjs`. This codebase is TypeScript — `.js` paths almost always mean `.ts` source.',
+        '2. **Directory listing.** Call `list_dir` on the parent directory to see what files actually exist there. Then re-read the right file.',
+        '3. **Glob search.** Call `glob_files` with a wildcard (`**/engine.*`, `**/<filename>.*`) or `grep_search` for a unique symbol you expect inside the file.',
+        '4. **Memory lookup.** `memory_file_history` or `memory_search` may surface the path the user (or a past agent) actually used.',
+        '5. **Re-read the listing.** If you already called `list_dir` earlier this turn, scroll back — the file is probably there under a different extension.',
+        '',
+        'Only after 2+ recovery attempts that all fail should you tell the user the file genuinely does not exist, and even then propose the closest matching files you DID find. Phrases like "I will skip this file and wait for your next instruction" or "What would you like to focus on next?" are forbidden when you have not exhausted the recovery moves above.',
+        '',
+        '**The same persistence rule applies to every tool failure** — failed greps, failed edits (re-read the file and try a narrower string), failed shell commands (read the stderr and adjust). When a `/goal` is active, NEVER stop on a single failure — the goal-block in your system prompt is your directive, and the CLI auto-continues turns until you either call `goal_complete` with evidence or `goal_blocked` with a concrete unblocker. Burning an iteration to ask "what next?" violates the goal contract.',
+        '',
+        '## Surfacing tool output to the user (read every turn)',
+        'When the user explicitly asks to see something — phrasings like "list dir", "show me X", "what\'s in Y", "print/dump/cat Z", "find files matching Q", "grep for W" — your final assistant message MUST include the actual content the tool returned. Replying with only an acknowledgement ("I have listed the contents", "Search completed") is a failure: the user is left blind because the CLI hides full tool payloads by default. Render the result inline — a Markdown list for directory listings, a fenced code block for file contents, a table or bullet list for grep matches — using the data your tool calls produced. The CLI also prints a short preview for inspection tools, but that preview is a fallback for terse-LLM cases, NOT a substitute for your response.',
+        '',
+        '## Operating Behavior',
+        '- Be concise but not passive. Do the next useful thing with tools.',
+        '- Do not say you lack session context when the Runtime Context contains a sessionKey.',
+        '- Do not ask for a workspace path unless the current workspace root is wrong or inaccessible.',
+        '- Read before editing. Keep edits scoped. Run relevant tests after changes.',
+        '- If the model or endpoint cannot use tools, explain that clearly and continue with the best available direct answer.',
+        '- For multi-step work, keep the durable plan current with update_plan. Use statuses pending, in_progress, and completed, with at most one in_progress item.',
+        '- The CLI persists per-session state under .brainrouter/cli/sessions/<encodedKey>/ (transcript.jsonl, goal.json, tasks.json) for inspection and future orchestration.',
+        '',
+        personalityOverlay(context.personality),
+    ].join('\n');
+}
+export function loadWorkspaceInstructionSummary(workspaceRoot) {
+    const instructionPath = ['AGENT.md', 'AGENTS.md']
+        .map(file => path.join(workspaceRoot, file))
+        .find(filePath => fs.existsSync(filePath));
+    if (!instructionPath)
+        return undefined;
+    const content = fs.readFileSync(instructionPath, 'utf8');
+    return content
+        .replace(/<!--[\s\S]*?-->/g, '')
+        .split('\n')
+        .slice(0, 120)
+        .join('\n')
+        .trim();
+}

package/dist/runtime/clipboard.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+/**
+ * Cross-platform clipboard copy. Wraps the OS-native CLI tool so we don't add
+ * a dependency.
+ *
+ *   macOS:    pbcopy
+ *   Linux:    wl-copy (Wayland) → xclip (X11) → xsel as a last resort
+ *   Windows:  clip
+ *
+ * Returns a tuple `[ok, error?]`. `ok` is false when no copy tool is available
+ * (common on bare Linux containers); the caller should fall back to printing
+ * the text so the user can select-copy manually.
+ */
+export declare function copyToClipboard(text: string): Promise<{
+    ok: boolean;
+    tool?: string;
+    error?: string;
+}>;

package/dist/runtime/clipboard.js ADDED Viewed

@@ -0,0 +1,52 @@
+import { spawn } from 'node:child_process';
+/**
+ * Cross-platform clipboard copy. Wraps the OS-native CLI tool so we don't add
+ * a dependency.
+ *
+ *   macOS:    pbcopy
+ *   Linux:    wl-copy (Wayland) → xclip (X11) → xsel as a last resort
+ *   Windows:  clip
+ *
+ * Returns a tuple `[ok, error?]`. `ok` is false when no copy tool is available
+ * (common on bare Linux containers); the caller should fall back to printing
+ * the text so the user can select-copy manually.
+ */
+export async function copyToClipboard(text) {
+    const candidates = (() => {
+        if (process.platform === 'darwin')
+            return [['pbcopy', []]];
+        if (process.platform === 'win32')
+            return [['clip', []]];
+        return [
+            ['wl-copy', []],
+            ['xclip', ['-selection', 'clipboard']],
+            ['xsel', ['--clipboard', '--input']],
+        ];
+    })();
+    for (const [cmd, args] of candidates) {
+        const result = await tryCopy(cmd, args, text);
+        if (result.ok)
+            return { ok: true, tool: cmd };
+    }
+    return { ok: false, error: `no clipboard tool found on ${process.platform}` };
+}
+function tryCopy(cmd, args, text) {
+    return new Promise((resolve) => {
+        let child;
+        try {
+            child = spawn(cmd, args, { stdio: ['pipe', 'ignore', 'ignore'] });
+        }
+        catch {
+            resolve({ ok: false });
+            return;
+        }
+        child.on('error', () => resolve({ ok: false }));
+        child.on('close', (code) => resolve({ ok: code === 0 }));
+        try {
+            child.stdin?.end(text);
+        }
+        catch {
+            resolve({ ok: false });
+        }
+    });
+}

package/dist/runtime/llmSemaphore.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Process-wide semaphore for CLI-side LLM calls.
+ *
+ * The CLI fires LLM requests from two places:
+ *   1. The user-facing chat in `callOpenAI` (the assistant reply).
+ *   2. Each spawned child agent's own chat loop (parallel fan-out from
+ *      `spawn_agents` runs all children concurrently).
+ *
+ * When all of those plus the MCP child's background extraction/contradiction
+ * /graph workers hit the same local backend (LM Studio with a single GPU,
+ * or any throughput-bounded endpoint), the model thrashes or auto-unloads.
+ * Capping concurrency here prevents the CLI process from overwhelming the
+ * backend. The MCP child has its own matching semaphore (mcp/.../llm-semaphore.ts)
+ * with the same env knob, so the two processes coordinate by setting the
+ * same `BRAINROUTER_LLM_MAX_CONCURRENT` budget.
+ *
+ * Env knob:
+ *   BRAINROUTER_LLM_MAX_CONCURRENT  (default 4; values < 1 disable the cap)
+ *
+ * Cap defaults higher on the CLI side than on MCP (4 vs 2) because the
+ * user-facing chat is latency-sensitive; we'd rather burst chat calls and
+ * queue background extraction.
+ */
+export declare function acquireLLMSlot(): Promise<() => void>;
+export declare function getLLMSemaphoreState(): {
+    cap: number;
+    inFlight: number;
+    queued: number;
+};
+export declare function resetLLMSemaphoreForTests(): void;

package/dist/runtime/llmSemaphore.js ADDED Viewed

@@ -0,0 +1,67 @@
+/**
+ * Process-wide semaphore for CLI-side LLM calls.
+ *
+ * The CLI fires LLM requests from two places:
+ *   1. The user-facing chat in `callOpenAI` (the assistant reply).
+ *   2. Each spawned child agent's own chat loop (parallel fan-out from
+ *      `spawn_agents` runs all children concurrently).
+ *
+ * When all of those plus the MCP child's background extraction/contradiction
+ * /graph workers hit the same local backend (LM Studio with a single GPU,
+ * or any throughput-bounded endpoint), the model thrashes or auto-unloads.
+ * Capping concurrency here prevents the CLI process from overwhelming the
+ * backend. The MCP child has its own matching semaphore (mcp/.../llm-semaphore.ts)
+ * with the same env knob, so the two processes coordinate by setting the
+ * same `BRAINROUTER_LLM_MAX_CONCURRENT` budget.
+ *
+ * Env knob:
+ *   BRAINROUTER_LLM_MAX_CONCURRENT  (default 4; values < 1 disable the cap)
+ *
+ * Cap defaults higher on the CLI side than on MCP (4 vs 2) because the
+ * user-facing chat is latency-sensitive; we'd rather burst chat calls and
+ * queue background extraction.
+ */
+const DEFAULT_CAP = 4;
+function resolveCap() {
+    const raw = process.env.BRAINROUTER_LLM_MAX_CONCURRENT;
+    if (!raw)
+        return DEFAULT_CAP;
+    const parsed = parseInt(raw, 10);
+    if (!Number.isFinite(parsed) || parsed < 1)
+        return Number.POSITIVE_INFINITY;
+    return parsed;
+}
+let cap = resolveCap();
+let inFlight = 0;
+const waiters = [];
+export async function acquireLLMSlot() {
+    if (!Number.isFinite(cap))
+        return () => { };
+    if (inFlight < cap) {
+        inFlight++;
+        return makeRelease();
+    }
+    await new Promise((resolve) => waiters.push(resolve));
+    inFlight++;
+    return makeRelease();
+}
+function makeRelease() {
+    let released = false;
+    return () => {
+        if (released)
+            return;
+        released = true;
+        inFlight = Math.max(0, inFlight - 1);
+        const next = waiters.shift();
+        if (next)
+            next();
+    };
+}
+export function getLLMSemaphoreState() {
+    return { cap, inFlight, queued: waiters.length };
+}
+export function resetLLMSemaphoreForTests() {
+    cap = resolveCap();
+    inFlight = 0;
+    waiters.length = 0;
+}

package/dist/runtime/loopRunner.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Lightweight repeating-prompt runner for `/loop`.
+ *
+ * Only one loop runs at a time per CLI process. Callers register a function
+ * to invoke on each tick; the runner schedules with setTimeout (not
+ * setInterval) so a long-running iteration doesn't pile up. Tick errors are
+ * captured but don't kill the loop; that's the point of a loop.
+ */
+export interface LoopState {
+    prompt: string;
+    intervalMs: number;
+    startedAt: string;
+    iterations: number;
+    lastFiredAt?: string;
+    lastError?: string;
+}
+export declare function isLoopRunning(): boolean;
+export declare function getLoopState(): LoopState | null;
+export declare function startLoop(prompt: string, intervalMs: number, tick: (state: LoopState) => Promise<void>): {
+    started: boolean;
+    reason?: string;
+};
+export declare function stopLoop(): boolean;
+/** Parse a duration like "5s", "10m", "1h". Returns ms or undefined. */
+export declare function parseInterval(raw: string): number | undefined;

package/dist/runtime/loopRunner.js ADDED Viewed

@@ -0,0 +1,79 @@
+/**
+ * Lightweight repeating-prompt runner for `/loop`.
+ *
+ * Only one loop runs at a time per CLI process. Callers register a function
+ * to invoke on each tick; the runner schedules with setTimeout (not
+ * setInterval) so a long-running iteration doesn't pile up. Tick errors are
+ * captured but don't kill the loop; that's the point of a loop.
+ */
+let active = null;
+export function isLoopRunning() {
+    return active !== null;
+}
+export function getLoopState() {
+    return active?.state ?? null;
+}
+export function startLoop(prompt, intervalMs, tick) {
+    if (active) {
+        return { started: false, reason: 'a loop is already running — use /loop stop first' };
+    }
+    if (!Number.isFinite(intervalMs) || intervalMs < 1_000) {
+        return { started: false, reason: 'interval must be at least 1000ms' };
+    }
+    const state = {
+        prompt,
+        intervalMs,
+        startedAt: new Date().toISOString(),
+        iterations: 0,
+    };
+    let stopped = false;
+    let timer = null;
+    const scheduleNext = () => {
+        if (stopped)
+            return;
+        timer = setTimeout(async () => {
+            if (stopped)
+                return;
+            state.iterations += 1;
+            state.lastFiredAt = new Date().toISOString();
+            try {
+                await tick(state);
+                state.lastError = undefined;
+            }
+            catch (err) {
+                state.lastError = err?.message ?? String(err);
+            }
+            scheduleNext();
+        }, state.intervalMs);
+    };
+    active = {
+        state,
+        cancel: () => {
+            stopped = true;
+            if (timer)
+                clearTimeout(timer);
+            timer = null;
+            active = null;
+        },
+    };
+    scheduleNext();
+    return { started: true };
+}
+export function stopLoop() {
+    if (!active)
+        return false;
+    active.cancel();
+    return true;
+}
+/** Parse a duration like "5s", "10m", "1h". Returns ms or undefined. */
+export function parseInterval(raw) {
+    const match = /^(\d+(?:\.\d+)?)(ms|s|m|h)?$/i.exec(raw.trim());
+    if (!match)
+        return undefined;
+    const n = Number(match[1]);
+    if (!Number.isFinite(n))
+        return undefined;
+    const unit = (match[2] ?? 's').toLowerCase();
+    const mul = unit === 'ms' ? 1 : unit === 's' ? 1000 : unit === 'm' ? 60_000 : 3_600_000;
+    return Math.round(n * mul);
+}

package/dist/runtime/mcpClient.d.ts ADDED Viewed

@@ -0,0 +1,156 @@
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import type { LLMConfig, ServerConfig } from '../config/config.js';
+export declare class McpClientWrapper {
+    client: Client;
+    private transport;
+    /**
+     * True only after a successful `connect()`. Lets the CLI run in a degraded
+     * "offline" mode when the MCP server is unreachable at startup — `listTools`
+     * returns an empty list and `callTool` returns an error envelope instead of
+     * blowing up, which the agent's existing try/catch wrappers already handle.
+     */
+    private connected;
+    constructor();
+    /** Whether this wrapper has an active MCP transport. */
+    isConnected(): boolean;
+    connect(serverConfig: ServerConfig, llmConfig?: LLMConfig): Promise<void>;
+    listTools(): Promise<{
+        [x: string]: unknown;
+        tools: {
+            inputSchema: {
+                [x: string]: unknown;
+                type: "object";
+                properties?: Record<string, object> | undefined;
+                required?: string[] | undefined;
+            };
+            name: string;
+            description?: string | undefined;
+            outputSchema?: {
+                [x: string]: unknown;
+                type: "object";
+                properties?: Record<string, object> | undefined;
+                required?: string[] | undefined;
+            } | undefined;
+            annotations?: {
+                title?: string | undefined;
+                readOnlyHint?: boolean | undefined;
+                destructiveHint?: boolean | undefined;
+                idempotentHint?: boolean | undefined;
+                openWorldHint?: boolean | undefined;
+            } | undefined;
+            execution?: {
+                taskSupport?: "optional" | "required" | "forbidden" | undefined;
+            } | undefined;
+            _meta?: Record<string, unknown> | undefined;
+            icons?: {
+                src: string;
+                mimeType?: string | undefined;
+                sizes?: string[] | undefined;
+                theme?: "light" | "dark" | undefined;
+            }[] | undefined;
+            title?: string | undefined;
+        }[];
+        _meta?: {
+            [x: string]: unknown;
+            progressToken?: string | number | undefined;
+            "io.modelcontextprotocol/related-task"?: {
+                taskId: string;
+            } | undefined;
+        } | undefined;
+        nextCursor?: string | undefined;
+    }>;
+    callTool(name: string, args: Record<string, any>): Promise<{
+        [x: string]: unknown;
+        content: ({
+            type: "text";
+            text: string;
+            annotations?: {
+                audience?: ("user" | "assistant")[] | undefined;
+                priority?: number | undefined;
+                lastModified?: string | undefined;
+            } | undefined;
+            _meta?: Record<string, unknown> | undefined;
+        } | {
+            type: "image";
+            data: string;
+            mimeType: string;
+            annotations?: {
+                audience?: ("user" | "assistant")[] | undefined;
+                priority?: number | undefined;
+                lastModified?: string | undefined;
+            } | undefined;
+            _meta?: Record<string, unknown> | undefined;
+        } | {
+            type: "audio";
+            data: string;
+            mimeType: string;
+            annotations?: {
+                audience?: ("user" | "assistant")[] | undefined;
+                priority?: number | undefined;
+                lastModified?: string | undefined;
+            } | undefined;
+            _meta?: Record<string, unknown> | undefined;
+        } | {
+            type: "resource";
+            resource: {
+                uri: string;
+                text: string;
+                mimeType?: string | undefined;
+                _meta?: Record<string, unknown> | undefined;
+            } | {
+                uri: string;
+                blob: string;
+                mimeType?: string | undefined;
+                _meta?: Record<string, unknown> | undefined;
+            };
+            annotations?: {
+                audience?: ("user" | "assistant")[] | undefined;
+                priority?: number | undefined;
+                lastModified?: string | undefined;
+            } | undefined;
+            _meta?: Record<string, unknown> | undefined;
+        } | {
+            uri: string;
+            name: string;
+            type: "resource_link";
+            description?: string | undefined;
+            mimeType?: string | undefined;
+            size?: number | undefined;
+            annotations?: {
+                audience?: ("user" | "assistant")[] | undefined;
+                priority?: number | undefined;
+                lastModified?: string | undefined;
+            } | undefined;
+            _meta?: {
+                [x: string]: unknown;
+            } | undefined;
+            icons?: {
+                src: string;
+                mimeType?: string | undefined;
+                sizes?: string[] | undefined;
+                theme?: "light" | "dark" | undefined;
+            }[] | undefined;
+            title?: string | undefined;
+        })[];
+        _meta?: {
+            [x: string]: unknown;
+            progressToken?: string | number | undefined;
+            "io.modelcontextprotocol/related-task"?: {
+                taskId: string;
+            } | undefined;
+        } | undefined;
+        structuredContent?: Record<string, unknown> | undefined;
+        isError?: boolean | undefined;
+    } | {
+        [x: string]: unknown;
+        toolResult: unknown;
+        _meta?: {
+            [x: string]: unknown;
+            progressToken?: string | number | undefined;
+            "io.modelcontextprotocol/related-task"?: {
+                taskId: string;
+            } | undefined;
+        } | undefined;
+    }>;
+    close(): Promise<void>;
+}