npm - bare-agent - Versions diffs - 0.13.0 → 0.14.0 - Mend

bare-agent 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +3 -3
package/bareagent.context.md +17 -3
package/examples/litectx-assemble.mjs +78 -0
package/examples/wake.sh +8 -0
package/package.json +2 -2
package/src/loop.d.ts +10 -1
package/src/loop.js +100 -2
package/src/mcp-bridge.d.ts +5 -2
package/src/mcp-bridge.js +92 -29
package/src/provider-openai.d.ts +1 -4
package/src/provider-openai.js +17 -0

package/README.md CHANGED Viewed

@@ -66,7 +66,7 @@ Every piece works alone — take what you need, ignore the rest.
 | Component | What it does |
 |---|---|
-| **Loop** | Think → act → observe → repeat. Calls any LLM, executes your tools, loops until done. Returns estimated USD cost per run. Governance via `Loop({ policy })` — wire bareguard's `Gate` through `wireGate(gate)` and every tool call (native, MCP, browsing, mobile) traverses one chokepoint with per-caller `ctx` routing. Bareguard owns the audit log, budget caps, and halt decisions; Loop respects the verdict. Context engineering via `Loop({ assemble })` — a per-round `assemble(msgs, ctx)` chokepoint to recall/compress/trim the window sent to the model (the seam litectx plugs into); returns a view, the canonical transcript stays intact, fail-open. The exported `unitAssembler`/`toUnits`/`fromUnits` adapter lets a consumer work over a neutral unit `{id, role, content, kind, pinned, atomic, tokensApprox}` — bareagent owns the grammar (atomic tool-pair bundling, pinned system/task, a pairing seatbelt), the consumer owns content + relevance. `onError` + `loop:error` surface every silent-ish failure (callback throw, Checkpoint timeout) |
+| **Loop** | Think → act → observe → repeat. Calls any LLM, executes your tools, loops until done. Returns estimated USD cost per run. Governance via `Loop({ policy })` — wire bareguard's `Gate` through `wireGate(gate)` and every tool call (native, MCP, browsing, mobile) traverses one chokepoint with per-caller `ctx` routing. Bareguard owns the audit log, budget caps, and halt decisions; Loop respects the verdict. Context engineering via `Loop({ assemble })` — a per-round `assemble(msgs, ctx)` chokepoint to recall/compress/trim the window sent to the model (the seam litectx plugs into); returns a view, the canonical transcript stays intact, fail-open. The exported `unitAssembler`/`toUnits`/`fromUnits` adapter lets a consumer work over a neutral unit `{id, role, content, kind, pinned, atomic, tokensApprox}` — bareagent owns the grammar (atomic tool-pair bundling, pinned system/task, a pairing seatbelt), the consumer owns content + relevance. The CE function reads its inputs from the per-run `ctx` — litectx's budget-fitter uses `ctx.budget` (and `ctx.task`), so you **must** populate it via `run(msgs, tools, { ctx })`: an unset `ctx.budget` means the fitter has no budget, keeps everything, and returns the window unchanged — a silent no-op, not a bug (see `examples/litectx-assemble.mjs`). For summary-window compaction the Loop also lends a provider-bound `ctx.summarize(excerpt) => Promise<string>` (R-C6): the consumer owns when/what to summarize and the splice, bareagent makes the one model call (counted against the budget via `onLlmResult`, tagged `kind:'summarize'`). `onError` + `loop:error` surface every silent-ish failure (callback throw, Checkpoint timeout) |
 | **Planner** | Break a goal into a step DAG via LLM. Built-in caching (`cacheTTL`) |
 | **runPlan** | Execute steps in parallel waves. Dependency-aware, failure propagation, per-step retry |
 | **Retry** | Exponential/linear backoff with jitter. Respects `err.retryable` |
@@ -82,11 +82,11 @@ Every piece works alone — take what you need, ignore the rest.
 | **Browsing** | Web navigation, clicking, typing, reading via `barebrowse` (17 tools). Two modes: library tools (inline snapshots, pass to Loop) or CLI session (disk-based snapshots, token-efficient for multi-step flows). Optional `assess` tool (privacy scan) when `wearehere` is installed |
 | **Mobile** | Android + iOS device control via `baremobile`. Same two modes: library tools (`createMobileTools` — action tools auto-return snapshots) or CLI session (`baremobile` CLI — disk-based snapshots) |
 | **Shell** | Cross-platform `shell_read`, `shell_grep`, `shell_run` (argv, no shell), `shell_exec` (raw shell). Pure Node — no `grep`/`rg`/`findstr` dependency. Injection-proof `shell_run` for policy-gated use |
-| **MCP Bridge** | Auto-discover MCP servers from IDE configs (Claude Code, Cursor, etc.), expose as bareagent tools. Static allow/deny via `.mcp-bridge.json`, `systemContext` for LLM awareness. Runtime policy lives in `Loop({ policy })` — one hook for MCP + native tools alike. Returns both bulk `tools` (one per MCP tool) and `metaTools` (`mcp_discover` + `mcp_invoke` for token-thrifty access to large catalogs). Zero deps |
+| **MCP Bridge** | Auto-discover MCP servers from IDE configs (Claude Code, Cursor, etc.), expose as bareagent tools. Static allow/deny via `.mcp-bridge.json`, `systemContext` for LLM awareness. Runtime policy lives in `Loop({ policy })` — one hook for MCP + native tools alike. Returns both bulk `tools` (one per MCP tool) and `metaTools` (`mcp_discover` + `mcp_invoke` for token-thrifty access to large catalogs). Connecting runs a server's `command` (which may come from a cwd `.mcp.json`): pass `confirmServer` to vet each before it spawns — otherwise the bridge warns naming every command it runs. Every RPC is time-bounded (`timeout` for the handshake, `callTimeout` for `tools/call`), and a server that breaks its stdin pipe fails the connection instead of crashing the host. Zero deps |
 | **Spawn** | Fork a child bareagent process as a specialist agent. LLM-callable form blocks until child exits; library form returns a handle (`wait`, `onLine`, `kill`). One JSONL channel per child — child stderr captured and re-emitted as `child:stderr` events on the parent stream. Threads `BAREGUARD_AUDIT_PATH` / `BAREGUARD_PARENT_RUN_ID` / `BAREGUARD_BUDGET_FILE` / `BAREGUARD_SPAWN_DEPTH` so the family stitches into one audit + budget. `bareguard ^0.2.0` adds `spawn.ratePerMinute` + `limits.maxDepth` per-family caps |
 | **Defer** | Append a `{action, when}` record to a JSONL queue for a separate waker (cron / systemd timer / `examples/wake.sh`) to fire later. Two-phase governance: emit-time `gate.check` on the `defer` action; fire-time `gate.check` on the inner action when the waker re-invokes. `bareguard ^0.2.0` adds `defer.ratePerMinute` family-wide cap |
-**Providers:** OpenAI-compatible (OpenAI, OpenRouter, Groq, vLLM, LM Studio), Anthropic, Ollama, CLIPipe (any CLI tool via stdin/stdout with real-time streaming), Fallback, or bring your own (one method: `generate`). All return the same shape — swap freely.
+**Providers:** OpenAI-compatible (OpenAI, OpenRouter, Groq, vLLM, LM Studio), Anthropic, Ollama, CLIPipe (any CLI tool via stdin/stdout with real-time streaming), Fallback, or bring your own (one method: `generate`). All return the same shape — swap freely. The OpenAI provider warns if it would send your key over plaintext `http://` to a non-loopback host (use `https`, or drop `apiKey` for keyless local endpoints).
 **Tools:** Any function is a tool. REST APIs, MCP servers, CLI commands, shell scripts — if it's a function, it works. Built-in: `barebrowse` for web browsing, `baremobile` for Android + iOS device control (both optional) — library tools for inline results or CLI session mode for token-efficient disk-based snapshots.

package/bareagent.context.md CHANGED Viewed

@@ -1,7 +1,7 @@
 # bareagent — Integration Guide
 > For AI assistants and developers wiring bareagent into a project.
-> v0.13.0 | Node.js >= 18 | one required dep (`bareguard ^0.4.2`) | Apache 2.0
+> v0.14.0 | Node.js >= 18 | one required dep (`bareguard ^0.4.2`) | Apache 2.0
 >
 > Full human guide with composition examples, design philosophy, and recipes: [Usage Guide](docs/02-features/usage-guide.md)
@@ -254,7 +254,19 @@ or `content.denyPatterns` over the serialized action.
 repo) as well as your home/IDE configs. Pass `confirmServer(name, def)
 => boolean` to `createMCPBridge` to approve each server **before its
 command is spawned** (return `false` to skip it; a throw fails closed).
-Default trusts all discovered servers — unchanged behavior.
+Default trusts all discovered servers — unchanged behavior. **When no
+`confirmServer` is set, the bridge prints a one-time warning naming every
+command it is about to spawn** (before the first spawn, discovery included),
+so a cwd `.mcp.json` can't run a command unannounced — `confirmServer` is
+still how you actually *gate* it.
+**RPC timeouts (Unreleased).** Every JSON-RPC round-trip is now bounded, so a
+server that never answers can't hang the bridge or the loop. `opts.timeout`
+(default 15 s) bounds the handshake (`initialize` + `tools/list`);
+`opts.callTimeout` (default 120 s, `0` disables) bounds each `tools/call`. A
+timed-out tool call rejects with a `timed out after Nms` `ToolError` rather
+than blocking forever; a server that breaks its stdin pipe surfaces as a
+failed connection, never an uncaught `EPIPE` crash.
 ## Wiring with bareguard
@@ -294,7 +306,7 @@ if (result.error?.startsWith('halt:')) {
 }
 ```
-**Why four pieces (`policy` + `onLlmResult` + `onToolResult` + `filterTools`).** `policy` runs `gate.check` *before* every tool call. `onLlmResult` fires after every successful `provider.generate` — without it, `budget.maxCostUsd` never sees LLM cost and is silently undercounted for token-heavy / tool-light workloads (every chatbot). `onToolResult` fires after every `tool.execute` and carries the per-run `ctx` opaque blob into `gate.record` so per-principal accounting works. `filterTools` is a `gate.allows` pre-filter — denied tools are dropped from the catalog the LLM ever sees, no `gate.check` round-trip per call.
+**Why four pieces (`policy` + `onLlmResult` + `onToolResult` + `filterTools`).** `policy` runs `gate.check` *before* every tool call. `onLlmResult` fires after every successful `provider.generate` — without it, `budget.maxCostUsd` never sees LLM cost and is silently undercounted for token-heavy / tool-light workloads (every chatbot). It also fires for the out-of-band `ctx.summarize` call (R-C6) tagged `kind:'summarize'`; main-loop rounds carry `kind:'turn'` — so summary-window tokens count against the budget too, and a consumer can tell the two apart. `onToolResult` fires after every `tool.execute` and carries the per-run `ctx` opaque blob into `gate.record` so per-principal accounting works. `filterTools` is a `gate.allows` pre-filter — denied tools are dropped from the catalog the LLM ever sees, no `gate.check` round-trip per call.
 Halt-severity decisions exit the loop cleanly via a typed `HaltError` — full mechanics (sealed `msgs`, `halt:<rule>` error token, `loop:done{halted:true}` event, `throwOnError:true` interaction, `halt:unknown` coalesce) are in the **Halt decisions throw `HaltError`** paragraph below. Short version: check `result.error?.startsWith('halt:')` after the run.
@@ -544,6 +556,8 @@ All return `{ text, toolCalls, usage: { inputTokens, outputTokens } }`. CLIPipe
 **Error body (v0.11.0):** on an HTTP error the OpenAI/Anthropic/Ollama providers throw a `ProviderError` whose `message` carries the upstream error string. The full parsed response is **not** attached to `err.body` by default (so an unexpected field can't leak through logs that dump the error object). Pass `{ exposeErrorBody: true }` to attach it for debugging.
+**Plaintext-key warning (Unreleased):** the OpenAI provider's `baseUrl` accepts `http://` (for local/OpenAI-compatible endpoints), but a `Bearer` key sent over plaintext http to a **non-loopback** host is exposed on the wire. The provider now warns once when that happens. Loopback hosts (`localhost`/`127.0.0.0/8`/`::1` — local proxies, Ollama-style endpoints) stay silent, since that's the legitimate keyless-local case. The header is **not** stripped (some local proxies want a key), so use `https` for any remote endpoint, or drop `apiKey` when the local endpoint needs none.
 **Cost estimation:** Loop automatically estimates USD cost per run based on model and token usage. The `cost` field appears in every `loop.run()` result and in `loop:done` stream events. Pricing covers OpenAI and Anthropic models; unknown models use a default average. To adjust rates, edit `COST_PER_1K` at the top of `src/loop.js`.
 ## Store options

package/examples/litectx-assemble.mjs ADDED Viewed

@@ -0,0 +1,78 @@
+// examples/litectx-assemble.mjs
+//
+// RT-1 — wire litectx's budget-fit `assemble` verb into bareagent's Loop context-assembly seam,
+// and show the ONE footgun: you must populate `ctx.budget`, or the fit is a silent no-op.
+//
+// Run:  node examples/litectx-assemble.mjs
+//       (runs litectx's real verb if installed — `npm install litectx` — otherwise an inline
+//        stand-in with identical budget semantics, so the lesson runs zero-dep.)
+//
+// How the seam works:
+//   - Loop({ assemble }) calls `assemble(msgs, ctx)` before EVERY provider call, sending the
+//     returned view (the canonical transcript is never mutated).
+//   - bareagent's `unitAssembler()` wraps a litectx-shaped `assemble(units, ctx)` into that
+//     msgs-level seam — bareagent owns the grammar (atomic tool-pair bundling, pinned system/task),
+//     litectx owns content + relevance.
+//   - litectx reads its inputs from the per-run `ctx`: `ctx.budget` (token budget) and `ctx.task`
+//     (recall intent). You pass that ctx via `loop.run(msgs, tools, { ctx })`.
+//
+// THE FOOTGUN: an unset `ctx.budget` is NOT a litectx bug. With no budget the fit defaults to
+// Infinity, keeps everything, and returns the window unchanged — so litectx's core verb LOOKS
+// broken when it is really a wiring omission. Always pass `ctx.budget`.
+import { createRequire } from 'node:module';
+const require = createRequire(import.meta.url);
+const { unitAssembler } = require('bare-agent');
+// litectx's real assemble verb if installed; else an inline stand-in with the same budget semantics
+// (best-effort, recency-anchored, never drops `pinned`, returns the { units, dropped, tokens } envelope).
+let assembleVerb;
+try {
+  ({ assemble: assembleVerb } = require('litectx')); // free function on the main entry (litectx 0.11+)
+  console.log("using litectx's real assemble() verb\n");
+} catch {
+  console.log('litectx not installed — using an inline stand-in with identical budget semantics\n');
+  const tok = (u) => (Number.isFinite(u.tokensApprox) ? u.tokensApprox : Math.ceil((u.content?.length ?? 0) / 4));
+  assembleVerb = (units, ctx = {}) => {
+    const budget = Number.isFinite(ctx.budget) ? ctx.budget : Infinity;
+    const keep = new Set();
+    let used = 0;
+    for (const u of units) if (u.pinned) { keep.add(u.id); used += tok(u); } // pinned always kept
+    // newest-first, skip-and-continue greedy over the un-pinned remainder
+    const rest = units.map((u, i) => ({ u, i })).filter(({ u }) => !u.pinned).sort((a, b) => b.i - a.i);
+    for (const { u } of rest) if (used + tok(u) <= budget) { keep.add(u.id); used += tok(u); }
+    const kept = units.filter((u) => keep.has(u.id));
+    const dropped = units.filter((u) => !keep.has(u.id)).map((u) => ({ id: u.id, reason: 'budget' }));
+    return { units: kept, dropped, tokens: used };
+  };
+}
+// the seam bareagent's Loop calls: assemble(msgs, ctx) => msgs
+const assemble = unitAssembler(assembleVerb);
+// a transcript grown past budget: a pinned system prompt + the task, then several tool rounds.
+const msgs = [
+  { role: 'system', content: 'You are a helpful coding agent. '.repeat(20) },
+  { role: 'user', content: 'Find and fix the rate-limiter bug in the auth service.' },
+];
+for (let i = 1; i <= 8; i++) {
+  const id = `call_${i}`;
+  msgs.push({ role: 'assistant', content: `Round ${i}: inspecting.`, tool_calls: [{ id, type: 'function', function: { name: 'read_file', arguments: `{"path":"src/auth/round${i}.js"}` } }] });
+  msgs.push({ role: 'tool', tool_call_id: id, content: `// round ${i} file contents — `.repeat(40) });
+}
+const before = msgs.length;
+// (1) ctx.budget SET — the fit drops the oldest un-pinned rounds to fit the budget.
+const fitted = await assemble(msgs, { budget: 400, task: 'rate-limiter bug' });
+console.log(`with ctx.budget=400  : ${before} msgs -> ${fitted.length} msgs (fit dropped ${before - fitted.length})`);
+// (2) ctx.budget UNSET — the footgun. No budget => Infinity => nothing drops => window unchanged.
+const noop = await assemble(msgs, { task: 'rate-limiter bug' }); // <-- budget missing
+console.log(`with ctx.budget unset: ${before} msgs -> ${noop.length} msgs (fit dropped ${before - noop.length})  <-- silent no-op!`);
+// the pinned system prompt + task always survive the fit (pin, don't hide):
+console.log(`\nsystem prompt survives the tight fit: ${fitted.some((m) => m.role === 'system')}`);
+console.log(`task (first user turn) survives the tight fit: ${fitted.some((m) => m.role === 'user')}`);
+console.log('\nLesson: wire it as  loop.run(msgs, tools, { ctx: { budget, task } }).');
+console.log('An unset ctx.budget is not a litectx bug — the fitter correctly keeps everything when given no budget.');

package/examples/wake.sh CHANGED Viewed

@@ -63,6 +63,14 @@ echo "$PENDING" | while IFS= read -r record; do
   ID=$(echo "$record" | jq -r '.id')
   ACTION=$(echo "$record" | jq -c '.action')
+  # The defer tool generates ids as def_<base36>_<hex>. Anything else means a
+  # hand-edited / untrusted queue line — reject before $ID reaches a file path
+  # below (defence-in-depth against path traversal via a crafted id).
+  case "$ID" in
+    def_[a-z0-9]*_[a-f0-9]*) ;;
+    *) echo "[wake $NOW] skipping record with unexpected id: $ID" >&2; continue ;;
+  esac
   # Append "fired" status line first (defer queue is append-only).
   printf '{"id":"%s","status":"fired","ts":"%s"}\n' "$ID" "$NOW" >> "$QUEUE"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bare-agent",
-  "version": "0.13.0",
+  "version": "0.14.0",
   "files": [
     "index.js",
     "index.d.ts",
@@ -99,7 +99,7 @@
   },
   "devDependencies": {
     "@types/node": "^22.19.19",
-    "litectx": "^0.11.0",
+    "litectx": "^0.13.0",
     "typescript": "^5.7.0"
   }
 }

package/src/loop.d.ts CHANGED Viewed

@@ -27,9 +27,18 @@ export type LoopOptions = {
      * thrown HaltError propagates. `ctx` is the per-run opaque blob (`run(msgs, tools, { ctx })`), the
      * same object forwarded to `policy`; litectx reads `ctx.task` (intent) and `ctx.budget`. The
      * neutral-unit signature `assemble(units, ctx)` is provided by bareagent's msgs⇄units adapter
-     * (src/context-units.js), which composes over this msgs-level seam.
+     * (src/context-units.js), which composes over this msgs-level seam. When `ctx` is an object, the
+     * Loop also lends a provider-bound `ctx.summarize(excerpt, opts?) => Promise<string>` (R-C6,
+     * non-enumerable): assemble calls it to roll a summary window — bareagent makes the one model
+     * call, the consumer owns the trigger/N/splice. Its usage is forwarded to `onLlmResult` so the
+     * summary tokens count against the budget.
      */
     assemble?: Function | undefined;
+    /**
+     * - async (event) => void after each LLM call; forwards usage to
+     * gate.record (via wireGate). `event.kind` discriminates the source: `'turn'` for a main-loop round,
+     * `'summarize'` for an out-of-band `ctx.summarize` call (R-C6). Both count against the budget.
+     */
     onLlmResult?: Function | undefined;
     onToolResult?: Function | undefined;
     /**

package/src/loop.js CHANGED Viewed

@@ -32,8 +32,14 @@ const { ToolError, HaltError } = require('./errors');
  *   thrown HaltError propagates. `ctx` is the per-run opaque blob (`run(msgs, tools, { ctx })`), the
  *   same object forwarded to `policy`; litectx reads `ctx.task` (intent) and `ctx.budget`. The
  *   neutral-unit signature `assemble(units, ctx)` is provided by bareagent's msgs⇄units adapter
- *   (src/context-units.js), which composes over this msgs-level seam.
- * @property {Function} [onLlmResult]
+ *   (src/context-units.js), which composes over this msgs-level seam. When `ctx` is an object, the
+ *   Loop also lends a provider-bound `ctx.summarize(excerpt, opts?) => Promise<string>` (R-C6,
+ *   non-enumerable): assemble calls it to roll a summary window — bareagent makes the one model
+ *   call, the consumer owns the trigger/N/splice. Its usage is forwarded to `onLlmResult` so the
+ *   summary tokens count against the budget.
+ * @property {Function} [onLlmResult] - async (event) => void after each LLM call; forwards usage to
+ *   gate.record (via wireGate). `event.kind` discriminates the source: `'turn'` for a main-loop round,
+ *   `'summarize'` for an out-of-band `ctx.summarize` call (R-C6). Both count against the budget.
  * @property {Function} [onToolResult]
  * @property {number} [maxRounds] - Removed in v0.8; presence throws a migration error.
  */
@@ -105,6 +111,38 @@ function estimateCost(model, usage) {
   );
 }
+// R-C6: default instruction for the provider-bound `ctx.summarize` lent to the assemble seam.
+const DEFAULT_SUMMARY_INSTRUCTION =
+  'You are a precise conversation summarizer. Produce a concise, factual summary of the following ' +
+  'conversation excerpt. Preserve concrete facts, decisions, and identifiers (names, ids, file ' +
+  'paths, numbers), and note any open or unresolved threads. Do not invent information. Output ' +
+  'only the summary prose, with no preamble.';
+// Flatten an excerpt (array of OpenAI-format messages, or a raw string) into one prose block for the
+// summarizer's single user turn. Rendering to text — rather than forwarding raw messages — sidesteps
+// tool-call/result pairing entirely: a summary input never needs to be a valid wire transcript.
+/**
+ * @param {Array<any>|string|null|undefined} excerpt
+ * @returns {string}
+ */
+function renderForSummary(excerpt) {
+  if (excerpt == null) return '';
+  if (typeof excerpt === 'string') return excerpt;
+  if (!Array.isArray(excerpt)) return String(excerpt);
+  const parts = [];
+  for (const m of excerpt) {
+    if (m == null) continue;
+    if (typeof m === 'string') { parts.push(m); continue; }
+    const role = m.role || 'message';
+    let text = m.content != null ? String(m.content) : '';
+    if (Array.isArray(m.tool_calls) && m.tool_calls.length) {
+      text += (text ? '\n' : '') + `[tool_calls: ${JSON.stringify(m.tool_calls)}]`;
+    }
+    parts.push(`${role}: ${text}`);
+  }
+  return parts.join('\n\n');
+}
 class Loop {
   /**
    * `policy` is async `(toolName, args, ctx) => true | string`. Recommended wiring: a closure
@@ -250,6 +288,65 @@ class Loop {
     let lastUsage = { inputTokens: 0, outputTokens: 0 };
     let totalCost = 0;
+    // R-C6: lend a provider-bound summarizer to the assemble seam via `ctx.summarize`. litectx owns
+    // the trigger/N/splice (its restorable COMPRESS path keeps summarized turns recoverable by id);
+    // bareagent lends ONLY the single model call. Attached NON-ENUMERABLE so it never shows up in the
+    // caller's ctx via JSON/iteration/deepEqual — preserving the `assemble(units, ctx)` identity
+    // contract (test/loop-assemble.test.js). `summarize(excerpt, opts?) => Promise<string>`:
+    //   excerpt — array of OpenAI-format messages (or a raw string) litectx wants compressed
+    //   opts    — { instruction?, ...generateOpts } (instruction overrides the default; the rest pass
+    //             through to provider.generate; temperature defaults to 0 for determinism)
+    // The summary call's usage is forwarded to onLlmResult so its tokens count against the budget
+    // (BA1 lineage — token-only flows must not be invisible to the gate); a HaltError there is a
+    // governance exit and propagates, matching the main-loop onLlmResult contract.
+    if (ctx && typeof ctx === 'object') {
+      const loop = this;
+      /**
+       * @param {Array<any>|string} excerpt
+       * @param {Record<string, any>} [opts]
+       * @returns {Promise<string>}
+       */
+      const summarize = async (excerpt, opts = {}) => {
+        const { instruction, ...genOpts } = opts || {};
+        const prompt = [
+          { role: 'system', content: instruction || DEFAULT_SUMMARY_INSTRUCTION },
+          { role: 'user', content: renderForSummary(excerpt) },
+        ];
+        const startedAt = Date.now();
+        const result = await loop.provider.generate(prompt, [], { temperature: 0, ...genOpts });
+        const usage = (result && result.usage) || null;
+        const model = loop.provider.model || null;
+        const cost = estimateCost(model, usage);
+        if (cost !== null) totalCost += cost;
+        loop._safeEmit({ type: 'loop:summarize', data: { usage, costUsd: cost, durationMs: Date.now() - startedAt } });
+        if (loop.onLlmResult) {
+          try {
+            await loop.onLlmResult({
+              model,
+              provider: loop.provider.name || null,
+              usage,
+              costUsd: cost,
+              durationMs: Date.now() - startedAt,
+              ctx,
+              kind: 'summarize',
+            });
+          } catch (err) {
+            if (err instanceof HaltError) throw err;
+            loop._reportError('onLlmResult', err, { phase: 'summarize' });
+          }
+        }
+        return (result && result.text) || '';
+      };
+      // Fail-OPEN to match the assemble seam's own contract: a frozen / sealed / non-configurable ctx
+      // must NOT crash the agent. On failure the seam is simply unavailable (consumers already handle
+      // ctx.summarize being absent — it only exists when ctx is an object), reported, never silent.
+      try {
+        Object.defineProperty(ctx, 'summarize', { value: summarize, enumerable: false, configurable: true, writable: true });
+      } catch (err) {
+        this._reportError('summarize-attach', err);
+      }
+    }
     try {
     for (let round = 0; round < HARD_ROUND_LIMIT; round++) {
       if (this._stopped) break;
@@ -301,6 +398,7 @@ class Loop {
             costUsd: roundCost,
             durationMs: Date.now() - llmStartedAt,
             ctx,
+            kind: 'turn',
           });
         } catch (err) {
           if (err instanceof HaltError) throw err;

package/src/mcp-bridge.d.ts CHANGED Viewed

@@ -52,7 +52,7 @@ export type DeniedTool = {
  * JSON-RPC stdio client over a spawned MCP server.
  */
 export type RpcClient = {
-    rpc: (method: string, params?: object) => Promise<any>;
+    rpc: (method: string, params?: object, timeoutMs?: number) => Promise<any>;
     notify: (method: string, params?: object) => void;
     child: import("node:child_process").ChildProcessWithoutNullStreams;
     stderr: string;
@@ -76,7 +76,9 @@ export type RpcClient = {
  * @param {string} [opts.bridgePath] - Path to .mcp-bridge.json. Default: .mcp-bridge.json in cwd.
  * @param {string[]} [opts.configPaths] - IDE config paths for discovery.
  * @param {string[]} [opts.servers] - Limit to these server names.
- * @param {number} [opts.timeout=15000] - Per-server init timeout in ms.
+ * @param {number} [opts.timeout=15000] - Per-server handshake timeout in ms (initialize + tools/list).
+ * @param {number} [opts.callTimeout=120000] - Per-invocation timeout in ms for tools/call. Bounds a
+ *   server that accepts a tool call but never responds. Set 0 to disable (unbounded).
  * @param {boolean} [opts.refresh=false] - Force re-discovery regardless of TTL.
  * @param {(name: string, def: ServerDef) => boolean | Promise<boolean>} [opts.confirmServer]
  *   Vet each discovered server BEFORE its `command` is spawned. Connecting to an
@@ -92,6 +94,7 @@ export function createMCPBridge(opts?: {
     configPaths?: string[] | undefined;
     servers?: string[] | undefined;
     timeout?: number | undefined;
+    callTimeout?: number | undefined;
     refresh?: boolean | undefined;
     confirmServer?: ((name: string, def: ServerDef) => boolean | Promise<boolean>) | undefined;
 }): Promise<{

package/src/mcp-bridge.js CHANGED Viewed

@@ -54,7 +54,7 @@ const { ToolError } = require('./errors');
 /**
  * JSON-RPC stdio client over a spawned MCP server.
  * @typedef {object} RpcClient
- * @property {(method: string, params?: object) => Promise<any>} rpc
+ * @property {(method: string, params?: object, timeoutMs?: number) => Promise<any>} rpc
  * @property {(method: string, params?: object) => void} notify
  * @property {import('node:child_process').ChildProcessWithoutNullStreams} child
  * @property {string} stderr
@@ -215,11 +215,25 @@ function createRpcClient(name, def) {
     ...(cwd && { cwd }),
   });
-  /** @type {Map<number, {resolve: (v: any) => void, reject: (e: any) => void}>} */
+  /** @type {Map<number, {resolve: (v: any) => void, reject: (e: any) => void, timer: NodeJS.Timeout | null}>} */
   const pending = new Map();
   let nextId = 1;
   let buffer = '';
+  // Settle a pending request exactly once, clearing its timeout timer. Returns
+  // false if the id was already settled (response/close/timeout raced) so callers
+  // can avoid double-settling. Every settle path (response, close, write error,
+  // timeout) funnels through here.
+  /** @param {number} id @param {boolean} ok @param {any} payload @returns {boolean} */
+  function settle(id, ok, payload) {
+    const p = pending.get(id);
+    if (!p) return false;
+    pending.delete(id);
+    if (p.timer) clearTimeout(p.timer);
+    if (ok) p.resolve(payload); else p.reject(payload);
+    return true;
+  }
   child.stdout.setEncoding('utf8');
   child.stdout.on('data', (chunk) => {
     buffer += chunk;
@@ -231,15 +245,12 @@ function createRpcClient(name, def) {
       let msg;
       try { msg = JSON.parse(line); } catch { continue; }
       if (!msg.id) continue;
-      const p = pending.get(msg.id);
-      if (!p) continue;
-      pending.delete(msg.id);
       if (msg.error) {
-        p.reject(new ToolError(`MCP server "${name}": ${msg.error.message}`, {
+        settle(msg.id, false, new ToolError(`MCP server "${name}": ${msg.error.message}`, {
           context: { code: msg.error.code },
         }));
       } else {
-        p.resolve(msg.result);
+        settle(msg.id, true, msg.result);
       }
     }
   });
@@ -248,24 +259,49 @@ function createRpcClient(name, def) {
   child.stderr?.setEncoding('utf8');
   child.stderr?.on('data', (chunk) => { stderrBuf += chunk; });
+  // A child can exit (crash, fast-exit before init, killed) at any moment.
+  // Writing to its stdin then emits an 'error' on the pipe; with NO listener,
+  // Node re-throws it as an uncaught exception and takes down the HOST process.
+  // Swallow it here — pending rpc()s are rejected by the 'close' handler below,
+  // and rpc()/notify() guard writability before writing.
+  child.stdin.on('error', () => { /* child gone; surfaced via close + write guards */ });
   child.on('close', (code) => {
-    for (const [id, { reject }] of pending) {
-      reject(new ToolError(`MCP server "${name}" exited (code ${code}). stderr: ${stderrBuf.slice(-500)}`));
+    for (const id of [...pending.keys()]) {
+      settle(id, false, new ToolError(`MCP server "${name}" exited (code ${code}). stderr: ${stderrBuf.slice(-500)}`));
     }
-    pending.clear();
   });
   /**
+   * Send a JSON-RPC request and await its response. Bounded by `timeoutMs`: a
+   * server that accepts the write but never answers (or answers a different id)
+   * would otherwise hang the caller forever — only `initialize` used to be
+   * bounded, leaving `tools/list` and `tools/call` open-ended. Pass 0 to disable.
    * @param {string} method
    * @param {object} [params]
+   * @param {number} [timeoutMs=0] - Reject if no response arrives within this many ms (0 = no limit).
    * @returns {Promise<any>}
    */
-  function rpc(method, params = {}) {
+  function rpc(method, params = {}, timeoutMs = 0) {
     const id = nextId++;
     return new Promise((resolve, reject) => {
-      pending.set(id, { resolve, reject });
+      if (!child.stdin.writable) {
+        return reject(new ToolError(`MCP server "${name}" stdin is not writable (process exited or pipe closed). stderr: ${stderrBuf.slice(-500)}`));
+      }
+      /** @type {NodeJS.Timeout | null} */
+      let timer = null;
+      if (timeoutMs > 0) {
+        timer = setTimeout(() => {
+          settle(id, false, new ToolError(`MCP server "${name}" "${method}" timed out after ${timeoutMs}ms. stderr: ${stderrBuf.slice(-500)}`));
+        }, timeoutMs);
+        timer.unref?.();
+      }
+      pending.set(id, { resolve, reject, timer });
       const msg = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n';
-      child.stdin.write(msg);
+      child.stdin.write(msg, (err) => {
+        // settle() no-ops if 'close'/timeout already settled this id.
+        if (err) settle(id, false, new ToolError(`MCP server "${name}" write failed: ${err.message}. stderr: ${stderrBuf.slice(-500)}`));
+      });
     });
   }
@@ -274,8 +310,9 @@ function createRpcClient(name, def) {
    * @param {object} [params]
    */
   function notify(method, params = {}) {
+    if (!child.stdin.writable) return;
     const msg = JSON.stringify({ jsonrpc: '2.0', method, params }) + '\n';
-    child.stdin.write(msg);
+    child.stdin.write(msg, () => { /* write errors swallowed via stdin 'error' handler */ });
   }
   return { rpc, notify, child, get stderr() { return stderrBuf; } };
@@ -301,16 +338,17 @@ function unwrapContent(content) {
 /**
  * @param {string} serverName
  * @param {McpTool[]} mcpTools
- * @param {(method: string, params?: object) => Promise<any>} rpc
+ * @param {(method: string, params?: object, timeoutMs?: number) => Promise<any>} rpc
+ * @param {number} [callTimeout=0] - Per-invocation timeout (ms) for tools/call; 0 = no limit.
  * @returns {ToolDef[]}
  */
-function wrapTools(serverName, mcpTools, rpc) {
+function wrapTools(serverName, mcpTools, rpc, callTimeout = 0) {
   return mcpTools.map(t => ({
     name: `${serverName}_${t.name}`,
     description: t.description || '',
     parameters: t.inputSchema || { type: 'object', properties: {} },
     execute: async (args) => {
-      const result = await rpc('tools/call', { name: t.name, arguments: args });
+      const result = await rpc('tools/call', { name: t.name, arguments: args }, callTimeout);
       if (result.isError) {
         throw new ToolError(unwrapContent(result.content) || 'MCP tool error', {
           context: { server: serverName, tool: t.name },
@@ -374,21 +412,17 @@ async function connectAndListTools(name, def, timeout = 15000) {
   const client = createRpcClient(name, def);
   try {
-    const init = client.rpc('initialize', {
+    // Both handshake round-trips are bounded by `timeout`. tools/list used to be
+    // unbounded — a server that answered initialize but never replied to
+    // tools/list would hang discovery (and the whole bridge) indefinitely.
+    await client.rpc('initialize', {
       protocolVersion: '2024-11-05',
       capabilities: {},
       clientInfo: { name: 'bare-agent', version: '0.5.0' },
-    });
-    let timerId;
-    const timer = new Promise((_, reject) => {
-      timerId = setTimeout(() => reject(new ToolError(`MCP server "${name}" init timed out after ${timeout}ms`)), timeout);
-    });
-    try { await Promise.race([init, timer]); } finally { clearTimeout(timerId); }
+    }, timeout);
     client.notify('notifications/initialized');
-    const { tools: mcpTools } = await client.rpc('tools/list');
+    const { tools: mcpTools } = await client.rpc('tools/list', {}, timeout);
     return { mcpTools, client };
   } catch (err) {
@@ -562,7 +596,9 @@ function buildMetaTools(tools, discoveredAt) {
  * @param {string} [opts.bridgePath] - Path to .mcp-bridge.json. Default: .mcp-bridge.json in cwd.
  * @param {string[]} [opts.configPaths] - IDE config paths for discovery.
  * @param {string[]} [opts.servers] - Limit to these server names.
- * @param {number} [opts.timeout=15000] - Per-server init timeout in ms.
+ * @param {number} [opts.timeout=15000] - Per-server handshake timeout in ms (initialize + tools/list).
+ * @param {number} [opts.callTimeout=120000] - Per-invocation timeout in ms for tools/call. Bounds a
+ *   server that accepts a tool call but never responds. Set 0 to disable (unbounded).
  * @param {boolean} [opts.refresh=false] - Force re-discovery regardless of TTL.
  * @param {(name: string, def: ServerDef) => boolean | Promise<boolean>} [opts.confirmServer]
  *   Vet each discovered server BEFORE its `command` is spawned. Connecting to an
@@ -583,6 +619,8 @@ async function createMCPBridge(opts = {}) {
   }
   const bridgePath = opts.bridgePath || DEFAULT_BRIDGE_PATH();
   const timeout = opts.timeout || 15000;
+  // 0 is a valid explicit "unbounded"; only undefined falls back to the default.
+  const callTimeout = opts.callTimeout ?? 120000;
   // Vet a server before spawning its command. Fail-closed: an undefined hook
   // trusts all (unchanged behavior); a throw denies.
@@ -594,6 +632,24 @@ async function createMCPBridge(opts = {}) {
     catch { return false; }
   };
+  // Connecting to a server EXECUTES its `command`, which can originate from a
+  // cwd-relative .mcp.json in an untrusted repo (discoverServers reads project
+  // configs). With no confirmServer hook, every discovered command runs unvetted.
+  // Warn ONCE per call, BEFORE the first spawn — and the first spawn is the
+  // discovery phase on a cold/refresh run, not the main-connect phase below.
+  let warnedUnvetted = false;
+  /** @param {Array<{name: string, command: string, args?: string[]}>} specs */
+  const warnUnvettedSpawn = (specs) => {
+    if (confirmServer || warnedUnvetted || specs.length === 0) return;
+    warnedUnvetted = true;
+    const cmds = specs.map(s => `${s.name} → ${s.command} ${(s.args || []).join(' ')}`.trim());
+    console.warn(
+      `[MCP Bridge] spawning ${specs.length} server command(s) without a confirmServer hook:\n  ` +
+      cmds.join('\n  ') +
+      `\n  Pass { confirmServer } to vet each command before it runs.`,
+    );
+  };
   let config = readBridgeConfig(bridgePath);
   const needsRefresh = opts.refresh || !config || isExpired(config);
@@ -621,6 +677,8 @@ async function createMCPBridge(opts = {}) {
         ? [...discovered.entries()].filter(([n]) => reqServers.includes(n))
         : [...discovered.entries()];
+      warnUnvettedSpawn(toDiscover.map(([name, def]) => ({ name, command: def.command, args: def.args })));
       await Promise.all(toDiscover.map(async ([name, def]) => {
         try {
           // Denied by confirmServer: skip silently — this is the caller's own
@@ -674,6 +732,11 @@ async function createMCPBridge(opts = {}) {
     return { tools: [], servers: [], systemContext: '', denied: [], close: async () => {} };
   }
+  // Warn before the main-connect spawn too. On a warm run (config exists, no
+  // refresh) this is the first and only spawn; on a cold run the discovery phase
+  // already warned, so the once-flag makes this a no-op.
+  warnUnvettedSpawn(serverNames.map(n => ({ name: n, command: cfg.servers[n].command, args: cfg.servers[n].args })));
   // Connect to servers and wrap only allowed tools
   /** @type {ToolDef[]} */
   const tools = [];
@@ -702,7 +765,7 @@ async function createMCPBridge(opts = {}) {
       // Only wrap tools that are allowed in config
       const allowed = mcpTools.filter(t => allowedToolNames.includes(t.name));
-      const wrapped = wrapTools(name, allowed, client.rpc);
+      const wrapped = wrapTools(name, allowed, client.rpc, callTimeout);
       tools.push(...wrapped);
       children.push(client.child);

package/src/provider-openai.d.ts CHANGED Viewed

@@ -15,10 +15,6 @@ export type OpenAIOptions = {
      */
     exposeErrorBody?: boolean | undefined;
 };
-/** @typedef {import('../types').Message} Message */
-/** @typedef {import('../types').ToolDef} ToolDef */
-/** @typedef {import('../types').ToolCall} ToolCall */
-/** @typedef {import('../types').GenerateResult} GenerateResult */
 /**
  * @typedef {object} OpenAIOptions
  * @property {string} [apiKey]
@@ -54,4 +50,5 @@ export class OpenAIProvider {
      * @returns {Promise<any>}
      */
     _request(path: string, body: Record<string, any>): Promise<any>;
+    _warnedInsecure: boolean | undefined;
 }

package/src/provider-openai.js CHANGED Viewed

@@ -9,6 +9,12 @@ const { ProviderError } = require('./errors');
 /** @typedef {import('../types').ToolCall} ToolCall */
 /** @typedef {import('../types').GenerateResult} GenerateResult */
+/** @param {string} hostname @returns {boolean} */
+function isLoopbackHost(hostname) {
+  const h = hostname.replace(/^\[|\]$/g, ''); // strip IPv6 brackets
+  return h === 'localhost' || h === '127.0.0.1' || h === '::1' || h.startsWith('127.');
+}
 /**
  * @typedef {object} OpenAIOptions
  * @property {string} [apiKey]
@@ -84,6 +90,17 @@ class OpenAIProvider {
       const transport = url.protocol === 'https:' ? https : http;
       const payload = JSON.stringify(body);
+      // Sending a Bearer key over plaintext http to a non-loopback host exposes
+      // it to anyone on-path. Loopback (local proxies / Ollama-style endpoints)
+      // is the legitimate keyless case, so only warn for remote http. Warn once.
+      if (this.apiKey && url.protocol === 'http:' && !isLoopbackHost(url.hostname) && !this._warnedInsecure) {
+        this._warnedInsecure = true;
+        console.warn(
+          `[OpenAIProvider] sending Authorization key over PLAINTEXT http to ${url.hostname} — ` +
+          `the key is exposed on the wire. Use https, or drop the apiKey for keyless local endpoints.`,
+        );
+      }
       const req = transport.request(url, {
         method: 'POST',
         headers: {