npm - bare-agent - Versions diffs - 0.10.2 → 0.10.4 - Mend

bare-agent 0.10.2 → 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +15 -0
package/bin/cli.js +14 -6
package/package.json +1 -1
package/src/bareguard-adapter.js +36 -12
package/src/errors.js +2 -1
package/src/loop.js +43 -7

package/README.md CHANGED Viewed

@@ -169,6 +169,21 @@ Halts also fire `loop:error` on the stream (`source: 'halt'`) and the `onError`
 ---
+## Examples
+Runnable scripts in [`examples/`](examples/) — each is self-contained and the file's top docstring documents flags and required env vars.
+| File | What it shows |
+|---|---|
+| [`with-bareguard.mjs`](examples/with-bareguard.mjs) | End-to-end Loop + bareguard wiring: budget cap, fs scope, bash allowlist, audit log, humanChannel. The canonical governed-loop reference. |
+| [`mcp-bridge-poc.js`](examples/mcp-bridge-poc.js) | Auto-discover MCP servers from your IDE configs and expose them as bareagent tools. First run writes `.mcp-bridge.json` (edit to deny tools). |
+| [`mcp-bridge-concurrent.js`](examples/mcp-bridge-concurrent.js) | Soak test: fan out concurrent `barebrowse_browse` calls against real domains (Amazon, Wikipedia, GitHub, a dead host) and verify resilience. |
+| [`orchestrator/`](examples/orchestrator/) | Multi-agent dispatch via `spawn`. Three configs, one system prompt — no orchestrator class, no role types. Roles are JSON files. |
+| [`wake.sh`](examples/wake.sh) + [`wake.md`](examples/wake.md) | Reference cron + jq script for firing deferred actions. The runtime half of `createDeferTool` — bareagent emits, `wake.sh` fires. |
+| [`replay-job.js`](examples/replay-job.js) | Supervised replay POC: record a browser task once with the LLM driving, then replay against fresh snapshots with the LLM as locator-only. Falls back to full reasoning when the locator misses, and patches the trace. |
+---
 ## Cross-language usage
 Not using Node.js? Spawn bare-agent as a subprocess from any language. Ready-made wrappers in [`contrib/`](contrib/README.md) for Python, Go, Rust, Ruby, and Java — copy one file, no package registry needed.

package/bin/cli.js CHANGED Viewed

@@ -63,9 +63,13 @@ async function runConfigMode(cfgPath) {
   // Tools — registry resolved by name from a curated set of built-ins.
   const tools = await resolveTools(cfg.tools || [], { stream });
-  // Bareguard Gate (optional but strongly recommended for spawn children)
+  // Bareguard Gate (optional but strongly recommended for spawn children).
+  // Fail-closed: if the config asks for a gate but wiring fails, exit non-zero
+  // rather than run an ungoverned child agent.
   let policy = null;
-  let wrapToolsFn = (t) => t;
+  let onLlmResult = null;
+  let onToolResult = null;
+  let gatedTools = tools;
   if (cfg.gate) {
     try {
       const { Gate } = require('bareguard');
@@ -95,9 +99,12 @@ async function runConfigMode(cfgPath) {
       await gate.init();
       const wired = wireGate(gate);
       policy = wired.policy;
-      wrapToolsFn = wired.wrapTools;
+      onLlmResult = wired.onLlmResult;
+      onToolResult = wired.onToolResult;
+      gatedTools = await wired.filterTools(tools);
     } catch (err) {
-      process.stderr.write(`[cli] failed to wire bareguard: ${err.message}. Continuing without policy gate.\n`);
+      process.stderr.write(`[cli] failed to wire bareguard: ${err.message}. Refusing to run ungoverned (cfg.gate set).\n`);
+      process.exit(1);
     }
   }
@@ -111,13 +118,14 @@ async function runConfigMode(cfgPath) {
     system: cfg.systemPrompt || null,
     stream,
     policy,
+    onLlmResult,
+    onToolResult,
     onError: (err, meta) => {
       process.stderr.write(`[loop:error ${meta.source}] ${err.message}\n`);
     },
   });
-  const wrapped = wrapToolsFn(tools);
-  await loop.run([initialMessage], wrapped);
+  await loop.run([initialMessage], gatedTools);
   // Stream's loop:done event has already been emitted; exit clean.
   process.exit(0);
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bare-agent",
-  "version": "0.10.2",
+  "version": "0.10.4",
   "files": [
     "index.js",
     "src/",

package/src/bareguard-adapter.js CHANGED Viewed

@@ -2,6 +2,24 @@
 const { HaltError } = require('./errors');
+// Safe-stringify for tool results: tools can return circular structures or
+// values that include functions / undefined / bigints. Falling back to String()
+// keeps gate.record from throwing inside onToolResult (which would surface as a
+// loop:error{source:'onToolResult'} for what is really a serialization quirk).
+function safeStringify(value) {
+  if (typeof value === 'string') return value;
+  try {
+    const json = JSON.stringify(value);
+    return json === undefined ? String(value) : json;
+  } catch {
+    return String(value);
+  }
+}
+// Module-scope so a process that spawns many child agents (each with its own
+// wireGate call) only prints the wrapTool deprecation warning once.
+let warnedWrap = false;
 /**
  * Wire a bareguard Gate into bareagent's Loop.
  *
@@ -14,7 +32,12 @@ const { HaltError } = require('./errors');
  *   - `onToolResult`  — callback for `new Loop({ onToolResult })`. Forwards every
  *                       tool.execute result to gate.record with ctx in scope.
  *   - `filterTools`   — async (tools) => filtered. Drops tools denied by gate.allows
- *                       so the LLM never sees them. No audit, no record.
+ *                       so the LLM never sees them. No audit, no record. Bulk-only:
+ *                       when MCP tools are exposed via `mcp_discover`+`mcp_invoke`
+ *                       meta-tools, filterTools cannot drop the inner names (they
+ *                       are not in the tool list). Gate those via bareguard's
+ *                       `tools.denyArgPatterns: { mcp_invoke: [/"name":"…"/] }`
+ *                       — see src/mcp-bridge.js (Gov shape).
  *   - `wrapTool` / `wrapTools` — DEPRECATED. Pre-BA1 shim that wraps execute() to
  *                       call gate.record post-hoc. Loses _ctx and never sees LLM cost.
  *                       Prefer `onToolResult` (and `onLlmResult` for budget correctness).
@@ -25,9 +48,11 @@ const { HaltError } = require('./errors');
  *
  * @param {object} gate - A bareguard Gate instance (must have .check, .record, .allows).
  * @param {object} [options]
- * @param {Function} [options.formatDeny] - (decision) => string. Transforms the deny
- *   string fed to the LLM. Default: "[deny: <rule>] <reason>". Halt bypasses this
- *   (HaltError doesn't reach the LLM).
+ * @param {Function} [options.formatDeny] - (decision, toolName) => string. Transforms
+ *   the deny string fed to the LLM. The second arg is the bareagent tool name (handy
+ *   for tool-specific deny copy). Default: "[deny: <rule>] <reason>" or
+ *   "[deny: <rule>] <toolName> denied" when bareguard omits a reason. Halt bypasses
+ *   this (HaltError doesn't reach the LLM).
  * @param {Function} [options.actionTranslator] - (toolName, args, ctx) => action.
  *   Builds the action object passed to `gate.check` and `gate.record`. Default:
  *   `{ type: toolName, args, _ctx: ctx }`. Override when bareguard's primitives
@@ -102,7 +127,7 @@ function wireGate(gate, options = {}) {
       });
     } else {
       await gate.record(action, {
-        result: typeof result === 'string' ? result : JSON.stringify(result),
+        result: safeStringify(result),
         durationMs: durationMs ?? null,
       });
     }
@@ -115,14 +140,13 @@ function wireGate(gate, options = {}) {
     if (typeof gate.allows !== 'function') {
       throw new Error('[wireGate.filterTools] gate must have .allows (bareguard >= 0.2)');
     }
-    const out = [];
-    for (const t of tools) {
-      if (await gate.allows(t.name)) out.push(t);
-    }
-    return out;
+    // Parallel: gate.allows is config-driven and pure, so concurrent calls are
+    // safe. Matters for large MCP catalogs (50+ tools) where sequential awaits
+    // were noticeable overhead on every startup.
+    const verdicts = await Promise.all(tools.map(t => gate.allows(t.name)));
+    return tools.filter((_, i) => verdicts[i]);
   };
-  let warnedWrap = false;
   function wrapTool(tool) {
     if (!warnedWrap) {
       warnedWrap = true;
@@ -143,7 +167,7 @@ function wireGate(gate, options = {}) {
         try {
           const result = await original(args);
           await gate.record(action, {
-            result: typeof result === 'string' ? result : JSON.stringify(result),
+            result: safeStringify(result),
             durationMs: Date.now() - startedAt,
           });
           return result;

package/src/errors.js CHANGED Viewed

@@ -52,8 +52,9 @@ class HaltError extends BareAgentError {
     super(message || `[HALT: ${rule || 'unknown'}]`, {
       code: 'HALT',
       retryable: false,
-      context: { ...context, rule, decision },
+      context,
     });
+    // Public, stable surface — read `err.rule` / `err.decision` (not `err.context`).
     this.rule = rule || null;
     this.decision = decision || null;
   }

package/src/loop.js CHANGED Viewed

@@ -3,7 +3,7 @@
 const { ToolError, HaltError } = require('./errors');
 // Average pricing per 1K tokens (USD). Adjust these to match your provider's rates.
-// Last updated: 2026-03-18. Source: public provider pricing pages.
+// Last updated: 2026-05-18. Source: public provider pricing pages.
 const COST_PER_1K = {
   // OpenAI
   'gpt-4o': { in: 0.0025, out: 0.01 },
@@ -12,9 +12,13 @@ const COST_PER_1K = {
   'gpt-4.1-mini': { in: 0.0004, out: 0.0016 },
   'gpt-4.1-nano': { in: 0.0001, out: 0.0004 },
   'o3-mini': { in: 0.0011, out: 0.0044 },
-  // Anthropic
-  'claude-sonnet-4-20250514': { in: 0.003, out: 0.015 },
+  // Anthropic — Claude 4.x current generation (2026-05)
+  'claude-opus-4-7': { in: 0.015, out: 0.075 },
+  'claude-sonnet-4-6': { in: 0.003, out: 0.015 },
   'claude-haiku-4-5-20251001': { in: 0.0008, out: 0.004 },
+  'claude-haiku-4-5': { in: 0.0008, out: 0.004 },
+  // Anthropic — earlier 4.x snapshots
+  'claude-sonnet-4-20250514': { in: 0.003, out: 0.015 },
   'claude-opus-4-20250514': { in: 0.015, out: 0.075 },
   // Fallback average across popular models (~$0.002 in, ~$0.008 out per 1K)
   '_default': { in: 0.002, out: 0.008 },
@@ -25,6 +29,27 @@ const COST_PER_1K = {
 // no governance and the LLM loop is unbounded by design — wire bareguard.
 const HARD_ROUND_LIMIT = 100;
+// Walk the assistant tool_calls in the last assistant message and append a
+// synthetic `role:'tool'` reply for every tool_call_id that has no matching
+// reply. Halt-path only — keeps msgs a valid OpenAI transcript when the loop
+// exits between pushing assistant.tool_calls and finishing the per-tool loop.
+function sealDanglingToolCalls(msgs, rule) {
+  for (let i = msgs.length - 1; i >= 0; i--) {
+    const m = msgs[i];
+    if (m.role !== 'assistant' || !Array.isArray(m.tool_calls)) continue;
+    const seen = new Set();
+    for (let j = i + 1; j < msgs.length; j++) {
+      if (msgs[j].role === 'tool' && msgs[j].tool_call_id) seen.add(msgs[j].tool_call_id);
+    }
+    for (const tc of m.tool_calls) {
+      if (!seen.has(tc.id)) {
+        msgs.push({ role: 'tool', tool_call_id: tc.id, content: `[halted:${rule}]` });
+      }
+    }
+    return;
+  }
+}
 function estimateCost(model, usage) {
   if (!usage || !model) return null;
   const rates = COST_PER_1K[model] || COST_PER_1K['_default'];
@@ -127,7 +152,12 @@ class Loop {
    * @param {Array<object>} messages - Conversation messages in OpenAI format.
    * @param {Array<object>} [tools=[]] - Tool definitions with name, execute, description, parameters.
    * @param {object} [options={}] - Per-run overrides (system, temperature, ctx, etc.).
-   * @returns {Promise<{text: string, toolCalls: Array, usage: object, error: string|null}>}
+   * @returns {Promise<{text: string, toolCalls: Array, usage: object, cost: number, error: string|null, msgs: Array<object>}>}
+   *   On halt the returned `error` is `halt:<rule>` (or `halt:unknown` if the
+   *   thrown HaltError carried no `rule`), and `msgs` is sanitized so any
+   *   dangling assistant `tool_calls` from the halted round are paired with
+   *   synthetic `[halted]` tool replies — safe to feed back into another
+   *   provider call without violating OpenAI's tool-call/tool-result pairing.
    * @throws {Error} `[Loop] Tool is missing a name` — when a tool has no name or a non-string name.
    * @throws {Error} `[Loop] Tool "X" is missing an execute() function` — when execute is not a function.
    * @throws {Error} `[Loop] Tool "X" has invalid parameters` — when parameters is not an object.
@@ -323,9 +353,15 @@ class Loop {
       // BA2: HaltError is a clean governance exit, not a runtime failure.
       // No throw even when throwOnError:true — the gate halted us deliberately.
       if (err instanceof HaltError) {
-        this._reportError('halt', err, { rule: err.rule, reason: err.decision?.reason ?? null });
-        this._safeEmit({ type: 'loop:done', data: { text: '', halted: true, rule: err.rule, cost: totalCost } });
-        return { text: '', toolCalls: [], usage: lastUsage, cost: totalCost, error: `halt:${err.rule}`, msgs };
+        const rule = err.rule || 'unknown';
+        // Pair any dangling assistant.tool_calls (from the halted round) with
+        // synthetic `[halted]` replies so the returned msgs is a valid
+        // OpenAI-shaped transcript — consumers can feed it back into another
+        // provider call without tripping the tool-call/tool-result pairing.
+        sealDanglingToolCalls(msgs, rule);
+        this._reportError('halt', err, { rule, reason: err.decision?.reason ?? null });
+        this._safeEmit({ type: 'loop:done', data: { text: '', halted: true, rule, cost: totalCost } });
+        return { text: '', toolCalls: [], usage: lastUsage, cost: totalCost, error: `halt:${rule}`, msgs };
       }
       throw err;
     }