bare-agent 0.10.2 → 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -169,6 +169,21 @@ Halts also fire `loop:error` on the stream (`source: 'halt'`) and the `onError`
169
169
 
170
170
  ---
171
171
 
172
+ ## Examples
173
+
174
+ Runnable scripts in [`examples/`](examples/) — each is self-contained and the file's top docstring documents flags and required env vars.
175
+
176
+ | File | What it shows |
177
+ |---|---|
178
+ | [`with-bareguard.mjs`](examples/with-bareguard.mjs) | End-to-end Loop + bareguard wiring: budget cap, fs scope, bash allowlist, audit log, humanChannel. The canonical governed-loop reference. |
179
+ | [`mcp-bridge-poc.js`](examples/mcp-bridge-poc.js) | Auto-discover MCP servers from your IDE configs and expose them as bareagent tools. First run writes `.mcp-bridge.json` (edit to deny tools). |
180
+ | [`mcp-bridge-concurrent.js`](examples/mcp-bridge-concurrent.js) | Soak test: fan out concurrent `barebrowse_browse` calls against real domains (Amazon, Wikipedia, GitHub, a dead host) and verify resilience. |
181
+ | [`orchestrator/`](examples/orchestrator/) | Multi-agent dispatch via `spawn`. Three configs, one system prompt — no orchestrator class, no role types. Roles are JSON files. |
182
+ | [`wake.sh`](examples/wake.sh) + [`wake.md`](examples/wake.md) | Reference cron + jq script for firing deferred actions. The runtime half of `createDeferTool` — bareagent emits, `wake.sh` fires. |
183
+ | [`replay-job.js`](examples/replay-job.js) | Supervised replay POC: record a browser task once with the LLM driving, then replay against fresh snapshots with the LLM as locator-only. Falls back to full reasoning when the locator misses, and patches the trace. |
184
+
185
+ ---
186
+
172
187
  ## Cross-language usage
173
188
 
174
189
  Not using Node.js? Spawn bare-agent as a subprocess from any language. Ready-made wrappers in [`contrib/`](contrib/README.md) for Python, Go, Rust, Ruby, and Java — copy one file, no package registry needed.
package/bin/cli.js CHANGED
@@ -63,9 +63,13 @@ async function runConfigMode(cfgPath) {
63
63
  // Tools — registry resolved by name from a curated set of built-ins.
64
64
  const tools = await resolveTools(cfg.tools || [], { stream });
65
65
 
66
- // Bareguard Gate (optional but strongly recommended for spawn children)
66
+ // Bareguard Gate (optional but strongly recommended for spawn children).
67
+ // Fail-closed: if the config asks for a gate but wiring fails, exit non-zero
68
+ // rather than run an ungoverned child agent.
67
69
  let policy = null;
68
- let wrapToolsFn = (t) => t;
70
+ let onLlmResult = null;
71
+ let onToolResult = null;
72
+ let gatedTools = tools;
69
73
  if (cfg.gate) {
70
74
  try {
71
75
  const { Gate } = require('bareguard');
@@ -95,9 +99,12 @@ async function runConfigMode(cfgPath) {
95
99
  await gate.init();
96
100
  const wired = wireGate(gate);
97
101
  policy = wired.policy;
98
- wrapToolsFn = wired.wrapTools;
102
+ onLlmResult = wired.onLlmResult;
103
+ onToolResult = wired.onToolResult;
104
+ gatedTools = await wired.filterTools(tools);
99
105
  } catch (err) {
100
- process.stderr.write(`[cli] failed to wire bareguard: ${err.message}. Continuing without policy gate.\n`);
106
+ process.stderr.write(`[cli] failed to wire bareguard: ${err.message}. Refusing to run ungoverned (cfg.gate set).\n`);
107
+ process.exit(1);
101
108
  }
102
109
  }
103
110
 
@@ -111,13 +118,14 @@ async function runConfigMode(cfgPath) {
111
118
  system: cfg.systemPrompt || null,
112
119
  stream,
113
120
  policy,
121
+ onLlmResult,
122
+ onToolResult,
114
123
  onError: (err, meta) => {
115
124
  process.stderr.write(`[loop:error ${meta.source}] ${err.message}\n`);
116
125
  },
117
126
  });
118
127
 
119
- const wrapped = wrapToolsFn(tools);
120
- await loop.run([initialMessage], wrapped);
128
+ await loop.run([initialMessage], gatedTools);
121
129
  // Stream's loop:done event has already been emitted; exit clean.
122
130
  process.exit(0);
123
131
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bare-agent",
3
- "version": "0.10.2",
3
+ "version": "0.10.4",
4
4
  "files": [
5
5
  "index.js",
6
6
  "src/",
@@ -2,6 +2,24 @@
2
2
 
3
3
  const { HaltError } = require('./errors');
4
4
 
5
+ // Safe-stringify for tool results: tools can return circular structures or
6
+ // values that include functions / undefined / bigints. Falling back to String()
7
+ // keeps gate.record from throwing inside onToolResult (which would surface as a
8
+ // loop:error{source:'onToolResult'} for what is really a serialization quirk).
9
+ function safeStringify(value) {
10
+ if (typeof value === 'string') return value;
11
+ try {
12
+ const json = JSON.stringify(value);
13
+ return json === undefined ? String(value) : json;
14
+ } catch {
15
+ return String(value);
16
+ }
17
+ }
18
+
19
+ // Module-scope so a process that spawns many child agents (each with its own
20
+ // wireGate call) only prints the wrapTool deprecation warning once.
21
+ let warnedWrap = false;
22
+
5
23
  /**
6
24
  * Wire a bareguard Gate into bareagent's Loop.
7
25
  *
@@ -14,7 +32,12 @@ const { HaltError } = require('./errors');
14
32
  * - `onToolResult` — callback for `new Loop({ onToolResult })`. Forwards every
15
33
  * tool.execute result to gate.record with ctx in scope.
16
34
  * - `filterTools` — async (tools) => filtered. Drops tools denied by gate.allows
17
- * so the LLM never sees them. No audit, no record.
35
+ * so the LLM never sees them. No audit, no record. Bulk-only:
36
+ * when MCP tools are exposed via `mcp_discover`+`mcp_invoke`
37
+ * meta-tools, filterTools cannot drop the inner names (they
38
+ * are not in the tool list). Gate those via bareguard's
39
+ * `tools.denyArgPatterns: { mcp_invoke: [/"name":"…"/] }`
40
+ * — see src/mcp-bridge.js (Gov shape).
18
41
  * - `wrapTool` / `wrapTools` — DEPRECATED. Pre-BA1 shim that wraps execute() to
19
42
  * call gate.record post-hoc. Loses _ctx and never sees LLM cost.
20
43
  * Prefer `onToolResult` (and `onLlmResult` for budget correctness).
@@ -25,9 +48,11 @@ const { HaltError } = require('./errors');
25
48
  *
26
49
  * @param {object} gate - A bareguard Gate instance (must have .check, .record, .allows).
27
50
  * @param {object} [options]
28
- * @param {Function} [options.formatDeny] - (decision) => string. Transforms the deny
29
- * string fed to the LLM. Default: "[deny: <rule>] <reason>". Halt bypasses this
30
- * (HaltError doesn't reach the LLM).
51
+ * @param {Function} [options.formatDeny] - (decision, toolName) => string. Transforms
52
+ * the deny string fed to the LLM. The second arg is the bareagent tool name (handy
53
+ * for tool-specific deny copy). Default: "[deny: <rule>] <reason>" or
54
+ * "[deny: <rule>] <toolName> denied" when bareguard omits a reason. Halt bypasses
55
+ * this (HaltError doesn't reach the LLM).
31
56
  * @param {Function} [options.actionTranslator] - (toolName, args, ctx) => action.
32
57
  * Builds the action object passed to `gate.check` and `gate.record`. Default:
33
58
  * `{ type: toolName, args, _ctx: ctx }`. Override when bareguard's primitives
@@ -102,7 +127,7 @@ function wireGate(gate, options = {}) {
102
127
  });
103
128
  } else {
104
129
  await gate.record(action, {
105
- result: typeof result === 'string' ? result : JSON.stringify(result),
130
+ result: safeStringify(result),
106
131
  durationMs: durationMs ?? null,
107
132
  });
108
133
  }
@@ -115,14 +140,13 @@ function wireGate(gate, options = {}) {
115
140
  if (typeof gate.allows !== 'function') {
116
141
  throw new Error('[wireGate.filterTools] gate must have .allows (bareguard >= 0.2)');
117
142
  }
118
- const out = [];
119
- for (const t of tools) {
120
- if (await gate.allows(t.name)) out.push(t);
121
- }
122
- return out;
143
+ // Parallel: gate.allows is config-driven and pure, so concurrent calls are
144
+ // safe. Matters for large MCP catalogs (50+ tools) where sequential awaits
145
+ // were noticeable overhead on every startup.
146
+ const verdicts = await Promise.all(tools.map(t => gate.allows(t.name)));
147
+ return tools.filter((_, i) => verdicts[i]);
123
148
  };
124
149
 
125
- let warnedWrap = false;
126
150
  function wrapTool(tool) {
127
151
  if (!warnedWrap) {
128
152
  warnedWrap = true;
@@ -143,7 +167,7 @@ function wireGate(gate, options = {}) {
143
167
  try {
144
168
  const result = await original(args);
145
169
  await gate.record(action, {
146
- result: typeof result === 'string' ? result : JSON.stringify(result),
170
+ result: safeStringify(result),
147
171
  durationMs: Date.now() - startedAt,
148
172
  });
149
173
  return result;
package/src/errors.js CHANGED
@@ -52,8 +52,9 @@ class HaltError extends BareAgentError {
52
52
  super(message || `[HALT: ${rule || 'unknown'}]`, {
53
53
  code: 'HALT',
54
54
  retryable: false,
55
- context: { ...context, rule, decision },
55
+ context,
56
56
  });
57
+ // Public, stable surface — read `err.rule` / `err.decision` (not `err.context`).
57
58
  this.rule = rule || null;
58
59
  this.decision = decision || null;
59
60
  }
package/src/loop.js CHANGED
@@ -3,7 +3,7 @@
3
3
  const { ToolError, HaltError } = require('./errors');
4
4
 
5
5
  // Average pricing per 1K tokens (USD). Adjust these to match your provider's rates.
6
- // Last updated: 2026-03-18. Source: public provider pricing pages.
6
+ // Last updated: 2026-05-18. Source: public provider pricing pages.
7
7
  const COST_PER_1K = {
8
8
  // OpenAI
9
9
  'gpt-4o': { in: 0.0025, out: 0.01 },
@@ -12,9 +12,13 @@ const COST_PER_1K = {
12
12
  'gpt-4.1-mini': { in: 0.0004, out: 0.0016 },
13
13
  'gpt-4.1-nano': { in: 0.0001, out: 0.0004 },
14
14
  'o3-mini': { in: 0.0011, out: 0.0044 },
15
- // Anthropic
16
- 'claude-sonnet-4-20250514': { in: 0.003, out: 0.015 },
15
+ // Anthropic — Claude 4.x current generation (2026-05)
16
+ 'claude-opus-4-7': { in: 0.015, out: 0.075 },
17
+ 'claude-sonnet-4-6': { in: 0.003, out: 0.015 },
17
18
  'claude-haiku-4-5-20251001': { in: 0.0008, out: 0.004 },
19
+ 'claude-haiku-4-5': { in: 0.0008, out: 0.004 },
20
+ // Anthropic — earlier 4.x snapshots
21
+ 'claude-sonnet-4-20250514': { in: 0.003, out: 0.015 },
18
22
  'claude-opus-4-20250514': { in: 0.015, out: 0.075 },
19
23
  // Fallback average across popular models (~$0.002 in, ~$0.008 out per 1K)
20
24
  '_default': { in: 0.002, out: 0.008 },
@@ -25,6 +29,27 @@ const COST_PER_1K = {
25
29
  // no governance and the LLM loop is unbounded by design — wire bareguard.
26
30
  const HARD_ROUND_LIMIT = 100;
27
31
 
32
+ // Walk the assistant tool_calls in the last assistant message and append a
33
+ // synthetic `role:'tool'` reply for every tool_call_id that has no matching
34
+ // reply. Halt-path only — keeps msgs a valid OpenAI transcript when the loop
35
+ // exits between pushing assistant.tool_calls and finishing the per-tool loop.
36
+ function sealDanglingToolCalls(msgs, rule) {
37
+ for (let i = msgs.length - 1; i >= 0; i--) {
38
+ const m = msgs[i];
39
+ if (m.role !== 'assistant' || !Array.isArray(m.tool_calls)) continue;
40
+ const seen = new Set();
41
+ for (let j = i + 1; j < msgs.length; j++) {
42
+ if (msgs[j].role === 'tool' && msgs[j].tool_call_id) seen.add(msgs[j].tool_call_id);
43
+ }
44
+ for (const tc of m.tool_calls) {
45
+ if (!seen.has(tc.id)) {
46
+ msgs.push({ role: 'tool', tool_call_id: tc.id, content: `[halted:${rule}]` });
47
+ }
48
+ }
49
+ return;
50
+ }
51
+ }
52
+
28
53
  function estimateCost(model, usage) {
29
54
  if (!usage || !model) return null;
30
55
  const rates = COST_PER_1K[model] || COST_PER_1K['_default'];
@@ -127,7 +152,12 @@ class Loop {
127
152
  * @param {Array<object>} messages - Conversation messages in OpenAI format.
128
153
  * @param {Array<object>} [tools=[]] - Tool definitions with name, execute, description, parameters.
129
154
  * @param {object} [options={}] - Per-run overrides (system, temperature, ctx, etc.).
130
- * @returns {Promise<{text: string, toolCalls: Array, usage: object, error: string|null}>}
155
+ * @returns {Promise<{text: string, toolCalls: Array, usage: object, cost: number, error: string|null, msgs: Array<object>}>}
156
+ * On halt the returned `error` is `halt:<rule>` (or `halt:unknown` if the
157
+ * thrown HaltError carried no `rule`), and `msgs` is sanitized so any
158
+ * dangling assistant `tool_calls` from the halted round are paired with
159
+ * synthetic `[halted]` tool replies — safe to feed back into another
160
+ * provider call without violating OpenAI's tool-call/tool-result pairing.
131
161
  * @throws {Error} `[Loop] Tool is missing a name` — when a tool has no name or a non-string name.
132
162
  * @throws {Error} `[Loop] Tool "X" is missing an execute() function` — when execute is not a function.
133
163
  * @throws {Error} `[Loop] Tool "X" has invalid parameters` — when parameters is not an object.
@@ -323,9 +353,15 @@ class Loop {
323
353
  // BA2: HaltError is a clean governance exit, not a runtime failure.
324
354
  // No throw even when throwOnError:true — the gate halted us deliberately.
325
355
  if (err instanceof HaltError) {
326
- this._reportError('halt', err, { rule: err.rule, reason: err.decision?.reason ?? null });
327
- this._safeEmit({ type: 'loop:done', data: { text: '', halted: true, rule: err.rule, cost: totalCost } });
328
- return { text: '', toolCalls: [], usage: lastUsage, cost: totalCost, error: `halt:${err.rule}`, msgs };
356
+ const rule = err.rule || 'unknown';
357
+ // Pair any dangling assistant.tool_calls (from the halted round) with
358
+ // synthetic `[halted]` replies so the returned msgs is a valid
359
+ // OpenAI-shaped transcript — consumers can feed it back into another
360
+ // provider call without tripping the tool-call/tool-result pairing.
361
+ sealDanglingToolCalls(msgs, rule);
362
+ this._reportError('halt', err, { rule, reason: err.decision?.reason ?? null });
363
+ this._safeEmit({ type: 'loop:done', data: { text: '', halted: true, rule, cost: totalCost } });
364
+ return { text: '', toolCalls: [], usage: lastUsage, cost: totalCost, error: `halt:${rule}`, msgs };
329
365
  }
330
366
  throw err;
331
367
  }