bare-agent 0.12.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/bareagent.context.md +29 -4
- package/bin/cli.js +54 -2
- package/examples/README.md +15 -0
- package/examples/litectx-as-store.mjs +78 -0
- package/examples/litectx-mcp-child.mjs +57 -0
- package/examples/mcp-bridge-concurrent.js +106 -0
- package/examples/mcp-bridge-poc.js +77 -0
- package/examples/orchestrator/README.md +53 -0
- package/examples/orchestrator/orchestrator.json +14 -0
- package/examples/orchestrator/specialists/researcher.json +12 -0
- package/examples/orchestrator/specialists/summarizer.json +11 -0
- package/examples/replay-job.js +213 -0
- package/examples/wake.md +99 -0
- package/examples/wake.sh +84 -0
- package/examples/with-bareguard.mjs +65 -0
- package/index.d.ts +4 -1
- package/index.js +4 -0
- package/package.json +4 -2
- package/src/context-units.d.ts +44 -0
- package/src/context-units.js +225 -0
- package/src/loop.d.ts +11 -0
- package/src/loop.js +31 -1
- package/src/tools.d.ts +2 -1
- package/src/tools.js +2 -0
- package/tools/litectx-mcp.d.ts +28 -0
- package/tools/litectx-mcp.js +65 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Replay-job POC — supervised replay of a recorded browser task.
|
|
6
|
+
*
|
|
7
|
+
* The idea: record once with the LLM driving (full reasoning), then on every
|
|
8
|
+
* subsequent run replay the recorded *intents* against a fresh snapshot, with
|
|
9
|
+
* the LLM acting as a locator/supervisor — not a planner. If the locator
|
|
10
|
+
* can't find a match for a step, fall back to full Loop reasoning from that
|
|
11
|
+
* point and overwrite the trace. That's the self-healing path.
|
|
12
|
+
*
|
|
13
|
+
* Why this isn't a barebrowse feature: barebrowse stays dumb (URL → snapshot,
|
|
14
|
+
* ref → action). The "job" concept (trace storage, replay supervisor, scheduler
|
|
15
|
+
* hookup) is composed from bareagent primitives.
|
|
16
|
+
*
|
|
17
|
+
* Usage:
|
|
18
|
+
* # First run — record:
|
|
19
|
+
* OPENAI_API_KEY=sk-... node examples/replay-job.js --record demo-job \
|
|
20
|
+
* "Go to example.com and click the 'More information' link"
|
|
21
|
+
*
|
|
22
|
+
* # Subsequent runs — replay:
|
|
23
|
+
* OPENAI_API_KEY=sk-... node examples/replay-job.js --replay demo-job
|
|
24
|
+
*
|
|
25
|
+
* # Cron'd:
|
|
26
|
+
* *\/15 * * * * node /path/to/replay-job.js --replay demo-job
|
|
27
|
+
*
|
|
28
|
+
* What's deliberately NOT in this POC (next steps, in order):
|
|
29
|
+
* 1. Fingerprint fast-path: hash selector+role+text per step, try direct
|
|
30
|
+
* match before calling the locator LLM. Brings per-step cost to ~0 on
|
|
31
|
+
* stable UIs (Gmail, IG).
|
|
32
|
+
* 2. PostState assertion: after each step, ask the LLM "does this snapshot
|
|
33
|
+
* reflect the expected outcome?" Without this, replay can silently drift
|
|
34
|
+
* on subtly-changed UIs.
|
|
35
|
+
* 3. Trace confidence: rolling success rate per step; below a threshold,
|
|
36
|
+
* re-derive the whole trace instead of patching one entry.
|
|
37
|
+
* 4. Scheduler integration: wrap as a Scheduler trigger so cron is a config
|
|
38
|
+
* line instead of an OS cron entry.
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
const fs = require('node:fs');
|
|
42
|
+
const path = require('node:path');
|
|
43
|
+
const { Loop } = require('../src/loop');
|
|
44
|
+
|
|
45
|
+
const JOBS_DIR = path.join(__dirname, '..', '.jobs');
|
|
46
|
+
|
|
47
|
+
function parseArgs(argv) {
|
|
48
|
+
const mode = argv.includes('--record') ? 'record'
|
|
49
|
+
: argv.includes('--replay') ? 'replay'
|
|
50
|
+
: null;
|
|
51
|
+
const flagIdx = argv.indexOf(`--${mode}`);
|
|
52
|
+
const name = mode ? argv[flagIdx + 1] : null;
|
|
53
|
+
const goal = argv.slice(flagIdx + 2).join(' ');
|
|
54
|
+
return { mode, name, goal };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function jobPath(name) { return path.join(JOBS_DIR, `${name}.json`); }
|
|
58
|
+
|
|
59
|
+
function loadProvider() {
|
|
60
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
61
|
+
if (!apiKey) { console.error('Set OPENAI_API_KEY'); process.exit(1); }
|
|
62
|
+
const { OpenAIProvider } = require('../src/provider-openai');
|
|
63
|
+
return new OpenAIProvider({ apiKey, model: 'gpt-4.1-mini' });
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async function loadBrowseTools() {
|
|
67
|
+
const mod = await import('barebrowse/bareagent');
|
|
68
|
+
return mod.createBrowseTools({});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// --- RECORD ----------------------------------------------------------------
|
|
72
|
+
// Run Loop normally. The onToolCall hook captures each step as it happens.
|
|
73
|
+
// The trace stores the LLM's free-text "intent" (the assistant message
|
|
74
|
+
// immediately preceding the tool call) alongside the tool name and args.
|
|
75
|
+
// On replay, intent is what the locator LLM matches against — not args.
|
|
76
|
+
async function record({ name, goal }) {
|
|
77
|
+
const provider = loadProvider();
|
|
78
|
+
const { tools, close } = await loadBrowseTools();
|
|
79
|
+
|
|
80
|
+
const trace = [];
|
|
81
|
+
let pendingIntent = '';
|
|
82
|
+
|
|
83
|
+
const loop = new Loop({
|
|
84
|
+
provider,
|
|
85
|
+
maxRounds: 15,
|
|
86
|
+
onText: (text) => { pendingIntent = text; },
|
|
87
|
+
onToolCall: (toolName, args) => {
|
|
88
|
+
trace.push({ intent: pendingIntent.trim().slice(0, 500), tool: toolName, args });
|
|
89
|
+
pendingIntent = '';
|
|
90
|
+
},
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const result = await loop.run([{ role: 'user', content: goal }], tools);
|
|
95
|
+
fs.mkdirSync(JOBS_DIR, { recursive: true });
|
|
96
|
+
fs.writeFileSync(jobPath(name), JSON.stringify({ goal, recordedAt: new Date().toISOString(), trace }, null, 2));
|
|
97
|
+
console.log(`[record] saved ${trace.length} steps to ${jobPath(name)}`);
|
|
98
|
+
console.log(`[record] cost: $${(result.cost ?? 0).toFixed(4)}`);
|
|
99
|
+
} finally {
|
|
100
|
+
await close();
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// --- REPLAY ----------------------------------------------------------------
|
|
105
|
+
// For each recorded step:
|
|
106
|
+
// 1. Take a fresh snapshot.
|
|
107
|
+
// 2. Ask the LLM (no tools, JSON-mode) to map the recorded intent to a ref
|
|
108
|
+
// in the current snapshot — OR return null if no match.
|
|
109
|
+
// 3. On match: execute the tool with the resolved ref. The recorded args
|
|
110
|
+
// that aren't refs (e.g. type's `text`, goto's `url`) carry over verbatim.
|
|
111
|
+
// 4. On miss: fall back to driving Loop from the remaining goal, capture
|
|
112
|
+
// the new sub-trace, and splice it into the saved trace.
|
|
113
|
+
async function replay({ name }) {
|
|
114
|
+
const job = JSON.parse(fs.readFileSync(jobPath(name), 'utf8'));
|
|
115
|
+
const provider = loadProvider();
|
|
116
|
+
const { tools, close } = await loadBrowseTools();
|
|
117
|
+
const toolByName = Object.fromEntries(tools.map((t) => [t.name, t]));
|
|
118
|
+
|
|
119
|
+
let mutated = false;
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
for (let i = 0; i < job.trace.length; i++) {
|
|
123
|
+
const step = job.trace[i];
|
|
124
|
+
const tool = toolByName[step.tool];
|
|
125
|
+
if (!tool) throw new Error(`unknown tool in trace: ${step.tool}`);
|
|
126
|
+
|
|
127
|
+
// Steps whose args have no ref (goto, browse, back, scroll) replay verbatim.
|
|
128
|
+
if (!('ref' in (step.args || {}))) {
|
|
129
|
+
console.log(`[replay ${i + 1}/${job.trace.length}] ${step.tool}(${JSON.stringify(step.args)})`);
|
|
130
|
+
await tool.execute(step.args);
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Ref-bearing steps: snapshot, then ask the LLM to locate.
|
|
135
|
+
const snapshot = await toolByName.snapshot.execute({});
|
|
136
|
+
const snapshotText = typeof snapshot === 'string' ? snapshot : (snapshot?.snapshot ?? JSON.stringify(snapshot));
|
|
137
|
+
const located = await locate({ provider, intent: step.intent, tool: step.tool, snapshotText });
|
|
138
|
+
|
|
139
|
+
if (located.ref) {
|
|
140
|
+
const args = { ...step.args, ref: located.ref };
|
|
141
|
+
console.log(`[replay ${i + 1}/${job.trace.length}] ${step.tool}(ref=${located.ref}) — intent="${step.intent.slice(0, 60)}"`);
|
|
142
|
+
await tool.execute(args);
|
|
143
|
+
} else {
|
|
144
|
+
console.warn(`[replay ${i + 1}/${job.trace.length}] locator miss — falling back to full Loop`);
|
|
145
|
+
const remaining = `Continue this task from the current page. Original goal: ${job.goal}. We have completed ${i} of ${job.trace.length} recorded steps; the next intended step was: "${step.intent}".`;
|
|
146
|
+
const subTrace = await driveFallback({ provider, tools, goal: remaining });
|
|
147
|
+
job.trace.splice(i, job.trace.length - i, ...subTrace);
|
|
148
|
+
mutated = true;
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (mutated) {
|
|
154
|
+
job.recordedAt = new Date().toISOString();
|
|
155
|
+
fs.writeFileSync(jobPath(name), JSON.stringify(job, null, 2));
|
|
156
|
+
console.log(`[replay] trace patched after locator miss; saved ${job.trace.length} steps`);
|
|
157
|
+
} else {
|
|
158
|
+
console.log(`[replay] completed ${job.trace.length} steps without falling back`);
|
|
159
|
+
}
|
|
160
|
+
} finally {
|
|
161
|
+
await close();
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Locator: structured-output call, no tools, no loop. One LLM call per ref-bearing step.
|
|
166
|
+
async function locate({ provider, intent, tool, snapshotText }) {
|
|
167
|
+
const system = 'You map a recorded intent to a ref in a current ARIA snapshot. Reply with strict JSON: {"ref": "<ref>"} if confident, or {"ref": null, "reason": "<why>"} if no element matches. Never invent refs not present in the snapshot.';
|
|
168
|
+
const user = `Recorded intent: ${intent}\nTool: ${tool}\n\nCurrent snapshot:\n${snapshotText}\n\nReturn JSON only.`;
|
|
169
|
+
const { text } = await provider.generate({
|
|
170
|
+
messages: [{ role: 'system', content: system }, { role: 'user', content: user }],
|
|
171
|
+
});
|
|
172
|
+
try {
|
|
173
|
+
const match = text.match(/\{[\s\S]*\}/);
|
|
174
|
+
return JSON.parse(match ? match[0] : text);
|
|
175
|
+
} catch {
|
|
176
|
+
return { ref: null, reason: 'unparseable locator response' };
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Fallback driver: same shape as record(), just used mid-replay when the
|
|
181
|
+
// locator can't resolve a step. Returns the new sub-trace to splice in.
|
|
182
|
+
async function driveFallback({ provider, tools, goal }) {
|
|
183
|
+
const sub = [];
|
|
184
|
+
let pendingIntent = '';
|
|
185
|
+
const loop = new Loop({
|
|
186
|
+
provider,
|
|
187
|
+
maxRounds: 10,
|
|
188
|
+
onText: (text) => { pendingIntent = text; },
|
|
189
|
+
onToolCall: (toolName, args) => {
|
|
190
|
+
sub.push({ intent: pendingIntent.trim().slice(0, 500), tool: toolName, args });
|
|
191
|
+
pendingIntent = '';
|
|
192
|
+
},
|
|
193
|
+
});
|
|
194
|
+
await loop.run([{ role: 'user', content: goal }], tools);
|
|
195
|
+
return sub;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async function main() {
|
|
199
|
+
const { mode, name, goal } = parseArgs(process.argv.slice(2));
|
|
200
|
+
if (!mode || !name) {
|
|
201
|
+
console.error('Usage:\n --record <name> "<goal>"\n --replay <name>');
|
|
202
|
+
process.exit(1);
|
|
203
|
+
}
|
|
204
|
+
if (mode === 'record') {
|
|
205
|
+
if (!goal) { console.error('record mode requires a goal string'); process.exit(1); }
|
|
206
|
+
await record({ name, goal });
|
|
207
|
+
} else {
|
|
208
|
+
if (!fs.existsSync(jobPath(name))) { console.error(`no job at ${jobPath(name)}`); process.exit(1); }
|
|
209
|
+
await replay({ name });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
main().catch((err) => { console.error(err); process.exit(1); });
|
package/examples/wake.md
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# wake.sh — defer queue runner
|
|
2
|
+
|
|
3
|
+
`examples/wake.sh` is the reference scheduler that fires bareagent's
|
|
4
|
+
deferred actions. It's not a library primitive — it's a small bash script
|
|
5
|
+
you copy into your project and adapt. Bareagent emits JSONL records via
|
|
6
|
+
the `defer` tool; wake.sh reads the queue and re-invokes bareagent with
|
|
7
|
+
the fired action.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
cp examples/wake.sh /usr/local/bin/bareagent-wake
|
|
13
|
+
chmod +x /usr/local/bin/bareagent-wake
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Cron entry (every minute)
|
|
17
|
+
|
|
18
|
+
```cron
|
|
19
|
+
* * * * * /usr/local/bin/bareagent-wake
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
For project-scoped use, run from the project directory:
|
|
23
|
+
|
|
24
|
+
```cron
|
|
25
|
+
* * * * * cd /path/to/your/project && /usr/local/bin/bareagent-wake
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Environment overrides
|
|
29
|
+
|
|
30
|
+
| Variable | Default | What it does |
|
|
31
|
+
|---|---|---|
|
|
32
|
+
| `BAREAGENT_DEFER_QUEUE` | `./bareagent-defers.jsonl` | Path to the JSONL defer queue (must match what the `defer` tool writes). |
|
|
33
|
+
| `ORCHESTRATOR_CONFIG` | `./orchestrator.json` | Bareagent config file the wake script invokes for fired actions. |
|
|
34
|
+
| `LOCKFILE` | `/tmp/bareagent-wake.lock` | Single-instance lock via `flock`. |
|
|
35
|
+
| `BAREAGENT_WAKE_LOG_DIR` | `/tmp/bareagent-wake` | Per-fired-action log directory. |
|
|
36
|
+
|
|
37
|
+
## Dependencies
|
|
38
|
+
|
|
39
|
+
- `jq` — JSONL fold + filter
|
|
40
|
+
- `flock` (Linux util-linux) — single-instance lock
|
|
41
|
+
- `bare-agent` on `$PATH` — `npm install -g bare-agent` or use the full path
|
|
42
|
+
|
|
43
|
+
## Behaviour
|
|
44
|
+
|
|
45
|
+
1. **Folds** the queue: `{id, status, ...}` records are append-only; the
|
|
46
|
+
live status of each id is the *latest* line. jq does the fold.
|
|
47
|
+
2. **Filters** to `status === 'pending' AND when <= now()`.
|
|
48
|
+
3. For each due record: appends `{id, status: 'fired', ts}` (atomic JSONL
|
|
49
|
+
append on POSIX), then invokes
|
|
50
|
+
`bare-agent --config $ORCHESTRATOR_CONFIG` with the inner action as
|
|
51
|
+
stdin input. Bareagent runs the action through bareguard's gate as a
|
|
52
|
+
fresh action — full pipeline against the inner action, separate audit
|
|
53
|
+
line.
|
|
54
|
+
4. After the fired invocation completes: appends `{id, status: 'done|failed', ts, exit_code?}`.
|
|
55
|
+
|
|
56
|
+
## Why bash and not Node
|
|
57
|
+
|
|
58
|
+
The wake script is OS-level glue — cron + filesystem + subprocess. Keeping
|
|
59
|
+
it as a shell script makes the dependency on bareagent (and only bareagent)
|
|
60
|
+
obvious, and avoids users thinking the script is a library to import.
|
|
61
|
+
|
|
62
|
+
## Customisation points
|
|
63
|
+
|
|
64
|
+
- **Different queue path:** set `BAREAGENT_DEFER_QUEUE` and pass the same
|
|
65
|
+
to your `defer` tool config (or `BAREAGENT_DEFER_QUEUE` env on the
|
|
66
|
+
bareagent process that emits).
|
|
67
|
+
- **Different orchestrator per action type:** parse `record.action.type`
|
|
68
|
+
and pick a config file accordingly. ~5 lines added inside the per-record
|
|
69
|
+
loop.
|
|
70
|
+
- **Different fire-time semantics:** instead of invoking bareagent CLI,
|
|
71
|
+
shell out to a Node script that wires Loop differently. The defer queue
|
|
72
|
+
schema doesn't constrain you.
|
|
73
|
+
|
|
74
|
+
## Log rotation
|
|
75
|
+
|
|
76
|
+
`logrotate(8)` is the standard answer. Example
|
|
77
|
+
`/etc/logrotate.d/bareagent-wake`:
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
/tmp/bareagent-wake/*.log {
|
|
81
|
+
daily
|
|
82
|
+
rotate 7
|
|
83
|
+
compress
|
|
84
|
+
missingok
|
|
85
|
+
notifempty
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/path/to/your/project/bareagent-defers.jsonl {
|
|
89
|
+
weekly
|
|
90
|
+
rotate 4
|
|
91
|
+
compress
|
|
92
|
+
missingok
|
|
93
|
+
notifempty
|
|
94
|
+
copytruncate
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
`copytruncate` matters for the queue: it preserves the file inode (which
|
|
99
|
+
the defer tool's `appendFile` depends on for atomic POSIX appends).
|
package/examples/wake.sh
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# examples/wake.sh — reference scheduler for bareagent's defer queue.
|
|
3
|
+
#
|
|
4
|
+
# This is a *reference*, not a primitive. Copy into your project and modify.
|
|
5
|
+
# See examples/wake.md for the cron entry and customization points.
|
|
6
|
+
#
|
|
7
|
+
# What this script does:
|
|
8
|
+
# 1. Reads the JSONL defer queue file.
|
|
9
|
+
# 2. Folds status-update lines per id (latest wins) using jq.
|
|
10
|
+
# 3. For each pending record whose `when` <= now: appends a "fired" status
|
|
11
|
+
# line to the queue (atomic JSONL append), then invokes
|
|
12
|
+
# `bareagent --config <orchestrator>` with the inner action as stdin.
|
|
13
|
+
# 4. Uses flock(1) to prevent overlapping wake invocations.
|
|
14
|
+
#
|
|
15
|
+
# The fired action goes through bareguard's gate AGAIN at fire time — full
|
|
16
|
+
# pipeline against the inner action, separate from the emit-time check.
|
|
17
|
+
# (Two gate.check calls, two distinct audit lines, reconstructable via
|
|
18
|
+
# parent_run_id.)
|
|
19
|
+
|
|
20
|
+
set -euo pipefail
|
|
21
|
+
|
|
22
|
+
QUEUE="${BAREAGENT_DEFER_QUEUE:-./bareagent-defers.jsonl}"
|
|
23
|
+
ORCHESTRATOR_CONFIG="${ORCHESTRATOR_CONFIG:-./orchestrator.json}"
|
|
24
|
+
LOCKFILE="${LOCKFILE:-/tmp/bareagent-wake.lock}"
|
|
25
|
+
LOG_DIR="${BAREAGENT_WAKE_LOG_DIR:-/tmp/bareagent-wake}"
|
|
26
|
+
|
|
27
|
+
mkdir -p "$LOG_DIR"
|
|
28
|
+
|
|
29
|
+
NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
30
|
+
|
|
31
|
+
# Single-instance: bail if another wake is running.
|
|
32
|
+
exec 9>"$LOCKFILE"
|
|
33
|
+
if ! flock -n 9; then
|
|
34
|
+
echo "[wake $NOW] another instance running, exiting" >&2
|
|
35
|
+
exit 0
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
# No queue file = nothing to do.
|
|
39
|
+
if [ ! -f "$QUEUE" ]; then
|
|
40
|
+
exit 0
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
# Reconstruct status by folding all lines per id (latest wins) and filter
|
|
44
|
+
# to records whose `when` <= now AND status == "pending". One JSON object
|
|
45
|
+
# per output line.
|
|
46
|
+
PENDING=$(jq -n -c '
|
|
47
|
+
reduce inputs as $r ({};
|
|
48
|
+
.[$r.id] |= (. // {}) + $r
|
|
49
|
+
)
|
|
50
|
+
| to_entries
|
|
51
|
+
| map(.value)
|
|
52
|
+
| map(select(.status == "pending" and .when <= "'"$NOW"'"))
|
|
53
|
+
| .[]
|
|
54
|
+
' < "$QUEUE")
|
|
55
|
+
|
|
56
|
+
if [ -z "$PENDING" ]; then
|
|
57
|
+
exit 0
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
echo "$PENDING" | while IFS= read -r record; do
|
|
61
|
+
[ -z "$record" ] && continue
|
|
62
|
+
|
|
63
|
+
ID=$(echo "$record" | jq -r '.id')
|
|
64
|
+
ACTION=$(echo "$record" | jq -c '.action')
|
|
65
|
+
|
|
66
|
+
# Append "fired" status line first (defer queue is append-only).
|
|
67
|
+
printf '{"id":"%s","status":"fired","ts":"%s"}\n' "$ID" "$NOW" >> "$QUEUE"
|
|
68
|
+
|
|
69
|
+
# Invoke bareagent with the deferred action as stdin input.
|
|
70
|
+
# Run in background — wake script doesn't wait for completion.
|
|
71
|
+
( echo "$ACTION" | bare-agent --config "$ORCHESTRATOR_CONFIG" \
|
|
72
|
+
>> "$LOG_DIR/fired-$ID.log" 2>&1
|
|
73
|
+
rc=$?
|
|
74
|
+
if [ $rc -ne 0 ]; then
|
|
75
|
+
printf '{"id":"%s","status":"failed","ts":"%s","exit_code":%d}\n' \
|
|
76
|
+
"$ID" "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" "$rc" >> "$QUEUE"
|
|
77
|
+
else
|
|
78
|
+
printf '{"id":"%s","status":"done","ts":"%s"}\n' \
|
|
79
|
+
"$ID" "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> "$QUEUE"
|
|
80
|
+
fi
|
|
81
|
+
) &
|
|
82
|
+
done
|
|
83
|
+
|
|
84
|
+
wait
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// examples/with-bareguard.mjs
|
|
2
|
+
//
|
|
3
|
+
// End-to-end: bareagent Loop + bareguard Gate.
|
|
4
|
+
// Runs a small LLM loop with budget cap, fs scope, audit log, and humanChannel.
|
|
5
|
+
//
|
|
6
|
+
// Run: OPENAI_API_KEY=... node examples/with-bareguard.mjs
|
|
7
|
+
//
|
|
8
|
+
// What this demonstrates:
|
|
9
|
+
// - Single-gate governance: every tool call traverses gate.check; every
|
|
10
|
+
// result reaches gate.record (via wrapTools).
|
|
11
|
+
// - Budget halt: if accumulated cost exceeds maxCostUsd, gate halts the loop.
|
|
12
|
+
// - Audit log: one JSONL line per gated event at ./bareagent-audit.jsonl.
|
|
13
|
+
// - humanChannel: required by bareguard. Here we auto-deny asks; in real use
|
|
14
|
+
// wire it to a chat platform, terminal prompt, etc.
|
|
15
|
+
|
|
16
|
+
import { Gate } from 'bareguard';
|
|
17
|
+
import { createRequire } from 'node:module';
|
|
18
|
+
const require = createRequire(import.meta.url);
|
|
19
|
+
const { Loop, wireGate } = require('bare-agent');
|
|
20
|
+
const { OpenAI } = require('bare-agent/providers');
|
|
21
|
+
const { createShellTools } = require('bare-agent/tools');
|
|
22
|
+
|
|
23
|
+
// 1. Build the gate. Every primitive is optional with sensible defaults.
|
|
24
|
+
const gate = new Gate({
|
|
25
|
+
budget: { maxCostUsd: 0.10 }, // hard USD cap
|
|
26
|
+
limits: { maxTurns: 20 }, // safety net on think/act cycles
|
|
27
|
+
fs: { readScope: ['/tmp', '~/'] }, // shell_read / shell_grep allowed roots
|
|
28
|
+
bash: { allow: ['ls', 'cat', 'echo', 'pwd'] }, // argv[0] allowlist for shell_run
|
|
29
|
+
audit: { path: './bareagent-audit.jsonl' },
|
|
30
|
+
// Required by bareguard: any ask/halt event flows through here.
|
|
31
|
+
// Auto-deny is the safest default for headless use; in real apps, wire to
|
|
32
|
+
// a Telegram/Slack/terminal prompt and return { decision: 'allow' | 'deny' }.
|
|
33
|
+
humanChannel: async (event) => {
|
|
34
|
+
console.warn(`[humanChannel] ${event.kind}: ${event.rule} — auto-denying`);
|
|
35
|
+
return { decision: 'deny' };
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
await gate.init();
|
|
39
|
+
|
|
40
|
+
// 2. Wire the gate into Loop's policy slot and wrap tools so gate.record fires.
|
|
41
|
+
const { policy, wrapTools } = wireGate(gate);
|
|
42
|
+
|
|
43
|
+
// 3. Standard bareagent setup.
|
|
44
|
+
const provider = new OpenAI({
|
|
45
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
46
|
+
model: 'gpt-4o-mini',
|
|
47
|
+
});
|
|
48
|
+
const { tools } = createShellTools();
|
|
49
|
+
|
|
50
|
+
const loop = new Loop({
|
|
51
|
+
provider,
|
|
52
|
+
policy,
|
|
53
|
+
onError: (err, meta) => console.error(`[onError ${meta.source}]`, err.message),
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// 4. Run.
|
|
57
|
+
const result = await loop.run(
|
|
58
|
+
[{ role: 'user', content: 'List the contents of /tmp using shell_run with argv ["ls", "/tmp"].' }],
|
|
59
|
+
wrapTools(tools),
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
console.log('---');
|
|
63
|
+
console.log('text:', result.text);
|
|
64
|
+
console.log('cost:', result.cost?.toFixed(6) ?? 'n/a');
|
|
65
|
+
console.log('audit log → ./bareagent-audit.jsonl');
|
package/index.d.ts
CHANGED
|
@@ -10,6 +10,9 @@ import { runPlan } from "./src/run-plan";
|
|
|
10
10
|
import { CircuitBreaker } from "./src/circuit-breaker";
|
|
11
11
|
import { wireGate } from "./src/bareguard-adapter";
|
|
12
12
|
import { defaultActionTranslator } from "./src/bareguard-adapter";
|
|
13
|
+
import { toUnits } from "./src/context-units";
|
|
14
|
+
import { fromUnits } from "./src/context-units";
|
|
15
|
+
import { unitAssembler } from "./src/context-units";
|
|
13
16
|
import { BareAgentError } from "./src/errors";
|
|
14
17
|
import { ProviderError } from "./src/errors";
|
|
15
18
|
import { ToolError } from "./src/errors";
|
|
@@ -17,4 +20,4 @@ import { TimeoutError } from "./src/errors";
|
|
|
17
20
|
import { ValidationError } from "./src/errors";
|
|
18
21
|
import { CircuitOpenError } from "./src/errors";
|
|
19
22
|
import { HaltError } from "./src/errors";
|
|
20
|
-
export { Loop, Planner, StateMachine, Scheduler, Checkpoint, Memory, Stream, Retry, runPlan, CircuitBreaker, wireGate, defaultActionTranslator, BareAgentError, ProviderError, ToolError, TimeoutError, ValidationError, CircuitOpenError, HaltError };
|
|
23
|
+
export { Loop, Planner, StateMachine, Scheduler, Checkpoint, Memory, Stream, Retry, runPlan, CircuitBreaker, wireGate, defaultActionTranslator, toUnits, fromUnits, unitAssembler, BareAgentError, ProviderError, ToolError, TimeoutError, ValidationError, CircuitOpenError, HaltError };
|
package/index.js
CHANGED
|
@@ -11,6 +11,7 @@ const { Retry } = require('./src/retry');
|
|
|
11
11
|
const { runPlan } = require('./src/run-plan');
|
|
12
12
|
const { CircuitBreaker } = require('./src/circuit-breaker');
|
|
13
13
|
const { wireGate, defaultActionTranslator } = require('./src/bareguard-adapter');
|
|
14
|
+
const { toUnits, fromUnits, unitAssembler } = require('./src/context-units');
|
|
14
15
|
const {
|
|
15
16
|
BareAgentError,
|
|
16
17
|
ProviderError,
|
|
@@ -34,6 +35,9 @@ module.exports = {
|
|
|
34
35
|
CircuitBreaker,
|
|
35
36
|
wireGate,
|
|
36
37
|
defaultActionTranslator,
|
|
38
|
+
toUnits,
|
|
39
|
+
fromUnits,
|
|
40
|
+
unitAssembler,
|
|
37
41
|
BareAgentError,
|
|
38
42
|
ProviderError,
|
|
39
43
|
ToolError,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bare-agent",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.0",
|
|
4
4
|
"files": [
|
|
5
5
|
"index.js",
|
|
6
6
|
"index.d.ts",
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"bin/",
|
|
10
10
|
"tools/",
|
|
11
11
|
"types/",
|
|
12
|
+
"examples/",
|
|
12
13
|
"LICENSE",
|
|
13
14
|
"NOTICE"
|
|
14
15
|
],
|
|
@@ -90,7 +91,7 @@
|
|
|
90
91
|
}
|
|
91
92
|
},
|
|
92
93
|
"scripts": {
|
|
93
|
-
"test": "node --test
|
|
94
|
+
"test": "node --test test/**/*.test.js",
|
|
94
95
|
"typecheck": "tsc --noEmit",
|
|
95
96
|
"prebuild:types": "node scripts/clean-types.js",
|
|
96
97
|
"build:types": "tsc",
|
|
@@ -98,6 +99,7 @@
|
|
|
98
99
|
},
|
|
99
100
|
"devDependencies": {
|
|
100
101
|
"@types/node": "^22.19.19",
|
|
102
|
+
"litectx": "^0.11.0",
|
|
101
103
|
"typescript": "^5.7.0"
|
|
102
104
|
}
|
|
103
105
|
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* msgs → neutral units. Bundles each assistant-tool-call message with the contiguous tool result(s)
|
|
3
|
+
* that answer its ids into ONE atomic unit (so pairing can never be split). system + first user turn
|
|
4
|
+
* are pinned.
|
|
5
|
+
* @param {Array<Record<string, any>>} msgs
|
|
6
|
+
* @returns {Array<Record<string, any>>}
|
|
7
|
+
*/
|
|
8
|
+
export function toUnits(msgs: Array<Record<string, any>>): Array<Record<string, any>>;
|
|
9
|
+
/**
|
|
10
|
+
* units → msgs. Honors drop (absent units), reorder (order of the returned array), recall-inject
|
|
11
|
+
* (units with no backing → one synthesised message), and COMPRESS (a unit whose `content` was rewritten
|
|
12
|
+
* is reconstructed from the new content). Atomic units keep their assistant tool-call message verbatim
|
|
13
|
+
* so pairing holds; a content rewrite lands on the tool RESULT. A multi-result atomic bundle whose
|
|
14
|
+
* content was rewritten is kept VERBATIM — a flat string can't be faithfully split back into N
|
|
15
|
+
* results, and splitting is grammar (bareagent's), not litectx's to attempt. This isn't a special
|
|
16
|
+
* case: litectx's compress() is a pure text→text render that returns verbatim when handed no single
|
|
17
|
+
* parseable format (compress.js — "never returns less than the body losslessly"), so a flattened
|
|
18
|
+
* multi-result unit round-trips unchanged on both sides. RATIFIED by litectx (2026-06-12). The pairing
|
|
19
|
+
* seatbelt is the final guard.
|
|
20
|
+
* @param {Array<Record<string, any>>} units
|
|
21
|
+
* @returns {Array<Record<string, any>>}
|
|
22
|
+
*/
|
|
23
|
+
export function fromUnits(units: Array<Record<string, any>>): Array<Record<string, any>>;
|
|
24
|
+
/**
|
|
25
|
+
* Wrap litectx's `assemble(units, ctx)` verb into the Loop's msgs-level `assemble(msgs, ctx)` seam.
|
|
26
|
+
* litectx ships the **`AssembleResult` envelope** `{ units, dropped, tokens }` (CE-PRD §8.2: `dropped[]`
|
|
27
|
+
* is load-bearing — it ships in the same slice, never silently truncated). This wrapper accepts that
|
|
28
|
+
* envelope (uses `.units`) OR a bare `units` array (a simpler consumer). `dropped`/`tokens` are litectx's
|
|
29
|
+
* accounting; the Loop's seam is msgs-in/msgs-out, so they're not threaded onward here (the canonical
|
|
30
|
+
* transcript already holds every dropped unit by id — restorable on demand).
|
|
31
|
+
* Fail-OPEN at this layer too: any other return shape → the original msgs are sent unchanged. A thrown
|
|
32
|
+
* error (incl. HaltError) is left to the Loop's own fail-open / HaltError handling — not swallowed here.
|
|
33
|
+
* @param {(units: Array<Record<string, any>>, ctx: any) => (any | Promise<any>)} assembleUnits
|
|
34
|
+
* @returns {(msgs: Array<Record<string, any>>, ctx: any) => Promise<Array<Record<string, any>>>}
|
|
35
|
+
*/
|
|
36
|
+
export function unitAssembler(assembleUnits: (units: Array<Record<string, any>>, ctx: any) => (any | Promise<any>)): (msgs: Array<Record<string, any>>, ctx: any) => Promise<Array<Record<string, any>>>;
|
|
37
|
+
/** chars/4 token estimate over a list of messages (matches poc2 / the Loop's own heuristic). */
|
|
38
|
+
export function approxTokens(msgs: any): number;
|
|
39
|
+
/**
|
|
40
|
+
* Drop any tool-result whose tool_call_id has no open assistant tool-call before it, and any assistant
|
|
41
|
+
* tool-call message left with zero surviving results. The final grammar guard: even if litectx hands
|
|
42
|
+
* back something that would orphan a pair, the wire is always valid. Returns a fresh array.
|
|
43
|
+
*/
|
|
44
|
+
export function pairingSeatbelt(msgs: any): any[];
|