@robzilla1738/agentswarm 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +142 -0
- package/bin/swarm.js +10 -0
- package/dist/agent.js +211 -0
- package/dist/cli.js +667 -0
- package/dist/config.js +289 -0
- package/dist/control.js +96 -0
- package/dist/deepseek.js +321 -0
- package/dist/executor.js +988 -0
- package/dist/hub.js +553 -0
- package/dist/journal.js +152 -0
- package/dist/prompts.js +232 -0
- package/dist/providers.js +151 -0
- package/dist/run.js +309 -0
- package/dist/sandbox.js +505 -0
- package/dist/state.js +230 -0
- package/dist/terminal.js +298 -0
- package/dist/tools.js +491 -0
- package/dist/types.js +26 -0
- package/dist/util.js +209 -0
- package/dist/webtools.js +205 -0
- package/package.json +63 -0
- package/ui/out/404/index.html +1 -0
- package/ui/out/404.html +1 -0
- package/ui/out/_next/static/chunks/255-2aa030c9ba2867e3.js +1 -0
- package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +1 -0
- package/ui/out/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
- package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +1 -0
- package/ui/out/_next/static/chunks/677-b37981ba0eca75b2.js +1 -0
- package/ui/out/_next/static/chunks/app/_not-found/page-2d0982e372f7be41.js +1 -0
- package/ui/out/_next/static/chunks/app/layout-37ad32c5fdb26f29.js +1 -0
- package/ui/out/_next/static/chunks/app/page-0c9f35bd4aa8e370.js +1 -0
- package/ui/out/_next/static/chunks/app/run/page-13dc41a57e34da71.js +1 -0
- package/ui/out/_next/static/chunks/app/settings/page-a1763be7f6de888c.js +1 -0
- package/ui/out/_next/static/chunks/framework-2c534e0e662575a2.js +1 -0
- package/ui/out/_next/static/chunks/main-app-889ed884f8bc78e3.js +1 -0
- package/ui/out/_next/static/chunks/main-eb90ae3b35d2fd16.js +1 -0
- package/ui/out/_next/static/chunks/pages/_app-7d307437aca18ad4.js +1 -0
- package/ui/out/_next/static/chunks/pages/_error-cb2a52f75f2162e2.js +1 -0
- package/ui/out/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- package/ui/out/_next/static/chunks/webpack-38639c05c96dbeca.js +1 -0
- package/ui/out/_next/static/css/82edaa7a5942f894.css +3 -0
- package/ui/out/_next/static/eiQeDU9uBHNsBj0CFkp8M/_buildManifest.js +1 -0
- package/ui/out/_next/static/eiQeDU9uBHNsBj0CFkp8M/_ssgManifest.js +1 -0
- package/ui/out/_next/static/media/0aa834ed78bf6d07-s.woff2 +0 -0
- package/ui/out/_next/static/media/438aa629764e75f3-s.woff2 +0 -0
- package/ui/out/_next/static/media/4c9affa5bc8f420e-s.p.woff2 +0 -0
- package/ui/out/_next/static/media/51251f8b9793cdb3-s.woff2 +0 -0
- package/ui/out/_next/static/media/67957d42bae0796d-s.woff2 +0 -0
- package/ui/out/_next/static/media/875ae681bfde4580-s.woff2 +0 -0
- package/ui/out/_next/static/media/886030b0b59bc5a7-s.woff2 +0 -0
- package/ui/out/_next/static/media/939c4f875ee75fbb-s.woff2 +0 -0
- package/ui/out/_next/static/media/bb3ef058b751a6ad-s.p.woff2 +0 -0
- package/ui/out/_next/static/media/cc978ac5ee68c2b6-s.woff2 +0 -0
- package/ui/out/_next/static/media/e857b654a2caa584-s.woff2 +0 -0
- package/ui/out/_next/static/media/f911b923c6adde36-s.woff2 +0 -0
- package/ui/out/icon.png +0 -0
- package/ui/out/index.html +1 -0
- package/ui/out/index.txt +22 -0
- package/ui/out/run/index.html +1 -0
- package/ui/out/run/index.txt +22 -0
- package/ui/out/settings/index.html +1 -0
- package/ui/out/settings/index.txt +22 -0
- package/ui/out/swarm-mark.png +0 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Robert Courson
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# agentswarm
|
|
2
|
+
|
|
3
|
+
A local agent-swarm orchestrator with a terminal dashboard and a localhost web UI. Works with DeepSeek, OpenAI, Anthropic, xAI, MiniMax, OpenRouter, Ollama, LM Studio, or any OpenAI-compatible endpoint.
|
|
4
|
+
|
|
5
|
+
You give it a mission. A conductor model breaks the mission into tasks and hands them to worker agents that run in parallel, share findings on a blackboard, and get checked by an adversarial verifier. The run ends with a synthesized report plus whatever files the agents produced. Everything runs on your machine with your own API key, or fully offline against a local model.
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
┌─────────────┐
|
|
9
|
+
│ Conductor │ decomposes the mission, schedules waves,
|
|
10
|
+
└──────┬──────┘ reacts to results, steers toward the goal
|
|
11
|
+
┌───────────┼───────────┐
|
|
12
|
+
┌───▼───┐ ┌───▼───┐ ┌───▼───┐ parallel worker agents
|
|
13
|
+
│ T1 │ │ T2 │ │ T3 │ (shell · files · web · notes)
|
|
14
|
+
└───┬───┘ └───┬───┘ └───┬───┘
|
|
15
|
+
└─────┬─────┘ │ dependencies + shared blackboard
|
|
16
|
+
┌────▼─────┐ ┌────▼────┐
|
|
17
|
+
│ T4 dep │◀─────│ verify │ adversarial verification
|
|
18
|
+
└────┬─────┘ └─────────┘
|
|
19
|
+
┌────▼─────┐
|
|
20
|
+
│Synthesize│ → final-report.md + artifacts
|
|
21
|
+
└──────────┘
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## What it does
|
|
25
|
+
|
|
26
|
+
- Independent tasks run at the same time, up to a parallelism cap you set. Dependent tasks start the moment their inputs are ready.
|
|
27
|
+
- Runs are built to go long. Each agent compacts its own context when it grows too big, and the conductor's history is bounded the same way. A run-wide token budget is enforced mid-task; when it's hit, agents wrap up and report instead of dying mid-thought. Failed verifications retry with feedback. Every event lands in an append-only journal that survives crashes.
|
|
28
|
+
- Interrupted runs resume. `swarm resume <id>`, or a button in the UI, keeps completed work, re-runs whatever was in flight, and carries the token spend over.
|
|
29
|
+
- Runs execute in an isolated per-run workspace on your machine by default. Nothing extra to install, no daemon to start. Want stronger isolation? Run in a Docker container or an E2B/Modal/Vercel cloud sandbox, per run (`--sandbox docker`) or as your default (`swarm config set sandboxRuntime auto` picks the strongest one you've configured). `swarm sandbox test` boots whichever is active and tells you whether it works.
|
|
30
|
+
- Tasks flagged `verify` get a second agent whose whole job is to prove the first one wrong. Failures bounce back for a retry with the verifier's feedback attached.
|
|
31
|
+
- You can steer a live run. `swarm note <id> "skip the pricing section"` and the conductor re-plans on its next tick.
|
|
32
|
+
- Workers get real tools: shell, file read/write/patch, web search and fetch, the blackboard, and an artifacts folder that lands on your disk. Search uses [SearchKit](https://github.com/robzilla1738/script-search) if it's installed (local, returns quotable passages; agents can pass `deep=true` when they need grounded sources), TinyFish if you have a key, DuckDuckGo otherwise.
|
|
33
|
+
- The web UI streams every tool call live and renders the final report. Each task gets a deterministic name and pixel avatar so you can tell agents apart at a glance.
|
|
34
|
+
- Provider keys are stored per provider, so switching between DeepSeek, OpenAI, Anthropic, Grok, MiniMax, OpenRouter, Ollama, and LM Studio never loses a key. Reasoning effort maps to whatever each API actually supports.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
Requires Node 20 or newer.
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
npm install -g @robzilla1738/agentswarm
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
That gives you the `swarm` command with the web UI prebuilt, nothing else to do. The E2B/Modal/Vercel SDKs install as optional dependencies; add `--omit=optional` if you'll never use a cloud sandbox.
|
|
45
|
+
|
|
46
|
+
Or from source:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
git clone https://github.com/robzilla1738/agentswarm.git && cd agentswarm
|
|
50
|
+
npm run setup # installs deps + builds the engine and the web UI
|
|
51
|
+
npm link # optional: puts `swarm` on your PATH
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Without `npm link`, replace `swarm` below with `node bin/swarm.js`.
|
|
55
|
+
|
|
56
|
+
## First run
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
swarm config set apiKey sk-... # key for the active provider (default: DeepSeek)
|
|
60
|
+
swarm config set provider ollama # or: openai | anthropic | xai | minimax | openrouter | lmstudio | custom
|
|
61
|
+
pip install searchkit # optional: local, citable web search for agents
|
|
62
|
+
swarm serve --open # opens the web UI (http://localhost:7777)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Type a mission, hit Launch swarm, and watch it work. Or stay in the terminal:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
swarm run "Research the best open-source vector DBs in 2026 and write a recommendation"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## CLI
|
|
72
|
+
|
|
73
|
+
| Command | What it does |
|
|
74
|
+
|---|---|
|
|
75
|
+
| `swarm run "<mission>"` | Decompose and execute a mission (live terminal dashboard). Ctrl-C detaches; the run keeps going. |
|
|
76
|
+
| `swarm serve [--port 7777] [--open]` | Start the web UI + REST API. |
|
|
77
|
+
| `swarm watch <id>` | Re-attach a live dashboard to any run. |
|
|
78
|
+
| `swarm resume <id>` | Resume an interrupted run. Done tasks keep their results, in-flight tasks re-run. |
|
|
79
|
+
| `swarm sandbox [test\|<runtime>]` | Show the resolved shell runtime, or boot and smoke-test one (host, docker, e2b, modal, vercel). |
|
|
80
|
+
| `swarm ls` | List runs (status, tasks, tokens, cost). |
|
|
81
|
+
| `swarm report <id> [--open]` | Print or open a run's final report. |
|
|
82
|
+
| `swarm note <id> "<text>"` | Steer a live run. The conductor reads it. |
|
|
83
|
+
| `swarm cancel <id>` | Stop a run. It still synthesizes a report from completed work. |
|
|
84
|
+
| `swarm config [list\|get\|set …]` | Manage `~/.agentswarm/config.json`. |
|
|
85
|
+
| `swarm models` | List models from the active provider. |
|
|
86
|
+
| `swarm demo` | Run a self-contained demo mission in an isolated workspace. |
|
|
87
|
+
|
|
88
|
+
Run options (also on the UI launch form under Options): `--workers N` (parallelism), `--tasks N`, `--steps N` (tool steps per task), `--budget N` (token cap), `--model`, `--conductor`, `--verify off|normal|strict`, `--effort low|medium|high|max`, `--no-thinking`, `--sandbox host|docker|e2b|modal|vercel|auto` (shell runtime for this run), `--cwd <path>` (run against a real directory instead of an isolated workspace), `--fg` (foreground in this process).
|
|
89
|
+
|
|
90
|
+
## How it works
|
|
91
|
+
|
|
92
|
+
The conductor is a model with three tools: `spawn_tasks`, `wait`, and `finish`. It reads the mission, spawns self-contained tasks (each with an objective, success criteria, a role, optional dependencies, and an optional `verify` flag), then reacts as reports come back.
|
|
93
|
+
|
|
94
|
+
Each task becomes an autonomous agent with a tool budget. It works in small steps, posts durable findings to the blackboard, saves artifacts, and ends by reporting back. The report is the only thing the conductor sees, which keeps reports specific.
|
|
95
|
+
|
|
96
|
+
The scheduler starts a task as soon as its dependencies are done, up to the parallelism cap. Tasks whose dependencies failed are blocked and surfaced to the conductor for re-planning.
|
|
97
|
+
|
|
98
|
+
When the conductor finishes (or the budget forces it), a synthesizer composes `final-report.md` from every task report.
|
|
99
|
+
|
|
100
|
+
The journal is the source of truth. Every run is an append-only `events.jsonl`; the terminal dashboard, the web UI, and `swarm ls` all reduce the same file. That's why runs survive crashes and can be resumed or replayed. Runs live under `~/.agentswarm/runs/<id>/`.
|
|
101
|
+
|
|
102
|
+
If the engine process dies without writing a terminal status (kill -9, reboot), the hub notices the missing process and shows the run as interrupted instead of leaving it "running" forever.
|
|
103
|
+
|
|
104
|
+
## Architecture
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
src/ TypeScript engine (zero runtime deps)
|
|
108
|
+
deepseek.ts streaming chat client (OpenAI-compatible; thinking mode, tool calls, retries)
|
|
109
|
+
providers.ts provider registry (DeepSeek/OpenAI/Anthropic/xAI/MiniMax/OpenRouter/Ollama/LM Studio)
|
|
110
|
+
sandbox.ts sandbox runtimes: host, docker, E2B, Modal, Vercel
|
|
111
|
+
agent.ts the agent loop: stream → tool calls → results → repeat, with compaction
|
|
112
|
+
executor.ts the orchestrator: conductor loop, parallel scheduler, verify, synth, budget
|
|
113
|
+
tools.ts worker toolbelt (shell, files, web, blackboard, artifacts) + safety
|
|
114
|
+
webtools.ts web search/fetch: SearchKit → TinyFish → DuckDuckGo fallback chain
|
|
115
|
+
journal.ts append-only crash-safe event log (single source of truth)
|
|
116
|
+
state.ts pure reducer: events → live run state
|
|
117
|
+
hub.ts localhost HTTP API + SSE + static UI server
|
|
118
|
+
terminal.ts live TTY dashboard
|
|
119
|
+
cli.ts command-line interface
|
|
120
|
+
ui/ Next.js 15 + Tailwind 4 web app (static-exported, served by the hub)
|
|
121
|
+
test/ end-to-end test with a scripted mock model (no API key needed)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Testing
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
node test/e2e.js
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Boots a mock model server and drives real missions through the engine, offline, no API key needed. The happy path covers parallel execution, dependency order, tool calls, verification, and synthesis. The rest covers what goes wrong: bad keys fail loudly instead of producing a phantom run, interrupted runs resume without losing work, a tiny token budget still ends with a report, a failed verification retries with feedback and then passes, a live run can be steered with a note and cancelled, and agents compact their context when it grows too big. There's also a hub API phase and, when a docker daemon is reachable, a full run inside a container.
|
|
131
|
+
|
|
132
|
+
## Safety notes
|
|
133
|
+
|
|
134
|
+
- Safe mode is on by default. It blocks obviously destructive shell commands and confines writes to the working directory. `--no-safe` turns it off for a run; only do that when you trust the mission.
|
|
135
|
+
- Runs default to an isolated per-run workspace on this machine. That's a private directory, not a container. Agents still execute with your user's permissions; the engine strips API keys and sandbox credentials from their environment, and safe mode constrains commands and writes. For untrusted or risky missions, use `--sandbox docker` or a cloud runtime.
|
|
136
|
+
- Use `--cwd <path>` (or Workspace → "A directory on disk" in the UI) to let agents touch a real project. Those runs always execute on the host, since touching your real files is the point.
|
|
137
|
+
- Costs are estimates based on list prices and the token counts the API reports. Models without pricing data show $0. Set a `--budget` either way.
|
|
138
|
+
- Keys are stored in `~/.agentswarm/config.json` (chmod 600) and are only sent to the APIs you configured.
|
|
139
|
+
|
|
140
|
+
## License
|
|
141
|
+
|
|
142
|
+
MIT
|
package/bin/swarm.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
try {
|
|
3
|
+
require("../dist/cli.js").main();
|
|
4
|
+
} catch (e) {
|
|
5
|
+
if (e && e.code === "MODULE_NOT_FOUND" && /dist[\/\\]cli/.test(String(e.message))) {
|
|
6
|
+
console.error("agentswarm isn't built yet. From the repo root run:\n\n npm run setup\n\nthen try again.");
|
|
7
|
+
process.exit(1);
|
|
8
|
+
}
|
|
9
|
+
throw e;
|
|
10
|
+
}
|
package/dist/agent.js
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runAgent = runAgent;
|
|
4
|
+
exports.estimateMessages = estimateMessages;
|
|
5
|
+
const deepseek_1 = require("./deepseek");
|
|
6
|
+
const prompts_1 = require("./prompts");
|
|
7
|
+
const types_1 = require("./types");
|
|
8
|
+
const util_1 = require("./util");
|
|
9
|
+
/**
|
|
10
|
+
* The agent loop: stream a completion, execute tool calls, feed results back,
|
|
11
|
+
* until a terminal tool is called or the step budget runs out. Context is
|
|
12
|
+
* compacted in place when it grows past the configured limit.
|
|
13
|
+
*/
|
|
14
|
+
async function runAgent(p) {
|
|
15
|
+
const { cfg, hooks } = p;
|
|
16
|
+
let messages = [
|
|
17
|
+
{ role: "system", content: p.system },
|
|
18
|
+
{ role: "user", content: p.kickoff },
|
|
19
|
+
];
|
|
20
|
+
const terminalNames = new Set(p.terminal.map((t) => t.name));
|
|
21
|
+
const allSchemas = [
|
|
22
|
+
...Object.values(p.tools).map((t) => t.schema),
|
|
23
|
+
...p.terminal,
|
|
24
|
+
];
|
|
25
|
+
let usage = { ...types_1.ZERO_USAGE };
|
|
26
|
+
let lastText = "";
|
|
27
|
+
let steps = 0;
|
|
28
|
+
hooks.onTranscript?.(messages);
|
|
29
|
+
const callModel = (opts) => (0, deepseek_1.chat)(cfg, {
|
|
30
|
+
model: p.model,
|
|
31
|
+
messages,
|
|
32
|
+
tools: opts?.only
|
|
33
|
+
? allSchemas.filter((s) => s.name === opts.only)
|
|
34
|
+
: allSchemas,
|
|
35
|
+
toolChoice: opts?.only,
|
|
36
|
+
thinking: p.thinking,
|
|
37
|
+
reasoningEffort: p.thinking ? p.reasoningEffort : undefined,
|
|
38
|
+
maxTokens: p.maxTokensOut,
|
|
39
|
+
signal: p.signal,
|
|
40
|
+
onDelta: (d) => {
|
|
41
|
+
if (d.think)
|
|
42
|
+
hooks.onDelta?.("think", d.think);
|
|
43
|
+
if (d.text)
|
|
44
|
+
hooks.onDelta?.("text", d.text);
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
let stopReason = null;
|
|
48
|
+
while (steps < p.maxSteps) {
|
|
49
|
+
stopReason = p.stop?.() ?? null;
|
|
50
|
+
if (stopReason)
|
|
51
|
+
break;
|
|
52
|
+
steps++;
|
|
53
|
+
const res = await callModel();
|
|
54
|
+
hooks.onUsage?.(p.model, res.usage);
|
|
55
|
+
usage = (0, types_1.addUsage)(usage, res.usage);
|
|
56
|
+
if (res.toolCalls.length === 0) {
|
|
57
|
+
// The model replied with prose. Record it and nudge it back to tools.
|
|
58
|
+
messages.push({ role: "assistant", content: res.content, reasoning_content: res.reasoning });
|
|
59
|
+
if (res.content) {
|
|
60
|
+
lastText = res.content;
|
|
61
|
+
hooks.onMessage?.(res.content);
|
|
62
|
+
}
|
|
63
|
+
messages.push({ role: "user", content: prompts_1.NUDGE_USE_TOOLS });
|
|
64
|
+
hooks.onTranscript?.(messages);
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
messages.push({
|
|
68
|
+
role: "assistant",
|
|
69
|
+
content: res.content || null,
|
|
70
|
+
reasoning_content: res.reasoning,
|
|
71
|
+
tool_calls: res.toolCalls,
|
|
72
|
+
});
|
|
73
|
+
if (res.content) {
|
|
74
|
+
lastText = res.content;
|
|
75
|
+
hooks.onMessage?.(res.content);
|
|
76
|
+
}
|
|
77
|
+
for (const call of res.toolCalls) {
|
|
78
|
+
const name = call.function.name;
|
|
79
|
+
const parsed = (0, util_1.safeJson)(call.function.arguments);
|
|
80
|
+
const args = parsed ?? {};
|
|
81
|
+
if (terminalNames.has(name)) {
|
|
82
|
+
if (parsed === undefined && call.function.arguments.trim()) {
|
|
83
|
+
// Unparseable terminal args — tell the model and let it retry.
|
|
84
|
+
messages.push({
|
|
85
|
+
role: "tool",
|
|
86
|
+
tool_call_id: call.id,
|
|
87
|
+
content: "ERROR: arguments were not valid JSON. Call the tool again with valid JSON.",
|
|
88
|
+
});
|
|
89
|
+
hooks.onTranscript?.(messages);
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
hooks.onToolCall?.(call.id, name, redact(args));
|
|
93
|
+
hooks.onTranscript?.(messages);
|
|
94
|
+
return { terminal: { name, args }, finalText: lastText, steps, usage };
|
|
95
|
+
}
|
|
96
|
+
const tool = p.tools[name];
|
|
97
|
+
hooks.onToolCall?.(call.id, name, redact(args));
|
|
98
|
+
let result;
|
|
99
|
+
let ok = true;
|
|
100
|
+
if (!tool) {
|
|
101
|
+
ok = false;
|
|
102
|
+
result = `ERROR: unknown tool "${name}". Available: ${allSchemas.map((s) => s.name).join(", ")}`;
|
|
103
|
+
}
|
|
104
|
+
else if (parsed === undefined && call.function.arguments.trim()) {
|
|
105
|
+
ok = false;
|
|
106
|
+
result = "ERROR: arguments were not valid JSON.";
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
try {
|
|
110
|
+
result = await tool.run(args, p.ctx);
|
|
111
|
+
}
|
|
112
|
+
catch (e) {
|
|
113
|
+
ok = false;
|
|
114
|
+
result = `ERROR: ${(0, util_1.errMsg)(e)}`;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (p.signal.aborted)
|
|
118
|
+
throw new Error("cancelled");
|
|
119
|
+
result = (0, util_1.truncateMiddle)(result, cfg.maxToolResultChars, "chars");
|
|
120
|
+
hooks.onToolResult?.(call.id, name, ok, (0, util_1.clip)(result.replace(/\s+/g, " "), 200));
|
|
121
|
+
messages.push({ role: "tool", tool_call_id: call.id, content: result });
|
|
122
|
+
}
|
|
123
|
+
hooks.onTranscript?.(messages);
|
|
124
|
+
if (estimateMessages(messages) > cfg.contextTokenLimit) {
|
|
125
|
+
messages = await compact(p, messages);
|
|
126
|
+
hooks.onTranscript?.(messages);
|
|
127
|
+
hooks.onLog?.("info", `${p.agentId}: context compacted`);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
// Step budget exhausted (or stopped early) — force one final terminal call.
|
|
131
|
+
messages.push({ role: "user", content: stopReason ? (0, prompts_1.forcedFinal)(stopReason) : prompts_1.STEP_LIMIT_FINAL });
|
|
132
|
+
try {
|
|
133
|
+
const res = await callModel({ only: p.terminal[0].name });
|
|
134
|
+
hooks.onUsage?.(p.model, res.usage);
|
|
135
|
+
usage = (0, types_1.addUsage)(usage, res.usage);
|
|
136
|
+
const call = res.toolCalls.find((c) => terminalNames.has(c.function.name));
|
|
137
|
+
if (call) {
|
|
138
|
+
const args = (0, util_1.safeJson)(call.function.arguments) ?? {};
|
|
139
|
+
return { terminal: { name: call.function.name, args }, finalText: lastText, steps, usage };
|
|
140
|
+
}
|
|
141
|
+
if (res.content)
|
|
142
|
+
lastText = res.content;
|
|
143
|
+
}
|
|
144
|
+
catch (e) {
|
|
145
|
+
hooks.onLog?.("warn", `${p.agentId}: forced final call failed: ${(0, util_1.errMsg)(e)}`);
|
|
146
|
+
}
|
|
147
|
+
return { terminal: null, finalText: lastText, steps, usage };
|
|
148
|
+
}
|
|
149
|
+
function redact(args) {
|
|
150
|
+
const out = {};
|
|
151
|
+
for (const [k, v] of Object.entries(args)) {
|
|
152
|
+
out[k] = typeof v === "string" && v.length > 600 ? (0, util_1.clip)(v, 600) : v;
|
|
153
|
+
}
|
|
154
|
+
return out;
|
|
155
|
+
}
|
|
156
|
+
function estimateMessages(messages) {
|
|
157
|
+
let chars = 0;
|
|
158
|
+
for (const m of messages) {
|
|
159
|
+
chars += m.content?.length ?? 0;
|
|
160
|
+
chars += m.reasoning_content?.length ?? 0;
|
|
161
|
+
if (m.tool_calls) {
|
|
162
|
+
for (const c of m.tool_calls)
|
|
163
|
+
chars += c.function.arguments.length + 40;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return Math.ceil(chars / 3.5) + messages.length * 6;
|
|
167
|
+
}
|
|
168
|
+
async function compact(p, messages) {
|
|
169
|
+
const KEEP_TAIL = 8;
|
|
170
|
+
if (messages.length <= 2 + KEEP_TAIL + 2)
|
|
171
|
+
return messages;
|
|
172
|
+
let cut = messages.length - KEEP_TAIL;
|
|
173
|
+
// Never start the tail on a tool result whose assistant turn was dropped.
|
|
174
|
+
while (cut > 2 && messages[cut].role === "tool")
|
|
175
|
+
cut--;
|
|
176
|
+
if (cut <= 2)
|
|
177
|
+
return messages;
|
|
178
|
+
const middle = messages.slice(2, cut);
|
|
179
|
+
const serialized = middle
|
|
180
|
+
.map((m) => {
|
|
181
|
+
const tools = m.tool_calls?.map((c) => ` [${c.function.name}(${(0, util_1.clip)(c.function.arguments, 300)})]`).join("") ?? "";
|
|
182
|
+
const body = (0, util_1.clip)(m.content ?? "", m.role === "tool" ? 900 : 1500);
|
|
183
|
+
return `${m.role.toUpperCase()}:${tools} ${body}`;
|
|
184
|
+
})
|
|
185
|
+
.join("\n");
|
|
186
|
+
let summary;
|
|
187
|
+
try {
|
|
188
|
+
const res = await (0, deepseek_1.chat)(p.cfg, {
|
|
189
|
+
model: p.model,
|
|
190
|
+
messages: [{ role: "user", content: (0, prompts_1.compactorPrompt)((0, util_1.truncateMiddle)(serialized, 300_000, "chars")) }],
|
|
191
|
+
thinking: false,
|
|
192
|
+
maxTokens: 2048,
|
|
193
|
+
signal: p.signal,
|
|
194
|
+
});
|
|
195
|
+
p.hooks.onUsage?.(p.model, res.usage);
|
|
196
|
+
summary = res.content || "(compaction produced no summary)";
|
|
197
|
+
}
|
|
198
|
+
catch (e) {
|
|
199
|
+
// Compaction is best-effort; fall back to hard truncation.
|
|
200
|
+
summary = "(compaction failed: " + (0, util_1.errMsg)(e) + ") Earlier steps were dropped.";
|
|
201
|
+
}
|
|
202
|
+
return [
|
|
203
|
+
messages[0],
|
|
204
|
+
messages[1],
|
|
205
|
+
{
|
|
206
|
+
role: "user",
|
|
207
|
+
content: `[Context was compacted to save space. Faithful summary of your earlier work:]\n${summary}\n[Continue from here. The most recent steps follow.]`,
|
|
208
|
+
},
|
|
209
|
+
...messages.slice(cut),
|
|
210
|
+
];
|
|
211
|
+
}
|