@robzilla1738/agentswarm 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +142 -0
  3. package/bin/swarm.js +10 -0
  4. package/dist/agent.js +211 -0
  5. package/dist/cli.js +667 -0
  6. package/dist/config.js +289 -0
  7. package/dist/control.js +96 -0
  8. package/dist/deepseek.js +321 -0
  9. package/dist/executor.js +988 -0
  10. package/dist/hub.js +553 -0
  11. package/dist/journal.js +152 -0
  12. package/dist/prompts.js +232 -0
  13. package/dist/providers.js +151 -0
  14. package/dist/run.js +309 -0
  15. package/dist/sandbox.js +505 -0
  16. package/dist/state.js +230 -0
  17. package/dist/terminal.js +298 -0
  18. package/dist/tools.js +491 -0
  19. package/dist/types.js +26 -0
  20. package/dist/util.js +209 -0
  21. package/dist/webtools.js +205 -0
  22. package/package.json +63 -0
  23. package/ui/out/404/index.html +1 -0
  24. package/ui/out/404.html +1 -0
  25. package/ui/out/_next/static/chunks/255-2aa030c9ba2867e3.js +1 -0
  26. package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +1 -0
  27. package/ui/out/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
  28. package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +1 -0
  29. package/ui/out/_next/static/chunks/677-b37981ba0eca75b2.js +1 -0
  30. package/ui/out/_next/static/chunks/app/_not-found/page-2d0982e372f7be41.js +1 -0
  31. package/ui/out/_next/static/chunks/app/layout-37ad32c5fdb26f29.js +1 -0
  32. package/ui/out/_next/static/chunks/app/page-0c9f35bd4aa8e370.js +1 -0
  33. package/ui/out/_next/static/chunks/app/run/page-13dc41a57e34da71.js +1 -0
  34. package/ui/out/_next/static/chunks/app/settings/page-a1763be7f6de888c.js +1 -0
  35. package/ui/out/_next/static/chunks/framework-2c534e0e662575a2.js +1 -0
  36. package/ui/out/_next/static/chunks/main-app-889ed884f8bc78e3.js +1 -0
  37. package/ui/out/_next/static/chunks/main-eb90ae3b35d2fd16.js +1 -0
  38. package/ui/out/_next/static/chunks/pages/_app-7d307437aca18ad4.js +1 -0
  39. package/ui/out/_next/static/chunks/pages/_error-cb2a52f75f2162e2.js +1 -0
  40. package/ui/out/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  41. package/ui/out/_next/static/chunks/webpack-38639c05c96dbeca.js +1 -0
  42. package/ui/out/_next/static/css/82edaa7a5942f894.css +3 -0
  43. package/ui/out/_next/static/eiQeDU9uBHNsBj0CFkp8M/_buildManifest.js +1 -0
  44. package/ui/out/_next/static/eiQeDU9uBHNsBj0CFkp8M/_ssgManifest.js +1 -0
  45. package/ui/out/_next/static/media/0aa834ed78bf6d07-s.woff2 +0 -0
  46. package/ui/out/_next/static/media/438aa629764e75f3-s.woff2 +0 -0
  47. package/ui/out/_next/static/media/4c9affa5bc8f420e-s.p.woff2 +0 -0
  48. package/ui/out/_next/static/media/51251f8b9793cdb3-s.woff2 +0 -0
  49. package/ui/out/_next/static/media/67957d42bae0796d-s.woff2 +0 -0
  50. package/ui/out/_next/static/media/875ae681bfde4580-s.woff2 +0 -0
  51. package/ui/out/_next/static/media/886030b0b59bc5a7-s.woff2 +0 -0
  52. package/ui/out/_next/static/media/939c4f875ee75fbb-s.woff2 +0 -0
  53. package/ui/out/_next/static/media/bb3ef058b751a6ad-s.p.woff2 +0 -0
  54. package/ui/out/_next/static/media/cc978ac5ee68c2b6-s.woff2 +0 -0
  55. package/ui/out/_next/static/media/e857b654a2caa584-s.woff2 +0 -0
  56. package/ui/out/_next/static/media/f911b923c6adde36-s.woff2 +0 -0
  57. package/ui/out/icon.png +0 -0
  58. package/ui/out/index.html +1 -0
  59. package/ui/out/index.txt +22 -0
  60. package/ui/out/run/index.html +1 -0
  61. package/ui/out/run/index.txt +22 -0
  62. package/ui/out/settings/index.html +1 -0
  63. package/ui/out/settings/index.txt +22 -0
  64. package/ui/out/swarm-mark.png +0 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Robert Courson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,142 @@
1
+ # agentswarm
2
+
3
+ A local agent-swarm orchestrator with a terminal dashboard and a localhost web UI. Works with DeepSeek, OpenAI, Anthropic, xAI, MiniMax, OpenRouter, Ollama, LM Studio, or any OpenAI-compatible endpoint.
4
+
5
+ You give it a mission. A conductor model breaks the mission into tasks and hands them to worker agents that run in parallel, share findings on a blackboard, and get checked by an adversarial verifier. The run ends with a synthesized report plus whatever files the agents produced. Everything runs on your machine with your own API key, or fully offline against a local model.
6
+
7
+ ```
8
+ ┌─────────────┐
9
+ │ Conductor │ decomposes the mission, schedules waves,
10
+ └──────┬──────┘ reacts to results, steers toward the goal
11
+ ┌───────────┼───────────┐
12
+ ┌───▼───┐ ┌───▼───┐ ┌───▼───┐ parallel worker agents
13
+ │ T1 │ │ T2 │ │ T3 │ (shell · files · web · notes)
14
+ └───┬───┘ └───┬───┘ └───┬───┘
15
+ └─────┬─────┘ │ dependencies + shared blackboard
16
+ ┌────▼─────┐ ┌────▼────┐
17
+ │ T4 dep │◀─────│ verify │ adversarial verification
18
+ └────┬─────┘ └─────────┘
19
+ ┌────▼─────┐
20
+ │Synthesize│ → final-report.md + artifacts
21
+ └──────────┘
22
+ ```
23
+
24
+ ## What it does
25
+
26
+ - Independent tasks run at the same time, up to a parallelism cap you set. Dependent tasks start the moment their inputs are ready.
27
+ - Runs are built to go long. Each agent compacts its own context when it grows too big, and the conductor's history is bounded the same way. A run-wide token budget is enforced mid-task; when it's hit, agents wrap up and report instead of dying mid-thought. Failed verifications retry with feedback. Every event lands in an append-only journal that survives crashes.
28
+ - Interrupted runs resume. `swarm resume <id>`, or a button in the UI, keeps completed work, re-runs whatever was in flight, and carries the token spend over.
29
+ - Runs execute in an isolated per-run workspace on your machine by default. Nothing extra to install, no daemon to start. Want stronger isolation? Run in a Docker container or an E2B/Modal/Vercel cloud sandbox, per run (`--sandbox docker`) or as your default (`swarm config set sandboxRuntime auto` picks the strongest one you've configured). `swarm sandbox test` boots whichever is active and tells you whether it works.
30
+ - Tasks flagged `verify` get a second agent whose whole job is to prove the first one wrong. Failures bounce back for a retry with the verifier's feedback attached.
31
+ - You can steer a live run. `swarm note <id> "skip the pricing section"` and the conductor re-plans on its next tick.
32
+ - Workers get real tools: shell, file read/write/patch, web search and fetch, the blackboard, and an artifacts folder that lands on your disk. Search uses [SearchKit](https://github.com/robzilla1738/script-search) if it's installed (local, returns quotable passages; agents can pass `deep=true` when they need grounded sources), TinyFish if you have a key, DuckDuckGo otherwise.
33
+ - The web UI streams every tool call live and renders the final report. Each task gets a deterministic name and pixel avatar so you can tell agents apart at a glance.
34
+ - Provider keys are stored per provider, so switching between DeepSeek, OpenAI, Anthropic, Grok, MiniMax, OpenRouter, Ollama, and LM Studio never loses a key. Reasoning effort maps to whatever each API actually supports.
35
+
36
+ ## Install
37
+
38
+ Requires Node 20 or newer.
39
+
40
+ ```bash
41
+ npm install -g @robzilla1738/agentswarm
42
+ ```
43
+
44
+ That gives you the `swarm` command with the web UI prebuilt, nothing else to do. The E2B/Modal/Vercel SDKs install as optional dependencies; add `--omit=optional` if you'll never use a cloud sandbox.
45
+
46
+ Or from source:
47
+
48
+ ```bash
49
+ git clone https://github.com/robzilla1738/agentswarm.git && cd agentswarm
50
+ npm run setup # installs deps + builds the engine and the web UI
51
+ npm link # optional: puts `swarm` on your PATH
52
+ ```
53
+
54
+ Without `npm link`, replace `swarm` below with `node bin/swarm.js`.
55
+
56
+ ## First run
57
+
58
+ ```bash
59
+ swarm config set apiKey sk-... # key for the active provider (default: DeepSeek)
60
+ swarm config set provider ollama # or: openai | anthropic | xai | minimax | openrouter | lmstudio | custom
61
+ pip install searchkit # optional: local, citable web search for agents
62
+ swarm serve --open # opens the web UI (http://localhost:7777)
63
+ ```
64
+
65
+ Type a mission, hit Launch swarm, and watch it work. Or stay in the terminal:
66
+
67
+ ```bash
68
+ swarm run "Research the best open-source vector DBs in 2026 and write a recommendation"
69
+ ```
70
+
71
+ ## CLI
72
+
73
+ | Command | What it does |
74
+ |---|---|
75
+ | `swarm run "<mission>"` | Decompose and execute a mission (live terminal dashboard). Ctrl-C detaches; the run keeps going. |
76
+ | `swarm serve [--port 7777] [--open]` | Start the web UI + REST API. |
77
+ | `swarm watch <id>` | Re-attach a live dashboard to any run. |
78
+ | `swarm resume <id>` | Resume an interrupted run. Done tasks keep their results, in-flight tasks re-run. |
79
+ | `swarm sandbox [test\|<runtime>]` | Show the resolved shell runtime, or boot and smoke-test one (host, docker, e2b, modal, vercel). |
80
+ | `swarm ls` | List runs (status, tasks, tokens, cost). |
81
+ | `swarm report <id> [--open]` | Print or open a run's final report. |
82
+ | `swarm note <id> "<text>"` | Steer a live run. The conductor reads it. |
83
+ | `swarm cancel <id>` | Stop a run. It still synthesizes a report from completed work. |
84
+ | `swarm config [list\|get\|set …]` | Manage `~/.agentswarm/config.json`. |
85
+ | `swarm models` | List models from the active provider. |
86
+ | `swarm demo` | Run a self-contained demo mission in an isolated workspace. |
87
+
88
+ Run options (also on the UI launch form under Options): `--workers N` (parallelism), `--tasks N`, `--steps N` (tool steps per task), `--budget N` (token cap), `--model`, `--conductor`, `--verify off|normal|strict`, `--effort low|medium|high|max`, `--no-thinking`, `--sandbox host|docker|e2b|modal|vercel|auto` (shell runtime for this run), `--cwd <path>` (run against a real directory instead of an isolated workspace), `--fg` (foreground in this process).
89
+
90
+ ## How it works
91
+
92
+ The conductor is a model with three tools: `spawn_tasks`, `wait`, and `finish`. It reads the mission, spawns self-contained tasks (each with an objective, success criteria, a role, optional dependencies, and an optional `verify` flag), then reacts as reports come back.
93
+
94
+ Each task becomes an autonomous agent with a tool budget. It works in small steps, posts durable findings to the blackboard, saves artifacts, and ends by reporting back. The report is the only thing the conductor sees, which keeps reports specific.
95
+
96
+ The scheduler starts a task as soon as its dependencies are done, up to the parallelism cap. Tasks whose dependencies failed are blocked and surfaced to the conductor for re-planning.
97
+
98
+ When the conductor finishes (or the budget forces it), a synthesizer composes `final-report.md` from every task report.
99
+
100
+ The journal is the source of truth. Every run is an append-only `events.jsonl`; the terminal dashboard, the web UI, and `swarm ls` all reduce the same file. That's why runs survive crashes and can be resumed or replayed. Runs live under `~/.agentswarm/runs/<id>/`.
101
+
102
+ If the engine process dies without writing a terminal status (kill -9, reboot), the hub notices the missing process and shows the run as interrupted instead of leaving it "running" forever.
103
+
104
+ ## Architecture
105
+
106
+ ```
107
+ src/ TypeScript engine (zero runtime deps)
108
+ deepseek.ts streaming chat client (OpenAI-compatible; thinking mode, tool calls, retries)
109
+ providers.ts provider registry (DeepSeek/OpenAI/Anthropic/xAI/MiniMax/OpenRouter/Ollama/LM Studio)
110
+ sandbox.ts sandbox runtimes: host, docker, E2B, Modal, Vercel
111
+ agent.ts the agent loop: stream → tool calls → results → repeat, with compaction
112
+ executor.ts the orchestrator: conductor loop, parallel scheduler, verify, synth, budget
113
+ tools.ts worker toolbelt (shell, files, web, blackboard, artifacts) + safety
114
+ webtools.ts web search/fetch: SearchKit → TinyFish → DuckDuckGo fallback chain
115
+ journal.ts append-only crash-safe event log (single source of truth)
116
+ state.ts pure reducer: events → live run state
117
+ hub.ts localhost HTTP API + SSE + static UI server
118
+ terminal.ts live TTY dashboard
119
+ cli.ts command-line interface
120
+ ui/ Next.js 15 + Tailwind 4 web app (static-exported, served by the hub)
121
+ test/ end-to-end test with a scripted mock model (no API key needed)
122
+ ```
123
+
124
+ ## Testing
125
+
126
+ ```bash
127
+ node test/e2e.js
128
+ ```
129
+
130
+ Boots a mock model server and drives real missions through the engine, offline, no API key needed. The happy path covers parallel execution, dependency order, tool calls, verification, and synthesis. The rest covers what goes wrong: bad keys fail loudly instead of producing a phantom run, interrupted runs resume without losing work, a tiny token budget still ends with a report, a failed verification retries with feedback and then passes, a live run can be steered with a note and cancelled, and agents compact their context when it grows too big. There's also a hub API phase and, when a docker daemon is reachable, a full run inside a container.
131
+
132
+ ## Safety notes
133
+
134
+ - Safe mode is on by default. It blocks obviously destructive shell commands and confines writes to the working directory. `--no-safe` turns it off for a run; only do that when you trust the mission.
135
+ - Runs default to an isolated per-run workspace on this machine. That's a private directory, not a container. Agents still execute with your user's permissions; the engine strips API keys and sandbox credentials from their environment, and safe mode constrains commands and writes. For untrusted or risky missions, use `--sandbox docker` or a cloud runtime.
136
+ - Use `--cwd <path>` (or Workspace → "A directory on disk" in the UI) to let agents touch a real project. Those runs always execute on the host, since touching your real files is the point.
137
+ - Costs are estimates based on list prices and the token counts the API reports. Models without pricing data show $0. Set a `--budget` either way.
138
+ - Keys are stored in `~/.agentswarm/config.json` (chmod 600) and are only sent to the APIs you configured.
139
+
140
+ ## License
141
+
142
+ MIT
package/bin/swarm.js ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env node
2
+ try {
3
+ require("../dist/cli.js").main();
4
+ } catch (e) {
5
+ if (e && e.code === "MODULE_NOT_FOUND" && /dist[\/\\]cli/.test(String(e.message))) {
6
+ console.error("agentswarm isn't built yet. From the repo root run:\n\n npm run setup\n\nthen try again.");
7
+ process.exit(1);
8
+ }
9
+ throw e;
10
+ }
package/dist/agent.js ADDED
@@ -0,0 +1,211 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.runAgent = runAgent;
4
+ exports.estimateMessages = estimateMessages;
5
+ const deepseek_1 = require("./deepseek");
6
+ const prompts_1 = require("./prompts");
7
+ const types_1 = require("./types");
8
+ const util_1 = require("./util");
9
+ /**
10
+ * The agent loop: stream a completion, execute tool calls, feed results back,
11
+ * until a terminal tool is called or the step budget runs out. Context is
12
+ * compacted in place when it grows past the configured limit.
13
+ */
14
+ async function runAgent(p) {
15
+ const { cfg, hooks } = p;
16
+ let messages = [
17
+ { role: "system", content: p.system },
18
+ { role: "user", content: p.kickoff },
19
+ ];
20
+ const terminalNames = new Set(p.terminal.map((t) => t.name));
21
+ const allSchemas = [
22
+ ...Object.values(p.tools).map((t) => t.schema),
23
+ ...p.terminal,
24
+ ];
25
+ let usage = { ...types_1.ZERO_USAGE };
26
+ let lastText = "";
27
+ let steps = 0;
28
+ hooks.onTranscript?.(messages);
29
+ const callModel = (opts) => (0, deepseek_1.chat)(cfg, {
30
+ model: p.model,
31
+ messages,
32
+ tools: opts?.only
33
+ ? allSchemas.filter((s) => s.name === opts.only)
34
+ : allSchemas,
35
+ toolChoice: opts?.only,
36
+ thinking: p.thinking,
37
+ reasoningEffort: p.thinking ? p.reasoningEffort : undefined,
38
+ maxTokens: p.maxTokensOut,
39
+ signal: p.signal,
40
+ onDelta: (d) => {
41
+ if (d.think)
42
+ hooks.onDelta?.("think", d.think);
43
+ if (d.text)
44
+ hooks.onDelta?.("text", d.text);
45
+ },
46
+ });
47
+ let stopReason = null;
48
+ while (steps < p.maxSteps) {
49
+ stopReason = p.stop?.() ?? null;
50
+ if (stopReason)
51
+ break;
52
+ steps++;
53
+ const res = await callModel();
54
+ hooks.onUsage?.(p.model, res.usage);
55
+ usage = (0, types_1.addUsage)(usage, res.usage);
56
+ if (res.toolCalls.length === 0) {
57
+ // The model replied with prose. Record it and nudge it back to tools.
58
+ messages.push({ role: "assistant", content: res.content, reasoning_content: res.reasoning });
59
+ if (res.content) {
60
+ lastText = res.content;
61
+ hooks.onMessage?.(res.content);
62
+ }
63
+ messages.push({ role: "user", content: prompts_1.NUDGE_USE_TOOLS });
64
+ hooks.onTranscript?.(messages);
65
+ continue;
66
+ }
67
+ messages.push({
68
+ role: "assistant",
69
+ content: res.content || null,
70
+ reasoning_content: res.reasoning,
71
+ tool_calls: res.toolCalls,
72
+ });
73
+ if (res.content) {
74
+ lastText = res.content;
75
+ hooks.onMessage?.(res.content);
76
+ }
77
+ for (const call of res.toolCalls) {
78
+ const name = call.function.name;
79
+ const parsed = (0, util_1.safeJson)(call.function.arguments);
80
+ const args = parsed ?? {};
81
+ if (terminalNames.has(name)) {
82
+ if (parsed === undefined && call.function.arguments.trim()) {
83
+ // Unparseable terminal args — tell the model and let it retry.
84
+ messages.push({
85
+ role: "tool",
86
+ tool_call_id: call.id,
87
+ content: "ERROR: arguments were not valid JSON. Call the tool again with valid JSON.",
88
+ });
89
+ hooks.onTranscript?.(messages);
90
+ continue;
91
+ }
92
+ hooks.onToolCall?.(call.id, name, redact(args));
93
+ hooks.onTranscript?.(messages);
94
+ return { terminal: { name, args }, finalText: lastText, steps, usage };
95
+ }
96
+ const tool = p.tools[name];
97
+ hooks.onToolCall?.(call.id, name, redact(args));
98
+ let result;
99
+ let ok = true;
100
+ if (!tool) {
101
+ ok = false;
102
+ result = `ERROR: unknown tool "${name}". Available: ${allSchemas.map((s) => s.name).join(", ")}`;
103
+ }
104
+ else if (parsed === undefined && call.function.arguments.trim()) {
105
+ ok = false;
106
+ result = "ERROR: arguments were not valid JSON.";
107
+ }
108
+ else {
109
+ try {
110
+ result = await tool.run(args, p.ctx);
111
+ }
112
+ catch (e) {
113
+ ok = false;
114
+ result = `ERROR: ${(0, util_1.errMsg)(e)}`;
115
+ }
116
+ }
117
+ if (p.signal.aborted)
118
+ throw new Error("cancelled");
119
+ result = (0, util_1.truncateMiddle)(result, cfg.maxToolResultChars, "chars");
120
+ hooks.onToolResult?.(call.id, name, ok, (0, util_1.clip)(result.replace(/\s+/g, " "), 200));
121
+ messages.push({ role: "tool", tool_call_id: call.id, content: result });
122
+ }
123
+ hooks.onTranscript?.(messages);
124
+ if (estimateMessages(messages) > cfg.contextTokenLimit) {
125
+ messages = await compact(p, messages);
126
+ hooks.onTranscript?.(messages);
127
+ hooks.onLog?.("info", `${p.agentId}: context compacted`);
128
+ }
129
+ }
130
+ // Step budget exhausted (or stopped early) — force one final terminal call.
131
+ messages.push({ role: "user", content: stopReason ? (0, prompts_1.forcedFinal)(stopReason) : prompts_1.STEP_LIMIT_FINAL });
132
+ try {
133
+ const res = await callModel({ only: p.terminal[0].name });
134
+ hooks.onUsage?.(p.model, res.usage);
135
+ usage = (0, types_1.addUsage)(usage, res.usage);
136
+ const call = res.toolCalls.find((c) => terminalNames.has(c.function.name));
137
+ if (call) {
138
+ const args = (0, util_1.safeJson)(call.function.arguments) ?? {};
139
+ return { terminal: { name: call.function.name, args }, finalText: lastText, steps, usage };
140
+ }
141
+ if (res.content)
142
+ lastText = res.content;
143
+ }
144
+ catch (e) {
145
+ hooks.onLog?.("warn", `${p.agentId}: forced final call failed: ${(0, util_1.errMsg)(e)}`);
146
+ }
147
+ return { terminal: null, finalText: lastText, steps, usage };
148
+ }
149
+ function redact(args) {
150
+ const out = {};
151
+ for (const [k, v] of Object.entries(args)) {
152
+ out[k] = typeof v === "string" && v.length > 600 ? (0, util_1.clip)(v, 600) : v;
153
+ }
154
+ return out;
155
+ }
156
+ function estimateMessages(messages) {
157
+ let chars = 0;
158
+ for (const m of messages) {
159
+ chars += m.content?.length ?? 0;
160
+ chars += m.reasoning_content?.length ?? 0;
161
+ if (m.tool_calls) {
162
+ for (const c of m.tool_calls)
163
+ chars += c.function.arguments.length + 40;
164
+ }
165
+ }
166
+ return Math.ceil(chars / 3.5) + messages.length * 6;
167
+ }
168
+ async function compact(p, messages) {
169
+ const KEEP_TAIL = 8;
170
+ if (messages.length <= 2 + KEEP_TAIL + 2)
171
+ return messages;
172
+ let cut = messages.length - KEEP_TAIL;
173
+ // Never start the tail on a tool result whose assistant turn was dropped.
174
+ while (cut > 2 && messages[cut].role === "tool")
175
+ cut--;
176
+ if (cut <= 2)
177
+ return messages;
178
+ const middle = messages.slice(2, cut);
179
+ const serialized = middle
180
+ .map((m) => {
181
+ const tools = m.tool_calls?.map((c) => ` [${c.function.name}(${(0, util_1.clip)(c.function.arguments, 300)})]`).join("") ?? "";
182
+ const body = (0, util_1.clip)(m.content ?? "", m.role === "tool" ? 900 : 1500);
183
+ return `${m.role.toUpperCase()}:${tools} ${body}`;
184
+ })
185
+ .join("\n");
186
+ let summary;
187
+ try {
188
+ const res = await (0, deepseek_1.chat)(p.cfg, {
189
+ model: p.model,
190
+ messages: [{ role: "user", content: (0, prompts_1.compactorPrompt)((0, util_1.truncateMiddle)(serialized, 300_000, "chars")) }],
191
+ thinking: false,
192
+ maxTokens: 2048,
193
+ signal: p.signal,
194
+ });
195
+ p.hooks.onUsage?.(p.model, res.usage);
196
+ summary = res.content || "(compaction produced no summary)";
197
+ }
198
+ catch (e) {
199
+ // Compaction is best-effort; fall back to hard truncation.
200
+ summary = "(compaction failed: " + (0, util_1.errMsg)(e) + ") Earlier steps were dropped.";
201
+ }
202
+ return [
203
+ messages[0],
204
+ messages[1],
205
+ {
206
+ role: "user",
207
+ content: `[Context was compacted to save space. Faithful summary of your earlier work:]\n${summary}\n[Continue from here. The most recent steps follow.]`,
208
+ },
209
+ ...messages.slice(cut),
210
+ ];
211
+ }