zubo 0.1.19 → 0.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/site/docs/cli.html +7 -2
- package/site/docs/config.html +92 -0
- package/site/docs/index.html +4 -2
- package/site/docs/webhooks.html +17 -0
- package/src/agent/compaction.ts +20 -4
- package/src/agent/history.ts +7 -2
- package/src/agent/loop.ts +50 -18
- package/src/agent/prompts.ts +2 -0
- package/src/agent/session.ts +69 -2
- package/src/agent/summarizer.ts +223 -0
- package/src/channels/dashboard.html.ts +14 -4
- package/src/channels/telegram.ts +10 -1
- package/src/channels/webchat.ts +40 -8
- package/src/llm/codex.ts +2 -1
- package/src/llm/factory.ts +81 -2
- package/src/llm/failover.ts +59 -4
- package/src/llm/smart-router.ts +14 -6
- package/src/memory/knowledge-graph.ts +1 -1
- package/src/memory/vector-index.ts +1 -1
- package/src/scheduler/visual-workflows.ts +1 -1
- package/src/setup-web.html.ts +1371 -0
- package/src/setup-web.ts +165 -0
- package/src/setup.ts +266 -15
- package/src/start.ts +12 -2
- package/src/tools/builtin/config-update.ts +18 -1
- package/src/tools/mcp-registry.ts +11 -5
- package/src/tools/permissions.ts +2 -2
package/README.md
CHANGED
|
@@ -56,7 +56,7 @@ npm i -g zubo # also works
|
|
|
56
56
|
Then:
|
|
57
57
|
|
|
58
58
|
```bash
|
|
59
|
-
zubo setup # interactive config wizard
|
|
59
|
+
zubo setup # interactive config wizard (terminal or browser)
|
|
60
60
|
zubo start # launch the agent
|
|
61
61
|
```
|
|
62
62
|
|
|
@@ -118,7 +118,7 @@ Set secrets through natural conversation: *"Set my github_token to ghp_..."*
|
|
|
118
118
|
## CLI
|
|
119
119
|
|
|
120
120
|
```
|
|
121
|
-
zubo setup Interactive configuration wizard
|
|
121
|
+
zubo setup Interactive configuration wizard (terminal or browser)
|
|
122
122
|
zubo start [--daemon] Start the agent
|
|
123
123
|
zubo stop Stop the background daemon
|
|
124
124
|
zubo status Show runtime status
|
package/package.json
CHANGED
package/site/docs/cli.html
CHANGED
|
@@ -88,9 +88,14 @@
|
|
|
88
88
|
<h2>Core Commands</h2>
|
|
89
89
|
|
|
90
90
|
<h3>zubo setup</h3>
|
|
91
|
-
<p>Interactive 4-step wizard for first-time configuration.
|
|
91
|
+
<p>Interactive 4-step wizard for first-time configuration. When you run it, you'll be asked to choose between two modes:</p>
|
|
92
|
+
<ul>
|
|
93
|
+
<li><strong>Terminal</strong> — Classic step-by-step prompts right in your terminal.</li>
|
|
94
|
+
<li><strong>Dashboard</strong> — Opens a beautiful browser-based wizard at <code>http://localhost:<port></code> with the same steps, auto-detection of local providers (Ollama, LM Studio), and connection testing. Great for non-technical users or if you prefer a visual UI.</li>
|
|
95
|
+
</ul>
|
|
96
|
+
<p>Both modes walk you through 4 steps:</p>
|
|
92
97
|
<ol>
|
|
93
|
-
<li><strong>LLM Provider</strong> — Choose from
|
|
98
|
+
<li><strong>LLM Provider</strong> — Choose from 15 supported providers (Anthropic, OpenAI, Ollama, Groq, Together, OpenRouter, DeepSeek, xAI, MiniMax, Fireworks, Cerebras, LM Studio, Claude Code, Codex, or any OpenAI-compatible endpoint). Enter your API key and select a model. Optionally add a fallback provider for automatic failover.</li>
|
|
94
99
|
<li><strong>Channels</strong> — Enable any of 7 messaging channels: Telegram, Discord, Slack, WhatsApp, Signal, and Email. WebChat is always enabled by default. Each channel prompts for its required credentials (bot tokens, webhook URLs, etc.).</li>
|
|
95
100
|
<li><strong>Personalization</strong> — Name your agent and optionally describe its personality. The name and personality are used in the system prompt and across all channels.</li>
|
|
96
101
|
<li><strong>Smart Routing</strong> — Optionally set up a fast provider for simple queries (e.g., greetings, factual lookups). Smart routing automatically directs simple messages to a cheaper, faster model while using the primary model for complex tasks — saving 50–80% on costs.</li>
|
package/site/docs/config.html
CHANGED
|
@@ -251,6 +251,98 @@ zubo config get rateLimit # Show all rate limit settings</code></pre>
|
|
|
251
251
|
</tbody>
|
|
252
252
|
</table>
|
|
253
253
|
|
|
254
|
+
<!-- ================================================================ -->
|
|
255
|
+
<h3 id="local-models">Local Models (Ollama & LM Studio)</h3>
|
|
256
|
+
|
|
257
|
+
<p>
|
|
258
|
+
Local models run entirely on your machine — no API keys, no usage fees, and your data never leaves your computer. They're ideal as a primary provider for privacy-focused setups, or as a failover when your internet is down.
|
|
259
|
+
</p>
|
|
260
|
+
|
|
261
|
+
<h4>Ollama</h4>
|
|
262
|
+
|
|
263
|
+
<p><a href="https://ollama.com" target="_blank">Ollama</a> is a lightweight runtime for running open-source models locally. It provides an OpenAI-compatible API out of the box.</p>
|
|
264
|
+
|
|
265
|
+
<p><strong>Installation:</strong></p>
|
|
266
|
+
<pre><code># macOS (Homebrew)
|
|
267
|
+
brew install ollama
|
|
268
|
+
|
|
269
|
+
# Linux
|
|
270
|
+
curl -fsSL https://ollama.com/install.sh | sh
|
|
271
|
+
|
|
272
|
+
# Windows — download from https://ollama.com/download</code></pre>
|
|
273
|
+
|
|
274
|
+
<p><strong>Getting started:</strong></p>
|
|
275
|
+
<pre><code># Start the Ollama server (runs on port 11434)
|
|
276
|
+
ollama serve
|
|
277
|
+
|
|
278
|
+
# Pull a model
|
|
279
|
+
ollama pull llama3.3 # Meta Llama 3.3 (good general-purpose)
|
|
280
|
+
ollama pull mistral # Mistral 7B (fast, lightweight)
|
|
281
|
+
ollama pull qwen2.5 # Qwen 2.5 (strong multilingual)
|
|
282
|
+
ollama pull deepseek-r1 # DeepSeek R1 (strong reasoning)
|
|
283
|
+
ollama pull gemma2 # Google Gemma 2
|
|
284
|
+
|
|
285
|
+
# List downloaded models
|
|
286
|
+
ollama list</code></pre>
|
|
287
|
+
|
|
288
|
+
<p><strong>Zubo configuration:</strong></p>
|
|
289
|
+
<pre><code>"ollama": {
|
|
290
|
+
"baseUrl": "http://localhost:11434/v1",
|
|
291
|
+
"model": "llama3.3",
|
|
292
|
+
"apiKey": "ollama"
|
|
293
|
+
}</code></pre>
|
|
294
|
+
<p>The <code>apiKey</code> field is required by the OpenAI-compatible client but Ollama ignores it — any value works.</p>
|
|
295
|
+
|
|
296
|
+
<h4>LM Studio</h4>
|
|
297
|
+
|
|
298
|
+
<p><a href="https://lmstudio.ai" target="_blank">LM Studio</a> provides a graphical interface for downloading, managing, and running local models. It includes a built-in server that exposes an OpenAI-compatible API.</p>
|
|
299
|
+
|
|
300
|
+
<p><strong>Installation:</strong></p>
|
|
301
|
+
<ol>
|
|
302
|
+
<li>Download LM Studio from <a href="https://lmstudio.ai" target="_blank">lmstudio.ai</a> (macOS, Windows, Linux)</li>
|
|
303
|
+
<li>Open LM Studio and browse the model library to download a model</li>
|
|
304
|
+
<li>Go to the <strong>Local Server</strong> tab in the left sidebar</li>
|
|
305
|
+
<li>Select your model and click <strong>Start Server</strong> — it runs on port 1234 by default</li>
|
|
306
|
+
</ol>
|
|
307
|
+
|
|
308
|
+
<p><strong>Zubo configuration:</strong></p>
|
|
309
|
+
<pre><code>"lmstudio": {
|
|
310
|
+
"baseUrl": "http://localhost:1234/v1",
|
|
311
|
+
"model": "your-model-name",
|
|
312
|
+
"apiKey": "lm-studio"
|
|
313
|
+
}</code></pre>
|
|
314
|
+
|
|
315
|
+
<p><strong>Tip:</strong> Local models work great as a failover. Set a cloud provider as primary and Ollama/LM Studio as the fallback — if your API key runs out or the network drops, Zubo seamlessly falls back to your local model:</p>
|
|
316
|
+
<pre><code>"activeProvider": "anthropic",
|
|
317
|
+
"failover": ["ollama"]</code></pre>
|
|
318
|
+
|
|
319
|
+
<h4>CLI Providers (Claude Code & OpenAI Codex)</h4>
|
|
320
|
+
|
|
321
|
+
<p>These providers use locally installed CLI tools that handle their own authentication — no API key configuration needed in Zubo.</p>
|
|
322
|
+
|
|
323
|
+
<table>
|
|
324
|
+
<thead>
|
|
325
|
+
<tr><th>Provider</th><th>CLI Tool</th><th>Install</th><th>Auth</th></tr>
|
|
326
|
+
</thead>
|
|
327
|
+
<tbody>
|
|
328
|
+
<tr>
|
|
329
|
+
<td><code>claude-code</code></td>
|
|
330
|
+
<td><code>claude</code></td>
|
|
331
|
+
<td><code>npm install -g @anthropic-ai/claude-code</code></td>
|
|
332
|
+
<td>Run <code>claude</code> once to authenticate via browser</td>
|
|
333
|
+
</tr>
|
|
334
|
+
<tr>
|
|
335
|
+
<td><code>codex</code></td>
|
|
336
|
+
<td><code>codex</code></td>
|
|
337
|
+
<td><code>npm install -g @openai/codex</code></td>
|
|
338
|
+
<td>Run <code>codex auth login</code> to authenticate</td>
|
|
339
|
+
</tr>
|
|
340
|
+
</tbody>
|
|
341
|
+
</table>
|
|
342
|
+
|
|
343
|
+
<pre><code>"claude-code": { "model": "claude-sonnet-4-5-20250929" }
|
|
344
|
+
"codex": { "model": "o4-mini" }</code></pre>
|
|
345
|
+
|
|
254
346
|
<h3>Multi-Provider Example with Failover</h3>
|
|
255
347
|
|
|
256
348
|
<pre><code>{
|
package/site/docs/index.html
CHANGED
|
@@ -296,9 +296,11 @@ docker compose up -d</code></pre>
|
|
|
296
296
|
|
|
297
297
|
<pre><code>zubo setup</code></pre>
|
|
298
298
|
|
|
299
|
-
<p>
|
|
299
|
+
<p>You'll be asked to choose between <strong>Terminal</strong> (classic prompts) or <strong>Dashboard</strong> (a browser-based wizard with visual provider cards, auto-detection, and connection testing). Both paths configure the same thing — pick whichever you prefer.</p>
|
|
300
|
+
|
|
301
|
+
<p>The wizard walks you through 4 steps:</p>
|
|
300
302
|
<ul>
|
|
301
|
-
<li><strong>Step 1: LLM Provider</strong> — choose from
|
|
303
|
+
<li><strong>Step 1: LLM Provider</strong> — choose from 15 supported providers (Anthropic, OpenAI, Ollama, Groq, Together, OpenRouter, DeepSeek, xAI, MiniMax, Fireworks, Cerebras, LM Studio, Claude Code, Codex, or any OpenAI-compatible endpoint) and enter your API key. Optionally configure a fallback provider for automatic failover.</li>
|
|
302
304
|
<li><strong>Step 2: Channels</strong> — enable any combination of Telegram, Discord, Slack, WhatsApp, Signal, and Email. Enter the required tokens and credentials for each. Web Chat is always on and requires no configuration.</li>
|
|
303
305
|
<li><strong>Step 3: Personalization</strong> — set your agent's name and describe its personality. This shapes how Zubo talks to you across all channels.</li>
|
|
304
306
|
<li><strong>Step 4: Smart Routing</strong> — optionally configure a fast model (e.g., Groq) for simple queries. Smart routing automatically detects low-complexity messages and routes them to the cheaper, faster model, saving 50–80% on costs without sacrificing quality for complex tasks.</li>
|
package/site/docs/webhooks.html
CHANGED
|
@@ -97,6 +97,23 @@
|
|
|
97
97
|
Webhooks let external services push events to Zubo in real time. When a webhook receives an event, Zubo processes the payload through a configurable prompt template and takes action automatically. This turns Zubo into a reactive automation hub — connect GitHub for push notifications, Stripe for payment events, CI/CD pipelines for build results, or any service that can send HTTP POST requests.
|
|
98
98
|
</p>
|
|
99
99
|
|
|
100
|
+
<!-- ================================================================ -->
|
|
101
|
+
<h2 id="network-requirements">Network Requirements</h2>
|
|
102
|
+
|
|
103
|
+
<div style="background: rgba(245,166,35,0.08); border-left: 3px solid #f5a623; padding: 14px 18px; border-radius: 6px; margin: 16px 0; font-size: 14px; line-height: 1.6;">
|
|
104
|
+
<strong style="color: #f5a623;">Localhost won't work for external services.</strong> Webhook URLs like <code>http://localhost:61939/api/webhook/...</code> are only reachable from your own machine. External services (GitHub, Stripe, etc.) cannot send events to localhost.
|
|
105
|
+
</div>
|
|
106
|
+
|
|
107
|
+
<p>To receive webhooks from external services, you need a publicly reachable URL. Options:</p>
|
|
108
|
+
|
|
109
|
+
<ul>
|
|
110
|
+
<li><strong>ngrok</strong> — Run <code>ngrok http 61939</code> to get a public HTTPS URL that tunnels to your local Zubo instance. Free tier available.</li>
|
|
111
|
+
<li><strong>Cloudflare Tunnel</strong> — Run <code>cloudflared tunnel --url http://localhost:61939</code> for a free, fast tunnel.</li>
|
|
112
|
+
<li><strong>Deploy Zubo</strong> — Run Zubo on a VPS or cloud server with a public IP and domain name.</li>
|
|
113
|
+
</ul>
|
|
114
|
+
|
|
115
|
+
<p>Once you have a public URL, configure the external service to send events to <code>https://your-public-url/api/webhook/<webhook-id></code>.</p>
|
|
116
|
+
|
|
100
117
|
<!-- ================================================================ -->
|
|
101
118
|
<h2 id="creating-webhooks">Creating Webhooks</h2>
|
|
102
119
|
|
package/src/agent/compaction.ts
CHANGED
|
@@ -35,19 +35,35 @@ export function compactMessages(
|
|
|
35
35
|
contextWindow: maxTokens,
|
|
36
36
|
});
|
|
37
37
|
|
|
38
|
+
// Check if first message is a summary (preserve it during truncation)
|
|
39
|
+
const hasSummary =
|
|
40
|
+
messages.length > 0 &&
|
|
41
|
+
messages[0].role === "user" &&
|
|
42
|
+
typeof messages[0].content !== "string" &&
|
|
43
|
+
Array.isArray(messages[0].content) &&
|
|
44
|
+
messages[0].content.some(
|
|
45
|
+
(b: any) =>
|
|
46
|
+
b.type === "text" &&
|
|
47
|
+
typeof b.text === "string" &&
|
|
48
|
+
b.text.includes("Previous conversation summary:")
|
|
49
|
+
);
|
|
50
|
+
|
|
38
51
|
// Find the start index where cumulative remaining tokens fit under target
|
|
39
|
-
|
|
52
|
+
// Skip index 0 if it's a summary message — we want to keep it
|
|
53
|
+
let startIdx = hasSummary ? 1 : 0;
|
|
40
54
|
while (startIdx < messages.length - 2 && tokens > target) {
|
|
41
55
|
tokens -= costs[startIdx];
|
|
42
56
|
startIdx++;
|
|
43
57
|
}
|
|
44
58
|
|
|
45
|
-
// Ensure first message is from user (Claude API requirement)
|
|
59
|
+
// Ensure first kept message (after summary) is from user (Claude API requirement)
|
|
46
60
|
while (startIdx < messages.length && messages[startIdx].role !== "user") {
|
|
47
61
|
startIdx++;
|
|
48
62
|
}
|
|
49
63
|
|
|
50
|
-
const compacted =
|
|
51
|
-
|
|
64
|
+
const compacted = hasSummary
|
|
65
|
+
? [messages[0], ...messages.slice(startIdx)]
|
|
66
|
+
: messages.slice(startIdx);
|
|
67
|
+
logger.info("Compaction done", { remainingMessages: compacted.length, preservedSummary: hasSummary });
|
|
52
68
|
return compacted;
|
|
53
69
|
}
|
package/src/agent/history.ts
CHANGED
|
@@ -14,13 +14,18 @@ export function recordMessage(
|
|
|
14
14
|
): void {
|
|
15
15
|
try {
|
|
16
16
|
const db = getDb();
|
|
17
|
+
// Ensure thread exists before inserting message
|
|
18
|
+
db.run(
|
|
19
|
+
"INSERT OR IGNORE INTO threads (id, title, channel, message_count, created_at, updated_at) VALUES (?, ?, ?, 0, datetime('now'), datetime('now'))",
|
|
20
|
+
[threadId, threadId, channel ?? "webchat"]
|
|
21
|
+
);
|
|
17
22
|
db.run(
|
|
18
23
|
"INSERT INTO conversation_messages (thread_id, role, content, channel, timestamp) VALUES (?, ?, ?, ?, datetime('now'))",
|
|
19
24
|
[threadId, role, content, channel ?? null]
|
|
20
25
|
);
|
|
21
26
|
db.run(
|
|
22
|
-
"UPDATE threads SET message_count = message_count + 1, updated_at = datetime('now') WHERE id = ?",
|
|
23
|
-
[threadId]
|
|
27
|
+
"UPDATE threads SET message_count = message_count + 1, updated_at = datetime('now'), channel = COALESCE(channel, ?) WHERE id = ?",
|
|
28
|
+
[channel ?? "webchat", threadId]
|
|
24
29
|
);
|
|
25
30
|
db.run(
|
|
26
31
|
"INSERT INTO conversation_search (content, thread_id, role) VALUES (?, ?, ?)",
|
package/src/agent/loop.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import type { LlmProvider, LlmMessage, LlmContentBlock, LlmResponse } from "../llm/provider";
|
|
2
2
|
import { getAllToolDefs } from "../tools/registry";
|
|
3
3
|
import { executeTool } from "../tools/executor";
|
|
4
|
-
import { appendMessage } from "./session";
|
|
4
|
+
import { appendMessage, loadSession } from "./session";
|
|
5
5
|
import { assembleContext } from "./context";
|
|
6
6
|
import { compactMessages } from "./compaction";
|
|
7
|
+
import { maybeCompactSession } from "./summarizer";
|
|
7
8
|
import { getDb } from "../db/connection";
|
|
8
9
|
import { logger } from "../util/logger";
|
|
9
10
|
|
|
@@ -38,6 +39,14 @@ function resolveOptions(memoriesOrOptions: string | AgentLoopOptions): AgentLoop
|
|
|
38
39
|
: memoriesOrOptions;
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
/** Detect simple greetings/chat that don't need tool definitions in context. */
|
|
43
|
+
function looksConversational(text: string): boolean {
|
|
44
|
+
const t = text.trim().toLowerCase();
|
|
45
|
+
if (t.split(/\s+/).length > 8) return false; // longer messages likely need tools
|
|
46
|
+
const greetings = /^(h(ello|i|ey|owdy|ola)|yo|sup|good\s*(morning|afternoon|evening|night)|what'?s\s*up|gm|thanks|thank\s*you|ok(ay)?|bye|see\s*ya|cool|nice|wow|lol|haha)\b/;
|
|
47
|
+
return greetings.test(t);
|
|
48
|
+
}
|
|
49
|
+
|
|
41
50
|
async function prepareLoop(
|
|
42
51
|
llm: LlmProvider,
|
|
43
52
|
sessionId: string,
|
|
@@ -67,23 +76,21 @@ async function prepareLoop(
|
|
|
67
76
|
? (memories ? `${memories}\n\nKnown context:\n${kgContext}` : `Known context:\n${kgContext}`)
|
|
68
77
|
: memories;
|
|
69
78
|
|
|
70
|
-
// Assemble context
|
|
79
|
+
// Assemble context (uses static import — no dynamic import overhead)
|
|
71
80
|
const ctx = options.systemPromptOverride
|
|
72
|
-
? { system: options.systemPromptOverride, messages:
|
|
81
|
+
? { system: options.systemPromptOverride, messages: loadSession(sessionId, 50) }
|
|
73
82
|
: assembleContext(sessionId, 50, fullMemories);
|
|
74
83
|
|
|
75
|
-
if (options.systemPromptOverride) {
|
|
76
|
-
const { loadSession } = await import("./session");
|
|
77
|
-
ctx.messages = loadSession(sessionId, 50);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
84
|
const messages = compactMessages(ctx.messages, llm.contextWindow);
|
|
81
85
|
|
|
82
|
-
// Filter tools
|
|
86
|
+
// Filter tools — skip for simple conversational messages to reduce context
|
|
87
|
+
// for small models. Tools are still available on subsequent rounds.
|
|
83
88
|
let tools = getAllToolDefs();
|
|
84
89
|
if (options.allowedTools) {
|
|
85
90
|
const allowed = new Set(options.allowedTools);
|
|
86
91
|
tools = tools.filter((t) => allowed.has(t.name));
|
|
92
|
+
} else if (looksConversational(userMessage)) {
|
|
93
|
+
tools = [];
|
|
87
94
|
}
|
|
88
95
|
|
|
89
96
|
return { system: ctx.system, messages, tools };
|
|
@@ -121,21 +128,25 @@ async function executeToolBlocks(
|
|
|
121
128
|
onToolStart?: (name: string, id: string) => void,
|
|
122
129
|
onToolEnd?: (name: string, id: string) => void
|
|
123
130
|
): Promise<{ results: LlmContentBlock[]; count: number }> {
|
|
124
|
-
|
|
125
|
-
let count = 0;
|
|
131
|
+
// Signal all tool starts immediately
|
|
126
132
|
for (const block of blocks) {
|
|
127
|
-
count++;
|
|
128
133
|
onToolStart?.(block.name, block.id);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Execute all tools in parallel
|
|
137
|
+
const resultPromises = blocks.map(async (block) => {
|
|
129
138
|
const result = await executeTool(block.name, block.id, block.input, allowedTools);
|
|
130
|
-
|
|
131
|
-
|
|
139
|
+
onToolEnd?.(block.name, block.id);
|
|
140
|
+
return {
|
|
141
|
+
type: "tool_result" as const,
|
|
132
142
|
tool_use_id: result.tool_use_id,
|
|
133
143
|
content: result.content,
|
|
134
144
|
is_error: result.is_error,
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
145
|
+
};
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
const results: LlmContentBlock[] = await Promise.all(resultPromises);
|
|
149
|
+
return { results, count: blocks.length };
|
|
139
150
|
}
|
|
140
151
|
|
|
141
152
|
function persistToolRound(
|
|
@@ -169,6 +180,23 @@ function finishLoop(sessionId: string, reply: string): void {
|
|
|
169
180
|
|
|
170
181
|
const MAX_ROUNDS_FALLBACK = "I've completed several tool operations. Let me know if you need anything else.";
|
|
171
182
|
|
|
183
|
+
// --- Post-loop summarization ---
|
|
184
|
+
|
|
185
|
+
const compactionInProgress = new Set<string>();
|
|
186
|
+
|
|
187
|
+
function triggerPostLoopCompaction(llm: LlmProvider, sessionId: string): void {
|
|
188
|
+
if (compactionInProgress.has(sessionId)) return;
|
|
189
|
+
compactionInProgress.add(sessionId);
|
|
190
|
+
|
|
191
|
+
maybeCompactSession(llm, sessionId)
|
|
192
|
+
.catch((err) => {
|
|
193
|
+
logger.error("Post-loop compaction failed", { sessionId, error: String(err) });
|
|
194
|
+
})
|
|
195
|
+
.finally(() => {
|
|
196
|
+
compactionInProgress.delete(sessionId);
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
172
200
|
// --- Public API ---
|
|
173
201
|
|
|
174
202
|
export async function agentLoop(
|
|
@@ -217,6 +245,7 @@ export async function agentLoop(
|
|
|
217
245
|
.map((b) => b.text ?? "")
|
|
218
246
|
.join("\n") || "";
|
|
219
247
|
finishLoop(sessionId, reply);
|
|
248
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
220
249
|
return { reply, toolCalls: totalToolCalls };
|
|
221
250
|
}
|
|
222
251
|
|
|
@@ -227,6 +256,7 @@ export async function agentLoop(
|
|
|
227
256
|
}
|
|
228
257
|
|
|
229
258
|
finishLoop(sessionId, MAX_ROUNDS_FALLBACK);
|
|
259
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
230
260
|
return { reply: MAX_ROUNDS_FALLBACK, toolCalls: totalToolCalls };
|
|
231
261
|
}
|
|
232
262
|
|
|
@@ -309,6 +339,7 @@ export async function agentLoopStream(
|
|
|
309
339
|
.join("\n") || roundText;
|
|
310
340
|
fullReply += reply;
|
|
311
341
|
finishLoop(sessionId, fullReply);
|
|
342
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
312
343
|
callbacks.onDone({ reply: fullReply, toolCalls: totalToolCalls });
|
|
313
344
|
return;
|
|
314
345
|
}
|
|
@@ -325,6 +356,7 @@ export async function agentLoopStream(
|
|
|
325
356
|
}
|
|
326
357
|
|
|
327
358
|
finishLoop(sessionId, MAX_ROUNDS_FALLBACK);
|
|
359
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
328
360
|
callbacks.onDone({ reply: MAX_ROUNDS_FALLBACK, toolCalls: totalToolCalls });
|
|
329
361
|
} catch (err: any) {
|
|
330
362
|
callbacks.onError(err);
|
package/src/agent/prompts.ts
CHANGED
|
@@ -5,6 +5,8 @@ const DEFAULT_PERSONALITY = `You are Zubo, a personal AI agent. You are friendly
|
|
|
5
5
|
|
|
6
6
|
## How you behave
|
|
7
7
|
|
|
8
|
+
**Be natural.** You are a real conversational partner. When the user greets you, greet them back warmly. When they chat casually, chat back. Not everything requires a tool call or an action — sometimes the right response is just a friendly reply.
|
|
9
|
+
|
|
8
10
|
**Act first.** When the user asks you to do something, do it immediately. Don't describe what you could do — use your tools and make it happen. Don't ask for permission to do what the user just asked you to do (e.g. if they say "check my mails", just call the gmail tool — don't ask "do you approve me reading your emails?"). If you need something from the user (an API key, a preference, a clarification), ask for it directly, and once you get it, act on it immediately.
|
|
9
11
|
|
|
10
12
|
**Be concise.** Answer in the fewest words that fully address the question. No filler, no preamble. Long explanations only when explicitly asked.
|
package/src/agent/session.ts
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
import { join } from "path";
|
|
2
|
+
import { randomBytes } from "crypto";
|
|
2
3
|
import { paths } from "../config/paths";
|
|
3
|
-
import { existsSync, appendFileSync, readFileSync, statSync, openSync, readSync, closeSync } from "fs";
|
|
4
|
+
import { existsSync, appendFileSync, readFileSync, writeFileSync, statSync, openSync, readSync, closeSync, renameSync } from "fs";
|
|
5
|
+
import { tmpdir } from "os";
|
|
4
6
|
import type { LlmMessage } from "../llm/provider";
|
|
5
7
|
|
|
6
8
|
export interface SessionMessage {
|
|
7
9
|
role: "user" | "assistant";
|
|
8
10
|
content: any;
|
|
9
11
|
timestamp: string;
|
|
12
|
+
__summary?: true;
|
|
13
|
+
__summarizedCount?: number;
|
|
10
14
|
}
|
|
11
15
|
|
|
12
16
|
function sessionPath(sessionId: string): string {
|
|
@@ -90,13 +94,76 @@ export function loadSession(
|
|
|
90
94
|
if (!existsSync(path)) return [];
|
|
91
95
|
|
|
92
96
|
const recent = readTailLines(path, maxTurns);
|
|
97
|
+
if (recent.length === 0) return [];
|
|
93
98
|
|
|
94
|
-
|
|
99
|
+
const messages = recent.map((line) => {
|
|
95
100
|
const msg: SessionMessage = JSON.parse(line);
|
|
96
101
|
return { role: msg.role, content: msg.content };
|
|
97
102
|
});
|
|
103
|
+
|
|
104
|
+
// If the tail-read missed a summary at line 0, prepend it.
|
|
105
|
+
// After summarization the file starts with a summary message — we must
|
|
106
|
+
// always include it or the whole point of summarization is lost.
|
|
107
|
+
const firstReturned = recent[0];
|
|
108
|
+
if (!firstReturned.includes('"__summary":true')) {
|
|
109
|
+
// We might have tail-read past the summary. Check line 0.
|
|
110
|
+
try {
|
|
111
|
+
const fd = openSync(path, "r");
|
|
112
|
+
try {
|
|
113
|
+
const buf = Buffer.alloc(4096);
|
|
114
|
+
const bytesRead = readSync(fd, buf, 0, 4096, 0);
|
|
115
|
+
const firstLine = buf.toString("utf-8", 0, bytesRead).split("\n")[0];
|
|
116
|
+
if (firstLine && firstLine.includes('"__summary":true')) {
|
|
117
|
+
const summaryMsg: SessionMessage = JSON.parse(firstLine);
|
|
118
|
+
messages.unshift({ role: summaryMsg.role, content: summaryMsg.content });
|
|
119
|
+
}
|
|
120
|
+
} finally {
|
|
121
|
+
closeSync(fd);
|
|
122
|
+
}
|
|
123
|
+
} catch {
|
|
124
|
+
// If reading line 0 fails, proceed without it
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return messages;
|
|
98
129
|
}
|
|
99
130
|
|
|
100
131
|
export function sessionExists(sessionId: string): boolean {
|
|
101
132
|
return existsSync(sessionPath(sessionId));
|
|
102
133
|
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Read the entire session file (not tail-limited).
|
|
137
|
+
* Used by the summarizer to make decisions about compaction.
|
|
138
|
+
*/
|
|
139
|
+
export function loadSessionFull(sessionId: string): SessionMessage[] {
|
|
140
|
+
const path = sessionPath(sessionId);
|
|
141
|
+
if (!existsSync(path)) return [];
|
|
142
|
+
|
|
143
|
+
const raw = readFileSync(path, "utf-8").trim();
|
|
144
|
+
if (!raw) return [];
|
|
145
|
+
|
|
146
|
+
const messages: SessionMessage[] = [];
|
|
147
|
+
for (const line of raw.split("\n")) {
|
|
148
|
+
if (!line) continue;
|
|
149
|
+
try {
|
|
150
|
+
messages.push(JSON.parse(line) as SessionMessage);
|
|
151
|
+
} catch {
|
|
152
|
+
// Skip corrupted lines — don't crash summarization
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return messages;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Atomically rewrite a session file with new messages.
|
|
160
|
+
* Writes to a temp file first, then renames (atomic on POSIX).
|
|
161
|
+
*/
|
|
162
|
+
export function rewriteSession(sessionId: string, messages: SessionMessage[]): void {
|
|
163
|
+
const path = sessionPath(sessionId);
|
|
164
|
+
const tmpPath = join(tmpdir(), `zubo-session-${sessionId}-${Date.now()}-${randomBytes(4).toString("hex")}.tmp`);
|
|
165
|
+
|
|
166
|
+
const data = messages.map((m) => JSON.stringify(m)).join("\n") + "\n";
|
|
167
|
+
writeFileSync(tmpPath, data);
|
|
168
|
+
renameSync(tmpPath, path);
|
|
169
|
+
}
|