zubo 0.1.19 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/site/docs/agents.html +2 -2
- package/site/docs/api.html +2 -2
- package/site/docs/cli.html +7 -2
- package/site/docs/config.html +92 -0
- package/site/docs/index.html +8 -6
- package/site/docs/integrations.html +3 -3
- package/site/docs/marketplace.html +9 -9
- package/site/docs/security.html +4 -4
- package/site/docs/skills.html +1 -1
- package/site/docs/webhooks.html +17 -0
- package/site/index.html +4 -4
- package/site/install.sh +11 -5
- package/src/agent/compaction.ts +20 -4
- package/src/agent/history.ts +7 -2
- package/src/agent/loop.ts +50 -18
- package/src/agent/prompts.ts +2 -0
- package/src/agent/session.ts +69 -2
- package/src/agent/summarizer.ts +223 -0
- package/src/channels/dashboard.html.ts +98 -56
- package/src/channels/telegram.ts +10 -1
- package/src/channels/webchat.ts +40 -8
- package/src/llm/claude-code.ts +1 -2
- package/src/llm/codex.ts +3 -3
- package/src/llm/factory.ts +81 -2
- package/src/llm/failover.ts +59 -4
- package/src/llm/smart-router.ts +14 -6
- package/src/memory/knowledge-graph.ts +1 -1
- package/src/memory/vector-index.ts +1 -1
- package/src/scheduler/visual-workflows.ts +1 -1
- package/src/setup-web.html.ts +1371 -0
- package/src/setup-web.ts +165 -0
- package/src/setup.ts +266 -15
- package/src/start.ts +12 -2
- package/src/tools/builtin/config-update.ts +18 -1
- package/src/tools/executor.ts +2 -2
- package/src/tools/mcp-registry.ts +12 -6
- package/src/tools/permissions.ts +2 -2
package/src/agent/loop.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import type { LlmProvider, LlmMessage, LlmContentBlock, LlmResponse } from "../llm/provider";
|
|
2
2
|
import { getAllToolDefs } from "../tools/registry";
|
|
3
3
|
import { executeTool } from "../tools/executor";
|
|
4
|
-
import { appendMessage } from "./session";
|
|
4
|
+
import { appendMessage, loadSession } from "./session";
|
|
5
5
|
import { assembleContext } from "./context";
|
|
6
6
|
import { compactMessages } from "./compaction";
|
|
7
|
+
import { maybeCompactSession } from "./summarizer";
|
|
7
8
|
import { getDb } from "../db/connection";
|
|
8
9
|
import { logger } from "../util/logger";
|
|
9
10
|
|
|
@@ -38,6 +39,14 @@ function resolveOptions(memoriesOrOptions: string | AgentLoopOptions): AgentLoop
|
|
|
38
39
|
: memoriesOrOptions;
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
/** Detect simple greetings/chat that don't need tool definitions in context. */
|
|
43
|
+
function looksConversational(text: string): boolean {
|
|
44
|
+
const t = text.trim().toLowerCase();
|
|
45
|
+
if (t.split(/\s+/).length > 8) return false; // longer messages likely need tools
|
|
46
|
+
const greetings = /^(h(ello|i|ey|owdy|ola)|yo|sup|good\s*(morning|afternoon|evening|night)|what'?s\s*up|gm|thanks|thank\s*you|ok(ay)?|bye|see\s*ya|cool|nice|wow|lol|haha)\b/;
|
|
47
|
+
return greetings.test(t);
|
|
48
|
+
}
|
|
49
|
+
|
|
41
50
|
async function prepareLoop(
|
|
42
51
|
llm: LlmProvider,
|
|
43
52
|
sessionId: string,
|
|
@@ -67,23 +76,21 @@ async function prepareLoop(
|
|
|
67
76
|
? (memories ? `${memories}\n\nKnown context:\n${kgContext}` : `Known context:\n${kgContext}`)
|
|
68
77
|
: memories;
|
|
69
78
|
|
|
70
|
-
// Assemble context
|
|
79
|
+
// Assemble context (uses static import — no dynamic import overhead)
|
|
71
80
|
const ctx = options.systemPromptOverride
|
|
72
|
-
? { system: options.systemPromptOverride, messages:
|
|
81
|
+
? { system: options.systemPromptOverride, messages: loadSession(sessionId, 50) }
|
|
73
82
|
: assembleContext(sessionId, 50, fullMemories);
|
|
74
83
|
|
|
75
|
-
if (options.systemPromptOverride) {
|
|
76
|
-
const { loadSession } = await import("./session");
|
|
77
|
-
ctx.messages = loadSession(sessionId, 50);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
84
|
const messages = compactMessages(ctx.messages, llm.contextWindow);
|
|
81
85
|
|
|
82
|
-
// Filter tools
|
|
86
|
+
// Filter tools — skip for simple conversational messages to reduce context
|
|
87
|
+
// for small models. Tools are still available on subsequent rounds.
|
|
83
88
|
let tools = getAllToolDefs();
|
|
84
89
|
if (options.allowedTools) {
|
|
85
90
|
const allowed = new Set(options.allowedTools);
|
|
86
91
|
tools = tools.filter((t) => allowed.has(t.name));
|
|
92
|
+
} else if (looksConversational(userMessage)) {
|
|
93
|
+
tools = [];
|
|
87
94
|
}
|
|
88
95
|
|
|
89
96
|
return { system: ctx.system, messages, tools };
|
|
@@ -121,21 +128,25 @@ async function executeToolBlocks(
|
|
|
121
128
|
onToolStart?: (name: string, id: string) => void,
|
|
122
129
|
onToolEnd?: (name: string, id: string) => void
|
|
123
130
|
): Promise<{ results: LlmContentBlock[]; count: number }> {
|
|
124
|
-
|
|
125
|
-
let count = 0;
|
|
131
|
+
// Signal all tool starts immediately
|
|
126
132
|
for (const block of blocks) {
|
|
127
|
-
count++;
|
|
128
133
|
onToolStart?.(block.name, block.id);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Execute all tools in parallel
|
|
137
|
+
const resultPromises = blocks.map(async (block) => {
|
|
129
138
|
const result = await executeTool(block.name, block.id, block.input, allowedTools);
|
|
130
|
-
|
|
131
|
-
|
|
139
|
+
onToolEnd?.(block.name, block.id);
|
|
140
|
+
return {
|
|
141
|
+
type: "tool_result" as const,
|
|
132
142
|
tool_use_id: result.tool_use_id,
|
|
133
143
|
content: result.content,
|
|
134
144
|
is_error: result.is_error,
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
145
|
+
};
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
const results: LlmContentBlock[] = await Promise.all(resultPromises);
|
|
149
|
+
return { results, count: blocks.length };
|
|
139
150
|
}
|
|
140
151
|
|
|
141
152
|
function persistToolRound(
|
|
@@ -169,6 +180,23 @@ function finishLoop(sessionId: string, reply: string): void {
|
|
|
169
180
|
|
|
170
181
|
const MAX_ROUNDS_FALLBACK = "I've completed several tool operations. Let me know if you need anything else.";
|
|
171
182
|
|
|
183
|
+
// --- Post-loop summarization ---
|
|
184
|
+
|
|
185
|
+
const compactionInProgress = new Set<string>();
|
|
186
|
+
|
|
187
|
+
function triggerPostLoopCompaction(llm: LlmProvider, sessionId: string): void {
|
|
188
|
+
if (compactionInProgress.has(sessionId)) return;
|
|
189
|
+
compactionInProgress.add(sessionId);
|
|
190
|
+
|
|
191
|
+
maybeCompactSession(llm, sessionId)
|
|
192
|
+
.catch((err) => {
|
|
193
|
+
logger.error("Post-loop compaction failed", { sessionId, error: String(err) });
|
|
194
|
+
})
|
|
195
|
+
.finally(() => {
|
|
196
|
+
compactionInProgress.delete(sessionId);
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
172
200
|
// --- Public API ---
|
|
173
201
|
|
|
174
202
|
export async function agentLoop(
|
|
@@ -217,6 +245,7 @@ export async function agentLoop(
|
|
|
217
245
|
.map((b) => b.text ?? "")
|
|
218
246
|
.join("\n") || "";
|
|
219
247
|
finishLoop(sessionId, reply);
|
|
248
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
220
249
|
return { reply, toolCalls: totalToolCalls };
|
|
221
250
|
}
|
|
222
251
|
|
|
@@ -227,6 +256,7 @@ export async function agentLoop(
|
|
|
227
256
|
}
|
|
228
257
|
|
|
229
258
|
finishLoop(sessionId, MAX_ROUNDS_FALLBACK);
|
|
259
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
230
260
|
return { reply: MAX_ROUNDS_FALLBACK, toolCalls: totalToolCalls };
|
|
231
261
|
}
|
|
232
262
|
|
|
@@ -309,6 +339,7 @@ export async function agentLoopStream(
|
|
|
309
339
|
.join("\n") || roundText;
|
|
310
340
|
fullReply += reply;
|
|
311
341
|
finishLoop(sessionId, fullReply);
|
|
342
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
312
343
|
callbacks.onDone({ reply: fullReply, toolCalls: totalToolCalls });
|
|
313
344
|
return;
|
|
314
345
|
}
|
|
@@ -325,6 +356,7 @@ export async function agentLoopStream(
|
|
|
325
356
|
}
|
|
326
357
|
|
|
327
358
|
finishLoop(sessionId, MAX_ROUNDS_FALLBACK);
|
|
359
|
+
triggerPostLoopCompaction(llm, sessionId);
|
|
328
360
|
callbacks.onDone({ reply: MAX_ROUNDS_FALLBACK, toolCalls: totalToolCalls });
|
|
329
361
|
} catch (err: any) {
|
|
330
362
|
callbacks.onError(err);
|
package/src/agent/prompts.ts
CHANGED
|
@@ -5,6 +5,8 @@ const DEFAULT_PERSONALITY = `You are Zubo, a personal AI agent. You are friendly
|
|
|
5
5
|
|
|
6
6
|
## How you behave
|
|
7
7
|
|
|
8
|
+
**Be natural.** You are a real conversational partner. When the user greets you, greet them back warmly. When they chat casually, chat back. Not everything requires a tool call or an action — sometimes the right response is just a friendly reply.
|
|
9
|
+
|
|
8
10
|
**Act first.** When the user asks you to do something, do it immediately. Don't describe what you could do — use your tools and make it happen. Don't ask for permission to do what the user just asked you to do (e.g. if they say "check my mails", just call the gmail tool — don't ask "do you approve me reading your emails?"). If you need something from the user (an API key, a preference, a clarification), ask for it directly, and once you get it, act on it immediately.
|
|
9
11
|
|
|
10
12
|
**Be concise.** Answer in the fewest words that fully address the question. No filler, no preamble. Long explanations only when explicitly asked.
|
package/src/agent/session.ts
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
import { join } from "path";
|
|
2
|
+
import { randomBytes } from "crypto";
|
|
2
3
|
import { paths } from "../config/paths";
|
|
3
|
-
import { existsSync, appendFileSync, readFileSync, statSync, openSync, readSync, closeSync } from "fs";
|
|
4
|
+
import { existsSync, appendFileSync, readFileSync, writeFileSync, statSync, openSync, readSync, closeSync, renameSync } from "fs";
|
|
5
|
+
import { tmpdir } from "os";
|
|
4
6
|
import type { LlmMessage } from "../llm/provider";
|
|
5
7
|
|
|
6
8
|
export interface SessionMessage {
|
|
7
9
|
role: "user" | "assistant";
|
|
8
10
|
content: any;
|
|
9
11
|
timestamp: string;
|
|
12
|
+
__summary?: true;
|
|
13
|
+
__summarizedCount?: number;
|
|
10
14
|
}
|
|
11
15
|
|
|
12
16
|
function sessionPath(sessionId: string): string {
|
|
@@ -90,13 +94,76 @@ export function loadSession(
|
|
|
90
94
|
if (!existsSync(path)) return [];
|
|
91
95
|
|
|
92
96
|
const recent = readTailLines(path, maxTurns);
|
|
97
|
+
if (recent.length === 0) return [];
|
|
93
98
|
|
|
94
|
-
|
|
99
|
+
const messages = recent.map((line) => {
|
|
95
100
|
const msg: SessionMessage = JSON.parse(line);
|
|
96
101
|
return { role: msg.role, content: msg.content };
|
|
97
102
|
});
|
|
103
|
+
|
|
104
|
+
// If the tail-read missed a summary at line 0, prepend it.
|
|
105
|
+
// After summarization the file starts with a summary message — we must
|
|
106
|
+
// always include it or the whole point of summarization is lost.
|
|
107
|
+
const firstReturned = recent[0];
|
|
108
|
+
if (!firstReturned.includes('"__summary":true')) {
|
|
109
|
+
// We might have tail-read past the summary. Check line 0.
|
|
110
|
+
try {
|
|
111
|
+
const fd = openSync(path, "r");
|
|
112
|
+
try {
|
|
113
|
+
const buf = Buffer.alloc(4096);
|
|
114
|
+
const bytesRead = readSync(fd, buf, 0, 4096, 0);
|
|
115
|
+
const firstLine = buf.toString("utf-8", 0, bytesRead).split("\n")[0];
|
|
116
|
+
if (firstLine && firstLine.includes('"__summary":true')) {
|
|
117
|
+
const summaryMsg: SessionMessage = JSON.parse(firstLine);
|
|
118
|
+
messages.unshift({ role: summaryMsg.role, content: summaryMsg.content });
|
|
119
|
+
}
|
|
120
|
+
} finally {
|
|
121
|
+
closeSync(fd);
|
|
122
|
+
}
|
|
123
|
+
} catch {
|
|
124
|
+
// If reading line 0 fails, proceed without it
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return messages;
|
|
98
129
|
}
|
|
99
130
|
|
|
100
131
|
export function sessionExists(sessionId: string): boolean {
|
|
101
132
|
return existsSync(sessionPath(sessionId));
|
|
102
133
|
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Read the entire session file (not tail-limited).
|
|
137
|
+
* Used by the summarizer to make decisions about compaction.
|
|
138
|
+
*/
|
|
139
|
+
export function loadSessionFull(sessionId: string): SessionMessage[] {
|
|
140
|
+
const path = sessionPath(sessionId);
|
|
141
|
+
if (!existsSync(path)) return [];
|
|
142
|
+
|
|
143
|
+
const raw = readFileSync(path, "utf-8").trim();
|
|
144
|
+
if (!raw) return [];
|
|
145
|
+
|
|
146
|
+
const messages: SessionMessage[] = [];
|
|
147
|
+
for (const line of raw.split("\n")) {
|
|
148
|
+
if (!line) continue;
|
|
149
|
+
try {
|
|
150
|
+
messages.push(JSON.parse(line) as SessionMessage);
|
|
151
|
+
} catch {
|
|
152
|
+
// Skip corrupted lines — don't crash summarization
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return messages;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Atomically rewrite a session file with new messages.
|
|
160
|
+
* Writes to a temp file first, then renames (atomic on POSIX).
|
|
161
|
+
*/
|
|
162
|
+
export function rewriteSession(sessionId: string, messages: SessionMessage[]): void {
|
|
163
|
+
const path = sessionPath(sessionId);
|
|
164
|
+
const tmpPath = join(tmpdir(), `zubo-session-${sessionId}-${Date.now()}-${randomBytes(4).toString("hex")}.tmp`);
|
|
165
|
+
|
|
166
|
+
const data = messages.map((m) => JSON.stringify(m)).join("\n") + "\n";
|
|
167
|
+
writeFileSync(tmpPath, data);
|
|
168
|
+
renameSync(tmpPath, path);
|
|
169
|
+
}
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import type { LlmProvider, LlmMessage, LlmContentBlock } from "../llm/provider";
|
|
2
|
+
import type { SessionMessage } from "./session";
|
|
3
|
+
import { loadSessionFull, rewriteSession } from "./session";
|
|
4
|
+
import { estimateTokens } from "../util/tokens";
|
|
5
|
+
import { logger } from "../util/logger";
|
|
6
|
+
|
|
7
|
+
const MSG_OVERHEAD = 4;
|
|
8
|
+
const MIN_MESSAGES_FOR_SUMMARY = 16;
|
|
9
|
+
const KEEP_RECENT = 10;
|
|
10
|
+
const THRESHOLD_RATIO = 0.6;
|
|
11
|
+
|
|
12
|
+
const SUMMARY_MARKER = "Previous conversation summary:";
|
|
13
|
+
|
|
14
|
+
// --- Token estimation ---
|
|
15
|
+
|
|
16
|
+
function messageTokens(m: LlmMessage | SessionMessage): number {
|
|
17
|
+
const content = "content" in m ? m.content : "";
|
|
18
|
+
const text = typeof content === "string" ? content : JSON.stringify(content);
|
|
19
|
+
return estimateTokens(text) + MSG_OVERHEAD;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function totalTokens(messages: SessionMessage[]): number {
|
|
23
|
+
let sum = 0;
|
|
24
|
+
for (const m of messages) sum += messageTokens(m);
|
|
25
|
+
return sum;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// --- Public helpers ---
|
|
29
|
+
|
|
30
|
+
export function needsSummarization(
|
|
31
|
+
messages: SessionMessage[],
|
|
32
|
+
contextWindow: number
|
|
33
|
+
): boolean {
|
|
34
|
+
if (messages.length < MIN_MESSAGES_FOR_SUMMARY) return false;
|
|
35
|
+
const tokens = totalTokens(messages);
|
|
36
|
+
return tokens > contextWindow * THRESHOLD_RATIO;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function partitionMessages(
|
|
40
|
+
messages: SessionMessage[]
|
|
41
|
+
): { toSummarize: SessionMessage[]; toKeep: SessionMessage[] } {
|
|
42
|
+
if (messages.length <= KEEP_RECENT) {
|
|
43
|
+
return { toSummarize: [], toKeep: messages };
|
|
44
|
+
}
|
|
45
|
+
const splitAt = messages.length - KEEP_RECENT;
|
|
46
|
+
return {
|
|
47
|
+
toSummarize: messages.slice(0, splitAt),
|
|
48
|
+
toKeep: messages.slice(splitAt),
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Convert messages to text-only format for the summarization prompt.
|
|
54
|
+
* Strips tool_use/tool_result blocks to brief placeholders.
|
|
55
|
+
*/
|
|
56
|
+
export function toTextOnlyMessages(
|
|
57
|
+
messages: SessionMessage[]
|
|
58
|
+
): { role: string; text: string }[] {
|
|
59
|
+
const result: { role: string; text: string }[] = [];
|
|
60
|
+
|
|
61
|
+
for (const msg of messages) {
|
|
62
|
+
const content = msg.content;
|
|
63
|
+
|
|
64
|
+
if (typeof content === "string") {
|
|
65
|
+
if (content.trim()) result.push({ role: msg.role, text: content });
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (!Array.isArray(content)) continue;
|
|
70
|
+
|
|
71
|
+
const parts: string[] = [];
|
|
72
|
+
for (const block of content as LlmContentBlock[]) {
|
|
73
|
+
switch (block.type) {
|
|
74
|
+
case "text":
|
|
75
|
+
if (block.text?.trim()) parts.push(block.text);
|
|
76
|
+
break;
|
|
77
|
+
case "tool_use":
|
|
78
|
+
parts.push(`[Used tool: ${block.name ?? "unknown"}]`);
|
|
79
|
+
break;
|
|
80
|
+
case "tool_result": {
|
|
81
|
+
const raw = typeof block.content === "string" ? block.content : "";
|
|
82
|
+
const truncated =
|
|
83
|
+
raw.length > 200 ? raw.slice(0, 200) + "…" : raw;
|
|
84
|
+
if (block.is_error) {
|
|
85
|
+
parts.push(`[Tool error: ${truncated}]`);
|
|
86
|
+
} else if (truncated) {
|
|
87
|
+
parts.push(`[Tool result: ${truncated}]`);
|
|
88
|
+
}
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (parts.length > 0) {
|
|
95
|
+
result.push({ role: msg.role, text: parts.join("\n") });
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Determine summary token budget based on context window size.
|
|
104
|
+
*/
|
|
105
|
+
function summaryBudget(contextWindow: number): number {
|
|
106
|
+
if (contextWindow <= 16_000) return 400;
|
|
107
|
+
if (contextWindow <= 64_000) return 800;
|
|
108
|
+
return 1200;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Call the LLM to generate a summary of the conversation.
|
|
113
|
+
*/
|
|
114
|
+
export async function generateSummary(
|
|
115
|
+
llm: LlmProvider,
|
|
116
|
+
messages: SessionMessage[]
|
|
117
|
+
): Promise<string | null> {
|
|
118
|
+
const textMessages = toTextOnlyMessages(messages);
|
|
119
|
+
if (textMessages.length === 0) return null;
|
|
120
|
+
|
|
121
|
+
const conversationText = textMessages
|
|
122
|
+
.map((m) => `${m.role}: ${m.text}`)
|
|
123
|
+
.join("\n\n");
|
|
124
|
+
|
|
125
|
+
const budget = summaryBudget(llm.contextWindow);
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
const response = await llm.chat({
|
|
129
|
+
system:
|
|
130
|
+
"You are a conversation summarizer. Produce a concise summary of the conversation below. " +
|
|
131
|
+
"Preserve key facts, decisions, user preferences, and any important context the assistant would need " +
|
|
132
|
+
"to continue the conversation naturally. Do NOT include greetings or filler. Be factual and dense.",
|
|
133
|
+
messages: [
|
|
134
|
+
{
|
|
135
|
+
role: "user",
|
|
136
|
+
content: `Summarize this conversation in under ${budget} tokens:\n\n${conversationText}`,
|
|
137
|
+
},
|
|
138
|
+
],
|
|
139
|
+
maxTokens: budget,
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
const summary = response.content
|
|
143
|
+
.filter((b) => b.type === "text")
|
|
144
|
+
.map((b) => b.text ?? "")
|
|
145
|
+
.join("\n")
|
|
146
|
+
.trim();
|
|
147
|
+
|
|
148
|
+
return summary || null;
|
|
149
|
+
} catch (err: any) {
|
|
150
|
+
logger.error("Summary generation failed", { error: err.message });
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Orchestrator: check if session needs summarization, generate summary, rewrite session.
|
|
157
|
+
* Returns true if summarization was performed.
|
|
158
|
+
*/
|
|
159
|
+
export async function maybeCompactSession(
|
|
160
|
+
llm: LlmProvider,
|
|
161
|
+
sessionId: string
|
|
162
|
+
): Promise<boolean> {
|
|
163
|
+
const messages = loadSessionFull(sessionId);
|
|
164
|
+
if (messages.length === 0) return false;
|
|
165
|
+
|
|
166
|
+
if (!needsSummarization(messages, llm.contextWindow)) {
|
|
167
|
+
return false;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
logger.info("Session needs summarization", {
|
|
171
|
+
sessionId,
|
|
172
|
+
messageCount: messages.length,
|
|
173
|
+
estimatedTokens: totalTokens(messages),
|
|
174
|
+
contextWindow: llm.contextWindow,
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
const { toSummarize, toKeep } = partitionMessages(messages);
|
|
178
|
+
if (toSummarize.length === 0) return false;
|
|
179
|
+
|
|
180
|
+
const summary = await generateSummary(llm, toSummarize);
|
|
181
|
+
if (!summary) {
|
|
182
|
+
logger.warn("Summarization produced no output, skipping rewrite", {
|
|
183
|
+
sessionId,
|
|
184
|
+
});
|
|
185
|
+
return false;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const summaryMessage: SessionMessage = {
|
|
189
|
+
role: "user",
|
|
190
|
+
content: [{ type: "text", text: `${SUMMARY_MARKER}\n${summary}` }],
|
|
191
|
+
timestamp: new Date().toISOString(),
|
|
192
|
+
__summary: true,
|
|
193
|
+
__summarizedCount: toSummarize.length,
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
// Ensure no consecutive user messages (breaks Claude API).
|
|
197
|
+
// If first kept message is also user, insert a minimal assistant bridge.
|
|
198
|
+
let kept = toKeep;
|
|
199
|
+
if (kept.length > 0 && kept[0].role === "user") {
|
|
200
|
+
const bridge: SessionMessage = {
|
|
201
|
+
role: "assistant",
|
|
202
|
+
content: [{ type: "text", text: "(Continuing from summary.)" }],
|
|
203
|
+
timestamp: new Date().toISOString(),
|
|
204
|
+
};
|
|
205
|
+
kept = [bridge, ...kept];
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Re-read session to capture any messages appended during summarization
|
|
209
|
+
const freshMessages = loadSessionFull(sessionId);
|
|
210
|
+
const newMessagesSince = freshMessages.slice(messages.length);
|
|
211
|
+
|
|
212
|
+
const newMessages = [summaryMessage, ...kept, ...newMessagesSince];
|
|
213
|
+
rewriteSession(sessionId, newMessages);
|
|
214
|
+
|
|
215
|
+
logger.info("Session summarized successfully", {
|
|
216
|
+
sessionId,
|
|
217
|
+
summarizedMessages: toSummarize.length,
|
|
218
|
+
keptMessages: toKeep.length,
|
|
219
|
+
summaryLength: summary.length,
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
return true;
|
|
223
|
+
}
|