dipclaw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.example.json +26 -0
- package/dist/agent/agent.d.ts +85 -0
- package/dist/agent/agent.js +725 -0
- package/dist/agent/agent.js.map +1 -0
- package/dist/agent/memory.d.ts +17 -0
- package/dist/agent/memory.js +92 -0
- package/dist/agent/memory.js.map +1 -0
- package/dist/agent/scheduler.d.ts +35 -0
- package/dist/agent/scheduler.js +154 -0
- package/dist/agent/scheduler.js.map +1 -0
- package/dist/agent/skill-generator.d.ts +37 -0
- package/dist/agent/skill-generator.js +263 -0
- package/dist/agent/skill-generator.js.map +1 -0
- package/dist/agent/task-runner.d.ts +31 -0
- package/dist/agent/task-runner.js +242 -0
- package/dist/agent/task-runner.js.map +1 -0
- package/dist/browser/actions.d.ts +28 -0
- package/dist/browser/actions.js +212 -0
- package/dist/browser/actions.js.map +1 -0
- package/dist/browser/manager.d.ts +17 -0
- package/dist/browser/manager.js +249 -0
- package/dist/browser/manager.js.map +1 -0
- package/dist/browser/script-runner.d.ts +49 -0
- package/dist/browser/script-runner.js +137 -0
- package/dist/browser/script-runner.js.map +1 -0
- package/dist/browser/snapshot.d.ts +15 -0
- package/dist/browser/snapshot.js +38 -0
- package/dist/browser/snapshot.js.map +1 -0
- package/dist/config/types.d.ts +62 -0
- package/dist/config/types.js +47 -0
- package/dist/config/types.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +219 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/client.d.ts +3 -0
- package/dist/llm/client.js +503 -0
- package/dist/llm/client.js.map +1 -0
- package/dist/llm/tools.d.ts +5 -0
- package/dist/llm/tools.js +94 -0
- package/dist/llm/tools.js.map +1 -0
- package/dist/llm/types.d.ts +49 -0
- package/dist/llm/types.js +2 -0
- package/dist/llm/types.js.map +1 -0
- package/dist/logging/logger.d.ts +17 -0
- package/dist/logging/logger.js +46 -0
- package/dist/logging/logger.js.map +1 -0
- package/dist/telegram/bot.d.ts +15 -0
- package/dist/telegram/bot.js +279 -0
- package/dist/telegram/bot.js.map +1 -0
- package/dist/tui/tui.d.ts +12 -0
- package/dist/tui/tui.js +176 -0
- package/dist/tui/tui.js.map +1 -0
- package/package.json +53 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
import { createLlmClient } from "../llm/client.js";
|
|
2
|
+
import { getAllTools } from "../llm/tools.js";
|
|
3
|
+
import { BrowserManager } from "../browser/manager.js";
|
|
4
|
+
import { TaskRunner } from "./task-runner.js";
|
|
5
|
+
import { Scheduler } from "./scheduler.js";
|
|
6
|
+
import { MemoryManager } from "./memory.js";
|
|
7
|
+
import { SkillGenerator } from "./skill-generator.js";
|
|
8
|
+
import { TUI } from "../tui/tui.js";
|
|
9
|
+
import { TelegramBot } from "../telegram/bot.js";
|
|
10
|
+
import { ExecutionLogger } from "../logging/logger.js";
|
|
11
|
+
import path from "node:path";
|
|
12
|
+
export class Agent {
|
|
13
|
+
config;
|
|
14
|
+
llmClient;
|
|
15
|
+
browser;
|
|
16
|
+
taskRunner;
|
|
17
|
+
scheduler;
|
|
18
|
+
memory;
|
|
19
|
+
skillGenerator;
|
|
20
|
+
tui = null;
|
|
21
|
+
telegramBot = null;
|
|
22
|
+
chatHistory = [];
|
|
23
|
+
conversationSummary = "";
|
|
24
|
+
summarizationCooldown = 0;
|
|
25
|
+
browserLock = Promise.resolve();
|
|
26
|
+
constructor(config) {
|
|
27
|
+
this.config = config;
|
|
28
|
+
this.llmClient = createLlmClient(config);
|
|
29
|
+
this.browser = new BrowserManager(config);
|
|
30
|
+
this.memory = new MemoryManager(config.workspace);
|
|
31
|
+
this.taskRunner = new TaskRunner(config, this.llmClient, this.browser, this.memory);
|
|
32
|
+
this.scheduler = new Scheduler(config.workspace);
|
|
33
|
+
this.skillGenerator = new SkillGenerator(config.workspace, this.llmClient);
|
|
34
|
+
// Give TaskRunner access to scheduler for auto-linking skills
|
|
35
|
+
this.taskRunner.setScheduler(this.scheduler);
|
|
36
|
+
// Wire up scheduler to task runner (serialized via browser lock)
|
|
37
|
+
this.scheduler.setHandler(async (task) => {
|
|
38
|
+
const lock = this.acquireBrowserLock();
|
|
39
|
+
await lock.promise;
|
|
40
|
+
try {
|
|
41
|
+
console.log(`🔒 Lock acquired for task: ${task.name}`);
|
|
42
|
+
const result = await this.taskRunner.runTask(task);
|
|
43
|
+
console.log(` Task result: ${result.slice(0, 200)}`);
|
|
44
|
+
// Notify via Telegram if available
|
|
45
|
+
if (this.telegramBot) {
|
|
46
|
+
await this.telegramBot.notify(`✅ Task "${task.name}" completed:\n${result.slice(0, 3800)}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
finally {
|
|
50
|
+
lock.release();
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
async start() {
|
|
55
|
+
// Launch browser immediately so Chrome lives with this process
|
|
56
|
+
await this.browser.launch();
|
|
57
|
+
// Start scheduler
|
|
58
|
+
this.scheduler.start();
|
|
59
|
+
// Start Telegram bot if configured
|
|
60
|
+
if (this.config.telegram) {
|
|
61
|
+
this.telegramBot = new TelegramBot(this.config.telegram.botToken, this, this.config.telegram.allowedUsers);
|
|
62
|
+
await this.telegramBot.start();
|
|
63
|
+
}
|
|
64
|
+
// Start TUI only if enabled
|
|
65
|
+
if (this.config.tui) {
|
|
66
|
+
this.tui = new TUI(this);
|
|
67
|
+
this.tui.start();
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
async stop() {
|
|
71
|
+
this.scheduler.stop();
|
|
72
|
+
this.telegramBot?.stop();
|
|
73
|
+
this.tui?.stop();
|
|
74
|
+
await this.browser.close();
|
|
75
|
+
}
|
|
76
|
+
// --- Public API for TUI/Telegram ---
|
|
77
|
+
getName() {
|
|
78
|
+
return this.config.name;
|
|
79
|
+
}
|
|
80
|
+
getWorkspace() {
|
|
81
|
+
return this.config.workspace;
|
|
82
|
+
}
|
|
83
|
+
/** Clear chat history and start a new session */
|
|
84
|
+
clearChat() {
|
|
85
|
+
this.chatHistory = [];
|
|
86
|
+
this.conversationSummary = "";
|
|
87
|
+
}
|
|
88
|
+
/** Chat with the agent (LLM-driven, with tool use) */
|
|
89
|
+
async chat(message) {
|
|
90
|
+
const lock = this.acquireBrowserLock();
|
|
91
|
+
await lock.promise;
|
|
92
|
+
try {
|
|
93
|
+
return await this.chatInner(message);
|
|
94
|
+
}
|
|
95
|
+
finally {
|
|
96
|
+
lock.release();
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
async chatInner(message) {
|
|
100
|
+
this.chatHistory.push({ role: "user", content: message });
|
|
101
|
+
await this.compressHistory();
|
|
102
|
+
const memoryIndex = this.memory.loadIndex();
|
|
103
|
+
const systemPrompt = this.buildChatSystemPrompt(memoryIndex);
|
|
104
|
+
const tools = getAllTools();
|
|
105
|
+
let response = await this.llmClient.chat(this.buildMessagesForLlm(), tools, systemPrompt);
|
|
106
|
+
// Handle tool calls in a loop
|
|
107
|
+
let iterations = 0;
|
|
108
|
+
let browserActionCount = 0;
|
|
109
|
+
const maxIter = this.config.maxIterations;
|
|
110
|
+
while (response.toolCalls.length > 0 && iterations < maxIter) {
|
|
111
|
+
iterations++;
|
|
112
|
+
// Add assistant message with tool calls
|
|
113
|
+
this.chatHistory.push({
|
|
114
|
+
role: "assistant",
|
|
115
|
+
content: response.content,
|
|
116
|
+
toolCalls: response.toolCalls,
|
|
117
|
+
});
|
|
118
|
+
// Execute tools
|
|
119
|
+
for (const tc of response.toolCalls) {
|
|
120
|
+
if (tc.name === "browser")
|
|
121
|
+
browserActionCount++;
|
|
122
|
+
let result;
|
|
123
|
+
try {
|
|
124
|
+
result = await this.executeToolCall(tc);
|
|
125
|
+
}
|
|
126
|
+
catch (err) {
|
|
127
|
+
result = `Error: ${err instanceof Error ? err.message : String(err)}`;
|
|
128
|
+
}
|
|
129
|
+
this.chatHistory.push({
|
|
130
|
+
role: "tool",
|
|
131
|
+
content: result,
|
|
132
|
+
toolCallId: tc.id,
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
// Get next response
|
|
136
|
+
response = await this.llmClient.chat(this.buildMessagesForLlm(), tools, systemPrompt);
|
|
137
|
+
}
|
|
138
|
+
// Reached iteration limit — ask LLM for a final summary without tools
|
|
139
|
+
if (iterations >= maxIter && response.toolCalls.length > 0) {
|
|
140
|
+
// Push the last assistant response (with unexecuted tool calls) as text only
|
|
141
|
+
if (response.content) {
|
|
142
|
+
this.chatHistory.push({ role: "assistant", content: response.content });
|
|
143
|
+
}
|
|
144
|
+
this.chatHistory.push({
|
|
145
|
+
role: "user",
|
|
146
|
+
content: "You've reached the maximum number of tool-calling iterations allowed. Please provide a final response summarizing what you've found and accomplished so far, without calling any more tools.",
|
|
147
|
+
});
|
|
148
|
+
const summary = await this.llmClient.chat(this.buildMessagesForLlm(), undefined, // no tools — force text-only response
|
|
149
|
+
systemPrompt);
|
|
150
|
+
const finalContent = summary.content + "\n\n⚠️ Reached the maximum number of steps. Send a message to continue where I left off.";
|
|
151
|
+
this.chatHistory.push({ role: "assistant", content: finalContent });
|
|
152
|
+
return finalContent;
|
|
153
|
+
}
|
|
154
|
+
// Nudge: after complex browser interactions, append a reminder
|
|
155
|
+
if (browserActionCount >= 5 && !response.content.includes("memory")) {
|
|
156
|
+
response.content += "\n\n💡 _This was a multi-step workflow. I've saved relevant discoveries to memory for future reference._";
|
|
157
|
+
// Auto-save a memory about the workflow
|
|
158
|
+
this.autoSaveWorkflowMemory(message, browserActionCount);
|
|
159
|
+
}
|
|
160
|
+
// Add final assistant message
|
|
161
|
+
this.chatHistory.push({
|
|
162
|
+
role: "assistant",
|
|
163
|
+
content: response.content,
|
|
164
|
+
});
|
|
165
|
+
return response.content;
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Auto-save a brief memory about a complex chat workflow.
|
|
169
|
+
* Non-blocking — fires and forgets.
|
|
170
|
+
*/
|
|
171
|
+
autoSaveWorkflowMemory(userMessage, actionCount) {
|
|
172
|
+
const key = `workflow-${Date.now()}`;
|
|
173
|
+
const title = userMessage.slice(0, 60).replace(/[^a-zA-Z0-9\u4e00-\u9fff\s_-]/g, "");
|
|
174
|
+
const content = `User request: ${userMessage.slice(0, 200)}\nComplexity: ${actionCount} browser actions\nTimestamp: ${new Date().toISOString()}`;
|
|
175
|
+
try {
|
|
176
|
+
this.memory.save(key, title, content);
|
|
177
|
+
}
|
|
178
|
+
catch {
|
|
179
|
+
// Best-effort, don't block chat
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
/** Chat with streaming — yields text deltas, handles tool calls internally */
|
|
183
|
+
async *chatStream(message) {
|
|
184
|
+
const lock = this.acquireBrowserLock();
|
|
185
|
+
await lock.promise;
|
|
186
|
+
try {
|
|
187
|
+
yield* this.chatStreamInner(message);
|
|
188
|
+
}
|
|
189
|
+
finally {
|
|
190
|
+
lock.release();
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
async *chatStreamInner(message) {
|
|
194
|
+
this.chatHistory.push({ role: "user", content: message });
|
|
195
|
+
await this.compressHistory();
|
|
196
|
+
const memoryIndex = this.memory.loadIndex();
|
|
197
|
+
const systemPrompt = this.buildChatSystemPrompt(memoryIndex);
|
|
198
|
+
const tools = getAllTools();
|
|
199
|
+
let iterations = 0;
|
|
200
|
+
let browserActionCount = 0;
|
|
201
|
+
const maxIter = this.config.maxIterations;
|
|
202
|
+
while (iterations < maxIter) {
|
|
203
|
+
iterations++;
|
|
204
|
+
let fullContent = "";
|
|
205
|
+
const toolCalls = [];
|
|
206
|
+
let hadToolCalls = false;
|
|
207
|
+
for await (const event of this.llmClient.chatStream(this.buildMessagesForLlm(), tools, systemPrompt)) {
|
|
208
|
+
if (event.type === "text_delta" && event.text) {
|
|
209
|
+
fullContent += event.text;
|
|
210
|
+
yield event.text;
|
|
211
|
+
}
|
|
212
|
+
else if (event.type === "tool_call_end" && event.toolCall) {
|
|
213
|
+
hadToolCalls = true;
|
|
214
|
+
toolCalls.push({
|
|
215
|
+
id: event.toolCall.id,
|
|
216
|
+
name: event.toolCall.name,
|
|
217
|
+
arguments: event.toolCall.arguments || {},
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
if (!hadToolCalls) {
|
|
222
|
+
// Nudge: after complex browser interactions, auto-save workflow memory
|
|
223
|
+
if (browserActionCount >= 5) {
|
|
224
|
+
this.autoSaveWorkflowMemory(message, browserActionCount);
|
|
225
|
+
yield "\n\n💡 _Multi-step workflow completed. Key discoveries saved to memory._";
|
|
226
|
+
}
|
|
227
|
+
this.chatHistory.push({ role: "assistant", content: fullContent });
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
// Process tool calls, then loop for next LLM response
|
|
231
|
+
this.chatHistory.push({
|
|
232
|
+
role: "assistant",
|
|
233
|
+
content: fullContent,
|
|
234
|
+
toolCalls,
|
|
235
|
+
});
|
|
236
|
+
for (const tc of toolCalls) {
|
|
237
|
+
if (tc.name === "browser")
|
|
238
|
+
browserActionCount++;
|
|
239
|
+
let result;
|
|
240
|
+
try {
|
|
241
|
+
result = await this.executeToolCall(tc);
|
|
242
|
+
}
|
|
243
|
+
catch (err) {
|
|
244
|
+
result = `Error: ${err instanceof Error ? err.message : String(err)}`;
|
|
245
|
+
}
|
|
246
|
+
this.chatHistory.push({
|
|
247
|
+
role: "tool",
|
|
248
|
+
content: result,
|
|
249
|
+
toolCallId: tc.id,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
// yield a visual separator so user knows tools ran
|
|
253
|
+
yield "\n";
|
|
254
|
+
}
|
|
255
|
+
// Reached iteration limit — ask LLM for a final summary without tools
|
|
256
|
+
this.chatHistory.push({
|
|
257
|
+
role: "user",
|
|
258
|
+
content: "You've reached the maximum number of tool-calling iterations allowed. Please provide a final response summarizing what you've found and accomplished so far, without calling any more tools.",
|
|
259
|
+
});
|
|
260
|
+
let summaryContent = "";
|
|
261
|
+
for await (const event of this.llmClient.chatStream(this.buildMessagesForLlm(), undefined, // no tools — force text-only response
|
|
262
|
+
systemPrompt)) {
|
|
263
|
+
if (event.type === "text_delta" && event.text) {
|
|
264
|
+
summaryContent += event.text;
|
|
265
|
+
yield event.text;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
const suffix = "\n\n⚠️ Reached the maximum number of steps. Send a message to continue where I left off.";
|
|
269
|
+
this.chatHistory.push({ role: "assistant", content: summaryContent + suffix });
|
|
270
|
+
yield suffix;
|
|
271
|
+
}
|
|
272
|
+
listTasks() {
|
|
273
|
+
return this.scheduler.formatTaskList();
|
|
274
|
+
}
|
|
275
|
+
addTask(name, cron, prompt) {
|
|
276
|
+
try {
|
|
277
|
+
const task = this.scheduler.addTask(name, cron, prompt);
|
|
278
|
+
return `✓ Task added: [${task.id}] ${task.name} (${task.cron})`;
|
|
279
|
+
}
|
|
280
|
+
catch (err) {
|
|
281
|
+
return `Error: ${err instanceof Error ? err.message : String(err)}`;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
removeTask(id) {
|
|
285
|
+
return this.scheduler.removeTask(id)
|
|
286
|
+
? `✓ Task ${id} removed.`
|
|
287
|
+
: `Task ${id} not found.`;
|
|
288
|
+
}
|
|
289
|
+
toggleTask(id, enabled) {
|
|
290
|
+
const task = this.scheduler.toggleTask(id, enabled);
|
|
291
|
+
return task
|
|
292
|
+
? `✓ Task ${id} ${enabled ? "enabled" : "disabled"}.`
|
|
293
|
+
: `Task ${id} not found.`;
|
|
294
|
+
}
|
|
295
|
+
async runTaskById(id) {
|
|
296
|
+
const task = this.scheduler.getTask(id);
|
|
297
|
+
if (!task)
|
|
298
|
+
return `Task ${id} not found.`;
|
|
299
|
+
const lock = this.acquireBrowserLock();
|
|
300
|
+
await lock.promise;
|
|
301
|
+
try {
|
|
302
|
+
const result = await this.taskRunner.runTask(task);
|
|
303
|
+
this.scheduler.updateTaskStatus(id, "ok");
|
|
304
|
+
return result;
|
|
305
|
+
}
|
|
306
|
+
catch (err) {
|
|
307
|
+
this.scheduler.updateTaskStatus(id, "error");
|
|
308
|
+
return `Error: ${err instanceof Error ? err.message : String(err)}`;
|
|
309
|
+
}
|
|
310
|
+
finally {
|
|
311
|
+
lock.release();
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
listSkills() {
|
|
315
|
+
return this.skillGenerator.listSkills();
|
|
316
|
+
}
|
|
317
|
+
async generateSkill(logFileName) {
|
|
318
|
+
const logsDir = path.join(this.config.workspace, "logs");
|
|
319
|
+
const logs = ExecutionLogger.listLogs(logsDir);
|
|
320
|
+
const match = logs.find((l) => l.includes(logFileName));
|
|
321
|
+
if (!match)
|
|
322
|
+
return `Log file not found: ${logFileName}`;
|
|
323
|
+
const logPath = path.join(logsDir, match);
|
|
324
|
+
return await this.skillGenerator.generate(logPath);
|
|
325
|
+
}
|
|
326
|
+
listMemories() {
|
|
327
|
+
return this.memory.list();
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Estimate character count of the chat history (rough proxy for tokens).
|
|
331
|
+
*/
|
|
332
|
+
estimateHistoryChars() {
|
|
333
|
+
let total = 0;
|
|
334
|
+
for (const msg of this.chatHistory) {
|
|
335
|
+
const content = typeof msg.content === "string"
|
|
336
|
+
? msg.content
|
|
337
|
+
: JSON.stringify(msg.content);
|
|
338
|
+
total += content.length;
|
|
339
|
+
if (msg.toolCalls) {
|
|
340
|
+
total += JSON.stringify(msg.toolCalls).length;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return total;
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Compress chat history: try LLM summarization first, fallback to simple trim.
|
|
347
|
+
*/
|
|
348
|
+
async compressHistory() {
|
|
349
|
+
const SOFT_LIMIT = 20_000;
|
|
350
|
+
const HARD_LIMIT = 30_000;
|
|
351
|
+
const TARGET = 15_000;
|
|
352
|
+
const currentChars = this.estimateHistoryChars();
|
|
353
|
+
if (currentChars <= SOFT_LIMIT)
|
|
354
|
+
return;
|
|
355
|
+
// Try LLM summarization if not on cooldown
|
|
356
|
+
if (this.summarizationCooldown <= 0) {
|
|
357
|
+
try {
|
|
358
|
+
await this.performSummarization(currentChars, TARGET);
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
catch (err) {
|
|
362
|
+
console.error("⚠️ Summarization failed, falling back to trim:", err);
|
|
363
|
+
this.summarizationCooldown = 3;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
if (this.summarizationCooldown > 0) {
|
|
367
|
+
this.summarizationCooldown--;
|
|
368
|
+
}
|
|
369
|
+
// Fallback: simple trim at hard limit
|
|
370
|
+
if (this.estimateHistoryChars() > HARD_LIMIT) {
|
|
371
|
+
this.simpleTrim(HARD_LIMIT);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Summarize old messages via LLM and replace them with a compressed summary.
|
|
376
|
+
*/
|
|
377
|
+
async performSummarization(currentChars, target) {
|
|
378
|
+
const excess = currentChars - target;
|
|
379
|
+
const history = this.chatHistory;
|
|
380
|
+
// Collect safe cut indices (user message boundaries)
|
|
381
|
+
const safeCuts = [];
|
|
382
|
+
for (let i = 1; i < history.length; i++) {
|
|
383
|
+
if (history[i].role === "user") {
|
|
384
|
+
safeCuts.push(i);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
if (safeCuts.length === 0)
|
|
388
|
+
return;
|
|
389
|
+
// Find the cut that removes approximately `excess` chars
|
|
390
|
+
let cutIndex = 0;
|
|
391
|
+
for (const sci of safeCuts) {
|
|
392
|
+
let chars = 0;
|
|
393
|
+
for (let i = 0; i < sci; i++) {
|
|
394
|
+
const c = typeof history[i].content === "string"
|
|
395
|
+
? history[i].content
|
|
396
|
+
: JSON.stringify(history[i].content);
|
|
397
|
+
chars += c.length;
|
|
398
|
+
if (history[i].toolCalls)
|
|
399
|
+
chars += JSON.stringify(history[i].toolCalls).length;
|
|
400
|
+
}
|
|
401
|
+
cutIndex = sci;
|
|
402
|
+
if (chars >= excess)
|
|
403
|
+
break;
|
|
404
|
+
}
|
|
405
|
+
if (cutIndex <= 0)
|
|
406
|
+
return;
|
|
407
|
+
const messagesToSummarize = history.slice(0, cutIndex);
|
|
408
|
+
console.log(`📝 Summarizing ${messagesToSummarize.length} messages (${currentChars} chars → ~${target} target)...`);
|
|
409
|
+
const summary = await this.summarizeMessages(messagesToSummarize);
|
|
410
|
+
this.conversationSummary = summary;
|
|
411
|
+
this.chatHistory = history.slice(cutIndex);
|
|
412
|
+
console.log(`📝 Summary complete (${summary.length} chars). History now: ${this.estimateHistoryChars()} chars.`);
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Format messages and call LLM to produce a conversation summary.
|
|
416
|
+
*/
|
|
417
|
+
async summarizeMessages(messages) {
|
|
418
|
+
const formatted = this.formatMessagesForSummary(messages);
|
|
419
|
+
let prompt = `Summarize the conversation between a user and a browser automation agent.
|
|
420
|
+
Your summary will replace the original messages to save context space.
|
|
421
|
+
|
|
422
|
+
`;
|
|
423
|
+
if (this.conversationSummary) {
|
|
424
|
+
prompt += `## Previous Summary
|
|
425
|
+
${this.conversationSummary}
|
|
426
|
+
|
|
427
|
+
`;
|
|
428
|
+
}
|
|
429
|
+
prompt += `## Messages to Summarize
|
|
430
|
+
${formatted}
|
|
431
|
+
|
|
432
|
+
## Instructions
|
|
433
|
+
Produce a structured summary (under 3000 characters) capturing:
|
|
434
|
+
1. **User Goals**: What the user wants to accomplish
|
|
435
|
+
2. **Actions Taken**: URLs visited, forms filled, data extracted
|
|
436
|
+
3. **Results**: Outcomes, findings, extracted data
|
|
437
|
+
4. **Errors & Recovery**: Problems encountered and resolutions
|
|
438
|
+
5. **Decisions**: Choices made by user or agent
|
|
439
|
+
6. **Current State**: Where things stand now
|
|
440
|
+
7. **Pending**: Anything not yet completed
|
|
441
|
+
|
|
442
|
+
Merge with previous summary if present. Omit empty sections.
|
|
443
|
+
Focus on facts, not conversational niceties.`;
|
|
444
|
+
const response = await this.llmClient.chat([{ role: "user", content: prompt }], undefined, "You are a conversation summarizer. Be concise and precise.");
|
|
445
|
+
return response.content;
|
|
446
|
+
}
|
|
447
|
+
/**
|
|
448
|
+
* Format chat messages into readable text for the summarizer.
|
|
449
|
+
* Skips snapshot content (ephemeral page state, too large for summaries).
|
|
450
|
+
*/
|
|
451
|
+
formatMessagesForSummary(messages) {
|
|
452
|
+
// Collect snapshot toolCallIds to skip their results
|
|
453
|
+
const snapshotToolCallIds = new Set();
|
|
454
|
+
for (const msg of messages) {
|
|
455
|
+
if (msg.role === "assistant" && msg.toolCalls) {
|
|
456
|
+
for (const tc of msg.toolCalls) {
|
|
457
|
+
if (tc.name === "browser" && tc.arguments.action === "snapshot") {
|
|
458
|
+
snapshotToolCallIds.add(tc.id);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
return messages
|
|
464
|
+
.map((msg) => {
|
|
465
|
+
const content = typeof msg.content === "string"
|
|
466
|
+
? msg.content
|
|
467
|
+
: msg.content
|
|
468
|
+
.map((b) => (b.type === "image" ? "[image]" : b.text || ""))
|
|
469
|
+
.join("");
|
|
470
|
+
if (msg.role === "user") {
|
|
471
|
+
return `User: ${content}`;
|
|
472
|
+
}
|
|
473
|
+
else if (msg.role === "assistant" && msg.toolCalls?.length) {
|
|
474
|
+
const toolsSummary = msg.toolCalls
|
|
475
|
+
.map((tc) => `${tc.name}(${tc.arguments.action || JSON.stringify(tc.arguments).slice(0, 100)})`)
|
|
476
|
+
.join(", ");
|
|
477
|
+
const textPart = content ? `${content}\n` : "";
|
|
478
|
+
return `Agent: ${textPart}[Called: ${toolsSummary}]`;
|
|
479
|
+
}
|
|
480
|
+
else if (msg.role === "tool") {
|
|
481
|
+
// Skip snapshot results entirely
|
|
482
|
+
if (msg.toolCallId && snapshotToolCallIds.has(msg.toolCallId)) {
|
|
483
|
+
return `Tool result: [browser snapshot omitted]`;
|
|
484
|
+
}
|
|
485
|
+
const truncated = content.length > 500
|
|
486
|
+
? content.slice(0, 500) + "...(truncated)"
|
|
487
|
+
: content;
|
|
488
|
+
return `Tool result: ${truncated}`;
|
|
489
|
+
}
|
|
490
|
+
else {
|
|
491
|
+
return `Agent: ${content}`;
|
|
492
|
+
}
|
|
493
|
+
})
|
|
494
|
+
.join("\n\n");
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* Simple trim: drop old messages at user-message boundaries to fit budget.
|
|
498
|
+
* Fallback when summarization is unavailable.
|
|
499
|
+
*/
|
|
500
|
+
simpleTrim(maxChars) {
|
|
501
|
+
if (this.estimateHistoryChars() <= maxChars)
|
|
502
|
+
return;
|
|
503
|
+
const history = this.chatHistory;
|
|
504
|
+
const safeCuts = [];
|
|
505
|
+
for (let i = 0; i < history.length; i++) {
|
|
506
|
+
if (history[i].role === "user") {
|
|
507
|
+
safeCuts.push(i);
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
for (let ci = safeCuts.length - 1; ci >= 0; ci--) {
|
|
511
|
+
const cutIdx = safeCuts[ci];
|
|
512
|
+
if (cutIdx === 0)
|
|
513
|
+
continue;
|
|
514
|
+
const candidate = history.slice(cutIdx);
|
|
515
|
+
let chars = 0;
|
|
516
|
+
for (const msg of candidate) {
|
|
517
|
+
const c = typeof msg.content === "string"
|
|
518
|
+
? msg.content
|
|
519
|
+
: JSON.stringify(msg.content);
|
|
520
|
+
chars += c.length;
|
|
521
|
+
if (msg.toolCalls)
|
|
522
|
+
chars += JSON.stringify(msg.toolCalls).length;
|
|
523
|
+
}
|
|
524
|
+
if (chars <= maxChars) {
|
|
525
|
+
this.chatHistory = candidate;
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
// Fallback: keep only the last user message and following messages
|
|
530
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
531
|
+
if (history[i].role === "user") {
|
|
532
|
+
this.chatHistory = history.slice(i);
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
this.chatHistory = [history[history.length - 1]];
|
|
537
|
+
}
|
|
538
|
+
/**
|
|
539
|
+
* Build a copy of chatHistory with snapshot results managed:
|
|
540
|
+
* - If the latest user message involves browser activity, keep only the most recent snapshot.
|
|
541
|
+
* - Otherwise, omit all snapshots.
|
|
542
|
+
*/
|
|
543
|
+
buildMessagesForLlm() {
|
|
544
|
+
// 1. Collect all snapshot toolCallIds
|
|
545
|
+
const snapshotToolCallIds = new Set();
|
|
546
|
+
for (const msg of this.chatHistory) {
|
|
547
|
+
if (msg.role === "assistant" && msg.toolCalls) {
|
|
548
|
+
for (const tc of msg.toolCalls) {
|
|
549
|
+
if (tc.name === "browser" && tc.arguments.action === "snapshot") {
|
|
550
|
+
snapshotToolCallIds.add(tc.id);
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
if (snapshotToolCallIds.size === 0)
|
|
556
|
+
return this.chatHistory;
|
|
557
|
+
// 2. Check if the most recent turn involves browser tool calls
|
|
558
|
+
// (i.e., the last assistant message has browser tool calls, or there's a pending browser interaction)
|
|
559
|
+
let lastTurnUsesBrowser = false;
|
|
560
|
+
for (let i = this.chatHistory.length - 1; i >= 0; i--) {
|
|
561
|
+
const msg = this.chatHistory[i];
|
|
562
|
+
if (msg.role === "user") {
|
|
563
|
+
// The latest user message — stop here, browser context depends on what follows
|
|
564
|
+
break;
|
|
565
|
+
}
|
|
566
|
+
if (msg.role === "assistant" && msg.toolCalls) {
|
|
567
|
+
if (msg.toolCalls.some((tc) => tc.name === "browser")) {
|
|
568
|
+
lastTurnUsesBrowser = true;
|
|
569
|
+
}
|
|
570
|
+
break;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
// 3. Find the last snapshot id (only needed if browser is active)
|
|
574
|
+
let lastSnapshotId;
|
|
575
|
+
if (lastTurnUsesBrowser) {
|
|
576
|
+
for (let i = this.chatHistory.length - 1; i >= 0; i--) {
|
|
577
|
+
const msg = this.chatHistory[i];
|
|
578
|
+
if (msg.role === "tool" && msg.toolCallId && snapshotToolCallIds.has(msg.toolCallId)) {
|
|
579
|
+
lastSnapshotId = msg.toolCallId;
|
|
580
|
+
break;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
// 4. Build messages: keep only the latest snapshot if browser active, omit all otherwise
|
|
585
|
+
return this.chatHistory.map((msg) => {
|
|
586
|
+
if (msg.role === "tool" &&
|
|
587
|
+
msg.toolCallId &&
|
|
588
|
+
snapshotToolCallIds.has(msg.toolCallId) &&
|
|
589
|
+
msg.toolCallId !== lastSnapshotId) {
|
|
590
|
+
return { ...msg, content: "(snapshot omitted)" };
|
|
591
|
+
}
|
|
592
|
+
return msg;
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
// --- Private helpers ---
|
|
596
|
+
async executeToolCall(tc) {
|
|
597
|
+
const args = tc.arguments;
|
|
598
|
+
const action = args.action;
|
|
599
|
+
switch (tc.name) {
|
|
600
|
+
case "browser":
|
|
601
|
+
return await this.executeBrowserAction(action, args);
|
|
602
|
+
case "memory":
|
|
603
|
+
return this.executeMemoryAction(action, args);
|
|
604
|
+
case "scheduler":
|
|
605
|
+
return this.executeSchedulerAction(action, args);
|
|
606
|
+
default:
|
|
607
|
+
return `Unknown tool: ${tc.name}`;
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
async executeBrowserAction(action, args) {
|
|
611
|
+
const { BrowserActions } = await import("../browser/actions.js");
|
|
612
|
+
const actions = new BrowserActions(this.browser);
|
|
613
|
+
switch (action) {
|
|
614
|
+
case "navigate":
|
|
615
|
+
return await actions.navigate(args.url);
|
|
616
|
+
case "snapshot":
|
|
617
|
+
return await actions.snapshot();
|
|
618
|
+
case "click":
|
|
619
|
+
return await actions.click(args.ref);
|
|
620
|
+
case "type":
|
|
621
|
+
return await actions.type(args.ref, args.text, args.clear);
|
|
622
|
+
case "scroll":
|
|
623
|
+
return await actions.scroll(args.direction, args.amount, args.ref);
|
|
624
|
+
case "screenshot": {
|
|
625
|
+
const ss = await actions.screenshot(args.fullPage);
|
|
626
|
+
return ss.text;
|
|
627
|
+
}
|
|
628
|
+
case "evaluate":
|
|
629
|
+
return await actions.evaluate(args.script);
|
|
630
|
+
case "wait":
|
|
631
|
+
return await actions.wait(args);
|
|
632
|
+
case "press":
|
|
633
|
+
return await actions.pressKey(args.key);
|
|
634
|
+
case "select":
|
|
635
|
+
return await actions.selectOption(args.ref, args.value);
|
|
636
|
+
case "tabs":
|
|
637
|
+
return await actions.listTabs();
|
|
638
|
+
case "close_tab":
|
|
639
|
+
return await actions.closeTab(args.index);
|
|
640
|
+
case "focus_tab":
|
|
641
|
+
return await actions.focusTab(args.index);
|
|
642
|
+
default:
|
|
643
|
+
return `Unknown browser action: ${action}`;
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
executeMemoryAction(action, args) {
|
|
647
|
+
switch (action) {
|
|
648
|
+
case "save":
|
|
649
|
+
return this.memory.save(args.key, args.title, args.content);
|
|
650
|
+
case "read":
|
|
651
|
+
return this.memory.read(args.key);
|
|
652
|
+
case "list":
|
|
653
|
+
return this.memory.list();
|
|
654
|
+
default:
|
|
655
|
+
return `Unknown memory action: ${action}`;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
executeSchedulerAction(action, args) {
|
|
659
|
+
switch (action) {
|
|
660
|
+
case "list":
|
|
661
|
+
return this.listTasks();
|
|
662
|
+
case "add":
|
|
663
|
+
return this.addTask(args.name, args.cron, args.prompt);
|
|
664
|
+
case "remove":
|
|
665
|
+
return this.removeTask(args.id);
|
|
666
|
+
case "toggle":
|
|
667
|
+
return this.toggleTask(args.id, args.enabled);
|
|
668
|
+
default:
|
|
669
|
+
return `Unknown scheduler action: ${action}`;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
buildChatSystemPrompt(memoryIndex) {
|
|
673
|
+
let prompt = `You are "${this.config.name}", a browser automation assistant. You help users by controlling a web browser and managing scheduled tasks.
|
|
674
|
+
|
|
675
|
+
## Capabilities
|
|
676
|
+
- Browse the web: navigate, click, type, scroll, take screenshots, extract data
|
|
677
|
+
- Manage scheduled tasks: add, remove, enable/disable cron-based tasks
|
|
678
|
+
- Remember things: save and recall important information across sessions
|
|
679
|
+
- Generate skills: create reusable automation scripts from execution logs
|
|
680
|
+
|
|
681
|
+
## How to use browser tool
|
|
682
|
+
1. Call browser with action "navigate" to go to a URL
|
|
683
|
+
2. Call browser with action "snapshot" to see the page (you'll get element refs like [e1], [e2])
|
|
684
|
+
3. Call browser with action "click", "type", etc. using element references
|
|
685
|
+
4. After actions, snapshot again to verify results
|
|
686
|
+
|
|
687
|
+
## Guidelines
|
|
688
|
+
- Always snapshot before interacting with page elements
|
|
689
|
+
- Be concise in responses
|
|
690
|
+
- Manage scheduled tasks with the scheduler tool
|
|
691
|
+
|
|
692
|
+
## Learning & Memory (IMPORTANT)
|
|
693
|
+
You have a persistent memory system. Proactively save knowledge that would be valuable in future sessions:
|
|
694
|
+
- **Website patterns**: Login flows, page structures, selectors that work, CAPTCHAs, rate limits
|
|
695
|
+
- **User preferences**: How the user likes data formatted, which sites they frequent, credentials notes (never passwords)
|
|
696
|
+
- **Task insights**: Workarounds discovered, timing that works best, error recovery strategies
|
|
697
|
+
- **Domain knowledge**: Business rules, data relationships, naming conventions found during browsing
|
|
698
|
+
|
|
699
|
+
When you complete a multi-step browser task, ask yourself: "Would this workflow be useful again?" If yes, suggest the user create a scheduled task — the system will automatically learn and optimize it over time.
|
|
700
|
+
|
|
701
|
+
Do NOT ask permission every time — just save important discoveries. Use descriptive keys like "site-twitter-login-flow" or "workaround-cloudflare-challenge".`;
|
|
702
|
+
if (this.conversationSummary) {
|
|
703
|
+
prompt += `\n\n## Earlier Conversation Summary\n${this.conversationSummary}`;
|
|
704
|
+
}
|
|
705
|
+
if (memoryIndex.trim().length > 20) {
|
|
706
|
+
prompt += `\n\n## Saved Memories\n${memoryIndex}`;
|
|
707
|
+
}
|
|
708
|
+
return prompt;
|
|
709
|
+
}
|
|
710
|
+
/**
|
|
711
|
+
* Acquire a serialized lock for browser access.
|
|
712
|
+
* All operations that may use the browser (chat, scheduled tasks) must go through this
|
|
713
|
+
* to prevent concurrent browser manipulation.
|
|
714
|
+
*/
|
|
715
|
+
acquireBrowserLock() {
|
|
716
|
+
let release;
|
|
717
|
+
const next = new Promise((resolve) => {
|
|
718
|
+
release = resolve;
|
|
719
|
+
});
|
|
720
|
+
const promise = this.browserLock;
|
|
721
|
+
this.browserLock = this.browserLock.then(() => next);
|
|
722
|
+
return { promise, release };
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
//# sourceMappingURL=agent.js.map
|