dipclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/config.example.json +26 -0
  2. package/dist/agent/agent.d.ts +85 -0
  3. package/dist/agent/agent.js +725 -0
  4. package/dist/agent/agent.js.map +1 -0
  5. package/dist/agent/memory.d.ts +17 -0
  6. package/dist/agent/memory.js +92 -0
  7. package/dist/agent/memory.js.map +1 -0
  8. package/dist/agent/scheduler.d.ts +35 -0
  9. package/dist/agent/scheduler.js +154 -0
  10. package/dist/agent/scheduler.js.map +1 -0
  11. package/dist/agent/skill-generator.d.ts +37 -0
  12. package/dist/agent/skill-generator.js +263 -0
  13. package/dist/agent/skill-generator.js.map +1 -0
  14. package/dist/agent/task-runner.d.ts +31 -0
  15. package/dist/agent/task-runner.js +242 -0
  16. package/dist/agent/task-runner.js.map +1 -0
  17. package/dist/browser/actions.d.ts +28 -0
  18. package/dist/browser/actions.js +212 -0
  19. package/dist/browser/actions.js.map +1 -0
  20. package/dist/browser/manager.d.ts +17 -0
  21. package/dist/browser/manager.js +249 -0
  22. package/dist/browser/manager.js.map +1 -0
  23. package/dist/browser/script-runner.d.ts +49 -0
  24. package/dist/browser/script-runner.js +137 -0
  25. package/dist/browser/script-runner.js.map +1 -0
  26. package/dist/browser/snapshot.d.ts +15 -0
  27. package/dist/browser/snapshot.js +38 -0
  28. package/dist/browser/snapshot.js.map +1 -0
  29. package/dist/config/types.d.ts +62 -0
  30. package/dist/config/types.js +47 -0
  31. package/dist/config/types.js.map +1 -0
  32. package/dist/index.d.ts +2 -0
  33. package/dist/index.js +219 -0
  34. package/dist/index.js.map +1 -0
  35. package/dist/llm/client.d.ts +3 -0
  36. package/dist/llm/client.js +503 -0
  37. package/dist/llm/client.js.map +1 -0
  38. package/dist/llm/tools.d.ts +5 -0
  39. package/dist/llm/tools.js +94 -0
  40. package/dist/llm/tools.js.map +1 -0
  41. package/dist/llm/types.d.ts +49 -0
  42. package/dist/llm/types.js +2 -0
  43. package/dist/llm/types.js.map +1 -0
  44. package/dist/logging/logger.d.ts +17 -0
  45. package/dist/logging/logger.js +46 -0
  46. package/dist/logging/logger.js.map +1 -0
  47. package/dist/telegram/bot.d.ts +15 -0
  48. package/dist/telegram/bot.js +279 -0
  49. package/dist/telegram/bot.js.map +1 -0
  50. package/dist/tui/tui.d.ts +12 -0
  51. package/dist/tui/tui.js +176 -0
  52. package/dist/tui/tui.js.map +1 -0
  53. package/package.json +53 -0
@@ -0,0 +1,725 @@
1
+ import { createLlmClient } from "../llm/client.js";
2
+ import { getAllTools } from "../llm/tools.js";
3
+ import { BrowserManager } from "../browser/manager.js";
4
+ import { TaskRunner } from "./task-runner.js";
5
+ import { Scheduler } from "./scheduler.js";
6
+ import { MemoryManager } from "./memory.js";
7
+ import { SkillGenerator } from "./skill-generator.js";
8
+ import { TUI } from "../tui/tui.js";
9
+ import { TelegramBot } from "../telegram/bot.js";
10
+ import { ExecutionLogger } from "../logging/logger.js";
11
+ import path from "node:path";
12
+ export class Agent {
13
+ config;
14
+ llmClient;
15
+ browser;
16
+ taskRunner;
17
+ scheduler;
18
+ memory;
19
+ skillGenerator;
20
+ tui = null;
21
+ telegramBot = null;
22
+ chatHistory = [];
23
+ conversationSummary = "";
24
+ summarizationCooldown = 0;
25
+ browserLock = Promise.resolve();
26
+ constructor(config) {
27
+ this.config = config;
28
+ this.llmClient = createLlmClient(config);
29
+ this.browser = new BrowserManager(config);
30
+ this.memory = new MemoryManager(config.workspace);
31
+ this.taskRunner = new TaskRunner(config, this.llmClient, this.browser, this.memory);
32
+ this.scheduler = new Scheduler(config.workspace);
33
+ this.skillGenerator = new SkillGenerator(config.workspace, this.llmClient);
34
+ // Give TaskRunner access to scheduler for auto-linking skills
35
+ this.taskRunner.setScheduler(this.scheduler);
36
+ // Wire up scheduler to task runner (serialized via browser lock)
37
+ this.scheduler.setHandler(async (task) => {
38
+ const lock = this.acquireBrowserLock();
39
+ await lock.promise;
40
+ try {
41
+ console.log(`🔒 Lock acquired for task: ${task.name}`);
42
+ const result = await this.taskRunner.runTask(task);
43
+ console.log(` Task result: ${result.slice(0, 200)}`);
44
+ // Notify via Telegram if available
45
+ if (this.telegramBot) {
46
+ await this.telegramBot.notify(`✅ Task "${task.name}" completed:\n${result.slice(0, 3800)}`);
47
+ }
48
+ }
49
+ finally {
50
+ lock.release();
51
+ }
52
+ });
53
+ }
54
+ async start() {
55
+ // Launch browser immediately so Chrome lives with this process
56
+ await this.browser.launch();
57
+ // Start scheduler
58
+ this.scheduler.start();
59
+ // Start Telegram bot if configured
60
+ if (this.config.telegram) {
61
+ this.telegramBot = new TelegramBot(this.config.telegram.botToken, this, this.config.telegram.allowedUsers);
62
+ await this.telegramBot.start();
63
+ }
64
+ // Start TUI only if enabled
65
+ if (this.config.tui) {
66
+ this.tui = new TUI(this);
67
+ this.tui.start();
68
+ }
69
+ }
70
+ async stop() {
71
+ this.scheduler.stop();
72
+ this.telegramBot?.stop();
73
+ this.tui?.stop();
74
+ await this.browser.close();
75
+ }
76
+ // --- Public API for TUI/Telegram ---
77
+ getName() {
78
+ return this.config.name;
79
+ }
80
+ getWorkspace() {
81
+ return this.config.workspace;
82
+ }
83
+ /** Clear chat history and start a new session */
84
+ clearChat() {
85
+ this.chatHistory = [];
86
+ this.conversationSummary = "";
87
+ }
88
+ /** Chat with the agent (LLM-driven, with tool use) */
89
+ async chat(message) {
90
+ const lock = this.acquireBrowserLock();
91
+ await lock.promise;
92
+ try {
93
+ return await this.chatInner(message);
94
+ }
95
+ finally {
96
+ lock.release();
97
+ }
98
+ }
99
+ async chatInner(message) {
100
+ this.chatHistory.push({ role: "user", content: message });
101
+ await this.compressHistory();
102
+ const memoryIndex = this.memory.loadIndex();
103
+ const systemPrompt = this.buildChatSystemPrompt(memoryIndex);
104
+ const tools = getAllTools();
105
+ let response = await this.llmClient.chat(this.buildMessagesForLlm(), tools, systemPrompt);
106
+ // Handle tool calls in a loop
107
+ let iterations = 0;
108
+ let browserActionCount = 0;
109
+ const maxIter = this.config.maxIterations;
110
+ while (response.toolCalls.length > 0 && iterations < maxIter) {
111
+ iterations++;
112
+ // Add assistant message with tool calls
113
+ this.chatHistory.push({
114
+ role: "assistant",
115
+ content: response.content,
116
+ toolCalls: response.toolCalls,
117
+ });
118
+ // Execute tools
119
+ for (const tc of response.toolCalls) {
120
+ if (tc.name === "browser")
121
+ browserActionCount++;
122
+ let result;
123
+ try {
124
+ result = await this.executeToolCall(tc);
125
+ }
126
+ catch (err) {
127
+ result = `Error: ${err instanceof Error ? err.message : String(err)}`;
128
+ }
129
+ this.chatHistory.push({
130
+ role: "tool",
131
+ content: result,
132
+ toolCallId: tc.id,
133
+ });
134
+ }
135
+ // Get next response
136
+ response = await this.llmClient.chat(this.buildMessagesForLlm(), tools, systemPrompt);
137
+ }
138
+ // Reached iteration limit — ask LLM for a final summary without tools
139
+ if (iterations >= maxIter && response.toolCalls.length > 0) {
140
+ // Push the last assistant response (with unexecuted tool calls) as text only
141
+ if (response.content) {
142
+ this.chatHistory.push({ role: "assistant", content: response.content });
143
+ }
144
+ this.chatHistory.push({
145
+ role: "user",
146
+ content: "You've reached the maximum number of tool-calling iterations allowed. Please provide a final response summarizing what you've found and accomplished so far, without calling any more tools.",
147
+ });
148
+ const summary = await this.llmClient.chat(this.buildMessagesForLlm(), undefined, // no tools — force text-only response
149
+ systemPrompt);
150
+ const finalContent = summary.content + "\n\n⚠️ Reached the maximum number of steps. Send a message to continue where I left off.";
151
+ this.chatHistory.push({ role: "assistant", content: finalContent });
152
+ return finalContent;
153
+ }
154
+ // Nudge: after complex browser interactions, append a reminder
155
+ if (browserActionCount >= 5 && !response.content.includes("memory")) {
156
+ response.content += "\n\n💡 _This was a multi-step workflow. I've saved relevant discoveries to memory for future reference._";
157
+ // Auto-save a memory about the workflow
158
+ this.autoSaveWorkflowMemory(message, browserActionCount);
159
+ }
160
+ // Add final assistant message
161
+ this.chatHistory.push({
162
+ role: "assistant",
163
+ content: response.content,
164
+ });
165
+ return response.content;
166
+ }
167
+ /**
168
+ * Auto-save a brief memory about a complex chat workflow.
169
+ * Non-blocking — fires and forgets.
170
+ */
171
+ autoSaveWorkflowMemory(userMessage, actionCount) {
172
+ const key = `workflow-${Date.now()}`;
173
+ const title = userMessage.slice(0, 60).replace(/[^a-zA-Z0-9\u4e00-\u9fff\s_-]/g, "");
174
+ const content = `User request: ${userMessage.slice(0, 200)}\nComplexity: ${actionCount} browser actions\nTimestamp: ${new Date().toISOString()}`;
175
+ try {
176
+ this.memory.save(key, title, content);
177
+ }
178
+ catch {
179
+ // Best-effort, don't block chat
180
+ }
181
+ }
182
+ /** Chat with streaming — yields text deltas, handles tool calls internally */
183
+ async *chatStream(message) {
184
+ const lock = this.acquireBrowserLock();
185
+ await lock.promise;
186
+ try {
187
+ yield* this.chatStreamInner(message);
188
+ }
189
+ finally {
190
+ lock.release();
191
+ }
192
+ }
193
+ async *chatStreamInner(message) {
194
+ this.chatHistory.push({ role: "user", content: message });
195
+ await this.compressHistory();
196
+ const memoryIndex = this.memory.loadIndex();
197
+ const systemPrompt = this.buildChatSystemPrompt(memoryIndex);
198
+ const tools = getAllTools();
199
+ let iterations = 0;
200
+ let browserActionCount = 0;
201
+ const maxIter = this.config.maxIterations;
202
+ while (iterations < maxIter) {
203
+ iterations++;
204
+ let fullContent = "";
205
+ const toolCalls = [];
206
+ let hadToolCalls = false;
207
+ for await (const event of this.llmClient.chatStream(this.buildMessagesForLlm(), tools, systemPrompt)) {
208
+ if (event.type === "text_delta" && event.text) {
209
+ fullContent += event.text;
210
+ yield event.text;
211
+ }
212
+ else if (event.type === "tool_call_end" && event.toolCall) {
213
+ hadToolCalls = true;
214
+ toolCalls.push({
215
+ id: event.toolCall.id,
216
+ name: event.toolCall.name,
217
+ arguments: event.toolCall.arguments || {},
218
+ });
219
+ }
220
+ }
221
+ if (!hadToolCalls) {
222
+ // Nudge: after complex browser interactions, auto-save workflow memory
223
+ if (browserActionCount >= 5) {
224
+ this.autoSaveWorkflowMemory(message, browserActionCount);
225
+ yield "\n\n💡 _Multi-step workflow completed. Key discoveries saved to memory._";
226
+ }
227
+ this.chatHistory.push({ role: "assistant", content: fullContent });
228
+ return;
229
+ }
230
+ // Process tool calls, then loop for next LLM response
231
+ this.chatHistory.push({
232
+ role: "assistant",
233
+ content: fullContent,
234
+ toolCalls,
235
+ });
236
+ for (const tc of toolCalls) {
237
+ if (tc.name === "browser")
238
+ browserActionCount++;
239
+ let result;
240
+ try {
241
+ result = await this.executeToolCall(tc);
242
+ }
243
+ catch (err) {
244
+ result = `Error: ${err instanceof Error ? err.message : String(err)}`;
245
+ }
246
+ this.chatHistory.push({
247
+ role: "tool",
248
+ content: result,
249
+ toolCallId: tc.id,
250
+ });
251
+ }
252
+ // yield a visual separator so user knows tools ran
253
+ yield "\n";
254
+ }
255
+ // Reached iteration limit — ask LLM for a final summary without tools
256
+ this.chatHistory.push({
257
+ role: "user",
258
+ content: "You've reached the maximum number of tool-calling iterations allowed. Please provide a final response summarizing what you've found and accomplished so far, without calling any more tools.",
259
+ });
260
+ let summaryContent = "";
261
+ for await (const event of this.llmClient.chatStream(this.buildMessagesForLlm(), undefined, // no tools — force text-only response
262
+ systemPrompt)) {
263
+ if (event.type === "text_delta" && event.text) {
264
+ summaryContent += event.text;
265
+ yield event.text;
266
+ }
267
+ }
268
+ const suffix = "\n\n⚠️ Reached the maximum number of steps. Send a message to continue where I left off.";
269
+ this.chatHistory.push({ role: "assistant", content: summaryContent + suffix });
270
+ yield suffix;
271
+ }
272
+ listTasks() {
273
+ return this.scheduler.formatTaskList();
274
+ }
275
+ addTask(name, cron, prompt) {
276
+ try {
277
+ const task = this.scheduler.addTask(name, cron, prompt);
278
+ return `✓ Task added: [${task.id}] ${task.name} (${task.cron})`;
279
+ }
280
+ catch (err) {
281
+ return `Error: ${err instanceof Error ? err.message : String(err)}`;
282
+ }
283
+ }
284
+ removeTask(id) {
285
+ return this.scheduler.removeTask(id)
286
+ ? `✓ Task ${id} removed.`
287
+ : `Task ${id} not found.`;
288
+ }
289
+ toggleTask(id, enabled) {
290
+ const task = this.scheduler.toggleTask(id, enabled);
291
+ return task
292
+ ? `✓ Task ${id} ${enabled ? "enabled" : "disabled"}.`
293
+ : `Task ${id} not found.`;
294
+ }
295
+ async runTaskById(id) {
296
+ const task = this.scheduler.getTask(id);
297
+ if (!task)
298
+ return `Task ${id} not found.`;
299
+ const lock = this.acquireBrowserLock();
300
+ await lock.promise;
301
+ try {
302
+ const result = await this.taskRunner.runTask(task);
303
+ this.scheduler.updateTaskStatus(id, "ok");
304
+ return result;
305
+ }
306
+ catch (err) {
307
+ this.scheduler.updateTaskStatus(id, "error");
308
+ return `Error: ${err instanceof Error ? err.message : String(err)}`;
309
+ }
310
+ finally {
311
+ lock.release();
312
+ }
313
+ }
314
+ listSkills() {
315
+ return this.skillGenerator.listSkills();
316
+ }
317
+ async generateSkill(logFileName) {
318
+ const logsDir = path.join(this.config.workspace, "logs");
319
+ const logs = ExecutionLogger.listLogs(logsDir);
320
+ const match = logs.find((l) => l.includes(logFileName));
321
+ if (!match)
322
+ return `Log file not found: ${logFileName}`;
323
+ const logPath = path.join(logsDir, match);
324
+ return await this.skillGenerator.generate(logPath);
325
+ }
326
+ listMemories() {
327
+ return this.memory.list();
328
+ }
329
+ /**
330
+ * Estimate character count of the chat history (rough proxy for tokens).
331
+ */
332
+ estimateHistoryChars() {
333
+ let total = 0;
334
+ for (const msg of this.chatHistory) {
335
+ const content = typeof msg.content === "string"
336
+ ? msg.content
337
+ : JSON.stringify(msg.content);
338
+ total += content.length;
339
+ if (msg.toolCalls) {
340
+ total += JSON.stringify(msg.toolCalls).length;
341
+ }
342
+ }
343
+ return total;
344
+ }
345
+ /**
346
+ * Compress chat history: try LLM summarization first, fallback to simple trim.
347
+ */
348
+ async compressHistory() {
349
+ const SOFT_LIMIT = 20_000;
350
+ const HARD_LIMIT = 30_000;
351
+ const TARGET = 15_000;
352
+ const currentChars = this.estimateHistoryChars();
353
+ if (currentChars <= SOFT_LIMIT)
354
+ return;
355
+ // Try LLM summarization if not on cooldown
356
+ if (this.summarizationCooldown <= 0) {
357
+ try {
358
+ await this.performSummarization(currentChars, TARGET);
359
+ return;
360
+ }
361
+ catch (err) {
362
+ console.error("⚠️ Summarization failed, falling back to trim:", err);
363
+ this.summarizationCooldown = 3;
364
+ }
365
+ }
366
+ if (this.summarizationCooldown > 0) {
367
+ this.summarizationCooldown--;
368
+ }
369
+ // Fallback: simple trim at hard limit
370
+ if (this.estimateHistoryChars() > HARD_LIMIT) {
371
+ this.simpleTrim(HARD_LIMIT);
372
+ }
373
+ }
374
+ /**
375
+ * Summarize old messages via LLM and replace them with a compressed summary.
376
+ */
377
+ async performSummarization(currentChars, target) {
378
+ const excess = currentChars - target;
379
+ const history = this.chatHistory;
380
+ // Collect safe cut indices (user message boundaries)
381
+ const safeCuts = [];
382
+ for (let i = 1; i < history.length; i++) {
383
+ if (history[i].role === "user") {
384
+ safeCuts.push(i);
385
+ }
386
+ }
387
+ if (safeCuts.length === 0)
388
+ return;
389
+ // Find the cut that removes approximately `excess` chars
390
+ let cutIndex = 0;
391
+ for (const sci of safeCuts) {
392
+ let chars = 0;
393
+ for (let i = 0; i < sci; i++) {
394
+ const c = typeof history[i].content === "string"
395
+ ? history[i].content
396
+ : JSON.stringify(history[i].content);
397
+ chars += c.length;
398
+ if (history[i].toolCalls)
399
+ chars += JSON.stringify(history[i].toolCalls).length;
400
+ }
401
+ cutIndex = sci;
402
+ if (chars >= excess)
403
+ break;
404
+ }
405
+ if (cutIndex <= 0)
406
+ return;
407
+ const messagesToSummarize = history.slice(0, cutIndex);
408
+ console.log(`📝 Summarizing ${messagesToSummarize.length} messages (${currentChars} chars → ~${target} target)...`);
409
+ const summary = await this.summarizeMessages(messagesToSummarize);
410
+ this.conversationSummary = summary;
411
+ this.chatHistory = history.slice(cutIndex);
412
+ console.log(`📝 Summary complete (${summary.length} chars). History now: ${this.estimateHistoryChars()} chars.`);
413
+ }
414
+ /**
415
+ * Format messages and call LLM to produce a conversation summary.
416
+ */
417
+ async summarizeMessages(messages) {
418
+ const formatted = this.formatMessagesForSummary(messages);
419
+ let prompt = `Summarize the conversation between a user and a browser automation agent.
420
+ Your summary will replace the original messages to save context space.
421
+
422
+ `;
423
+ if (this.conversationSummary) {
424
+ prompt += `## Previous Summary
425
+ ${this.conversationSummary}
426
+
427
+ `;
428
+ }
429
+ prompt += `## Messages to Summarize
430
+ ${formatted}
431
+
432
+ ## Instructions
433
+ Produce a structured summary (under 3000 characters) capturing:
434
+ 1. **User Goals**: What the user wants to accomplish
435
+ 2. **Actions Taken**: URLs visited, forms filled, data extracted
436
+ 3. **Results**: Outcomes, findings, extracted data
437
+ 4. **Errors & Recovery**: Problems encountered and resolutions
438
+ 5. **Decisions**: Choices made by user or agent
439
+ 6. **Current State**: Where things stand now
440
+ 7. **Pending**: Anything not yet completed
441
+
442
+ Merge with previous summary if present. Omit empty sections.
443
+ Focus on facts, not conversational niceties.`;
444
+ const response = await this.llmClient.chat([{ role: "user", content: prompt }], undefined, "You are a conversation summarizer. Be concise and precise.");
445
+ return response.content;
446
+ }
447
+ /**
448
+ * Format chat messages into readable text for the summarizer.
449
+ * Skips snapshot content (ephemeral page state, too large for summaries).
450
+ */
451
+ formatMessagesForSummary(messages) {
452
+ // Collect snapshot toolCallIds to skip their results
453
+ const snapshotToolCallIds = new Set();
454
+ for (const msg of messages) {
455
+ if (msg.role === "assistant" && msg.toolCalls) {
456
+ for (const tc of msg.toolCalls) {
457
+ if (tc.name === "browser" && tc.arguments.action === "snapshot") {
458
+ snapshotToolCallIds.add(tc.id);
459
+ }
460
+ }
461
+ }
462
+ }
463
+ return messages
464
+ .map((msg) => {
465
+ const content = typeof msg.content === "string"
466
+ ? msg.content
467
+ : msg.content
468
+ .map((b) => (b.type === "image" ? "[image]" : b.text || ""))
469
+ .join("");
470
+ if (msg.role === "user") {
471
+ return `User: ${content}`;
472
+ }
473
+ else if (msg.role === "assistant" && msg.toolCalls?.length) {
474
+ const toolsSummary = msg.toolCalls
475
+ .map((tc) => `${tc.name}(${tc.arguments.action || JSON.stringify(tc.arguments).slice(0, 100)})`)
476
+ .join(", ");
477
+ const textPart = content ? `${content}\n` : "";
478
+ return `Agent: ${textPart}[Called: ${toolsSummary}]`;
479
+ }
480
+ else if (msg.role === "tool") {
481
+ // Skip snapshot results entirely
482
+ if (msg.toolCallId && snapshotToolCallIds.has(msg.toolCallId)) {
483
+ return `Tool result: [browser snapshot omitted]`;
484
+ }
485
+ const truncated = content.length > 500
486
+ ? content.slice(0, 500) + "...(truncated)"
487
+ : content;
488
+ return `Tool result: ${truncated}`;
489
+ }
490
+ else {
491
+ return `Agent: ${content}`;
492
+ }
493
+ })
494
+ .join("\n\n");
495
+ }
496
+ /**
497
+ * Simple trim: drop old messages at user-message boundaries to fit budget.
498
+ * Fallback when summarization is unavailable.
499
+ */
500
+ simpleTrim(maxChars) {
501
+ if (this.estimateHistoryChars() <= maxChars)
502
+ return;
503
+ const history = this.chatHistory;
504
+ const safeCuts = [];
505
+ for (let i = 0; i < history.length; i++) {
506
+ if (history[i].role === "user") {
507
+ safeCuts.push(i);
508
+ }
509
+ }
510
+ for (let ci = safeCuts.length - 1; ci >= 0; ci--) {
511
+ const cutIdx = safeCuts[ci];
512
+ if (cutIdx === 0)
513
+ continue;
514
+ const candidate = history.slice(cutIdx);
515
+ let chars = 0;
516
+ for (const msg of candidate) {
517
+ const c = typeof msg.content === "string"
518
+ ? msg.content
519
+ : JSON.stringify(msg.content);
520
+ chars += c.length;
521
+ if (msg.toolCalls)
522
+ chars += JSON.stringify(msg.toolCalls).length;
523
+ }
524
+ if (chars <= maxChars) {
525
+ this.chatHistory = candidate;
526
+ return;
527
+ }
528
+ }
529
+ // Fallback: keep only the last user message and following messages
530
+ for (let i = history.length - 1; i >= 0; i--) {
531
+ if (history[i].role === "user") {
532
+ this.chatHistory = history.slice(i);
533
+ return;
534
+ }
535
+ }
536
+ this.chatHistory = [history[history.length - 1]];
537
+ }
538
+ /**
539
+ * Build a copy of chatHistory with snapshot results managed:
540
+ * - If the latest user message involves browser activity, keep only the most recent snapshot.
541
+ * - Otherwise, omit all snapshots.
542
+ */
543
+ buildMessagesForLlm() {
544
+ // 1. Collect all snapshot toolCallIds
545
+ const snapshotToolCallIds = new Set();
546
+ for (const msg of this.chatHistory) {
547
+ if (msg.role === "assistant" && msg.toolCalls) {
548
+ for (const tc of msg.toolCalls) {
549
+ if (tc.name === "browser" && tc.arguments.action === "snapshot") {
550
+ snapshotToolCallIds.add(tc.id);
551
+ }
552
+ }
553
+ }
554
+ }
555
+ if (snapshotToolCallIds.size === 0)
556
+ return this.chatHistory;
557
+ // 2. Check if the most recent turn involves browser tool calls
558
+ // (i.e., the last assistant message has browser tool calls, or there's a pending browser interaction)
559
+ let lastTurnUsesBrowser = false;
560
+ for (let i = this.chatHistory.length - 1; i >= 0; i--) {
561
+ const msg = this.chatHistory[i];
562
+ if (msg.role === "user") {
563
+ // The latest user message — stop here, browser context depends on what follows
564
+ break;
565
+ }
566
+ if (msg.role === "assistant" && msg.toolCalls) {
567
+ if (msg.toolCalls.some((tc) => tc.name === "browser")) {
568
+ lastTurnUsesBrowser = true;
569
+ }
570
+ break;
571
+ }
572
+ }
573
+ // 3. Find the last snapshot id (only needed if browser is active)
574
+ let lastSnapshotId;
575
+ if (lastTurnUsesBrowser) {
576
+ for (let i = this.chatHistory.length - 1; i >= 0; i--) {
577
+ const msg = this.chatHistory[i];
578
+ if (msg.role === "tool" && msg.toolCallId && snapshotToolCallIds.has(msg.toolCallId)) {
579
+ lastSnapshotId = msg.toolCallId;
580
+ break;
581
+ }
582
+ }
583
+ }
584
+ // 4. Build messages: keep only the latest snapshot if browser active, omit all otherwise
585
+ return this.chatHistory.map((msg) => {
586
+ if (msg.role === "tool" &&
587
+ msg.toolCallId &&
588
+ snapshotToolCallIds.has(msg.toolCallId) &&
589
+ msg.toolCallId !== lastSnapshotId) {
590
+ return { ...msg, content: "(snapshot omitted)" };
591
+ }
592
+ return msg;
593
+ });
594
+ }
595
+ // --- Private helpers ---
596
+ async executeToolCall(tc) {
597
+ const args = tc.arguments;
598
+ const action = args.action;
599
+ switch (tc.name) {
600
+ case "browser":
601
+ return await this.executeBrowserAction(action, args);
602
+ case "memory":
603
+ return this.executeMemoryAction(action, args);
604
+ case "scheduler":
605
+ return this.executeSchedulerAction(action, args);
606
+ default:
607
+ return `Unknown tool: ${tc.name}`;
608
+ }
609
+ }
610
+ async executeBrowserAction(action, args) {
611
+ const { BrowserActions } = await import("../browser/actions.js");
612
+ const actions = new BrowserActions(this.browser);
613
+ switch (action) {
614
+ case "navigate":
615
+ return await actions.navigate(args.url);
616
+ case "snapshot":
617
+ return await actions.snapshot();
618
+ case "click":
619
+ return await actions.click(args.ref);
620
+ case "type":
621
+ return await actions.type(args.ref, args.text, args.clear);
622
+ case "scroll":
623
+ return await actions.scroll(args.direction, args.amount, args.ref);
624
+ case "screenshot": {
625
+ const ss = await actions.screenshot(args.fullPage);
626
+ return ss.text;
627
+ }
628
+ case "evaluate":
629
+ return await actions.evaluate(args.script);
630
+ case "wait":
631
+ return await actions.wait(args);
632
+ case "press":
633
+ return await actions.pressKey(args.key);
634
+ case "select":
635
+ return await actions.selectOption(args.ref, args.value);
636
+ case "tabs":
637
+ return await actions.listTabs();
638
+ case "close_tab":
639
+ return await actions.closeTab(args.index);
640
+ case "focus_tab":
641
+ return await actions.focusTab(args.index);
642
+ default:
643
+ return `Unknown browser action: ${action}`;
644
+ }
645
+ }
646
+ executeMemoryAction(action, args) {
647
+ switch (action) {
648
+ case "save":
649
+ return this.memory.save(args.key, args.title, args.content);
650
+ case "read":
651
+ return this.memory.read(args.key);
652
+ case "list":
653
+ return this.memory.list();
654
+ default:
655
+ return `Unknown memory action: ${action}`;
656
+ }
657
+ }
658
+ executeSchedulerAction(action, args) {
659
+ switch (action) {
660
+ case "list":
661
+ return this.listTasks();
662
+ case "add":
663
+ return this.addTask(args.name, args.cron, args.prompt);
664
+ case "remove":
665
+ return this.removeTask(args.id);
666
+ case "toggle":
667
+ return this.toggleTask(args.id, args.enabled);
668
+ default:
669
+ return `Unknown scheduler action: ${action}`;
670
+ }
671
+ }
672
+ buildChatSystemPrompt(memoryIndex) {
673
+ let prompt = `You are "${this.config.name}", a browser automation assistant. You help users by controlling a web browser and managing scheduled tasks.
674
+
675
+ ## Capabilities
676
+ - Browse the web: navigate, click, type, scroll, take screenshots, extract data
677
+ - Manage scheduled tasks: add, remove, enable/disable cron-based tasks
678
+ - Remember things: save and recall important information across sessions
679
+ - Generate skills: create reusable automation scripts from execution logs
680
+
681
+ ## How to use browser tool
682
+ 1. Call browser with action "navigate" to go to a URL
683
+ 2. Call browser with action "snapshot" to see the page (you'll get element refs like [e1], [e2])
684
+ 3. Call browser with action "click", "type", etc. using element references
685
+ 4. After actions, snapshot again to verify results
686
+
687
+ ## Guidelines
688
+ - Always snapshot before interacting with page elements
689
+ - Be concise in responses
690
+ - Manage scheduled tasks with the scheduler tool
691
+
692
+ ## Learning & Memory (IMPORTANT)
693
+ You have a persistent memory system. Proactively save knowledge that would be valuable in future sessions:
694
+ - **Website patterns**: Login flows, page structures, selectors that work, CAPTCHAs, rate limits
695
+ - **User preferences**: How the user likes data formatted, which sites they frequent, credentials notes (never passwords)
696
+ - **Task insights**: Workarounds discovered, timing that works best, error recovery strategies
697
+ - **Domain knowledge**: Business rules, data relationships, naming conventions found during browsing
698
+
699
+ When you complete a multi-step browser task, ask yourself: "Would this workflow be useful again?" If yes, suggest the user create a scheduled task — the system will automatically learn and optimize it over time.
700
+
701
+ Do NOT ask permission every time — just save important discoveries. Use descriptive keys like "site-twitter-login-flow" or "workaround-cloudflare-challenge".`;
702
+ if (this.conversationSummary) {
703
+ prompt += `\n\n## Earlier Conversation Summary\n${this.conversationSummary}`;
704
+ }
705
+ if (memoryIndex.trim().length > 20) {
706
+ prompt += `\n\n## Saved Memories\n${memoryIndex}`;
707
+ }
708
+ return prompt;
709
+ }
710
+ /**
711
+ * Acquire a serialized lock for browser access.
712
+ * All operations that may use the browser (chat, scheduled tasks) must go through this
713
+ * to prevent concurrent browser manipulation.
714
+ */
715
+ acquireBrowserLock() {
716
+ let release;
717
+ const next = new Promise((resolve) => {
718
+ release = resolve;
719
+ });
720
+ const promise = this.browserLock;
721
+ this.browserLock = this.browserLock.then(() => next);
722
+ return { promise, release };
723
+ }
724
+ }
725
+ //# sourceMappingURL=agent.js.map