daemora 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/LICENSE +663 -0
  2. package/README.md +69 -19
  3. package/SOUL.md +25 -24
  4. package/daemora-ui/README.md +11 -0
  5. package/package.json +12 -2
  6. package/skills/api-development.md +35 -0
  7. package/skills/artifacts-builder/SKILL.md +74 -0
  8. package/skills/artifacts-builder/scripts/bundle-artifact.sh +54 -0
  9. package/skills/artifacts-builder/scripts/init-artifact.sh +322 -0
  10. package/skills/artifacts-builder/scripts/shadcn-components.tar.gz +0 -0
  11. package/skills/brand-guidelines.md +73 -0
  12. package/skills/browser.md +77 -0
  13. package/skills/changelog-generator.md +104 -0
  14. package/skills/coding.md +26 -10
  15. package/skills/content-research-writer.md +538 -0
  16. package/skills/data-analysis.md +27 -0
  17. package/skills/debugging.md +33 -0
  18. package/skills/devops.md +37 -0
  19. package/skills/document-docx.md +197 -0
  20. package/skills/document-pdf.md +294 -0
  21. package/skills/document-pptx.md +484 -0
  22. package/skills/document-xlsx.md +289 -0
  23. package/skills/domain-name-brainstormer.md +212 -0
  24. package/skills/file-organizer.md +433 -0
  25. package/skills/frontend-design.md +42 -0
  26. package/skills/image-enhancer.md +99 -0
  27. package/skills/invoice-organizer.md +446 -0
  28. package/skills/lead-research-assistant.md +199 -0
  29. package/skills/mcp-builder/SKILL.md +328 -0
  30. package/skills/mcp-builder/reference/evaluation.md +602 -0
  31. package/skills/mcp-builder/reference/mcp_best_practices.md +915 -0
  32. package/skills/mcp-builder/reference/node_mcp_server.md +916 -0
  33. package/skills/mcp-builder/reference/python_mcp_server.md +752 -0
  34. package/skills/mcp-builder/scripts/connections.py +151 -0
  35. package/skills/mcp-builder/scripts/evaluation.py +373 -0
  36. package/skills/mcp-builder/scripts/example_evaluation.xml +22 -0
  37. package/skills/mcp-builder/scripts/requirements.txt +2 -0
  38. package/skills/meeting-insights-analyzer.md +327 -0
  39. package/skills/orchestration.md +93 -0
  40. package/skills/raffle-winner-picker.md +159 -0
  41. package/skills/slack-gif-creator/SKILL.md +646 -0
  42. package/skills/slack-gif-creator/core/color_palettes.py +302 -0
  43. package/skills/slack-gif-creator/core/easing.py +230 -0
  44. package/skills/slack-gif-creator/core/frame_composer.py +469 -0
  45. package/skills/slack-gif-creator/core/gif_builder.py +246 -0
  46. package/skills/slack-gif-creator/core/typography.py +357 -0
  47. package/skills/slack-gif-creator/core/validators.py +264 -0
  48. package/skills/slack-gif-creator/core/visual_effects.py +494 -0
  49. package/skills/slack-gif-creator/requirements.txt +4 -0
  50. package/skills/slack-gif-creator/templates/bounce.py +106 -0
  51. package/skills/slack-gif-creator/templates/explode.py +331 -0
  52. package/skills/slack-gif-creator/templates/fade.py +329 -0
  53. package/skills/slack-gif-creator/templates/flip.py +291 -0
  54. package/skills/slack-gif-creator/templates/kaleidoscope.py +211 -0
  55. package/skills/slack-gif-creator/templates/morph.py +329 -0
  56. package/skills/slack-gif-creator/templates/move.py +293 -0
  57. package/skills/slack-gif-creator/templates/pulse.py +268 -0
  58. package/skills/slack-gif-creator/templates/shake.py +127 -0
  59. package/skills/slack-gif-creator/templates/slide.py +291 -0
  60. package/skills/slack-gif-creator/templates/spin.py +269 -0
  61. package/skills/slack-gif-creator/templates/wiggle.py +300 -0
  62. package/skills/slack-gif-creator/templates/zoom.py +312 -0
  63. package/skills/system-admin.md +44 -0
  64. package/skills/tailored-resume-generator.md +345 -0
  65. package/skills/theme-factory/SKILL.md +59 -0
  66. package/skills/theme-factory/theme-showcase.pdf +0 -0
  67. package/skills/theme-factory/themes/arctic-frost.md +19 -0
  68. package/skills/theme-factory/themes/botanical-garden.md +19 -0
  69. package/skills/theme-factory/themes/desert-rose.md +19 -0
  70. package/skills/theme-factory/themes/forest-canopy.md +19 -0
  71. package/skills/theme-factory/themes/golden-hour.md +19 -0
  72. package/skills/theme-factory/themes/midnight-galaxy.md +19 -0
  73. package/skills/theme-factory/themes/modern-minimalist.md +19 -0
  74. package/skills/theme-factory/themes/ocean-depths.md +19 -0
  75. package/skills/theme-factory/themes/sunset-boulevard.md +19 -0
  76. package/skills/theme-factory/themes/tech-innovation.md +19 -0
  77. package/skills/video-downloader.md +99 -0
  78. package/skills/web-development.md +32 -0
  79. package/skills/webapp-testing/SKILL.md +96 -0
  80. package/skills/webapp-testing/examples/console_logging.py +35 -0
  81. package/skills/webapp-testing/examples/element_discovery.py +40 -0
  82. package/skills/webapp-testing/examples/static_html_automation.py +33 -0
  83. package/skills/webapp-testing/scripts/with_server.py +106 -0
  84. package/src/agents/SubAgentManager.js +57 -12
  85. package/src/api/openai-compat.js +212 -0
  86. package/src/channels/TelegramChannel.js +5 -2
  87. package/src/channels/index.js +7 -10
  88. package/src/cli.js +129 -50
  89. package/src/config/agentProfiles.js +1 -0
  90. package/src/config/default.js +10 -0
  91. package/src/config/models.js +317 -71
  92. package/src/config/permissions.js +12 -0
  93. package/src/core/AgentLoop.js +70 -50
  94. package/src/core/Compaction.js +84 -2
  95. package/src/core/MessageQueue.js +90 -0
  96. package/src/core/Task.js +13 -0
  97. package/src/core/TaskQueue.js +1 -1
  98. package/src/core/TaskRunner.js +80 -5
  99. package/src/index.js +328 -48
  100. package/src/mcp/MCPAgentRunner.js +48 -11
  101. package/src/mcp/MCPManager.js +40 -2
  102. package/src/models/ModelRouter.js +67 -1
  103. package/src/safety/DockerSandbox.js +212 -0
  104. package/src/safety/ExecApproval.js +118 -0
  105. package/src/scheduler/Heartbeat.js +56 -21
  106. package/src/services/cleanup.js +106 -0
  107. package/src/services/sessions.js +39 -1
  108. package/src/setup/wizard.js +75 -4
  109. package/src/skills/SkillLoader.js +104 -17
  110. package/src/storage/TaskStore.js +19 -1
  111. package/src/systemPrompt.js +171 -328
  112. package/src/tools/browserAutomation.js +615 -104
  113. package/src/tools/executeCommand.js +19 -1
  114. package/src/tools/index.js +6 -0
  115. package/src/tools/manageAgents.js +55 -4
  116. package/src/tools/replyWithFile.js +62 -0
  117. package/src/tools/screenCapture.js +12 -1
  118. package/src/tools/taskManager.js +164 -0
  119. package/src/tools/useMCP.js +3 -1
  120. package/src/utils/Embeddings.js +157 -10
  121. package/src/webhooks/WebhookHandler.js +107 -0
@@ -1,52 +1,133 @@
1
1
  /**
2
- * Browser Automation - Playwright-based web interaction.
3
- * Upgraded: multi-tab, navigation guard, dialog handling, waitFor, cookies.
2
+ * Browser Automation Heavy Playwright-based web interaction.
3
+ *
4
+ * Features:
5
+ * - Accessibility snapshots (ARIA tree with numeric refs for agent navigation)
6
+ * - Multi-tab with targetId tracking
7
+ * - Session persistence (cookies, localStorage, sessionStorage)
8
+ * - Console/error capture
9
+ * - File upload/download handling
10
+ * - Drag & drop, viewport resize
11
+ * - Advanced waits (selector, text, URL, JS predicate, load state)
12
+ * - PDF generation, element highlight
13
+ * - Localhost allowed, private ranges blocked
4
14
  */
5
15
 
16
+ import { join } from "path";
17
+ import { mkdirSync, existsSync } from "fs";
18
+ import { config } from "../config/default.js";
19
+
6
20
  let browser = null;
7
21
  let browserContext = null;
8
- const pages = []; // Multi-tab support
22
+ const pages = new Map(); // targetId → page
23
+ let activeTargetId = null;
24
+ let targetCounter = 0;
25
+ let inactivityTimer = null;
26
+ const INACTIVITY_TIMEOUT = 5 * 60 * 1000;
27
+
28
+ // Console log buffer — per page, max 100 entries
29
+ const consoleLogs = new Map(); // targetId → [{type, text, timestamp}]
30
+ const MAX_CONSOLE_LOGS = 100;
31
+
32
+ // Snapshot ref cache — maps ref numbers to element handles
33
+ let snapshotRefs = new Map(); // "e1" → { selector, role, name }
34
+ let snapshotCounter = 0;
9
35
 
10
- // Blocked navigation patterns - SSRF / security guard
36
+ // ── Navigation guard ─────────────────────────────────────────────────────────
11
37
  const NAV_BLOCKLIST = [
12
38
  /^file:\/\//i,
13
- /^(http:\/\/|https:\/\/)(127\.|0\.|10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.|169\.254\.)/,
14
- /^(http:\/\/|https:\/\/)localhost/i,
39
+ /^(https?:\/\/)(10\.\d+\.\d+\.\d+)/,
40
+ /^(https?:\/\/)(172\.(1[6-9]|2[0-9]|3[01])\.\d+\.\d+)/,
41
+ /^(https?:\/\/)(192\.168\.\d+\.\d+)/,
42
+ /^(https?:\/\/)(169\.254\.\d+\.\d+)/,
15
43
  ];
16
44
 
17
45
  function isBlockedUrl(url) {
18
- return NAV_BLOCKLIST.some((pattern) => pattern.test(url));
46
+ return NAV_BLOCKLIST.some((p) => p.test(url));
47
+ }
48
+
49
+ // ── Inactivity timer ─────────────────────────────────────────────────────────
50
+ function resetInactivityTimer() {
51
+ if (inactivityTimer) clearTimeout(inactivityTimer);
52
+ inactivityTimer = setTimeout(async () => {
53
+ if (browser) {
54
+ console.log("[browser] Closing browser due to inactivity (5 min)");
55
+ await browser.close().catch(() => {});
56
+ cleanup();
57
+ }
58
+ }, INACTIVITY_TIMEOUT);
59
+ }
60
+
61
+ function cleanup() {
62
+ browser = null;
63
+ browserContext = null;
64
+ pages.clear();
65
+ consoleLogs.clear();
66
+ snapshotRefs.clear();
67
+ activeTargetId = null;
68
+ }
69
+
70
+ // ── Browser lifecycle ────────────────────────────────────────────────────────
71
+ function genTargetId() {
72
+ return `t${++targetCounter}`;
73
+ }
74
+
75
+ function attachConsoleLogs(targetId, page) {
76
+ const logs = [];
77
+ consoleLogs.set(targetId, logs);
78
+ page.on("console", (msg) => {
79
+ logs.push({ type: msg.type(), text: msg.text(), ts: Date.now() });
80
+ if (logs.length > MAX_CONSOLE_LOGS) logs.shift();
81
+ });
82
+ page.on("pageerror", (err) => {
83
+ logs.push({ type: "error", text: err.message, ts: Date.now() });
84
+ if (logs.length > MAX_CONSOLE_LOGS) logs.shift();
85
+ });
19
86
  }
20
87
 
21
- async function ensureBrowser() {
88
+ async function ensureBrowser(profileName = "default") {
89
+ resetInactivityTimer();
90
+
22
91
  if (browser && browser.isConnected()) {
23
- if (pages.length === 0 || pages[0].isClosed()) {
24
- pages[0] = await browserContext.newPage();
25
- pages[0].setDefaultTimeout(15000);
92
+ if (!activeTargetId || !pages.has(activeTargetId) || pages.get(activeTargetId).isClosed()) {
93
+ const page = await browserContext.newPage();
94
+ page.setDefaultTimeout(15000);
95
+ const tid = genTargetId();
96
+ pages.set(tid, page);
97
+ attachConsoleLogs(tid, page);
98
+ activeTargetId = tid;
26
99
  }
27
- return pages[0];
100
+ return pages.get(activeTargetId);
28
101
  }
29
102
 
30
103
  try {
31
104
  const { chromium } = await import("playwright");
32
- browser = await chromium.launch({ headless: true });
33
- browserContext = await browser.newContext({
34
- userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
105
+ const userDataDir = join(config.dataDir, "browser", profileName);
106
+ mkdirSync(userDataDir, { recursive: true });
107
+
108
+ browser = await chromium.launchPersistentContext(userDataDir, {
109
+ headless: true,
35
110
  viewport: { width: 1280, height: 720 },
111
+ acceptDownloads: true,
112
+ userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
36
113
  });
114
+ browserContext = browser;
37
115
 
38
- // Auto-handle dialogs (accept by default)
39
116
  browserContext.on("dialog", async (dialog) => {
40
- console.log(` [browser] Auto-dismissed dialog: ${dialog.type()} - "${dialog.message().slice(0, 80)}"`);
117
+ console.log(`[browser] Auto-dismissed dialog: ${dialog.type()} - "${dialog.message().slice(0, 80)}"`);
41
118
  await dialog.dismiss();
42
119
  });
43
120
 
44
- const page = await browserContext.newPage();
121
+ const existingPages = browserContext.pages();
122
+ const page = existingPages.length > 0 ? existingPages[0] : await browserContext.newPage();
45
123
  page.setDefaultTimeout(15000);
46
- pages.push(page);
47
- return pages[0];
124
+ const tid = genTargetId();
125
+ pages.set(tid, page);
126
+ attachConsoleLogs(tid, page);
127
+ activeTargetId = tid;
128
+ return page;
48
129
  } catch (error) {
49
- if (error.code === "ERR_MODULE_NOT_FOUND" || error.message.includes("playwright")) {
130
+ if (error.code === "ERR_MODULE_NOT_FOUND" || error.message?.includes("playwright")) {
50
131
  throw new Error("Playwright not installed. Run: pnpm add playwright && npx playwright install chromium");
51
132
  }
52
133
  throw error;
@@ -54,55 +135,320 @@ async function ensureBrowser() {
54
135
  }
55
136
 
56
137
  function currentPage() {
57
- if (pages.length === 0) throw new Error("No browser open. Use navigate first.");
58
- const activePage = pages[pages.length - 1];
59
- if (activePage.isClosed()) throw new Error("Current page is closed. Navigate to a URL first.");
60
- return activePage;
138
+ if (!activeTargetId || !pages.has(activeTargetId)) {
139
+ throw new Error("No browser open. Use navigate first.");
140
+ }
141
+ const p = pages.get(activeTargetId);
142
+ if (p.isClosed()) throw new Error("Current page is closed. Navigate to a URL first.");
143
+ resetInactivityTimer();
144
+ return p;
145
+ }
146
+
147
+ // ── Accessibility snapshot ───────────────────────────────────────────────────
148
+ // Builds an ARIA tree with numeric refs (e1, e2, ...) for agent navigation.
149
+ // Agent can then use click("e5") instead of CSS selectors.
150
+
151
+ async function buildAccessibilitySnapshot(page, opts = {}) {
152
+ const { selector, interactive, compact, maxChars = 50000 } = opts;
153
+
154
+ snapshotRefs.clear();
155
+ snapshotCounter = 0;
156
+
157
+ const tree = await page.accessibility.snapshot({ interestingOnly: interactive !== false });
158
+ if (!tree) return { text: "(empty page — no accessible content)", refs: {} };
159
+
160
+ const lines = [];
161
+ const refs = {};
162
+
163
+ function walk(node, depth = 0) {
164
+ if (!node) return;
165
+ const indent = " ".repeat(depth);
166
+ const ref = `e${++snapshotCounter}`;
167
+
168
+ // Skip non-interactive in interactive-only mode
169
+ const isInteractive = ["button", "link", "textbox", "checkbox", "radio", "combobox",
170
+ "menuitem", "tab", "switch", "slider", "spinbutton", "searchbox", "option"].includes(node.role);
171
+
172
+ if (interactive && !isInteractive && !node.children?.length) return;
173
+
174
+ const parts = [`${indent}[${ref}]`, node.role];
175
+ if (node.name) parts.push(`"${node.name}"`);
176
+ if (node.value) parts.push(`value="${node.value}"`);
177
+ if (node.checked !== undefined) parts.push(node.checked ? "checked" : "unchecked");
178
+ if (node.selected) parts.push("selected");
179
+ if (node.disabled) parts.push("disabled");
180
+ if (node.expanded !== undefined) parts.push(node.expanded ? "expanded" : "collapsed");
181
+ if (node.level) parts.push(`level=${node.level}`);
182
+
183
+ // Store ref mapping
184
+ refs[ref] = { role: node.role, name: node.name || "", selector: null };
185
+ snapshotRefs.set(ref, { role: node.role, name: node.name || "" });
186
+
187
+ if (!compact || isInteractive || depth <= 1) {
188
+ lines.push(parts.join(" "));
189
+ }
190
+
191
+ if (node.children) {
192
+ for (const child of node.children) walk(child, depth + 1);
193
+ }
194
+ }
195
+
196
+ walk(tree);
197
+ let text = lines.join("\n");
198
+ if (text.length > maxChars) {
199
+ text = text.slice(0, maxChars) + `\n... (truncated at ${maxChars} chars)`;
200
+ }
201
+
202
+ return { text, refs, count: snapshotCounter };
61
203
  }
62
204
 
205
+ // Resolve ref (e5) to a Playwright locator
206
+ async function resolveRef(page, ref) {
207
+ const info = snapshotRefs.get(ref);
208
+ if (!info) throw new Error(`Unknown ref "${ref}". Take a fresh snapshot first.`);
209
+
210
+ // Try role + name first (most reliable)
211
+ const { role, name } = info;
212
+ if (name) {
213
+ const locator = page.getByRole(role, { name, exact: false });
214
+ const count = await locator.count();
215
+ if (count === 1) return locator;
216
+ if (count > 1) return locator.first();
217
+ }
218
+
219
+ // Fallback to role only
220
+ const locator = page.getByRole(role);
221
+ const count = await locator.count();
222
+ if (count === 1) return locator;
223
+ if (count > 0) return locator.first();
224
+
225
+ throw new Error(`Could not locate element for ref "${ref}" (role=${role}, name="${name}"). Page may have changed — take a fresh snapshot.`);
226
+ }
227
+
228
+ // Check if param is a ref (e.g., "e5") or a CSS selector
229
+ function isRef(param) {
230
+ return /^e\d+$/.test(param);
231
+ }
232
+
233
+ // Get a locator from either ref or CSS selector
234
+ async function getLocator(page, selectorOrRef) {
235
+ if (isRef(selectorOrRef)) return resolveRef(page, selectorOrRef);
236
+ return page.locator(selectorOrRef);
237
+ }
238
+
239
+ // ── Error wrapping ───────────────────────────────────────────────────────────
240
+ function wrapError(error) {
241
+ const msg = error.message;
242
+ if (msg.includes("Timeout") && msg.includes("waiting for selector")) {
243
+ return `Element not found within timeout. Check the selector or take a fresh snapshot. Error: ${msg}`;
244
+ }
245
+ if (msg.includes("Target closed") || msg.includes("has been closed")) {
246
+ return `Browser/page was closed. Use navigate to open a new page. Error: ${msg}`;
247
+ }
248
+ if (msg.includes("net::ERR_CONNECTION_REFUSED")) {
249
+ return `Connection refused. Is the server running? Error: ${msg}`;
250
+ }
251
+ if (msg.includes("net::ERR_NAME_NOT_RESOLVED")) {
252
+ return `DNS resolution failed. Check the URL. Error: ${msg}`;
253
+ }
254
+ if (msg.includes("strict mode violation")) {
255
+ return `Multiple elements match. Use a more specific selector or take a snapshot and use refs. Error: ${msg}`;
256
+ }
257
+ if (msg.includes("not visible")) {
258
+ return `Element not visible. Try scrolling to it first: scroll("selector") or scroll("down"). Error: ${msg}`;
259
+ }
260
+ if (msg.includes("Unknown ref")) return msg;
261
+ return `Browser error: ${msg}`;
262
+ }
263
+
264
+ // ── Main action handler ──────────────────────────────────────────────────────
63
265
  export async function browserAction(action, param1, param2) {
64
266
  console.log(` [browser] ${action}: ${param1 || ""}`);
65
267
 
66
268
  try {
67
269
  switch (action) {
270
+
271
+ // ── Navigation ──────────────────────────────────────────────────────
68
272
  case "navigate":
69
273
  case "openPage": {
70
274
  if (!param1) return "Error: URL is required.";
71
- if (isBlockedUrl(param1)) return `Error: Navigation to "${param1}" is blocked for security (private/local addresses not allowed).`;
275
+ if (isBlockedUrl(param1)) return `Error: Navigation to "${param1}" is blocked (private network range). Localhost is allowed.`;
72
276
  const p = await ensureBrowser();
73
277
  await p.goto(param1, { waitUntil: "domcontentloaded" });
74
278
  const title = await p.title();
75
- return `Navigated to: ${param1}\nTitle: ${title}`;
279
+ return `Navigated to: ${param1}\nTitle: ${title}\nTab: ${activeTargetId}`;
76
280
  }
77
281
 
282
+ case "reload": {
283
+ await currentPage().reload({ waitUntil: "domcontentloaded" });
284
+ const title = await currentPage().title();
285
+ return `Reloaded. Title: ${title}`;
286
+ }
287
+
288
+ case "goBack": {
289
+ await currentPage().goBack({ waitUntil: "domcontentloaded" });
290
+ return `Back → ${currentPage().url()}`;
291
+ }
292
+
293
+ case "goForward": {
294
+ await currentPage().goForward({ waitUntil: "domcontentloaded" });
295
+ return `Forward → ${currentPage().url()}`;
296
+ }
297
+
298
+ // ── Snapshots (ARIA tree) ───────────────────────────────────────────
299
+ case "snapshot": {
300
+ const p = await ensureBrowser();
301
+ const opts = {};
302
+ if (param1) {
303
+ try { Object.assign(opts, JSON.parse(param1)); } catch {
304
+ opts.interactive = param1 === "interactive";
305
+ opts.compact = param1 === "compact";
306
+ }
307
+ }
308
+ const { text, refs, count } = await buildAccessibilitySnapshot(p, opts);
309
+ return `Accessibility snapshot (${count} elements):\n\n${text}\n\nUse refs like "e1", "e5" in click/fill/type actions instead of CSS selectors.`;
310
+ }
311
+
312
+ // ── Interaction ─────────────────────────────────────────────────────
78
313
  case "click": {
79
- if (!param1) return "Error: selector is required.";
80
- await currentPage().click(param1);
81
- return `Clicked: ${param1}`;
314
+ if (!param1) return "Error: selector or ref (e.g., e5) is required.";
315
+ const page = currentPage();
316
+ const locator = await getLocator(page, param1);
317
+ const opts = {};
318
+ if (param2) {
319
+ try { Object.assign(opts, JSON.parse(param2)); } catch {
320
+ if (param2 === "double") opts.clickCount = 2;
321
+ if (param2 === "right") opts.button = "right";
322
+ if (param2 === "middle") opts.button = "middle";
323
+ }
324
+ }
325
+ await locator.click(opts);
326
+ return `Clicked: ${param1}${opts.clickCount === 2 ? " (double-click)" : ""}${opts.button ? ` (${opts.button} button)` : ""}`;
82
327
  }
83
328
 
84
329
  case "fill": {
85
- if (!param1 || param2 === undefined) return "Error: selector and value are required.";
86
- await currentPage().fill(param1, param2);
330
+ if (!param1 || param2 === undefined) return "Error: selector/ref and value required.";
331
+ const locator = await getLocator(currentPage(), param1);
332
+ await locator.fill(param2);
87
333
  return `Filled "${param1}" with "${param2}"`;
88
334
  }
89
335
 
336
+ case "type": {
337
+ if (!param1 || param2 === undefined) return "Error: selector/ref and text required.";
338
+ const locator = await getLocator(currentPage(), param1);
339
+ await locator.click();
340
+ await currentPage().keyboard.type(param2, { delay: 50 });
341
+ return `Typed "${param2}" into "${param1}"`;
342
+ }
343
+
344
+ case "hover": {
345
+ if (!param1) return "Error: selector/ref required.";
346
+ const locator = await getLocator(currentPage(), param1);
347
+ await locator.hover();
348
+ return `Hovered: ${param1}`;
349
+ }
350
+
351
+ case "selectOption": {
352
+ if (!param1 || param2 === undefined) return "Error: selector/ref and value required.";
353
+ const locator = await getLocator(currentPage(), param1);
354
+ await locator.selectOption(param2);
355
+ return `Selected "${param2}" in "${param1}"`;
356
+ }
357
+
358
+ case "pressKey": {
359
+ if (!param1) return "Error: key required (Enter, Tab, Escape, ArrowDown, etc.)";
360
+ await currentPage().keyboard.press(param1);
361
+ return `Pressed: ${param1}`;
362
+ }
363
+
364
+ case "scroll": {
365
+ const page = currentPage();
366
+ const direction = param1 || "down";
367
+ const amount = parseInt(param2 || "500");
368
+ if (direction === "up") {
369
+ await page.evaluate((px) => window.scrollBy(0, -px), amount);
370
+ } else if (direction === "down") {
371
+ await page.evaluate((px) => window.scrollBy(0, px), amount);
372
+ } else if (direction === "left") {
373
+ await page.evaluate((px) => window.scrollBy(-px, 0), amount);
374
+ } else if (direction === "right") {
375
+ await page.evaluate((px) => window.scrollBy(px, 0), amount);
376
+ } else {
377
+ // Scroll to element (selector or ref)
378
+ if (isRef(direction)) {
379
+ const loc = await resolveRef(page, direction);
380
+ await loc.scrollIntoViewIfNeeded();
381
+ } else {
382
+ await page.evaluate((sel) => {
383
+ const el = document.querySelector(sel);
384
+ if (el) el.scrollIntoView({ behavior: "smooth", block: "center" });
385
+ }, direction);
386
+ }
387
+ }
388
+ return `Scrolled ${direction}${["up", "down", "left", "right"].includes(direction) ? ` ${amount}px` : ""}`;
389
+ }
390
+
391
+ case "drag": {
392
+ if (!param1 || !param2) return "Error: source and target selector/ref required.";
393
+ const page = currentPage();
394
+ const source = await getLocator(page, param1);
395
+ const target = await getLocator(page, param2);
396
+ await source.dragTo(target);
397
+ return `Dragged "${param1}" → "${param2}"`;
398
+ }
399
+
400
+ // ── Content extraction ──────────────────────────────────────────────
90
401
  case "getText": {
91
- const selector = param1 || "body";
92
- const text = await currentPage().textContent(selector);
93
- const trimmed = (text || "").trim().slice(0, 10000);
94
- return trimmed || "(empty)";
402
+ const sel = param1 || "body";
403
+ const locator = await getLocator(currentPage(), sel);
404
+ const text = await locator.textContent();
405
+ return (text || "").trim().slice(0, 10000) || "(empty)";
406
+ }
407
+
408
+ case "getContent": {
409
+ const sel = param1 || "body";
410
+ const html = await currentPage().evaluate((s) => {
411
+ const el = s === "body" ? document.body : document.querySelector(s);
412
+ return el ? el.innerHTML : null;
413
+ }, sel);
414
+ if (!html) return `No element found: ${sel}`;
415
+ return html.slice(0, 20000);
95
416
  }
96
417
 
418
+ // ── Screenshots & PDF ──────────────────────────────────────────────
97
419
  case "screenshot": {
98
420
  const p = await ensureBrowser();
99
- const path = param1 || `/tmp/screenshot-${Date.now()}.png`;
100
- await p.screenshot({ path, fullPage: param2 === "full" });
101
- return `Screenshot saved to: ${path}`;
421
+ const opts = { fullPage: false };
422
+ let path = `/tmp/screenshot-${Date.now()}.png`;
423
+
424
+ if (param1 && param1.startsWith("/")) {
425
+ path = param1;
426
+ } else if (param1) {
427
+ // param1 might be a selector/ref for element screenshot
428
+ try {
429
+ const locator = await getLocator(p, param1);
430
+ path = param2 || path;
431
+ await locator.screenshot({ path });
432
+ return `Element screenshot saved: ${path}`;
433
+ } catch {
434
+ // Not a valid selector, treat as path
435
+ path = param1;
436
+ }
437
+ }
438
+ if (param2 === "full") opts.fullPage = true;
439
+ await p.screenshot({ path, ...opts });
440
+ return `Screenshot saved: ${path}`;
441
+ }
442
+
443
+ case "pdf": {
444
+ const path = param1 || `/tmp/page-${Date.now()}.pdf`;
445
+ await currentPage().pdf({ path, format: "A4", printBackground: true });
446
+ return `PDF saved: ${path}`;
102
447
  }
103
448
 
449
+ // ── JavaScript evaluation ───────────────────────────────────────────
104
450
  case "evaluate": {
105
- if (!param1) return "Error: JavaScript expression is required.";
451
+ if (!param1) return "Error: JavaScript expression required.";
106
452
  const result = await currentPage().evaluate(param1);
107
453
  return JSON.stringify(result, null, 2);
108
454
  }
@@ -113,111 +459,276 @@ export async function browserAction(action, param1, param2) {
113
459
  .slice(0, 50)
114
460
  .map((a) => ({ text: a.textContent.trim().slice(0, 80), href: a.href }))
115
461
  );
116
- return links.map((l) => `${l.text} → ${l.href}`).join("\n") || "(no links found)";
462
+ return links.map((l) => `${l.text} → ${l.href}`).join("\n") || "(no links)";
463
+ }
464
+
465
+ // ── Console & errors ────────────────────────────────────────────────
466
+ case "console": {
467
+ const logs = consoleLogs.get(activeTargetId) || [];
468
+ const filter = param1 || "all"; // "all", "error", "warn", "log", "info"
469
+ const limit = parseInt(param2 || "30");
470
+ const filtered = filter === "all" ? logs : logs.filter(l => l.type === filter);
471
+ if (filtered.length === 0) return `No${filter !== "all" ? ` ${filter}` : ""} console messages.`;
472
+ return filtered.slice(-limit).map(l => {
473
+ const time = new Date(l.ts).toISOString().slice(11, 19);
474
+ return `[${time}] ${l.type.toUpperCase()}: ${l.text}`;
475
+ }).join("\n");
476
+ }
477
+
478
+ // ── Waiting ─────────────────────────────────────────────────────────
479
+ case "waitFor": {
480
+ if (!param1) return "Error: condition required.";
481
+ const page = currentPage();
482
+ const timeout = parseInt(param2 || "10000");
483
+
484
+ // Detect wait type
485
+ if (param1.startsWith("url:")) {
486
+ // Wait for URL to contain/match
487
+ const urlPattern = param1.slice(4);
488
+ await page.waitForURL(`**${urlPattern}**`, { timeout });
489
+ return `URL matched: ${page.url()}`;
490
+ }
491
+ if (param1.startsWith("text:")) {
492
+ // Wait for text to appear on page
493
+ const text = param1.slice(5);
494
+ await page.waitForFunction((t) => document.body.innerText.includes(t), text, { timeout });
495
+ return `Text "${text}" found on page.`;
496
+ }
497
+ if (param1.startsWith("js:")) {
498
+ // Wait for JS predicate
499
+ const predicate = param1.slice(3);
500
+ await page.waitForFunction(predicate, null, { timeout });
501
+ return `JS predicate satisfied.`;
502
+ }
503
+ if (param1 === "load" || param1 === "networkidle") {
504
+ await page.waitForLoadState(param1 === "load" ? "load" : "networkidle", { timeout });
505
+ return `Page reached ${param1} state.`;
506
+ }
507
+ // Default: CSS selector
508
+ await page.waitForSelector(param1, { timeout });
509
+ return `Element "${param1}" found.`;
117
510
  }
118
511
 
119
- // Multi-tab support
512
+ case "waitForNavigation": {
513
+ const timeout = param1 ? parseInt(param1) : 30000;
514
+ await currentPage().waitForNavigation({ timeout });
515
+ return `Navigation complete → ${currentPage().url()}`;
516
+ }
517
+
518
+ // ── Tab management ──────────────────────────────────────────────────
120
519
  case "newTab": {
121
- const url = param1;
122
- if (url && isBlockedUrl(url)) return `Error: Navigation to "${url}" is blocked.`;
520
+ if (param1 && isBlockedUrl(param1)) return `Error: URL "${param1}" is blocked.`;
123
521
  if (!browserContext) await ensureBrowser();
124
- const newPage = await browserContext.newPage();
125
- newPage.setDefaultTimeout(15000);
126
- pages.push(newPage);
127
- if (url) {
128
- await newPage.goto(url, { waitUntil: "domcontentloaded" });
129
- return `Opened new tab (${pages.length - 1}) at: ${url}`;
522
+ const page = await browserContext.newPage();
523
+ page.setDefaultTimeout(15000);
524
+ const tid = genTargetId();
525
+ pages.set(tid, page);
526
+ attachConsoleLogs(tid, page);
527
+ activeTargetId = tid;
528
+ if (param1) {
529
+ await page.goto(param1, { waitUntil: "domcontentloaded" });
530
+ return `Opened tab ${tid} at: ${param1}`;
130
531
  }
131
- return `Opened new blank tab (index: ${pages.length - 1})`;
532
+ return `Opened blank tab: ${tid}`;
132
533
  }
133
534
 
134
535
  case "switchTab": {
135
- const idx = parseInt(param1 || "0");
136
- if (isNaN(idx) || idx < 0 || idx >= pages.length) {
137
- return `Error: Tab index ${idx} out of range. Open tabs: 0–${pages.length - 1}`;
138
- }
139
- // Bring to focus (Playwright doesn't have focus concept, but we track active)
140
- pages.push(pages.splice(idx, 1)[0]); // Move selected to end (= current)
141
- return `Switched to tab ${idx} (now active)`;
536
+ if (!param1) return `Error: targetId required. Use listTabs to see open tabs.`;
537
+ if (!pages.has(param1)) return `Error: Tab "${param1}" not found. Use listTabs.`;
538
+ activeTargetId = param1;
539
+ const page = pages.get(param1);
540
+ const title = await page.title().catch(() => "?");
541
+ return `Switched to ${param1}: ${title} - ${page.url()}`;
142
542
  }
143
543
 
144
544
  case "listTabs": {
145
- if (pages.length === 0) return "No open tabs.";
146
- const titles = await Promise.all(
147
- pages.map(async (p, i) => {
148
- if (p.isClosed()) return ` ${i}: [closed]`;
149
- const title = await p.title().catch(() => "?");
150
- const url = p.url();
151
- return ` ${i}${i === pages.length - 1 ? " (active)" : ""}: ${title} - ${url}`;
152
- })
153
- );
154
- return `Open tabs (${pages.length}):\n${titles.join("\n")}`;
545
+ if (pages.size === 0) return "No open tabs.";
546
+ const entries = [];
547
+ for (const [tid, page] of pages) {
548
+ if (page.isClosed()) { entries.push(` ${tid}: [closed]`); continue; }
549
+ const title = await page.title().catch(() => "?");
550
+ const marker = tid === activeTargetId ? " (active)" : "";
551
+ entries.push(` ${tid}${marker}: ${title} - ${page.url()}`);
552
+ }
553
+ return `Open tabs (${pages.size}):\n${entries.join("\n")}`;
155
554
  }
156
555
 
157
556
  case "closeTab": {
158
- const idx = parseInt(param1 || `${pages.length - 1}`);
159
- if (idx < 0 || idx >= pages.length) return `Error: Tab index ${idx} out of range.`;
160
- await pages[idx].close();
161
- pages.splice(idx, 1);
162
- return `Closed tab ${idx}. Open tabs: ${pages.length}`;
557
+ const tid = param1 || activeTargetId;
558
+ if (!pages.has(tid)) return `Error: Tab "${tid}" not found.`;
559
+ await pages.get(tid).close();
560
+ pages.delete(tid);
561
+ consoleLogs.delete(tid);
562
+ if (activeTargetId === tid) {
563
+ activeTargetId = pages.size > 0 ? pages.keys().next().value : null;
564
+ }
565
+ return `Closed tab ${tid}. Remaining: ${pages.size}`;
163
566
  }
164
567
 
165
- // Waiting
166
- case "waitFor": {
167
- if (!param1) return "Error: selector is required.";
168
- const timeout = param2 ? parseInt(param2) : 10000;
169
- await currentPage().waitForSelector(param1, { timeout });
170
- return `Element "${param1}" found.`;
568
+ // ── Cookies ─────────────────────────────────────────────────────────
569
+ case "getCookies": {
570
+ if (!browserContext) return "No browser open.";
571
+ const cookies = await browserContext.cookies();
572
+ const filtered = param1 ? cookies.filter((c) => c.domain.includes(param1)) : cookies;
573
+ return JSON.stringify(filtered.slice(0, 30), null, 2);
171
574
  }
172
575
 
173
- // Dialog handling
576
+ case "setCookie": {
577
+ if (!param1) return 'Error: cookie JSON required ({"name":"x","value":"y","domain":"example.com"}).';
578
+ if (!browserContext) await ensureBrowser();
579
+ const cookie = JSON.parse(param1);
580
+ await browserContext.addCookies([cookie]);
581
+ return `Cookie "${cookie.name}" set.`;
582
+ }
583
+
584
+ case "clearCookies": {
585
+ if (!browserContext) return "No browser open.";
586
+ await browserContext.clearCookies();
587
+ return "All cookies cleared.";
588
+ }
589
+
590
+ // ── Local/Session Storage ───────────────────────────────────────────
591
+ case "getStorage": {
592
+ // param1: "local" or "session", param2: key (optional)
593
+ const kind = param1 || "local";
594
+ const key = param2;
595
+ const storageObj = kind === "session" ? "sessionStorage" : "localStorage";
596
+ const result = await currentPage().evaluate(([obj, k]) => {
597
+ const s = window[obj];
598
+ if (k) return { [k]: s.getItem(k) };
599
+ const all = {};
600
+ for (let i = 0; i < s.length; i++) { const key = s.key(i); all[key] = s.getItem(key); }
601
+ return all;
602
+ }, [storageObj, key]);
603
+ return JSON.stringify(result, null, 2);
604
+ }
605
+
606
+ case "setStorage": {
607
+ // param1: JSON {"kind":"local","key":"x","value":"y"}
608
+ if (!param1) return 'Error: JSON required {"kind":"local|session","key":"...","value":"..."}';
609
+ const { kind = "local", key, value } = JSON.parse(param1);
610
+ const storageObj = kind === "session" ? "sessionStorage" : "localStorage";
611
+ await currentPage().evaluate(([obj, k, v]) => window[obj].setItem(k, v), [storageObj, key, value]);
612
+ return `Set ${kind}Storage["${key}"]`;
613
+ }
614
+
615
+ case "clearStorage": {
616
+ const kind = param1 || "local";
617
+ const storageObj = kind === "session" ? "sessionStorage" : "localStorage";
618
+ await currentPage().evaluate((obj) => window[obj].clear(), storageObj);
619
+ return `${kind}Storage cleared.`;
620
+ }
621
+
622
+ // ── File upload ─────────────────────────────────────────────────────
623
+ case "upload": {
624
+ if (!param1 || !param2) return "Error: selector/ref and filePath required.";
625
+ const locator = await getLocator(currentPage(), param1);
626
+ await locator.setInputFiles(param2.includes(",") ? param2.split(",").map(f => f.trim()) : param2);
627
+ return `Uploaded file(s) to "${param1}": ${param2}`;
628
+ }
629
+
630
+ // ── Download ────────────────────────────────────────────────────────
631
+ case "download": {
632
+ // Click something that triggers download, wait for it
633
+ if (!param1) return "Error: selector/ref to click for download required.";
634
+ const page = currentPage();
635
+ const downloadDir = join(config.dataDir, "browser", "downloads");
636
+ mkdirSync(downloadDir, { recursive: true });
637
+ const [download] = await Promise.all([
638
+ page.waitForEvent("download", { timeout: 30000 }),
639
+ (await getLocator(page, param1)).click(),
640
+ ]);
641
+ const path = join(downloadDir, download.suggestedFilename());
642
+ await download.saveAs(path);
643
+ return `Downloaded: ${path} (${download.suggestedFilename()})`;
644
+ }
645
+
646
+ // ── Viewport ────────────────────────────────────────────────────────
647
+ case "resize": {
648
+ if (!param1) return 'Error: size required. e.g., "1920x1080" or JSON {"width":1920,"height":1080}';
649
+ let width, height;
650
+ if (param1.includes("x")) {
651
+ [width, height] = param1.split("x").map(Number);
652
+ } else {
653
+ const parsed = JSON.parse(param1);
654
+ width = parsed.width;
655
+ height = parsed.height;
656
+ }
657
+ await currentPage().setViewportSize({ width, height });
658
+ return `Viewport resized to ${width}x${height}`;
659
+ }
660
+
661
+ // ── Highlight ───────────────────────────────────────────────────────
662
+ case "highlight": {
663
+ if (!param1) return "Error: selector/ref required.";
664
+ const page = currentPage();
665
+ if (isRef(param1)) {
666
+ const loc = await resolveRef(page, param1);
667
+ await loc.evaluate((el) => {
668
+ el.style.outline = "3px solid red";
669
+ el.style.outlineOffset = "2px";
670
+ setTimeout(() => { el.style.outline = ""; el.style.outlineOffset = ""; }, 3000);
671
+ });
672
+ } else {
673
+ await page.evaluate((sel) => {
674
+ const el = document.querySelector(sel);
675
+ if (el) {
676
+ el.style.outline = "3px solid red";
677
+ el.style.outlineOffset = "2px";
678
+ setTimeout(() => { el.style.outline = ""; el.style.outlineOffset = ""; }, 3000);
679
+ }
680
+ }, param1);
681
+ }
682
+ return `Highlighted "${param1}" for 3 seconds.`;
683
+ }
684
+
685
+ // ── Dialog handling ─────────────────────────────────────────────────
174
686
  case "handleDialog": {
175
- // Override default dismiss behavior for next dialog
176
- const action = param1 || "accept"; // accept | dismiss
687
+ const dialogAction = param1 || "accept";
177
688
  const text = param2 || "";
178
689
  currentPage().once("dialog", async (dialog) => {
179
- if (action === "accept") await dialog.accept(text);
690
+ if (dialogAction === "accept") await dialog.accept(text);
180
691
  else await dialog.dismiss();
181
692
  });
182
- return `Next dialog will be ${action}ed${text ? ` with text: "${text}"` : ""}.`;
693
+ return `Next dialog will be ${dialogAction}ed${text ? ` with: "${text}"` : ""}.`;
183
694
  }
184
695
 
185
- // Cookies
186
- case "getCookies": {
187
- if (!browserContext) return "No browser open.";
188
- const cookies = await browserContext.cookies();
189
- const filtered = param1
190
- ? cookies.filter((c) => c.domain.includes(param1))
191
- : cookies;
192
- return JSON.stringify(filtered.slice(0, 20), null, 2);
696
+ // ── Session management ──────────────────────────────────────────────
697
+ case "newSession": {
698
+ if (browser) {
699
+ await browser.close().catch(() => {});
700
+ cleanup();
701
+ }
702
+ const profile = param1 || "default";
703
+ await ensureBrowser(profile);
704
+ return `New session started (profile: ${profile}). Auth/cookies from this profile are preserved.`;
193
705
  }
194
706
 
195
- case "setCookie": {
196
- if (!param1) return "Error: cookie JSON is required (e.g., {\"name\":\"token\",\"value\":\"abc\",\"domain\":\"example.com\"}).";
197
- if (!browserContext) await ensureBrowser();
198
- const cookie = JSON.parse(param1);
199
- await browserContext.addCookies([cookie]);
200
- return `Cookie "${cookie.name}" set.`;
707
+ case "status": {
708
+ const connected = browser && browser.isConnected();
709
+ const tabCount = pages.size;
710
+ const profile = "default"; // TODO: track current profile
711
+ if (!connected) return "Browser: not running";
712
+ return `Browser: running | Tabs: ${tabCount} | Active: ${activeTargetId} | URL: ${currentPage().url()}`;
201
713
  }
202
714
 
203
715
  case "close": {
716
+ if (inactivityTimer) clearTimeout(inactivityTimer);
204
717
  if (browser) {
205
718
  await browser.close();
206
- browser = null;
207
- browserContext = null;
208
- pages.length = 0;
719
+ cleanup();
209
720
  }
210
721
  return "Browser closed.";
211
722
  }
212
723
 
213
724
  default:
214
- return `Unknown action: "${action}". Available: navigate, click, fill, getText, screenshot, evaluate, getLinks, newTab, switchTab, listTabs, closeTab, waitFor, handleDialog, getCookies, setCookie, close`;
725
+ return `Unknown action: "${action}". Available: navigate, snapshot, click, fill, type, hover, selectOption, pressKey, scroll, drag, getText, getContent, screenshot, pdf, evaluate, getLinks, console, waitFor, waitForNavigation, reload, goBack, goForward, newTab, switchTab, listTabs, closeTab, getCookies, setCookie, clearCookies, getStorage, setStorage, clearStorage, upload, download, resize, highlight, handleDialog, newSession, status, close`;
215
726
  }
216
727
  } catch (error) {
217
728
  console.log(` [browser] Error: ${error.message}`);
218
- return `Browser error: ${error.message}`;
729
+ return wrapError(error);
219
730
  }
220
731
  }
221
732
 
222
733
  export const browserActionDescription =
223
- 'browserAction(action: string, param1?: string, param2?: string) - Browser automation via Playwright. Actions: navigate(url), click(selector), fill(selector,value), getText(selector), screenshot(path,full?), evaluate(js), getLinks, newTab(url?), switchTab(index), listTabs, closeTab(index), waitFor(selector,timeoutMs?), handleDialog(accept|dismiss,text?), getCookies(domain?), setCookie(json), close.';
734
+ 'browserAction(action, param1?, param2?) - Heavy Playwright browser automation. Actions: navigate(url), snapshot(opts?), click(selector|ref,opts?), fill(selector|ref,value), type(selector|ref,text), hover(selector|ref), selectOption(selector|ref,value), pressKey(key), scroll(direction|selector|ref,amount?), drag(source,target), getText(selector|ref?), getContent(selector?), screenshot(path|selector?,full?), pdf(path?), evaluate(js), getLinks, console(filter?,limit?), waitFor(condition,timeout?) — conditions: selector, "text:...", "url:...", "js:...", "load", "networkidle", waitForNavigation(timeout?), reload, goBack, goForward, newTab(url?), switchTab(targetId), listTabs, closeTab(targetId?), getCookies(domain?), setCookie(json), clearCookies, getStorage(local|session,key?), setStorage(json), clearStorage(local|session), upload(selector|ref,filePath), download(selector|ref), resize(WxH), highlight(selector|ref), handleDialog(accept|dismiss,text?), newSession(profile?), status, close. Supports ref-based interaction: take snapshot first, then use refs (e1, e5) instead of CSS selectors.';