camel-ai 0.2.72a8__py3-none-any.whl → 0.2.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (53) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +140 -345
  3. camel/memories/agent_memories.py +18 -17
  4. camel/societies/__init__.py +2 -0
  5. camel/societies/workforce/prompts.py +36 -10
  6. camel/societies/workforce/single_agent_worker.py +7 -5
  7. camel/societies/workforce/workforce.py +6 -4
  8. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  9. camel/storages/vectordb_storages/__init__.py +1 -0
  10. camel/storages/vectordb_storages/surreal.py +100 -150
  11. camel/toolkits/__init__.py +6 -1
  12. camel/toolkits/base.py +60 -2
  13. camel/toolkits/excel_toolkit.py +153 -64
  14. camel/toolkits/file_write_toolkit.py +67 -0
  15. camel/toolkits/hybrid_browser_toolkit/config_loader.py +136 -413
  16. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +131 -1966
  17. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1177 -0
  18. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4356 -0
  19. camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
  20. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  21. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +945 -0
  22. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +226 -0
  23. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +522 -0
  24. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  25. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +110 -0
  26. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
  27. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +254 -0
  28. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -0
  29. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  30. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +447 -0
  31. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2077 -0
  32. camel/toolkits/mcp_toolkit.py +341 -46
  33. camel/toolkits/message_integration.py +719 -0
  34. camel/toolkits/note_taking_toolkit.py +18 -29
  35. camel/toolkits/notion_mcp_toolkit.py +234 -0
  36. camel/toolkits/screenshot_toolkit.py +116 -31
  37. camel/toolkits/search_toolkit.py +20 -2
  38. camel/toolkits/slack_toolkit.py +43 -48
  39. camel/toolkits/terminal_toolkit.py +288 -46
  40. camel/toolkits/video_analysis_toolkit.py +13 -13
  41. camel/toolkits/video_download_toolkit.py +11 -11
  42. camel/toolkits/web_deploy_toolkit.py +207 -12
  43. camel/types/enums.py +6 -0
  44. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73.dist-info}/METADATA +49 -9
  45. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73.dist-info}/RECORD +53 -36
  46. /camel/toolkits/{hybrid_browser_toolkit → hybrid_browser_toolkit_py}/actions.py +0 -0
  47. /camel/toolkits/{hybrid_browser_toolkit → hybrid_browser_toolkit_py}/agent.py +0 -0
  48. /camel/toolkits/{hybrid_browser_toolkit → hybrid_browser_toolkit_py}/browser_session.py +0 -0
  49. /camel/toolkits/{hybrid_browser_toolkit → hybrid_browser_toolkit_py}/snapshot.py +0 -0
  50. /camel/toolkits/{hybrid_browser_toolkit → hybrid_browser_toolkit_py}/stealth_script.js +0 -0
  51. /camel/toolkits/{hybrid_browser_toolkit → hybrid_browser_toolkit_py}/unified_analyzer.js +0 -0
  52. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73.dist-info}/WHEEL +0 -0
  53. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,945 @@
1
+ import { Page, Browser, BrowserContext, chromium } from 'playwright';
2
+ import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
3
+ import { ConfigLoader, StealthConfig } from './config-loader';
4
+
5
+ export class HybridBrowserSession {
6
+ private browser: Browser | null = null;
7
+ private context: BrowserContext | null = null;
8
+ private pages: Map<string, Page> = new Map();
9
+ private currentTabId: string | null = null;
10
+ private tabCounter = 0;
11
+ private configLoader: ConfigLoader;
12
+ private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
13
+ private hasNavigatedBefore = false; // Track if we've navigated before
14
+
15
+ constructor(config: BrowserToolkitConfig = {}) {
16
+ // Use ConfigLoader's fromPythonConfig to handle conversion properly
17
+ this.configLoader = ConfigLoader.fromPythonConfig(config);
18
+ }
19
+
20
+ async ensureBrowser(): Promise<void> {
21
+ if (this.browser) {
22
+ return;
23
+ }
24
+
25
+ const browserConfig = this.configLoader.getBrowserConfig();
26
+ const stealthConfig = this.configLoader.getStealthConfig();
27
+
28
+ // Check if CDP connection is requested
29
+ if (browserConfig.connectOverCdp && browserConfig.cdpUrl) {
30
+ // Connect to existing browser via CDP
31
+ this.browser = await chromium.connectOverCDP(browserConfig.cdpUrl);
32
+
33
+ // Get existing contexts or create new one
34
+ const contexts = this.browser.contexts();
35
+ if (contexts.length > 0) {
36
+ this.context = contexts[0];
37
+ } else {
38
+ const contextOptions: any = {
39
+ viewport: browserConfig.viewport
40
+ };
41
+
42
+ // Apply stealth headers if configured
43
+ if (stealthConfig.enabled && stealthConfig.extraHTTPHeaders) {
44
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
45
+ }
46
+
47
+ this.context = await this.browser.newContext(contextOptions);
48
+ }
49
+
50
+ // Handle existing pages
51
+ const pages = this.context.pages();
52
+ if (pages.length > 0) {
53
+ // Map existing pages - for CDP, only use pages with about:blank URL
54
+ let availablePageFound = false;
55
+ for (const page of pages) {
56
+ const pageUrl = page.url();
57
+ // In CDP mode, only consider pages with about:blank as available
58
+ if (pageUrl === 'about:blank') {
59
+ const tabId = this.generateTabId();
60
+ this.pages.set(tabId, page);
61
+ if (!this.currentTabId) {
62
+ this.currentTabId = tabId;
63
+ availablePageFound = true;
64
+ }
65
+ }
66
+ }
67
+
68
+ // If no available blank pages found in CDP mode, we cannot create new ones
69
+ if (!availablePageFound) {
70
+ throw new Error('No available blank tabs found in CDP mode. The frontend should have pre-created blank tabs.');
71
+ }
72
+ } else {
73
+ // In CDP mode, newPage is not supported
74
+ throw new Error('No pages available in CDP mode and newPage() is not supported. Ensure the frontend has pre-created blank tabs.');
75
+ }
76
+ } else {
77
+ // Original launch logic
78
+ const launchOptions: any = {
79
+ headless: browserConfig.headless,
80
+ };
81
+
82
+ if (stealthConfig.enabled) {
83
+ launchOptions.args = stealthConfig.args || [];
84
+
85
+ // Apply stealth user agent if configured
86
+ if (stealthConfig.userAgent) {
87
+ launchOptions.userAgent = stealthConfig.userAgent;
88
+ }
89
+ }
90
+
91
+ if (browserConfig.userDataDir) {
92
+ this.context = await chromium.launchPersistentContext(
93
+ browserConfig.userDataDir,
94
+ launchOptions
95
+ );
96
+
97
+ const pages = this.context.pages();
98
+ if (pages.length > 0) {
99
+ const initialTabId = this.generateTabId();
100
+ this.pages.set(initialTabId, pages[0]);
101
+ this.currentTabId = initialTabId;
102
+ }
103
+ } else {
104
+ this.browser = await chromium.launch(launchOptions);
105
+ const contextOptions: any = {
106
+ viewport: browserConfig.viewport
107
+ };
108
+
109
+ // Apply stealth headers if configured
110
+ if (stealthConfig.enabled && stealthConfig.extraHTTPHeaders) {
111
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
112
+ }
113
+
114
+ this.context = await this.browser.newContext(contextOptions);
115
+
116
+ const initialPage = await this.context.newPage();
117
+ const initialTabId = this.generateTabId();
118
+ this.pages.set(initialTabId, initialPage);
119
+ this.currentTabId = initialTabId;
120
+ }
121
+ }
122
+
123
+ // Set timeouts
124
+ for (const page of this.pages.values()) {
125
+ page.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
126
+ page.setDefaultTimeout(browserConfig.navigationTimeout);
127
+ }
128
+ }
129
+
130
+ private generateTabId(): string {
131
+ const browserConfig = this.configLoader.getBrowserConfig();
132
+ return `${browserConfig.tabIdPrefix}${String(++this.tabCounter).padStart(browserConfig.tabCounterPadding, '0')}`;
133
+ }
134
+
135
+ async getCurrentPage(): Promise<Page> {
136
+ if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
137
+ throw new Error('No active page available');
138
+ }
139
+ return this.pages.get(this.currentTabId)!;
140
+ }
141
+
142
+ /**
143
+ * Get current scroll position from the page
144
+ */
145
+ private async getCurrentScrollPosition(): Promise<{ x: number; y: number }> {
146
+ try {
147
+ const page = await this.getCurrentPage();
148
+ const scrollInfo = await page.evaluate(() => {
149
+ return {
150
+ x: window.pageXOffset || document.documentElement.scrollLeft || 0,
151
+ y: window.pageYOffset || document.documentElement.scrollTop || 0,
152
+ devicePixelRatio: window.devicePixelRatio || 1,
153
+ zoomLevel: window.outerWidth / window.innerWidth || 1
154
+ };
155
+ }) as { x: number; y: number; devicePixelRatio: number; zoomLevel: number };
156
+
157
+ // Store scroll position
158
+ this.scrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
159
+ return this.scrollPosition;
160
+ } catch (error) {
161
+ console.warn('Failed to get scroll position:', error);
162
+ return this.scrollPosition;
163
+ }
164
+ }
165
+
166
+ async getSnapshotForAI(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
167
+ // Always use native Playwright mapping - this is the correct approach
168
+ return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
169
+ }
170
+
171
+ private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
172
+ const startTime = Date.now();
173
+ const page = await this.getCurrentPage();
174
+
175
+ try {
176
+ // Use _snapshotForAI() to properly update _lastAriaSnapshot
177
+ const snapshotStart = Date.now();
178
+ const snapshotText = await (page as any)._snapshotForAI();
179
+ const snapshotTime = Date.now() - snapshotStart;
180
+
181
+ // Extract refs from the snapshot text
182
+ const refPattern = /\[ref=([^\]]+)\]/g;
183
+ const refs: string[] = [];
184
+ let match;
185
+ while ((match = refPattern.exec(snapshotText)) !== null) {
186
+ refs.push(match[1]);
187
+ }
188
+
189
+ // Get element information including coordinates if needed
190
+ const mappingStart = Date.now();
191
+ const playwrightMapping: Record<string, any> = {};
192
+
193
+ if (includeCoordinates) {
194
+ // Get coordinates for each ref using aria-ref selector
195
+ for (const ref of refs) {
196
+ try {
197
+ const selector = `aria-ref=${ref}`;
198
+ const element = await page.locator(selector).first();
199
+ const exists = await element.count() > 0;
200
+
201
+ if (exists) {
202
+ // Get bounding box
203
+ const boundingBox = await element.boundingBox();
204
+
205
+ if (boundingBox) {
206
+ playwrightMapping[ref] = {
207
+ ref,
208
+ coordinates: {
209
+ x: Math.round(boundingBox.x),
210
+ y: Math.round(boundingBox.y),
211
+ width: Math.round(boundingBox.width),
212
+ height: Math.round(boundingBox.height)
213
+ }
214
+ };
215
+ }
216
+ }
217
+ } catch (error) {
218
+ // Failed to get coordinates for element
219
+ }
220
+ }
221
+ }
222
+
223
+ const mappingTime = Date.now() - mappingStart;
224
+
225
+ // Apply viewport filtering if requested
226
+ let finalElements = playwrightMapping;
227
+ let finalSnapshot = snapshotText;
228
+
229
+ if (viewportLimit) {
230
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
231
+ const scrollPos = await this.getCurrentScrollPosition();
232
+ finalElements = this.filterElementsInViewport(playwrightMapping, viewport, scrollPos);
233
+ finalSnapshot = this.rebuildSnapshotText(snapshotText, finalElements);
234
+ }
235
+
236
+ const totalTime = Date.now() - startTime;
237
+
238
+ return {
239
+ snapshot: finalSnapshot,
240
+ elements: finalElements,
241
+ metadata: {
242
+ elementCount: Object.keys(finalElements).length,
243
+ url: page.url(),
244
+ timestamp: new Date().toISOString(),
245
+ },
246
+ timing: {
247
+ total_time_ms: totalTime,
248
+ snapshot_time_ms: snapshotTime,
249
+ coordinate_enrichment_time_ms: 0, // Integrated into mapping
250
+ aria_mapping_time_ms: mappingTime,
251
+ },
252
+ };
253
+ } catch (error) {
254
+ console.error('Failed to get AI snapshot with native mapping:', error);
255
+ const totalTime = Date.now() - startTime;
256
+
257
+ return {
258
+ snapshot: 'Error: Unable to capture page snapshot',
259
+ elements: {},
260
+ metadata: {
261
+ elementCount: 0,
262
+ url: page.url(),
263
+ timestamp: new Date().toISOString(),
264
+ },
265
+ timing: {
266
+ total_time_ms: totalTime,
267
+ snapshot_time_ms: 0,
268
+ coordinate_enrichment_time_ms: 0,
269
+ aria_mapping_time_ms: 0,
270
+ },
271
+ };
272
+ }
273
+ }
274
+
275
+
276
+
277
+ /**
278
+ * Enhanced click implementation with new tab detection and scroll fix
279
+ */
280
+ private async performClick(page: Page, ref: string): Promise<{ success: boolean; method?: string; error?: string; newTabId?: string }> {
281
+
282
+ try {
283
+ // Ensure we have the latest snapshot and mapping
284
+ await (page as any)._snapshotForAI();
285
+
286
+ // Use Playwright's aria-ref selector engine
287
+ const selector = `aria-ref=${ref}`;
288
+
289
+ // Check if element exists
290
+ const element = await page.locator(selector).first();
291
+ const exists = await element.count() > 0;
292
+
293
+ if (!exists) {
294
+ return { success: false, error: `Element with ref ${ref} not found` };
295
+ }
296
+
297
+ // Check element properties
298
+ const browserConfig = this.configLoader.getBrowserConfig();
299
+ const target = await element.getAttribute(browserConfig.targetAttribute);
300
+ const href = await element.getAttribute(browserConfig.hrefAttribute);
301
+ const onclick = await element.getAttribute(browserConfig.onclickAttribute);
302
+ const tagName = await element.evaluate(el => el.tagName.toLowerCase());
303
+
304
+ // Check if element naturally opens new tab
305
+ const naturallyOpensNewTab = (
306
+ target === browserConfig.blankTarget ||
307
+ (onclick && onclick.includes(browserConfig.windowOpenString)) ||
308
+ (tagName === 'a' && href && (href.includes(`javascript:${browserConfig.windowOpenString}`) || href.includes(browserConfig.blankTarget)))
309
+ );
310
+
311
+ // Open ALL links in new tabs
312
+ // Check if this is a navigable link
313
+ const isNavigableLink = tagName === 'a' && href &&
314
+ !href.startsWith(browserConfig.anchorOnly) && // Not an anchor link
315
+ !href.startsWith(browserConfig.javascriptVoidPrefix) && // Not a void javascript
316
+ href !== browserConfig.javascriptVoidEmpty && // Not empty javascript
317
+ href !== browserConfig.anchorOnly; // Not just #
318
+
319
+ const shouldOpenNewTab = naturallyOpensNewTab || isNavigableLink;
320
+
321
+
322
+ if (shouldOpenNewTab) {
323
+ // Handle new tab opening
324
+
325
+ // If it's a link that doesn't naturally open in new tab, force it
326
+ if (isNavigableLink && !naturallyOpensNewTab) {
327
+ await element.evaluate((el, blankTarget) => {
328
+ if (el.tagName.toLowerCase() === 'a') {
329
+ el.setAttribute('target', blankTarget);
330
+ }
331
+ }, browserConfig.blankTarget);
332
+ }
333
+
334
+ // Set up popup listener before clicking
335
+ const popupPromise = page.context().waitForEvent('page', { timeout: browserConfig.popupTimeout });
336
+
337
+ // Click with force to avoid scrolling issues
338
+ await element.click({ force: browserConfig.forceClick });
339
+
340
+ try {
341
+ // Wait for new page to open
342
+ const newPage = await popupPromise;
343
+
344
+ // Generate tab ID for the new page
345
+ const newTabId = this.generateTabId();
346
+ this.pages.set(newTabId, newPage);
347
+
348
+ // Set up page properties
349
+ const browserConfig = this.configLoader.getBrowserConfig();
350
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
351
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
352
+
353
+
354
+ // Automatically switch to the new tab
355
+ this.currentTabId = newTabId;
356
+ await newPage.bringToFront();
357
+
358
+ // Wait for new page to be ready
359
+ await newPage.waitForLoadState('domcontentloaded', { timeout: browserConfig.popupTimeout }).catch(() => {});
360
+
361
+ return { success: true, method: 'playwright-aria-ref-newtab', newTabId };
362
+ } catch (popupError) {
363
+ return { success: true, method: 'playwright-aria-ref' };
364
+ }
365
+ } else {
366
+ // Add options to prevent scrolling issues
367
+ try {
368
+ // First try normal click
369
+ const browserConfig = this.configLoader.getBrowserConfig();
370
+ await element.click({ timeout: browserConfig.clickTimeout });
371
+ } catch (clickError) {
372
+ // If normal click fails due to scrolling, try force click
373
+ await element.click({ force: browserConfig.forceClick });
374
+ }
375
+
376
+ return { success: true, method: 'playwright-aria-ref' };
377
+ }
378
+
379
+ } catch (error) {
380
+ console.error('[performClick] Exception during click for ref: %s', ref, error);
381
+ return { success: false, error: `Click failed with exception: ${error}` };
382
+ }
383
+ }
384
+
385
+ /**
386
+ * Simplified type implementation using Playwright's aria-ref selector
387
+ */
388
+ private async performType(page: Page, ref: string, text: string): Promise<{ success: boolean; error?: string }> {
389
+ try {
390
+ // Ensure we have the latest snapshot
391
+ await (page as any)._snapshotForAI();
392
+
393
+ // Use Playwright's aria-ref selector
394
+ const selector = `aria-ref=${ref}`;
395
+ const element = await page.locator(selector).first();
396
+
397
+ const exists = await element.count() > 0;
398
+ if (!exists) {
399
+ return { success: false, error: `Element with ref ${ref} not found` };
400
+ }
401
+
402
+ // Type text using Playwright's built-in fill method
403
+ await element.fill(text);
404
+
405
+ return { success: true };
406
+ } catch (error) {
407
+ return { success: false, error: `Type failed: ${error}` };
408
+ }
409
+ }
410
+
411
+ /**
412
+ * Simplified select implementation using Playwright's aria-ref selector
413
+ */
414
+ private async performSelect(page: Page, ref: string, value: string): Promise<{ success: boolean; error?: string }> {
415
+ try {
416
+ // Ensure we have the latest snapshot
417
+ await (page as any)._snapshotForAI();
418
+
419
+ // Use Playwright's aria-ref selector
420
+ const selector = `aria-ref=${ref}`;
421
+ const element = await page.locator(selector).first();
422
+
423
+ const exists = await element.count() > 0;
424
+ if (!exists) {
425
+ return { success: false, error: `Element with ref ${ref} not found` };
426
+ }
427
+
428
+ // Select value using Playwright's built-in selectOption method
429
+ await element.selectOption(value);
430
+
431
+ return { success: true };
432
+ } catch (error) {
433
+ return { success: false, error: `Select failed: ${error}` };
434
+ }
435
+ }
436
+
437
+
438
+
439
+ async executeAction(action: BrowserAction): Promise<ActionResult> {
440
+ const startTime = Date.now();
441
+ const page = await this.getCurrentPage();
442
+
443
+ let elementSearchTime = 0;
444
+ let actionExecutionTime = 0;
445
+ let stabilityWaitTime = 0;
446
+
447
+ try {
448
+ const elementSearchStart = Date.now();
449
+
450
+ // No need to pre-fetch snapshot - each action method handles this
451
+
452
+ let newTabId: string | undefined;
453
+
454
+ switch (action.type) {
455
+ case 'click': {
456
+ elementSearchTime = Date.now() - elementSearchStart;
457
+ const clickStart = Date.now();
458
+
459
+ // Use simplified click logic
460
+ const clickResult = await this.performClick(page, action.ref);
461
+
462
+ if (!clickResult.success) {
463
+ throw new Error(`Click failed: ${clickResult.error}`);
464
+ }
465
+
466
+ // Capture new tab ID if present
467
+ newTabId = clickResult.newTabId;
468
+
469
+ actionExecutionTime = Date.now() - clickStart;
470
+ break;
471
+ }
472
+
473
+ case 'type': {
474
+ elementSearchTime = Date.now() - elementSearchStart;
475
+ const typeStart = Date.now();
476
+
477
+ const typeResult = await this.performType(page, action.ref, action.text);
478
+
479
+ if (!typeResult.success) {
480
+ throw new Error(`Type failed: ${typeResult.error}`);
481
+ }
482
+
483
+ actionExecutionTime = Date.now() - typeStart;
484
+ break;
485
+ }
486
+
487
+ case 'select': {
488
+ elementSearchTime = Date.now() - elementSearchStart;
489
+ const selectStart = Date.now();
490
+
491
+ const selectResult = await this.performSelect(page, action.ref, action.value);
492
+
493
+ if (!selectResult.success) {
494
+ throw new Error(`Select failed: ${selectResult.error}`);
495
+ }
496
+
497
+ actionExecutionTime = Date.now() - selectStart;
498
+ break;
499
+ }
500
+
501
+ case 'scroll': {
502
+ elementSearchTime = Date.now() - elementSearchStart;
503
+ const scrollStart = Date.now();
504
+ const scrollAmount = action.direction === 'up' ? -action.amount : action.amount;
505
+ await page.evaluate((amount: number) => {
506
+ window.scrollBy(0, amount);
507
+ }, scrollAmount);
508
+ // Update scroll position tracking
509
+ await this.getCurrentScrollPosition();
510
+ actionExecutionTime = Date.now() - scrollStart;
511
+ break;
512
+ }
513
+
514
+ case 'enter': {
515
+ elementSearchTime = Date.now() - elementSearchStart;
516
+ const enterStart = Date.now();
517
+ const browserConfig = this.configLoader.getBrowserConfig();
518
+ await page.keyboard.press(browserConfig.enterKey);
519
+ actionExecutionTime = Date.now() - enterStart;
520
+ break;
521
+ }
522
+
523
+ default:
524
+ throw new Error(`Unknown action type: ${(action as any).type}`);
525
+ }
526
+
527
+ // Wait for stability after action
528
+ const stabilityStart = Date.now();
529
+ const stabilityResult = await this.waitForPageStability(page);
530
+ stabilityWaitTime = Date.now() - stabilityStart;
531
+
532
+ const totalTime = Date.now() - startTime;
533
+
534
+ return {
535
+ success: true,
536
+ message: `Action ${action.type} executed successfully`,
537
+ timing: {
538
+ total_time_ms: totalTime,
539
+ element_search_time_ms: elementSearchTime,
540
+ action_execution_time_ms: actionExecutionTime,
541
+ stability_wait_time_ms: stabilityWaitTime,
542
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
543
+ network_idle_time_ms: stabilityResult.networkIdleTime,
544
+ },
545
+ ...(newTabId && { newTabId }), // Include new tab ID if present
546
+ };
547
+ } catch (error) {
548
+ const totalTime = Date.now() - startTime;
549
+ return {
550
+ success: false,
551
+ message: `Action ${action.type} failed: ${error}`,
552
+ timing: {
553
+ total_time_ms: totalTime,
554
+ element_search_time_ms: elementSearchTime,
555
+ action_execution_time_ms: actionExecutionTime,
556
+ stability_wait_time_ms: stabilityWaitTime,
557
+ },
558
+ };
559
+ }
560
+ }
561
+
562
+ private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
563
+ let domContentLoadedTime = 0;
564
+ let networkIdleTime = 0;
565
+
566
+ try {
567
+ const domStart = Date.now();
568
+ const browserConfig = this.configLoader.getBrowserConfig();
569
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
570
+ domContentLoadedTime = Date.now() - domStart;
571
+
572
+ const networkStart = Date.now();
573
+ await page.waitForLoadState(browserConfig.networkIdleState as any, { timeout: browserConfig.networkIdleTimeout });
574
+ networkIdleTime = Date.now() - networkStart;
575
+ } catch (error) {
576
+ // Continue even if stability wait fails
577
+ }
578
+
579
+ return { domContentLoadedTime, networkIdleTime };
580
+ }
581
+
582
+ async visitPage(url: string): Promise<ActionResult & { newTabId?: string }> {
583
+ const startTime = Date.now();
584
+
585
+ try {
586
+ // Get current page to check if it's blank
587
+ const currentPage = await this.getCurrentPage();
588
+ const currentUrl = currentPage.url();
589
+
590
+ // Check if current page is blank or if this is the first navigation
591
+ const browserConfig = this.configLoader.getBrowserConfig();
592
+ const isBlankPage = (
593
+ browserConfig.blankPageUrls.includes(currentUrl) ||
594
+ currentUrl === browserConfig.defaultStartUrl ||
595
+ currentUrl.startsWith(browserConfig.dataUrlPrefix) // data URLs are often used for blank pages
596
+ );
597
+
598
+ const shouldUseCurrentTab = isBlankPage || !this.hasNavigatedBefore;
599
+
600
+
601
+ if (shouldUseCurrentTab) {
602
+ // Navigate in current tab if it's blank
603
+
604
+ const navigationStart = Date.now();
605
+ const browserConfig = this.configLoader.getBrowserConfig();
606
+ await currentPage.goto(url, {
607
+ timeout: browserConfig.navigationTimeout,
608
+ waitUntil: browserConfig.domContentLoadedState as any
609
+ });
610
+
611
+ // Reset scroll position after navigation
612
+ this.scrollPosition = { x: 0, y: 0 };
613
+
614
+ // Mark that we've navigated
615
+ this.hasNavigatedBefore = true;
616
+
617
+ const navigationTime = Date.now() - navigationStart;
618
+ const stabilityResult = await this.waitForPageStability(currentPage);
619
+ const totalTime = Date.now() - startTime;
620
+
621
+ return {
622
+ success: true,
623
+ message: `Navigated to ${url}`,
624
+ timing: {
625
+ total_time_ms: totalTime,
626
+ navigation_time_ms: navigationTime,
627
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
628
+ network_idle_time_ms: stabilityResult.networkIdleTime,
629
+ },
630
+ };
631
+ } else {
632
+ // Open in new tab if current page has content
633
+ if (!this.context) {
634
+ throw new Error('Browser context not initialized');
635
+ }
636
+
637
+ const navigationStart = Date.now();
638
+
639
+ // In CDP mode, find an available blank tab instead of creating new page
640
+ let newPage: Page | null = null;
641
+ let newTabId: string | null = null;
642
+
643
+ const browserConfig = this.configLoader.getBrowserConfig();
644
+ if (browserConfig.connectOverCdp) {
645
+ // CDP mode: find an available blank tab
646
+ const allPages = this.context.pages();
647
+ for (const page of allPages) {
648
+ const pageUrl = page.url();
649
+ // Check if this page is not already tracked and is blank
650
+ const isTracked = Array.from(this.pages.values()).includes(page);
651
+ if (!isTracked && pageUrl === 'about:blank') {
652
+ newPage = page;
653
+ newTabId = this.generateTabId();
654
+ this.pages.set(newTabId, newPage);
655
+ break;
656
+ }
657
+ }
658
+
659
+ if (!newPage || !newTabId) {
660
+ throw new Error('No available blank tabs in CDP mode. Frontend should create more blank tabs when half are used.');
661
+ }
662
+ } else {
663
+ // Non-CDP mode: create new page as usual
664
+ newPage = await this.context.newPage();
665
+ newTabId = this.generateTabId();
666
+ this.pages.set(newTabId, newPage);
667
+ }
668
+
669
+ // Set up page properties
670
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
671
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
672
+
673
+ // Navigate to the URL
674
+ await newPage.goto(url, {
675
+ timeout: browserConfig.navigationTimeout,
676
+ waitUntil: browserConfig.domContentLoadedState as any
677
+ });
678
+
679
+ // Automatically switch to the new tab
680
+ this.currentTabId = newTabId;
681
+ await newPage.bringToFront();
682
+
683
+ // Reset scroll position for the new page
684
+ this.scrollPosition = { x: 0, y: 0 };
685
+
686
+ // Mark that we've navigated
687
+ this.hasNavigatedBefore = true;
688
+
689
+ const navigationTime = Date.now() - navigationStart;
690
+ const stabilityResult = await this.waitForPageStability(newPage);
691
+ const totalTime = Date.now() - startTime;
692
+
693
+ return {
694
+ success: true,
695
+ message: `Opened ${url} in new tab`,
696
+ newTabId: newTabId, // Include the new tab ID
697
+ timing: {
698
+ total_time_ms: totalTime,
699
+ navigation_time_ms: navigationTime,
700
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
701
+ network_idle_time_ms: stabilityResult.networkIdleTime,
702
+ },
703
+ };
704
+ }
705
+ } catch (error) {
706
+ const totalTime = Date.now() - startTime;
707
+ return {
708
+ success: false,
709
+ message: `Navigation to ${url} failed: ${error}`,
710
+ timing: {
711
+ total_time_ms: totalTime,
712
+ navigation_time_ms: 0,
713
+ dom_content_loaded_time_ms: 0,
714
+ network_idle_time_ms: 0,
715
+ },
716
+ };
717
+ }
718
+ }
719
+
720
+ async switchToTab(tabId: string): Promise<boolean> {
721
+ if (!this.pages.has(tabId)) {
722
+ return false;
723
+ }
724
+
725
+ const page = this.pages.get(tabId)!;
726
+
727
+ if (page.isClosed()) {
728
+ this.pages.delete(tabId);
729
+ return false;
730
+ }
731
+
732
+ try {
733
+ console.log(`Switching to tab ${tabId}`);
734
+
735
+ // Update internal state first
736
+ this.currentTabId = tabId;
737
+
738
+ // Try to activate the tab using a gentler approach
739
+ // Instead of bringToFront, we'll use a combination of methods
740
+ try {
741
+ // Method 1: Evaluate focus in the page context
742
+ await page.evaluate(() => {
743
+ // Focus the window
744
+ window.focus();
745
+ // Dispatch a focus event
746
+ window.dispatchEvent(new Event('focus'));
747
+ }).catch(() => {});
748
+
749
+ // Method 2: For non-headless mode, schedule bringToFront asynchronously
750
+ // This prevents WebSocket disruption by not blocking the current operation
751
+ if (!this.configLoader.getBrowserConfig().headless) {
752
+ // Use Promise to handle async operation without await
753
+ Promise.resolve().then(async () => {
754
+ // Small delay to ensure WebSocket message is processed
755
+ const browserConfig = this.configLoader.getBrowserConfig();
756
+ await new Promise(resolve => setTimeout(resolve, browserConfig.navigationDelay));
757
+ try {
758
+ await page.bringToFront();
759
+ } catch (e) {
760
+ // Silently ignore - tab switching still works internally
761
+ console.debug(`bringToFront failed for ${tabId}, but tab is switched internally`);
762
+ }
763
+ });
764
+ }
765
+ } catch (error) {
766
+ // Log but don't fail - internal state is still updated
767
+ console.warn(`Tab focus warning for ${tabId}:`, error);
768
+ }
769
+
770
+ console.log(`Successfully switched to tab ${tabId}`);
771
+ return true;
772
+ } catch (error) {
773
+ console.error(`Error switching to tab ${tabId}:`, error);
774
+ return false;
775
+ }
776
+ }
777
+
778
+ async closeTab(tabId: string): Promise<boolean> {
779
+ if (!this.pages.has(tabId)) {
780
+ return false;
781
+ }
782
+
783
+ const page = this.pages.get(tabId)!;
784
+
785
+ if (!page.isClosed()) {
786
+ await page.close();
787
+ }
788
+
789
+ this.pages.delete(tabId);
790
+
791
+ if (tabId === this.currentTabId) {
792
+ const remainingTabs = Array.from(this.pages.keys());
793
+ if (remainingTabs.length > 0) {
794
+ this.currentTabId = remainingTabs[0];
795
+ } else {
796
+ this.currentTabId = null;
797
+ }
798
+ }
799
+
800
+ return true;
801
+ }
802
+
803
+ async getTabInfo(): Promise<TabInfo[]> {
804
+ const tabInfo: TabInfo[] = [];
805
+
806
+ for (const [tabId, page] of this.pages) {
807
+ if (!page.isClosed()) {
808
+ try {
809
+ const title = await page.title();
810
+ const url = page.url();
811
+
812
+ tabInfo.push({
813
+ tab_id: tabId,
814
+ title,
815
+ url,
816
+ is_current: tabId === this.currentTabId,
817
+ });
818
+ } catch (error) {
819
+ // Skip tabs that can't be accessed
820
+ }
821
+ }
822
+ }
823
+
824
+ return tabInfo;
825
+ }
826
+
827
+ async takeScreenshot(): Promise<{ buffer: Buffer; timing: { screenshot_time_ms: number } }> {
828
+ const startTime = Date.now();
829
+ const page = await this.getCurrentPage();
830
+
831
+ const browserConfig = this.configLoader.getBrowserConfig();
832
+ const buffer = await page.screenshot({
833
+ timeout: browserConfig.screenshotTimeout,
834
+ fullPage: browserConfig.fullPageScreenshot
835
+ });
836
+
837
+ const screenshotTime = Date.now() - startTime;
838
+
839
+ return {
840
+ buffer,
841
+ timing: {
842
+ screenshot_time_ms: screenshotTime,
843
+ },
844
+ };
845
+ }
846
+
847
+ async close(): Promise<void> {
848
+ const browserConfig = this.configLoader.getBrowserConfig();
849
+
850
+ for (const page of this.pages.values()) {
851
+ if (!page.isClosed()) {
852
+ await page.close();
853
+ }
854
+ }
855
+
856
+ this.pages.clear();
857
+ this.currentTabId = null;
858
+
859
+ if (this.context) {
860
+ await this.context.close();
861
+ this.context = null;
862
+ }
863
+
864
+ if (this.browser) {
865
+ if (browserConfig.connectOverCdp) {
866
+ // For CDP connections, just disconnect without closing the browser
867
+ await this.browser.close();
868
+ } else {
869
+ // For launched browsers, close completely
870
+ await this.browser.close();
871
+ }
872
+ this.browser = null;
873
+ }
874
+ }
875
+
876
+ private filterElementsInViewport(
877
+ elements: Record<string, SnapshotElement>,
878
+ viewport: { width: number, height: number },
879
+ scrollPos: { x: number, y: number }
880
+ ): Record<string, SnapshotElement> {
881
+ const filtered: Record<string, SnapshotElement> = {};
882
+
883
+
884
+ // Apply viewport filtering with scroll position adjustment
885
+ const browserConfig = this.configLoader.getBrowserConfig();
886
+ const adjustedScrollPos = {
887
+ x: scrollPos.x * browserConfig.scrollPositionScale,
888
+ y: scrollPos.y * browserConfig.scrollPositionScale
889
+ };
890
+
891
+ for (const [ref, element] of Object.entries(elements)) {
892
+ // If element has no coordinates, include it (fallback)
893
+ if (!element.coordinates) {
894
+ filtered[ref] = element;
895
+ continue;
896
+ }
897
+
898
+ const { x, y, width, height } = element.coordinates;
899
+
900
+ // Calculate viewport bounds using adjusted scroll position
901
+ const viewportLeft = adjustedScrollPos.x;
902
+ const viewportTop = adjustedScrollPos.y;
903
+ const viewportRight = adjustedScrollPos.x + viewport.width;
904
+ const viewportBottom = adjustedScrollPos.y + viewport.height;
905
+
906
+ // Check if element is visible in current viewport
907
+ // Element is visible if it overlaps with viewport bounds
908
+ const isVisible = (
909
+ x < viewportRight && // Left edge is before viewport right
910
+ y < viewportBottom && // Top edge is before viewport bottom
911
+ x + width > viewportLeft && // Right edge is after viewport left
912
+ y + height > viewportTop // Bottom edge is after viewport top
913
+ );
914
+
915
+ if (isVisible) {
916
+ filtered[ref] = element;
917
+ }
918
+ }
919
+
920
+ return filtered;
921
+ }
922
+
923
+ private rebuildSnapshotText(originalSnapshot: string, filteredElements: Record<string, SnapshotElement>): string {
924
+ const lines = originalSnapshot.split('\n');
925
+ const filteredLines: string[] = [];
926
+
927
+ for (const line of lines) {
928
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
929
+
930
+ if (refMatch) {
931
+ const ref = refMatch[1];
932
+ // Only include lines for elements that passed viewport filtering
933
+ if (filteredElements[ref]) {
934
+ filteredLines.push(line);
935
+ }
936
+ } else {
937
+ // Include non-element lines (headers, etc.)
938
+ filteredLines.push(line);
939
+ }
940
+ }
941
+
942
+ return filteredLines.join('\n');
943
+ }
944
+
945
+ }