camel-ai 0.2.72a8__py3-none-any.whl → 0.2.73a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (37) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +113 -338
  3. camel/memories/agent_memories.py +18 -17
  4. camel/societies/workforce/prompts.py +10 -4
  5. camel/societies/workforce/single_agent_worker.py +7 -5
  6. camel/toolkits/__init__.py +4 -1
  7. camel/toolkits/base.py +57 -1
  8. camel/toolkits/hybrid_browser_toolkit/config_loader.py +136 -413
  9. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +796 -1631
  10. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4356 -0
  11. camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
  12. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  13. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +916 -0
  14. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +226 -0
  15. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +522 -0
  16. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  17. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +110 -0
  18. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
  19. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +210 -0
  20. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +533 -0
  21. camel/toolkits/message_integration.py +592 -0
  22. camel/toolkits/note_taking_toolkit.py +18 -29
  23. camel/toolkits/screenshot_toolkit.py +116 -31
  24. camel/toolkits/search_toolkit.py +20 -2
  25. camel/toolkits/terminal_toolkit.py +16 -2
  26. camel/toolkits/video_analysis_toolkit.py +13 -13
  27. camel/toolkits/video_download_toolkit.py +11 -11
  28. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73a0.dist-info}/METADATA +10 -4
  29. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73a0.dist-info}/RECORD +31 -25
  30. camel/toolkits/hybrid_browser_toolkit/actions.py +0 -417
  31. camel/toolkits/hybrid_browser_toolkit/agent.py +0 -311
  32. camel/toolkits/hybrid_browser_toolkit/browser_session.py +0 -740
  33. camel/toolkits/hybrid_browser_toolkit/snapshot.py +0 -227
  34. camel/toolkits/hybrid_browser_toolkit/stealth_script.js +0 -0
  35. camel/toolkits/hybrid_browser_toolkit/unified_analyzer.js +0 -1002
  36. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73a0.dist-info}/WHEEL +0 -0
  37. {camel_ai-0.2.72a8.dist-info → camel_ai-0.2.73a0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,916 @@
1
+ import { Page, Browser, BrowserContext, chromium } from 'playwright';
2
+ import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
3
+ import { ConfigLoader, StealthConfig } from './config-loader';
4
+
5
+ export class HybridBrowserSession {
6
+ private browser: Browser | null = null;
7
+ private context: BrowserContext | null = null;
8
+ private pages: Map<string, Page> = new Map();
9
+ private currentTabId: string | null = null;
10
+ private tabCounter = 0;
11
+ private configLoader: ConfigLoader;
12
+ private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
13
+ private hasNavigatedBefore = false; // Track if we've navigated before
14
+
15
+ constructor(config: BrowserToolkitConfig = {}) {
16
+ // Use ConfigLoader's fromPythonConfig to handle conversion properly
17
+ this.configLoader = ConfigLoader.fromPythonConfig(config);
18
+ }
19
+
20
+ async ensureBrowser(): Promise<void> {
21
+ if (this.browser) {
22
+ return;
23
+ }
24
+
25
+ const browserConfig = this.configLoader.getBrowserConfig();
26
+ const stealthConfig = this.configLoader.getStealthConfig();
27
+
28
+ // Check if CDP connection is requested
29
+ if (browserConfig.connectOverCdp && browserConfig.cdpUrl) {
30
+ // Connect to existing browser via CDP
31
+ this.browser = await chromium.connectOverCDP(browserConfig.cdpUrl);
32
+
33
+ // Get existing contexts or create new one
34
+ const contexts = this.browser.contexts();
35
+ if (contexts.length > 0) {
36
+ this.context = contexts[0];
37
+ } else {
38
+ const contextOptions: any = {
39
+ viewport: browserConfig.viewport
40
+ };
41
+
42
+ // Apply stealth headers if configured
43
+ if (stealthConfig.enabled && stealthConfig.extraHTTPHeaders) {
44
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
45
+ }
46
+
47
+ this.context = await this.browser.newContext(contextOptions);
48
+ }
49
+
50
+ // Handle existing pages
51
+ const pages = this.context.pages();
52
+ if (pages.length > 0) {
53
+ // Map existing pages
54
+ for (const page of pages) {
55
+ const tabId = this.generateTabId();
56
+ this.pages.set(tabId, page);
57
+ if (!this.currentTabId) {
58
+ this.currentTabId = tabId;
59
+ }
60
+ }
61
+ } else {
62
+ // Create initial page if none exist
63
+ const initialPage = await this.context.newPage();
64
+ const initialTabId = this.generateTabId();
65
+ this.pages.set(initialTabId, initialPage);
66
+ this.currentTabId = initialTabId;
67
+ }
68
+ } else {
69
+ // Original launch logic
70
+ const launchOptions: any = {
71
+ headless: browserConfig.headless,
72
+ };
73
+
74
+ if (stealthConfig.enabled) {
75
+ launchOptions.args = stealthConfig.args || [];
76
+
77
+ // Apply stealth user agent if configured
78
+ if (stealthConfig.userAgent) {
79
+ launchOptions.userAgent = stealthConfig.userAgent;
80
+ }
81
+ }
82
+
83
+ if (browserConfig.userDataDir) {
84
+ this.context = await chromium.launchPersistentContext(
85
+ browserConfig.userDataDir,
86
+ launchOptions
87
+ );
88
+
89
+ const pages = this.context.pages();
90
+ if (pages.length > 0) {
91
+ const initialTabId = this.generateTabId();
92
+ this.pages.set(initialTabId, pages[0]);
93
+ this.currentTabId = initialTabId;
94
+ }
95
+ } else {
96
+ this.browser = await chromium.launch(launchOptions);
97
+ const contextOptions: any = {
98
+ viewport: browserConfig.viewport
99
+ };
100
+
101
+ // Apply stealth headers if configured
102
+ if (stealthConfig.enabled && stealthConfig.extraHTTPHeaders) {
103
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
104
+ }
105
+
106
+ this.context = await this.browser.newContext(contextOptions);
107
+
108
+ const initialPage = await this.context.newPage();
109
+ const initialTabId = this.generateTabId();
110
+ this.pages.set(initialTabId, initialPage);
111
+ this.currentTabId = initialTabId;
112
+ }
113
+ }
114
+
115
+ // Set timeouts
116
+ for (const page of this.pages.values()) {
117
+ page.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
118
+ page.setDefaultTimeout(browserConfig.navigationTimeout);
119
+ }
120
+ }
121
+
122
+ private generateTabId(): string {
123
+ const browserConfig = this.configLoader.getBrowserConfig();
124
+ return `${browserConfig.tabIdPrefix}${String(++this.tabCounter).padStart(browserConfig.tabCounterPadding, '0')}`;
125
+ }
126
+
127
+ async getCurrentPage(): Promise<Page> {
128
+ if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
129
+ throw new Error('No active page available');
130
+ }
131
+ return this.pages.get(this.currentTabId)!;
132
+ }
133
+
134
+ /**
135
+ * Get current scroll position from the page
136
+ */
137
+ private async getCurrentScrollPosition(): Promise<{ x: number; y: number }> {
138
+ try {
139
+ const page = await this.getCurrentPage();
140
+ const scrollInfo = await page.evaluate(() => {
141
+ return {
142
+ x: window.pageXOffset || document.documentElement.scrollLeft || 0,
143
+ y: window.pageYOffset || document.documentElement.scrollTop || 0,
144
+ devicePixelRatio: window.devicePixelRatio || 1,
145
+ zoomLevel: window.outerWidth / window.innerWidth || 1
146
+ };
147
+ }) as { x: number; y: number; devicePixelRatio: number; zoomLevel: number };
148
+
149
+ // Store scroll position
150
+ this.scrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
151
+ return this.scrollPosition;
152
+ } catch (error) {
153
+ console.warn('Failed to get scroll position:', error);
154
+ return this.scrollPosition;
155
+ }
156
+ }
157
+
158
+ async getSnapshotForAI(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
159
+ // Always use native Playwright mapping - this is the correct approach
160
+ return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
161
+ }
162
+
163
+ private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
164
+ const startTime = Date.now();
165
+ const page = await this.getCurrentPage();
166
+
167
+ try {
168
+ // Use _snapshotForAI() to properly update _lastAriaSnapshot
169
+ const snapshotStart = Date.now();
170
+ const snapshotText = await (page as any)._snapshotForAI();
171
+ const snapshotTime = Date.now() - snapshotStart;
172
+
173
+ // Extract refs from the snapshot text
174
+ const refPattern = /\[ref=([^\]]+)\]/g;
175
+ const refs: string[] = [];
176
+ let match;
177
+ while ((match = refPattern.exec(snapshotText)) !== null) {
178
+ refs.push(match[1]);
179
+ }
180
+
181
+ // Get element information including coordinates if needed
182
+ const mappingStart = Date.now();
183
+ const playwrightMapping: Record<string, any> = {};
184
+
185
+ if (includeCoordinates) {
186
+ // Get coordinates for each ref using aria-ref selector
187
+ for (const ref of refs) {
188
+ try {
189
+ const selector = `aria-ref=${ref}`;
190
+ const element = await page.locator(selector).first();
191
+ const exists = await element.count() > 0;
192
+
193
+ if (exists) {
194
+ // Get bounding box
195
+ const boundingBox = await element.boundingBox();
196
+
197
+ if (boundingBox) {
198
+ playwrightMapping[ref] = {
199
+ ref,
200
+ coordinates: {
201
+ x: Math.round(boundingBox.x),
202
+ y: Math.round(boundingBox.y),
203
+ width: Math.round(boundingBox.width),
204
+ height: Math.round(boundingBox.height)
205
+ }
206
+ };
207
+ }
208
+ }
209
+ } catch (error) {
210
+ // Failed to get coordinates for element
211
+ }
212
+ }
213
+ }
214
+
215
+ const mappingTime = Date.now() - mappingStart;
216
+
217
+ // Apply viewport filtering if requested
218
+ let finalElements = playwrightMapping;
219
+ let finalSnapshot = snapshotText;
220
+
221
+ if (viewportLimit) {
222
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
223
+ const scrollPos = await this.getCurrentScrollPosition();
224
+ finalElements = this.filterElementsInViewport(playwrightMapping, viewport, scrollPos);
225
+ finalSnapshot = this.rebuildSnapshotText(snapshotText, finalElements);
226
+ }
227
+
228
+ const totalTime = Date.now() - startTime;
229
+
230
+ return {
231
+ snapshot: finalSnapshot,
232
+ elements: finalElements,
233
+ metadata: {
234
+ elementCount: Object.keys(finalElements).length,
235
+ url: page.url(),
236
+ timestamp: new Date().toISOString(),
237
+ },
238
+ timing: {
239
+ total_time_ms: totalTime,
240
+ snapshot_time_ms: snapshotTime,
241
+ coordinate_enrichment_time_ms: 0, // Integrated into mapping
242
+ aria_mapping_time_ms: mappingTime,
243
+ },
244
+ };
245
+ } catch (error) {
246
+ console.error('Failed to get AI snapshot with native mapping:', error);
247
+ const totalTime = Date.now() - startTime;
248
+
249
+ return {
250
+ snapshot: 'Error: Unable to capture page snapshot',
251
+ elements: {},
252
+ metadata: {
253
+ elementCount: 0,
254
+ url: page.url(),
255
+ timestamp: new Date().toISOString(),
256
+ },
257
+ timing: {
258
+ total_time_ms: totalTime,
259
+ snapshot_time_ms: 0,
260
+ coordinate_enrichment_time_ms: 0,
261
+ aria_mapping_time_ms: 0,
262
+ },
263
+ };
264
+ }
265
+ }
266
+
267
+
268
+
269
+ /**
270
+ * Enhanced click implementation with new tab detection and scroll fix
271
+ */
272
+ private async performClick(page: Page, ref: string): Promise<{ success: boolean; method?: string; error?: string; newTabId?: string }> {
273
+
274
+ try {
275
+ // Ensure we have the latest snapshot and mapping
276
+ await (page as any)._snapshotForAI();
277
+
278
+ // Use Playwright's aria-ref selector engine
279
+ const selector = `aria-ref=${ref}`;
280
+
281
+ // Check if element exists
282
+ const element = await page.locator(selector).first();
283
+ const exists = await element.count() > 0;
284
+
285
+ if (!exists) {
286
+ return { success: false, error: `Element with ref ${ref} not found` };
287
+ }
288
+
289
+ // Check element properties
290
+ const browserConfig = this.configLoader.getBrowserConfig();
291
+ const target = await element.getAttribute(browserConfig.targetAttribute);
292
+ const href = await element.getAttribute(browserConfig.hrefAttribute);
293
+ const onclick = await element.getAttribute(browserConfig.onclickAttribute);
294
+ const tagName = await element.evaluate(el => el.tagName.toLowerCase());
295
+
296
+ // Check if element naturally opens new tab
297
+ const naturallyOpensNewTab = (
298
+ target === browserConfig.blankTarget ||
299
+ (onclick && onclick.includes(browserConfig.windowOpenString)) ||
300
+ (tagName === 'a' && href && (href.includes(`javascript:${browserConfig.windowOpenString}`) || href.includes(browserConfig.blankTarget)))
301
+ );
302
+
303
+ // Open ALL links in new tabs
304
+ // Check if this is a navigable link
305
+ const isNavigableLink = tagName === 'a' && href &&
306
+ !href.startsWith(browserConfig.anchorOnly) && // Not an anchor link
307
+ !href.startsWith(browserConfig.javascriptVoidPrefix) && // Not a void javascript
308
+ href !== browserConfig.javascriptVoidEmpty && // Not empty javascript
309
+ href !== browserConfig.anchorOnly; // Not just #
310
+
311
+ const shouldOpenNewTab = naturallyOpensNewTab || isNavigableLink;
312
+
313
+
314
+ if (shouldOpenNewTab) {
315
+ // Handle new tab opening
316
+
317
+ // If it's a link that doesn't naturally open in new tab, force it
318
+ if (isNavigableLink && !naturallyOpensNewTab) {
319
+ await element.evaluate((el, blankTarget) => {
320
+ if (el.tagName.toLowerCase() === 'a') {
321
+ el.setAttribute('target', blankTarget);
322
+ }
323
+ }, browserConfig.blankTarget);
324
+ }
325
+
326
+ // Set up popup listener before clicking
327
+ const popupPromise = page.context().waitForEvent('page', { timeout: browserConfig.popupTimeout });
328
+
329
+ // Click with force to avoid scrolling issues
330
+ await element.click({ force: browserConfig.forceClick });
331
+
332
+ try {
333
+ // Wait for new page to open
334
+ const newPage = await popupPromise;
335
+
336
+ // Generate tab ID for the new page
337
+ const newTabId = this.generateTabId();
338
+ this.pages.set(newTabId, newPage);
339
+
340
+ // Set up page properties
341
+ const browserConfig = this.configLoader.getBrowserConfig();
342
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
343
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
344
+
345
+
346
+ // Automatically switch to the new tab
347
+ this.currentTabId = newTabId;
348
+ await newPage.bringToFront();
349
+
350
+ // Wait for new page to be ready
351
+ await newPage.waitForLoadState('domcontentloaded', { timeout: browserConfig.popupTimeout }).catch(() => {});
352
+
353
+ return { success: true, method: 'playwright-aria-ref-newtab', newTabId };
354
+ } catch (popupError) {
355
+ return { success: true, method: 'playwright-aria-ref' };
356
+ }
357
+ } else {
358
+ // Add options to prevent scrolling issues
359
+ try {
360
+ // First try normal click
361
+ const browserConfig = this.configLoader.getBrowserConfig();
362
+ await element.click({ timeout: browserConfig.clickTimeout });
363
+ } catch (clickError) {
364
+ // If normal click fails due to scrolling, try force click
365
+ await element.click({ force: browserConfig.forceClick });
366
+ }
367
+
368
+ return { success: true, method: 'playwright-aria-ref' };
369
+ }
370
+
371
+ } catch (error) {
372
+ console.error('[performClick] Exception during click for ref: %s', ref, error);
373
+ return { success: false, error: `Click failed with exception: ${error}` };
374
+ }
375
+ }
376
+
377
+ /**
378
+ * Simplified type implementation using Playwright's aria-ref selector
379
+ */
380
+ private async performType(page: Page, ref: string, text: string): Promise<{ success: boolean; error?: string }> {
381
+ try {
382
+ // Ensure we have the latest snapshot
383
+ await (page as any)._snapshotForAI();
384
+
385
+ // Use Playwright's aria-ref selector
386
+ const selector = `aria-ref=${ref}`;
387
+ const element = await page.locator(selector).first();
388
+
389
+ const exists = await element.count() > 0;
390
+ if (!exists) {
391
+ return { success: false, error: `Element with ref ${ref} not found` };
392
+ }
393
+
394
+ // Type text using Playwright's built-in fill method
395
+ await element.fill(text);
396
+
397
+ return { success: true };
398
+ } catch (error) {
399
+ return { success: false, error: `Type failed: ${error}` };
400
+ }
401
+ }
402
+
403
+ /**
404
+ * Simplified select implementation using Playwright's aria-ref selector
405
+ */
406
+ private async performSelect(page: Page, ref: string, value: string): Promise<{ success: boolean; error?: string }> {
407
+ try {
408
+ // Ensure we have the latest snapshot
409
+ await (page as any)._snapshotForAI();
410
+
411
+ // Use Playwright's aria-ref selector
412
+ const selector = `aria-ref=${ref}`;
413
+ const element = await page.locator(selector).first();
414
+
415
+ const exists = await element.count() > 0;
416
+ if (!exists) {
417
+ return { success: false, error: `Element with ref ${ref} not found` };
418
+ }
419
+
420
+ // Select value using Playwright's built-in selectOption method
421
+ await element.selectOption(value);
422
+
423
+ return { success: true };
424
+ } catch (error) {
425
+ return { success: false, error: `Select failed: ${error}` };
426
+ }
427
+ }
428
+
429
+
430
+
431
+ async executeAction(action: BrowserAction): Promise<ActionResult> {
432
+ const startTime = Date.now();
433
+ const page = await this.getCurrentPage();
434
+
435
+ let elementSearchTime = 0;
436
+ let actionExecutionTime = 0;
437
+ let stabilityWaitTime = 0;
438
+
439
+ try {
440
+ const elementSearchStart = Date.now();
441
+
442
+ // No need to pre-fetch snapshot - each action method handles this
443
+
444
+ let newTabId: string | undefined;
445
+
446
+ switch (action.type) {
447
+ case 'click': {
448
+ elementSearchTime = Date.now() - elementSearchStart;
449
+ const clickStart = Date.now();
450
+
451
+ // Use simplified click logic
452
+ const clickResult = await this.performClick(page, action.ref);
453
+
454
+ if (!clickResult.success) {
455
+ throw new Error(`Click failed: ${clickResult.error}`);
456
+ }
457
+
458
+ // Capture new tab ID if present
459
+ newTabId = clickResult.newTabId;
460
+
461
+ actionExecutionTime = Date.now() - clickStart;
462
+ break;
463
+ }
464
+
465
+ case 'type': {
466
+ elementSearchTime = Date.now() - elementSearchStart;
467
+ const typeStart = Date.now();
468
+
469
+ const typeResult = await this.performType(page, action.ref, action.text);
470
+
471
+ if (!typeResult.success) {
472
+ throw new Error(`Type failed: ${typeResult.error}`);
473
+ }
474
+
475
+ actionExecutionTime = Date.now() - typeStart;
476
+ break;
477
+ }
478
+
479
+ case 'select': {
480
+ elementSearchTime = Date.now() - elementSearchStart;
481
+ const selectStart = Date.now();
482
+
483
+ const selectResult = await this.performSelect(page, action.ref, action.value);
484
+
485
+ if (!selectResult.success) {
486
+ throw new Error(`Select failed: ${selectResult.error}`);
487
+ }
488
+
489
+ actionExecutionTime = Date.now() - selectStart;
490
+ break;
491
+ }
492
+
493
+ case 'scroll': {
494
+ elementSearchTime = Date.now() - elementSearchStart;
495
+ const scrollStart = Date.now();
496
+ const scrollAmount = action.direction === 'up' ? -action.amount : action.amount;
497
+ await page.evaluate((amount: number) => {
498
+ window.scrollBy(0, amount);
499
+ }, scrollAmount);
500
+ // Update scroll position tracking
501
+ await this.getCurrentScrollPosition();
502
+ actionExecutionTime = Date.now() - scrollStart;
503
+ break;
504
+ }
505
+
506
+ case 'enter': {
507
+ elementSearchTime = Date.now() - elementSearchStart;
508
+ const enterStart = Date.now();
509
+ const browserConfig = this.configLoader.getBrowserConfig();
510
+ await page.keyboard.press(browserConfig.enterKey);
511
+ actionExecutionTime = Date.now() - enterStart;
512
+ break;
513
+ }
514
+
515
+ default:
516
+ throw new Error(`Unknown action type: ${(action as any).type}`);
517
+ }
518
+
519
+ // Wait for stability after action
520
+ const stabilityStart = Date.now();
521
+ const stabilityResult = await this.waitForPageStability(page);
522
+ stabilityWaitTime = Date.now() - stabilityStart;
523
+
524
+ const totalTime = Date.now() - startTime;
525
+
526
+ return {
527
+ success: true,
528
+ message: `Action ${action.type} executed successfully`,
529
+ timing: {
530
+ total_time_ms: totalTime,
531
+ element_search_time_ms: elementSearchTime,
532
+ action_execution_time_ms: actionExecutionTime,
533
+ stability_wait_time_ms: stabilityWaitTime,
534
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
535
+ network_idle_time_ms: stabilityResult.networkIdleTime,
536
+ },
537
+ ...(newTabId && { newTabId }), // Include new tab ID if present
538
+ };
539
+ } catch (error) {
540
+ const totalTime = Date.now() - startTime;
541
+ return {
542
+ success: false,
543
+ message: `Action ${action.type} failed: ${error}`,
544
+ timing: {
545
+ total_time_ms: totalTime,
546
+ element_search_time_ms: elementSearchTime,
547
+ action_execution_time_ms: actionExecutionTime,
548
+ stability_wait_time_ms: stabilityWaitTime,
549
+ },
550
+ };
551
+ }
552
+ }
553
+
554
+ private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
555
+ let domContentLoadedTime = 0;
556
+ let networkIdleTime = 0;
557
+
558
+ try {
559
+ const domStart = Date.now();
560
+ const browserConfig = this.configLoader.getBrowserConfig();
561
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
562
+ domContentLoadedTime = Date.now() - domStart;
563
+
564
+ const networkStart = Date.now();
565
+ await page.waitForLoadState(browserConfig.networkIdleState as any, { timeout: browserConfig.networkIdleTimeout });
566
+ networkIdleTime = Date.now() - networkStart;
567
+ } catch (error) {
568
+ // Continue even if stability wait fails
569
+ }
570
+
571
+ return { domContentLoadedTime, networkIdleTime };
572
+ }
573
+
574
+ async visitPage(url: string): Promise<ActionResult & { newTabId?: string }> {
575
+ const startTime = Date.now();
576
+
577
+ try {
578
+ // Get current page to check if it's blank
579
+ const currentPage = await this.getCurrentPage();
580
+ const currentUrl = currentPage.url();
581
+
582
+ // Check if current page is blank or if this is the first navigation
583
+ const browserConfig = this.configLoader.getBrowserConfig();
584
+ const isBlankPage = (
585
+ browserConfig.blankPageUrls.includes(currentUrl) ||
586
+ currentUrl === browserConfig.defaultStartUrl ||
587
+ currentUrl.startsWith(browserConfig.dataUrlPrefix) // data URLs are often used for blank pages
588
+ );
589
+
590
+ const shouldUseCurrentTab = isBlankPage || !this.hasNavigatedBefore;
591
+
592
+
593
+ if (shouldUseCurrentTab) {
594
+ // Navigate in current tab if it's blank
595
+
596
+ const navigationStart = Date.now();
597
+ const browserConfig = this.configLoader.getBrowserConfig();
598
+ await currentPage.goto(url, {
599
+ timeout: browserConfig.navigationTimeout,
600
+ waitUntil: browserConfig.domContentLoadedState as any
601
+ });
602
+
603
+ // Reset scroll position after navigation
604
+ this.scrollPosition = { x: 0, y: 0 };
605
+
606
+ // Mark that we've navigated
607
+ this.hasNavigatedBefore = true;
608
+
609
+ const navigationTime = Date.now() - navigationStart;
610
+ const stabilityResult = await this.waitForPageStability(currentPage);
611
+ const totalTime = Date.now() - startTime;
612
+
613
+ return {
614
+ success: true,
615
+ message: `Navigated to ${url}`,
616
+ timing: {
617
+ total_time_ms: totalTime,
618
+ navigation_time_ms: navigationTime,
619
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
620
+ network_idle_time_ms: stabilityResult.networkIdleTime,
621
+ },
622
+ };
623
+ } else {
624
+ // Open in new tab if current page has content
625
+ if (!this.context) {
626
+ throw new Error('Browser context not initialized');
627
+ }
628
+
629
+
630
+ const navigationStart = Date.now();
631
+
632
+ // Create a new page (tab)
633
+ const newPage = await this.context.newPage();
634
+
635
+ // Generate tab ID for the new page
636
+ const newTabId = this.generateTabId();
637
+ this.pages.set(newTabId, newPage);
638
+
639
+ // Set up page properties
640
+ const browserConfig = this.configLoader.getBrowserConfig();
641
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
642
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
643
+
644
+ // Navigate to the URL
645
+ await newPage.goto(url, {
646
+ timeout: browserConfig.navigationTimeout,
647
+ waitUntil: browserConfig.domContentLoadedState as any
648
+ });
649
+
650
+ // Automatically switch to the new tab
651
+ this.currentTabId = newTabId;
652
+ await newPage.bringToFront();
653
+
654
+ // Reset scroll position for the new page
655
+ this.scrollPosition = { x: 0, y: 0 };
656
+
657
+ // Mark that we've navigated
658
+ this.hasNavigatedBefore = true;
659
+
660
+ const navigationTime = Date.now() - navigationStart;
661
+ const stabilityResult = await this.waitForPageStability(newPage);
662
+ const totalTime = Date.now() - startTime;
663
+
664
+ return {
665
+ success: true,
666
+ message: `Opened ${url} in new tab`,
667
+ newTabId: newTabId, // Include the new tab ID
668
+ timing: {
669
+ total_time_ms: totalTime,
670
+ navigation_time_ms: navigationTime,
671
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
672
+ network_idle_time_ms: stabilityResult.networkIdleTime,
673
+ },
674
+ };
675
+ }
676
+ } catch (error) {
677
+ const totalTime = Date.now() - startTime;
678
+ return {
679
+ success: false,
680
+ message: `Navigation to ${url} failed: ${error}`,
681
+ timing: {
682
+ total_time_ms: totalTime,
683
+ navigation_time_ms: 0,
684
+ dom_content_loaded_time_ms: 0,
685
+ network_idle_time_ms: 0,
686
+ },
687
+ };
688
+ }
689
+ }
690
+
691
+ async switchToTab(tabId: string): Promise<boolean> {
692
+ if (!this.pages.has(tabId)) {
693
+ return false;
694
+ }
695
+
696
+ const page = this.pages.get(tabId)!;
697
+
698
+ if (page.isClosed()) {
699
+ this.pages.delete(tabId);
700
+ return false;
701
+ }
702
+
703
+ try {
704
+ console.log(`Switching to tab ${tabId}`);
705
+
706
+ // Update internal state first
707
+ this.currentTabId = tabId;
708
+
709
+ // Try to activate the tab using a gentler approach
710
+ // Instead of bringToFront, we'll use a combination of methods
711
+ try {
712
+ // Method 1: Evaluate focus in the page context
713
+ await page.evaluate(() => {
714
+ // Focus the window
715
+ window.focus();
716
+ // Dispatch a focus event
717
+ window.dispatchEvent(new Event('focus'));
718
+ }).catch(() => {});
719
+
720
+ // Method 2: For non-headless mode, schedule bringToFront asynchronously
721
+ // This prevents WebSocket disruption by not blocking the current operation
722
+ if (!this.configLoader.getBrowserConfig().headless) {
723
+ // Use Promise to handle async operation without await
724
+ Promise.resolve().then(async () => {
725
+ // Small delay to ensure WebSocket message is processed
726
+ const browserConfig = this.configLoader.getBrowserConfig();
727
+ await new Promise(resolve => setTimeout(resolve, browserConfig.navigationDelay));
728
+ try {
729
+ await page.bringToFront();
730
+ } catch (e) {
731
+ // Silently ignore - tab switching still works internally
732
+ console.debug(`bringToFront failed for ${tabId}, but tab is switched internally`);
733
+ }
734
+ });
735
+ }
736
+ } catch (error) {
737
+ // Log but don't fail - internal state is still updated
738
+ console.warn(`Tab focus warning for ${tabId}:`, error);
739
+ }
740
+
741
+ console.log(`Successfully switched to tab ${tabId}`);
742
+ return true;
743
+ } catch (error) {
744
+ console.error(`Error switching to tab ${tabId}:`, error);
745
+ return false;
746
+ }
747
+ }
748
+
749
+ async closeTab(tabId: string): Promise<boolean> {
750
+ if (!this.pages.has(tabId)) {
751
+ return false;
752
+ }
753
+
754
+ const page = this.pages.get(tabId)!;
755
+
756
+ if (!page.isClosed()) {
757
+ await page.close();
758
+ }
759
+
760
+ this.pages.delete(tabId);
761
+
762
+ if (tabId === this.currentTabId) {
763
+ const remainingTabs = Array.from(this.pages.keys());
764
+ if (remainingTabs.length > 0) {
765
+ this.currentTabId = remainingTabs[0];
766
+ } else {
767
+ this.currentTabId = null;
768
+ }
769
+ }
770
+
771
+ return true;
772
+ }
773
+
774
+ async getTabInfo(): Promise<TabInfo[]> {
775
+ const tabInfo: TabInfo[] = [];
776
+
777
+ for (const [tabId, page] of this.pages) {
778
+ if (!page.isClosed()) {
779
+ try {
780
+ const title = await page.title();
781
+ const url = page.url();
782
+
783
+ tabInfo.push({
784
+ tab_id: tabId,
785
+ title,
786
+ url,
787
+ is_current: tabId === this.currentTabId,
788
+ });
789
+ } catch (error) {
790
+ // Skip tabs that can't be accessed
791
+ }
792
+ }
793
+ }
794
+
795
+ return tabInfo;
796
+ }
797
+
798
+ async takeScreenshot(): Promise<{ buffer: Buffer; timing: { screenshot_time_ms: number } }> {
799
+ const startTime = Date.now();
800
+ const page = await this.getCurrentPage();
801
+
802
+ const browserConfig = this.configLoader.getBrowserConfig();
803
+ const buffer = await page.screenshot({
804
+ timeout: browserConfig.screenshotTimeout,
805
+ fullPage: browserConfig.fullPageScreenshot
806
+ });
807
+
808
+ const screenshotTime = Date.now() - startTime;
809
+
810
+ return {
811
+ buffer,
812
+ timing: {
813
+ screenshot_time_ms: screenshotTime,
814
+ },
815
+ };
816
+ }
817
+
818
+ async close(): Promise<void> {
819
+ const browserConfig = this.configLoader.getBrowserConfig();
820
+
821
+ for (const page of this.pages.values()) {
822
+ if (!page.isClosed()) {
823
+ await page.close();
824
+ }
825
+ }
826
+
827
+ this.pages.clear();
828
+ this.currentTabId = null;
829
+
830
+ if (this.context) {
831
+ await this.context.close();
832
+ this.context = null;
833
+ }
834
+
835
+ if (this.browser) {
836
+ if (browserConfig.connectOverCdp) {
837
+ // For CDP connections, just disconnect without closing the browser
838
+ await this.browser.close();
839
+ } else {
840
+ // For launched browsers, close completely
841
+ await this.browser.close();
842
+ }
843
+ this.browser = null;
844
+ }
845
+ }
846
+
847
+ private filterElementsInViewport(
848
+ elements: Record<string, SnapshotElement>,
849
+ viewport: { width: number, height: number },
850
+ scrollPos: { x: number, y: number }
851
+ ): Record<string, SnapshotElement> {
852
+ const filtered: Record<string, SnapshotElement> = {};
853
+
854
+
855
+ // Apply viewport filtering with scroll position adjustment
856
+ const browserConfig = this.configLoader.getBrowserConfig();
857
+ const adjustedScrollPos = {
858
+ x: scrollPos.x * browserConfig.scrollPositionScale,
859
+ y: scrollPos.y * browserConfig.scrollPositionScale
860
+ };
861
+
862
+ for (const [ref, element] of Object.entries(elements)) {
863
+ // If element has no coordinates, include it (fallback)
864
+ if (!element.coordinates) {
865
+ filtered[ref] = element;
866
+ continue;
867
+ }
868
+
869
+ const { x, y, width, height } = element.coordinates;
870
+
871
+ // Calculate viewport bounds using adjusted scroll position
872
+ const viewportLeft = adjustedScrollPos.x;
873
+ const viewportTop = adjustedScrollPos.y;
874
+ const viewportRight = adjustedScrollPos.x + viewport.width;
875
+ const viewportBottom = adjustedScrollPos.y + viewport.height;
876
+
877
+ // Check if element is visible in current viewport
878
+ // Element is visible if it overlaps with viewport bounds
879
+ const isVisible = (
880
+ x < viewportRight && // Left edge is before viewport right
881
+ y < viewportBottom && // Top edge is before viewport bottom
882
+ x + width > viewportLeft && // Right edge is after viewport left
883
+ y + height > viewportTop // Bottom edge is after viewport top
884
+ );
885
+
886
+ if (isVisible) {
887
+ filtered[ref] = element;
888
+ }
889
+ }
890
+
891
+ return filtered;
892
+ }
893
+
894
+ private rebuildSnapshotText(originalSnapshot: string, filteredElements: Record<string, SnapshotElement>): string {
895
+ const lines = originalSnapshot.split('\n');
896
+ const filteredLines: string[] = [];
897
+
898
+ for (const line of lines) {
899
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
900
+
901
+ if (refMatch) {
902
+ const ref = refMatch[1];
903
+ // Only include lines for elements that passed viewport filtering
904
+ if (filteredElements[ref]) {
905
+ filteredLines.push(line);
906
+ }
907
+ } else {
908
+ // Include non-element lines (headers, etc.)
909
+ filteredLines.push(line);
910
+ }
911
+ }
912
+
913
+ return filteredLines.join('\n');
914
+ }
915
+
916
+ }