camel-ai 0.2.73a12__py3-none-any.whl → 0.2.74__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/models/anthropic_model.py +5 -3
- camel/societies/workforce/prompts.py +3 -19
- camel/societies/workforce/workforce.py +13 -8
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +225 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +164 -8
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +106 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +19 -1
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +20 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +41 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/note_taking_toolkit.py +3 -4
- camel/toolkits/search_toolkit.py +192 -59
- camel/toolkits/terminal_toolkit.py +12 -2
- camel/types/enums.py +3 -0
- camel/utils/token_counting.py +13 -2
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/METADATA +3 -2
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/RECORD +28 -28
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Page, Browser, BrowserContext, chromium } from 'playwright';
|
|
1
|
+
import { Page, Browser, BrowserContext, chromium, ConsoleMessage } from 'playwright';
|
|
2
2
|
import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
|
|
3
3
|
import { ConfigLoader, StealthConfig } from './config-loader';
|
|
4
4
|
|
|
@@ -6,18 +6,43 @@ export class HybridBrowserSession {
|
|
|
6
6
|
private browser: Browser | null = null;
|
|
7
7
|
private context: BrowserContext | null = null;
|
|
8
8
|
private pages: Map<string, Page> = new Map();
|
|
9
|
+
private consoleLogs: Map<string, ConsoleMessage[]> = new Map();
|
|
9
10
|
private currentTabId: string | null = null;
|
|
10
11
|
private tabCounter = 0;
|
|
11
12
|
private configLoader: ConfigLoader;
|
|
12
13
|
private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
|
|
13
14
|
private hasNavigatedBefore = false; // Track if we've navigated before
|
|
15
|
+
private logLimit: number;
|
|
14
16
|
|
|
15
17
|
constructor(config: BrowserToolkitConfig = {}) {
|
|
16
18
|
// Use ConfigLoader's fromPythonConfig to handle conversion properly
|
|
17
19
|
this.configLoader = ConfigLoader.fromPythonConfig(config);
|
|
20
|
+
// Load browser configuration for console log limit, default to 1000
|
|
21
|
+
this.logLimit = this.configLoader.getBrowserConfig().consoleLogLimit || 1000;
|
|
18
22
|
}
|
|
19
23
|
|
|
20
|
-
|
|
24
|
+
private registerNewPage(tabId: string, page: Page): void {
|
|
25
|
+
// Register page and logs with tabId
|
|
26
|
+
this.pages.set(tabId, page);
|
|
27
|
+
this.consoleLogs.set(tabId, []);
|
|
28
|
+
// Set up console log listener for the page
|
|
29
|
+
page.on('console', (msg: ConsoleMessage) => {
|
|
30
|
+
const logs = this.consoleLogs.get(tabId);
|
|
31
|
+
if (logs) {
|
|
32
|
+
logs.push(msg);
|
|
33
|
+
if (logs.length > this.logLimit) {
|
|
34
|
+
logs.shift();
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Clean logs on page close
|
|
40
|
+
page.on('close', () => {
|
|
41
|
+
this.consoleLogs.delete(tabId);
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async ensureBrowser(): Promise<void> {
|
|
21
46
|
if (this.browser) {
|
|
22
47
|
return;
|
|
23
48
|
}
|
|
@@ -57,7 +82,7 @@ export class HybridBrowserSession {
|
|
|
57
82
|
// In CDP mode, only consider pages with about:blank as available
|
|
58
83
|
if (pageUrl === 'about:blank') {
|
|
59
84
|
const tabId = this.generateTabId();
|
|
60
|
-
this.
|
|
85
|
+
this.registerNewPage(tabId, page);
|
|
61
86
|
if (!this.currentTabId) {
|
|
62
87
|
this.currentTabId = tabId;
|
|
63
88
|
availablePageFound = true;
|
|
@@ -97,7 +122,7 @@ export class HybridBrowserSession {
|
|
|
97
122
|
const pages = this.context.pages();
|
|
98
123
|
if (pages.length > 0) {
|
|
99
124
|
const initialTabId = this.generateTabId();
|
|
100
|
-
this.
|
|
125
|
+
this.registerNewPage(initialTabId, pages[0]);
|
|
101
126
|
this.currentTabId = initialTabId;
|
|
102
127
|
}
|
|
103
128
|
} else {
|
|
@@ -115,7 +140,7 @@ export class HybridBrowserSession {
|
|
|
115
140
|
|
|
116
141
|
const initialPage = await this.context.newPage();
|
|
117
142
|
const initialTabId = this.generateTabId();
|
|
118
|
-
this.
|
|
143
|
+
this.registerNewPage(initialTabId, initialPage);
|
|
119
144
|
this.currentTabId = initialTabId;
|
|
120
145
|
}
|
|
121
146
|
}
|
|
@@ -139,6 +164,13 @@ export class HybridBrowserSession {
|
|
|
139
164
|
return this.pages.get(this.currentTabId)!;
|
|
140
165
|
}
|
|
141
166
|
|
|
167
|
+
async getCurrentLogs(): Promise<ConsoleMessage[]> {
|
|
168
|
+
if (!this.currentTabId || !this.consoleLogs.has(this.currentTabId)) {
|
|
169
|
+
return [];
|
|
170
|
+
}
|
|
171
|
+
return this.consoleLogs.get(this.currentTabId) || [];
|
|
172
|
+
}
|
|
173
|
+
|
|
142
174
|
/**
|
|
143
175
|
* Get current scroll position from the page
|
|
144
176
|
*/
|
|
@@ -343,7 +375,7 @@ export class HybridBrowserSession {
|
|
|
343
375
|
|
|
344
376
|
// Generate tab ID for the new page
|
|
345
377
|
const newTabId = this.generateTabId();
|
|
346
|
-
this.
|
|
378
|
+
this.registerNewPage(newTabId, newPage);
|
|
347
379
|
|
|
348
380
|
// Set up page properties
|
|
349
381
|
const browserConfig = this.configLoader.getBrowserConfig();
|
|
@@ -434,7 +466,97 @@ export class HybridBrowserSession {
|
|
|
434
466
|
}
|
|
435
467
|
}
|
|
436
468
|
|
|
469
|
+
/**
|
|
470
|
+
* Simplified mouse control implementation
|
|
471
|
+
*/
|
|
472
|
+
private async performMouseControl(page: Page, control: string, x: number, y: number): Promise<{ success: boolean; error?: string }> {
|
|
473
|
+
try {
|
|
474
|
+
const viewport = page.viewportSize();
|
|
475
|
+
if (!viewport) {
|
|
476
|
+
return { success: false, error: 'Viewport size not available from page.' };
|
|
477
|
+
}
|
|
478
|
+
if (x < 0 || y < 0 || x > viewport.width || y > viewport.height) {
|
|
479
|
+
return { success: false, error: `Invalid coordinates, outside viewport bounds: (${x}, ${y})` };
|
|
480
|
+
}
|
|
481
|
+
switch (control) {
|
|
482
|
+
case 'click': {
|
|
483
|
+
await page.mouse.click(x, y);
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
case 'right_click': {
|
|
487
|
+
await page.mouse.click(x, y, { button: 'right' });
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
case 'dblclick': {
|
|
491
|
+
await page.mouse.dblclick(x, y);
|
|
492
|
+
break;
|
|
493
|
+
}
|
|
494
|
+
default:
|
|
495
|
+
return { success: false, error: `Invalid control action: ${control}` };
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return { success: true };
|
|
499
|
+
} catch (error) {
|
|
500
|
+
return { success: false, error: `Mouse action failed: ${error}` };
|
|
501
|
+
}
|
|
502
|
+
}
|
|
437
503
|
|
|
504
|
+
/**
|
|
505
|
+
* Enhanced mouse drag and drop implementation using ref IDs
|
|
506
|
+
*/
|
|
507
|
+
private async performMouseDrag(page: Page, fromRef: string, toRef: string): Promise<{ success: boolean; error?: string }> {
|
|
508
|
+
try {
|
|
509
|
+
// Ensure we have the latest snapshot
|
|
510
|
+
await (page as any)._snapshotForAI();
|
|
511
|
+
|
|
512
|
+
// Get elements using Playwright's aria-ref selector
|
|
513
|
+
const fromSelector = `aria-ref=${fromRef}`;
|
|
514
|
+
const toSelector = `aria-ref=${toRef}`;
|
|
515
|
+
|
|
516
|
+
const fromElement = await page.locator(fromSelector).first();
|
|
517
|
+
const toElement = await page.locator(toSelector).first();
|
|
518
|
+
|
|
519
|
+
// Check if elements exist
|
|
520
|
+
const fromExists = await fromElement.count() > 0;
|
|
521
|
+
const toExists = await toElement.count() > 0;
|
|
522
|
+
|
|
523
|
+
if (!fromExists) {
|
|
524
|
+
return { success: false, error: `Source element with ref ${fromRef} not found` };
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
if (!toExists) {
|
|
528
|
+
return { success: false, error: `Target element with ref ${toRef} not found` };
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Get the center coordinates of both elements
|
|
532
|
+
const fromBox = await fromElement.boundingBox();
|
|
533
|
+
const toBox = await toElement.boundingBox();
|
|
534
|
+
|
|
535
|
+
if (!fromBox) {
|
|
536
|
+
return { success: false, error: `Could not get bounding box for source element with ref ${fromRef}` };
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
if (!toBox) {
|
|
540
|
+
return { success: false, error: `Could not get bounding box for target element with ref ${toRef}` };
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
const fromX = fromBox.x + fromBox.width / 2;
|
|
544
|
+
const fromY = fromBox.y + fromBox.height / 2;
|
|
545
|
+
const toX = toBox.x + toBox.width / 2;
|
|
546
|
+
const toY = toBox.y + toBox.height / 2;
|
|
547
|
+
|
|
548
|
+
// Perform the drag operation
|
|
549
|
+
await page.mouse.move(fromX, fromY);
|
|
550
|
+
await page.mouse.down();
|
|
551
|
+
// Destination coordinates
|
|
552
|
+
await page.mouse.move(toX, toY);
|
|
553
|
+
await page.mouse.up();
|
|
554
|
+
|
|
555
|
+
return { success: true };
|
|
556
|
+
} catch (error) {
|
|
557
|
+
return { success: false, error: `Mouse drag action failed: ${error}` };
|
|
558
|
+
}
|
|
559
|
+
}
|
|
438
560
|
|
|
439
561
|
async executeAction(action: BrowserAction): Promise<ActionResult> {
|
|
440
562
|
const startTime = Date.now();
|
|
@@ -519,6 +641,40 @@ export class HybridBrowserSession {
|
|
|
519
641
|
actionExecutionTime = Date.now() - enterStart;
|
|
520
642
|
break;
|
|
521
643
|
}
|
|
644
|
+
|
|
645
|
+
case 'mouse_control': {
|
|
646
|
+
elementSearchTime = Date.now() - elementSearchStart;
|
|
647
|
+
const mouseControlStart = Date.now();
|
|
648
|
+
const mouseControlResult = await this.performMouseControl(page, action.control, action.x, action.y);
|
|
649
|
+
|
|
650
|
+
if (!mouseControlResult.success) {
|
|
651
|
+
throw new Error(`Action failed: ${mouseControlResult.error}`);
|
|
652
|
+
}
|
|
653
|
+
actionExecutionTime = Date.now() - mouseControlStart;
|
|
654
|
+
break;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
case 'mouse_drag': {
|
|
658
|
+
elementSearchTime = Date.now() - elementSearchStart;
|
|
659
|
+
const mouseDragStart = Date.now();
|
|
660
|
+
const mouseDragResult = await this.performMouseDrag(page, action.from_ref, action.to_ref);
|
|
661
|
+
|
|
662
|
+
if (!mouseDragResult.success) {
|
|
663
|
+
throw new Error(`Action failed: ${mouseDragResult.error}`);
|
|
664
|
+
}
|
|
665
|
+
actionExecutionTime = Date.now() - mouseDragStart;
|
|
666
|
+
break;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
case 'press_key': {
|
|
670
|
+
elementSearchTime = Date.now() - elementSearchStart;
|
|
671
|
+
const keyPressStart = Date.now();
|
|
672
|
+
// concatenate keys with '+' for key combinations
|
|
673
|
+
const keys = action.keys.join('+');
|
|
674
|
+
await page.keyboard.press(keys);
|
|
675
|
+
actionExecutionTime = Date.now() - keyPressStart;
|
|
676
|
+
break;
|
|
677
|
+
}
|
|
522
678
|
|
|
523
679
|
default:
|
|
524
680
|
throw new Error(`Unknown action type: ${(action as any).type}`);
|
|
@@ -651,7 +807,7 @@ export class HybridBrowserSession {
|
|
|
651
807
|
if (!isTracked && pageUrl === 'about:blank') {
|
|
652
808
|
newPage = page;
|
|
653
809
|
newTabId = this.generateTabId();
|
|
654
|
-
this.
|
|
810
|
+
this.registerNewPage(newTabId, newPage);
|
|
655
811
|
break;
|
|
656
812
|
}
|
|
657
813
|
}
|
|
@@ -663,7 +819,7 @@ export class HybridBrowserSession {
|
|
|
663
819
|
// Non-CDP mode: create new page as usual
|
|
664
820
|
newPage = await this.context.newPage();
|
|
665
821
|
newTabId = this.generateTabId();
|
|
666
|
-
this.
|
|
822
|
+
this.registerNewPage(newTabId, newPage);
|
|
667
823
|
}
|
|
668
824
|
|
|
669
825
|
// Set up page properties
|
|
@@ -30,6 +30,7 @@ export interface BrowserConfig {
|
|
|
30
30
|
// Tab management
|
|
31
31
|
tabIdPrefix: string;
|
|
32
32
|
tabCounterPadding: number;
|
|
33
|
+
consoleLogLimit: number;
|
|
33
34
|
|
|
34
35
|
// Scroll and positioning
|
|
35
36
|
scrollPositionScale: number;
|
|
@@ -113,6 +114,7 @@ function getDefaultBrowserConfig(): BrowserConfig {
|
|
|
113
114
|
clickTimeout: 3000,
|
|
114
115
|
tabIdPrefix: 'tab-',
|
|
115
116
|
tabCounterPadding: 3,
|
|
117
|
+
consoleLogLimit: 1000,
|
|
116
118
|
scrollPositionScale: 0.1,
|
|
117
119
|
navigationDelay: 100,
|
|
118
120
|
blankPageUrls: ['about:blank', ''],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {HybridBrowserSession} from './browser-session';
|
|
2
2
|
import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
|
|
3
3
|
import {ConfigLoader} from './config-loader';
|
|
4
|
+
import {ConsoleMessage} from 'playwright';
|
|
4
5
|
|
|
5
6
|
export class HybridBrowserToolkit {
|
|
6
7
|
private session: HybridBrowserSession;
|
|
@@ -382,6 +383,21 @@ export class HybridBrowserToolkit {
|
|
|
382
383
|
return this.executeActionWithSnapshot(action);
|
|
383
384
|
}
|
|
384
385
|
|
|
386
|
+
async mouseControl(control: 'click' | 'right_click'| 'dblclick', x: number, y: number): Promise<any> {
|
|
387
|
+
const action: BrowserAction = { type: 'mouse_control', control, x, y };
|
|
388
|
+
return this.executeActionWithSnapshot(action);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
async mouseDrag(from_ref: string, to_ref: string): Promise<any> {
|
|
392
|
+
const action: BrowserAction = { type: 'mouse_drag', from_ref, to_ref };
|
|
393
|
+
return this.executeActionWithSnapshot(action);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
async pressKeys(keys: string[]): Promise<any> {
|
|
397
|
+
const action: BrowserAction = { type: 'press_key', keys};
|
|
398
|
+
return this.executeActionWithSnapshot(action);
|
|
399
|
+
}
|
|
400
|
+
|
|
385
401
|
async back(): Promise<ActionResult> {
|
|
386
402
|
const startTime = Date.now();
|
|
387
403
|
|
|
@@ -519,4 +535,93 @@ export class HybridBrowserToolkit {
|
|
|
519
535
|
return await this.session.getTabInfo();
|
|
520
536
|
}
|
|
521
537
|
|
|
522
|
-
|
|
538
|
+
async getConsoleView(): Promise<any> {
|
|
539
|
+
const currentLogs = await this.session.getCurrentLogs();
|
|
540
|
+
// Format logs
|
|
541
|
+
return currentLogs.map(item => ({
|
|
542
|
+
type: item.type(),
|
|
543
|
+
text: item.text(),
|
|
544
|
+
}));
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
async consoleExecute(code: string): Promise<any> {
|
|
548
|
+
const startTime = Date.now();
|
|
549
|
+
try {
|
|
550
|
+
const page = await this.session.getCurrentPage();
|
|
551
|
+
|
|
552
|
+
// Wrap the code to capture console.log output
|
|
553
|
+
const wrappedCode = `
|
|
554
|
+
(function() {
|
|
555
|
+
const _logs = [];
|
|
556
|
+
const originalLog = console.log;
|
|
557
|
+
console.log = function(...args) {
|
|
558
|
+
_logs.push(args.map(arg => {
|
|
559
|
+
try {
|
|
560
|
+
return typeof arg === 'object' ? JSON.stringify(arg) : String(arg);
|
|
561
|
+
} catch (e) {
|
|
562
|
+
return String(arg);
|
|
563
|
+
}
|
|
564
|
+
}).join(' '));
|
|
565
|
+
originalLog.apply(console, args);
|
|
566
|
+
};
|
|
567
|
+
|
|
568
|
+
let result;
|
|
569
|
+
try {
|
|
570
|
+
result = eval(${JSON.stringify(code)});
|
|
571
|
+
} catch (e) {
|
|
572
|
+
try {
|
|
573
|
+
result = (function() { ${code} })();
|
|
574
|
+
} catch (error) {
|
|
575
|
+
console.log = originalLog;
|
|
576
|
+
throw error;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
console.log = originalLog;
|
|
581
|
+
return { result, logs: _logs };
|
|
582
|
+
})()
|
|
583
|
+
`;
|
|
584
|
+
|
|
585
|
+
const evalResult = await page.evaluate(wrappedCode) as { result: any; logs: string[] };
|
|
586
|
+
const { result, logs } = evalResult;
|
|
587
|
+
|
|
588
|
+
const snapshotStart = Date.now();
|
|
589
|
+
const snapshot = await this.getPageSnapshot(this.viewportLimit);
|
|
590
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
591
|
+
const totalTime = Date.now() - startTime;
|
|
592
|
+
|
|
593
|
+
// Properly serialize the result
|
|
594
|
+
let resultStr: string;
|
|
595
|
+
try {
|
|
596
|
+
resultStr = JSON.stringify(result, null, 2);
|
|
597
|
+
} catch (e) {
|
|
598
|
+
// Fallback for non-serializable values
|
|
599
|
+
resultStr = String(result);
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return {
|
|
603
|
+
result: `Console execution result: ${resultStr}`,
|
|
604
|
+
console_output: logs,
|
|
605
|
+
snapshot: snapshot,
|
|
606
|
+
timing: {
|
|
607
|
+
total_time_ms: totalTime,
|
|
608
|
+
snapshot_time_ms: snapshotTime,
|
|
609
|
+
},
|
|
610
|
+
};
|
|
611
|
+
|
|
612
|
+
} catch (error) {
|
|
613
|
+
const totalTime = Date.now() - startTime;
|
|
614
|
+
return {
|
|
615
|
+
result: `Console execution failed: ${error}`,
|
|
616
|
+
console_output: [],
|
|
617
|
+
snapshot: '',
|
|
618
|
+
timing: {
|
|
619
|
+
total_time_ms: totalTime,
|
|
620
|
+
snapshot_time_ms: 0,
|
|
621
|
+
},
|
|
622
|
+
};
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
}
|
|
627
|
+
|
|
@@ -101,7 +101,25 @@ export interface EnterAction {
|
|
|
101
101
|
type: 'enter';
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
-
export
|
|
104
|
+
export interface MouseAction {
|
|
105
|
+
type: 'mouse_control';
|
|
106
|
+
control: 'click' | 'right_click' | 'dblclick';
|
|
107
|
+
x: number;
|
|
108
|
+
y: number;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export interface MouseDragAction {
|
|
112
|
+
type: 'mouse_drag';
|
|
113
|
+
from_ref: string;
|
|
114
|
+
to_ref: string;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export interface PressKeyAction {
|
|
118
|
+
type: 'press_key';
|
|
119
|
+
keys: string[];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export type BrowserAction = ClickAction | TypeAction | SelectAction | ScrollAction | EnterAction | MouseAction | MouseDragAction | PressKeyAction;
|
|
105
123
|
|
|
106
124
|
export interface VisualMarkResult {
|
|
107
125
|
text: string;
|
|
@@ -173,6 +173,18 @@ class WebSocketBrowserServer {
|
|
|
173
173
|
case 'enter':
|
|
174
174
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
175
175
|
return await this.toolkit.enter();
|
|
176
|
+
|
|
177
|
+
case 'mouse_control':
|
|
178
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
179
|
+
return await this.toolkit.mouseControl(params.control, params.x, params.y);
|
|
180
|
+
|
|
181
|
+
case 'mouse_drag':
|
|
182
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
183
|
+
return await this.toolkit.mouseDrag(params.from_ref, params.to_ref);
|
|
184
|
+
|
|
185
|
+
case 'press_key':
|
|
186
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
187
|
+
return await this.toolkit.pressKeys(params.keys);
|
|
176
188
|
|
|
177
189
|
case 'back':
|
|
178
190
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
@@ -194,6 +206,14 @@ class WebSocketBrowserServer {
|
|
|
194
206
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
195
207
|
return await this.toolkit.getTabInfo();
|
|
196
208
|
|
|
209
|
+
case 'console_view':
|
|
210
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
211
|
+
return await this.toolkit.getConsoleView();
|
|
212
|
+
|
|
213
|
+
case 'console_exec':
|
|
214
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
215
|
+
return await this.toolkit.consoleExecute(params.code);
|
|
216
|
+
|
|
197
217
|
case 'wait_user':
|
|
198
218
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
199
219
|
return await this.toolkit.waitUser(params.timeout);
|
|
@@ -537,6 +537,31 @@ class WebSocketBrowserWrapper:
|
|
|
537
537
|
response = await self._send_command('enter', {})
|
|
538
538
|
return response
|
|
539
539
|
|
|
540
|
+
@action_logger
|
|
541
|
+
async def mouse_control(
|
|
542
|
+
self, control: str, x: float, y: float
|
|
543
|
+
) -> Dict[str, Any]:
|
|
544
|
+
"""Control the mouse to interact with browser with x, y coordinates."""
|
|
545
|
+
response = await self._send_command(
|
|
546
|
+
'mouse_control', {'control': control, 'x': x, 'y': y}
|
|
547
|
+
)
|
|
548
|
+
return response
|
|
549
|
+
|
|
550
|
+
@action_logger
|
|
551
|
+
async def mouse_drag(self, from_ref: str, to_ref: str) -> Dict[str, Any]:
|
|
552
|
+
"""Control the mouse to drag and drop in the browser using ref IDs."""
|
|
553
|
+
response = await self._send_command(
|
|
554
|
+
'mouse_drag',
|
|
555
|
+
{'from_ref': from_ref, 'to_ref': to_ref},
|
|
556
|
+
)
|
|
557
|
+
return response
|
|
558
|
+
|
|
559
|
+
@action_logger
|
|
560
|
+
async def press_key(self, keys: List[str]) -> Dict[str, Any]:
|
|
561
|
+
"""Press key and key combinations."""
|
|
562
|
+
response = await self._send_command('press_key', {'keys': keys})
|
|
563
|
+
return response
|
|
564
|
+
|
|
540
565
|
@action_logger
|
|
541
566
|
async def back(self) -> Dict[str, Any]:
|
|
542
567
|
"""Navigate back."""
|
|
@@ -571,6 +596,22 @@ class WebSocketBrowserWrapper:
|
|
|
571
596
|
# Fallback if wrapped in an object
|
|
572
597
|
return response.get('tabs', [])
|
|
573
598
|
|
|
599
|
+
@action_logger
|
|
600
|
+
async def console_view(self) -> List[Dict[str, Any]]:
|
|
601
|
+
"""Get current page console view"""
|
|
602
|
+
response = await self._send_command('console_view', {})
|
|
603
|
+
|
|
604
|
+
if isinstance(response, list):
|
|
605
|
+
return response
|
|
606
|
+
|
|
607
|
+
return response.get('logs', [])
|
|
608
|
+
|
|
609
|
+
@action_logger
|
|
610
|
+
async def console_exec(self, code: str) -> Dict[str, Any]:
|
|
611
|
+
"""Execute javascript code and get result."""
|
|
612
|
+
response = await self._send_command('console_exec', {'code': code})
|
|
613
|
+
return response
|
|
614
|
+
|
|
574
615
|
@action_logger
|
|
575
616
|
async def wait_user(
|
|
576
617
|
self, timeout_sec: Optional[float] = None
|
|
@@ -73,6 +73,9 @@ class ActionExecutor:
|
|
|
73
73
|
"extract": self._extract,
|
|
74
74
|
"scroll": self._scroll,
|
|
75
75
|
"enter": self._enter,
|
|
76
|
+
"mouse_control": self._mouse_control,
|
|
77
|
+
"mouse_drag": self._mouse_drag,
|
|
78
|
+
"press_key": self._press_key,
|
|
76
79
|
}.get(action_type)
|
|
77
80
|
|
|
78
81
|
if handler is None:
|
|
@@ -382,6 +385,150 @@ class ActionExecutor:
|
|
|
382
385
|
"details": details,
|
|
383
386
|
}
|
|
384
387
|
|
|
388
|
+
async def _mouse_control(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
389
|
+
r"""Handle mouse_control action based on the coordinates"""
|
|
390
|
+
control = action.get("control", "click")
|
|
391
|
+
x_coord = action.get("x", 0)
|
|
392
|
+
y_coord = action.get("y", 0)
|
|
393
|
+
|
|
394
|
+
details = {
|
|
395
|
+
"action_type": "mouse_control",
|
|
396
|
+
"target": f"coordinates : ({x_coord}, {y_coord})",
|
|
397
|
+
}
|
|
398
|
+
try:
|
|
399
|
+
if not self._valid_coordinates(x_coord, y_coord):
|
|
400
|
+
raise ValueError(
|
|
401
|
+
"Invalid coordinates, outside viewport bounds :"
|
|
402
|
+
f"({x_coord}, {y_coord})"
|
|
403
|
+
)
|
|
404
|
+
match control:
|
|
405
|
+
case "click":
|
|
406
|
+
await self.page.mouse.click(x_coord, y_coord)
|
|
407
|
+
message = "Action 'click' performed on the target"
|
|
408
|
+
|
|
409
|
+
case "right_click":
|
|
410
|
+
await self.page.mouse.click(
|
|
411
|
+
x_coord, y_coord, button="right"
|
|
412
|
+
)
|
|
413
|
+
message = "Action 'right_click' performed on the target"
|
|
414
|
+
|
|
415
|
+
case "dblclick":
|
|
416
|
+
await self.page.mouse.dblclick(x_coord, y_coord)
|
|
417
|
+
message = "Action 'dblclick' performed on the target"
|
|
418
|
+
|
|
419
|
+
case _:
|
|
420
|
+
return {
|
|
421
|
+
"message": f"Invalid control action {control}",
|
|
422
|
+
"details": details,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return {"message": message, "details": details}
|
|
426
|
+
except Exception as e:
|
|
427
|
+
return {"message": f"Action failed: {e}", "details": details}
|
|
428
|
+
|
|
429
|
+
async def _mouse_drag(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
430
|
+
r"""Handle mouse_drag action using ref IDs"""
|
|
431
|
+
from_ref = action.get("from_ref")
|
|
432
|
+
to_ref = action.get("to_ref")
|
|
433
|
+
|
|
434
|
+
if not from_ref or not to_ref:
|
|
435
|
+
return {
|
|
436
|
+
"message": "Error: mouse_drag requires from_ref and to_ref",
|
|
437
|
+
"details": {"error": "missing_refs"},
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
from_selector = f"[aria-ref='{from_ref}']"
|
|
441
|
+
to_selector = f"[aria-ref='{to_ref}']"
|
|
442
|
+
|
|
443
|
+
details = {
|
|
444
|
+
"action_type": "mouse_drag",
|
|
445
|
+
"from_ref": from_ref,
|
|
446
|
+
"to_ref": to_ref,
|
|
447
|
+
"from_selector": from_selector,
|
|
448
|
+
"to_selector": to_selector,
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
try:
|
|
452
|
+
# Get the source element
|
|
453
|
+
from_element = self.page.locator(from_selector)
|
|
454
|
+
from_count = await from_element.count()
|
|
455
|
+
if from_count == 0:
|
|
456
|
+
raise ValueError(
|
|
457
|
+
f"Source element with ref '{from_ref}' not found"
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# Get the target element
|
|
461
|
+
to_element = self.page.locator(to_selector)
|
|
462
|
+
to_count = await to_element.count()
|
|
463
|
+
if to_count == 0:
|
|
464
|
+
raise ValueError(
|
|
465
|
+
f"Target element with ref '{to_ref}' not found"
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Get bounding boxes
|
|
469
|
+
from_box = await from_element.first.bounding_box()
|
|
470
|
+
to_box = await to_element.first.bounding_box()
|
|
471
|
+
|
|
472
|
+
if not from_box:
|
|
473
|
+
raise ValueError(
|
|
474
|
+
f"Could not get bounding box for source element "
|
|
475
|
+
f"with ref '{from_ref}'"
|
|
476
|
+
)
|
|
477
|
+
if not to_box:
|
|
478
|
+
raise ValueError(
|
|
479
|
+
f"Could not get bounding box for target element "
|
|
480
|
+
f"with ref '{to_ref}'"
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# Calculate center coordinates
|
|
484
|
+
from_x = from_box['x'] + from_box['width'] / 2
|
|
485
|
+
from_y = from_box['y'] + from_box['height'] / 2
|
|
486
|
+
to_x = to_box['x'] + to_box['width'] / 2
|
|
487
|
+
to_y = to_box['y'] + to_box['height'] / 2
|
|
488
|
+
|
|
489
|
+
details.update(
|
|
490
|
+
{
|
|
491
|
+
"from_coordinates": {"x": from_x, "y": from_y},
|
|
492
|
+
"to_coordinates": {"x": to_x, "y": to_y},
|
|
493
|
+
}
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Perform the drag operation
|
|
497
|
+
await self.page.mouse.move(from_x, from_y)
|
|
498
|
+
await self.page.mouse.down()
|
|
499
|
+
# Destination coordinates
|
|
500
|
+
await self.page.mouse.move(to_x, to_y)
|
|
501
|
+
await self.page.mouse.up()
|
|
502
|
+
|
|
503
|
+
return {
|
|
504
|
+
"message": (
|
|
505
|
+
f"Dragged from element [ref={from_ref}] to element "
|
|
506
|
+
f"[ref={to_ref}]"
|
|
507
|
+
),
|
|
508
|
+
"details": details,
|
|
509
|
+
}
|
|
510
|
+
except Exception as e:
|
|
511
|
+
return {"message": f"Action failed: {e}", "details": details}
|
|
512
|
+
|
|
513
|
+
async def _press_key(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
514
|
+
r"""Handle press_key action by combining the keys in a list."""
|
|
515
|
+
keys = action.get("keys", [])
|
|
516
|
+
if not keys:
|
|
517
|
+
return {
|
|
518
|
+
"message": "Error: No keys specified",
|
|
519
|
+
"details": {"action_type": "press_key", "keys": ""},
|
|
520
|
+
}
|
|
521
|
+
combined_keys = "+".join(keys)
|
|
522
|
+
details = {"action_type": "press_key", "keys": combined_keys}
|
|
523
|
+
try:
|
|
524
|
+
await self.page.keyboard.press(combined_keys)
|
|
525
|
+
return {
|
|
526
|
+
"message": "Pressed keys in the browser",
|
|
527
|
+
"details": details,
|
|
528
|
+
}
|
|
529
|
+
except Exception as e:
|
|
530
|
+
return {"message": f"Action failed: {e}", "details": details}
|
|
531
|
+
|
|
385
532
|
# utilities
|
|
386
533
|
async def _wait_dom_stable(self) -> None:
|
|
387
534
|
r"""Wait for DOM to become stable before executing actions."""
|
|
@@ -402,6 +549,17 @@ class ActionExecutor:
|
|
|
402
549
|
except Exception:
|
|
403
550
|
pass # Don't fail if wait times out
|
|
404
551
|
|
|
552
|
+
def _valid_coordinates(self, x_coord: float, y_coord: float) -> bool:
|
|
553
|
+
r"""Validate given coordinates against viewport bounds."""
|
|
554
|
+
viewport = self.page.viewport_size
|
|
555
|
+
if not viewport:
|
|
556
|
+
raise ValueError("Viewport size not available from current page.")
|
|
557
|
+
|
|
558
|
+
return (
|
|
559
|
+
0 <= x_coord <= viewport['width']
|
|
560
|
+
and 0 <= y_coord <= viewport['height']
|
|
561
|
+
)
|
|
562
|
+
|
|
405
563
|
# static helpers
|
|
406
564
|
@staticmethod
|
|
407
565
|
def should_update_snapshot(action: Dict[str, Any]) -> bool:
|