camel-ai 0.2.74a5__py3-none-any.whl → 0.2.75__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +149 -95
- camel/configs/__init__.py +3 -0
- camel/configs/nebius_config.py +103 -0
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/models/__init__.py +2 -0
- camel/models/aiml_model.py +1 -16
- camel/models/anthropic_model.py +6 -22
- camel/models/aws_bedrock_model.py +1 -16
- camel/models/azure_openai_model.py +1 -16
- camel/models/base_model.py +0 -12
- camel/models/cohere_model.py +1 -16
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/gemini_model.py +1 -16
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +2 -0
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +6 -22
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +0 -3
- camel/models/openai_model.py +1 -22
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +0 -32
- camel/models/sglang_model.py +1 -16
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +1 -16
- camel/societies/workforce/prompts.py +1 -8
- camel/societies/workforce/task_channel.py +120 -27
- camel/societies/workforce/workforce.py +35 -3
- camel/toolkits/__init__.py +0 -2
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +260 -5
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +288 -37
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +3 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +209 -41
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +22 -3
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +28 -1
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +101 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +39 -14
- camel/toolkits/openai_image_toolkit.py +55 -24
- camel/toolkits/search_toolkit.py +153 -29
- camel/types/__init__.py +2 -2
- camel/types/enums.py +54 -10
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +5 -0
- camel/utils/mcp.py +2 -2
- camel/utils/token_counting.py +18 -3
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75.dist-info}/METADATA +9 -15
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75.dist-info}/RECORD +79 -78
- camel/toolkits/openai_agent_toolkit.py +0 -135
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {HybridBrowserSession} from './browser-session';
|
|
2
2
|
import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
|
|
3
3
|
import {ConfigLoader} from './config-loader';
|
|
4
|
+
import {ConsoleMessage} from 'playwright';
|
|
4
5
|
|
|
5
6
|
export class HybridBrowserToolkit {
|
|
6
7
|
private session: HybridBrowserSession;
|
|
@@ -68,35 +69,52 @@ export class HybridBrowserToolkit {
|
|
|
68
69
|
}
|
|
69
70
|
|
|
70
71
|
async visitPage(url: string): Promise<any> {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
const response: any = {
|
|
75
|
-
result: result.message,
|
|
76
|
-
snapshot: '',
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
if (result.success) {
|
|
80
|
-
const snapshotStart = Date.now();
|
|
81
|
-
response.snapshot = await this.getPageSnapshot(this.viewportLimit);
|
|
82
|
-
const snapshotTime = Date.now() - snapshotStart;
|
|
72
|
+
try {
|
|
73
|
+
// Ensure browser is initialized before visiting page
|
|
74
|
+
await this.session.ensureBrowser();
|
|
83
75
|
|
|
76
|
+
const result = await this.session.visitPage(url);
|
|
77
|
+
|
|
78
|
+
// Format response for Python layer compatibility
|
|
79
|
+
const response: any = {
|
|
80
|
+
result: result.message,
|
|
81
|
+
snapshot: '',
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
if (result.success) {
|
|
85
|
+
const snapshotStart = Date.now();
|
|
86
|
+
response.snapshot = await this.getPageSnapshot(this.viewportLimit);
|
|
87
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
88
|
+
|
|
89
|
+
if (result.timing) {
|
|
90
|
+
result.timing.snapshot_time_ms = snapshotTime;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Include timing if available
|
|
84
95
|
if (result.timing) {
|
|
85
|
-
|
|
96
|
+
response.timing = result.timing;
|
|
86
97
|
}
|
|
98
|
+
|
|
99
|
+
// Include newTabId if present
|
|
100
|
+
if (result.newTabId) {
|
|
101
|
+
response.newTabId = result.newTabId;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return response;
|
|
105
|
+
} catch (error) {
|
|
106
|
+
console.error('[visitPage] Error:', error);
|
|
107
|
+
return {
|
|
108
|
+
result: `Navigation to ${url} failed: ${error}`,
|
|
109
|
+
snapshot: '',
|
|
110
|
+
timing: {
|
|
111
|
+
total_time_ms: 0,
|
|
112
|
+
navigation_time_ms: 0,
|
|
113
|
+
dom_content_loaded_time_ms: 0,
|
|
114
|
+
network_idle_time_ms: 0,
|
|
115
|
+
}
|
|
116
|
+
};
|
|
87
117
|
}
|
|
88
|
-
|
|
89
|
-
// Include timing if available
|
|
90
|
-
if (result.timing) {
|
|
91
|
-
response.timing = result.timing;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// Include newTabId if present
|
|
95
|
-
if (result.newTabId) {
|
|
96
|
-
response.newTabId = result.newTabId;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return response;
|
|
100
118
|
}
|
|
101
119
|
|
|
102
120
|
async getPageSnapshot(viewportLimit: boolean = false): Promise<string> {
|
|
@@ -178,7 +196,40 @@ export class HybridBrowserToolkit {
|
|
|
178
196
|
// Use sharp for image processing
|
|
179
197
|
const sharp = require('sharp');
|
|
180
198
|
const page = await this.session.getCurrentPage();
|
|
181
|
-
|
|
199
|
+
let viewport = page.viewportSize();
|
|
200
|
+
|
|
201
|
+
// In CDP mode, viewportSize might be null, get it from window dimensions
|
|
202
|
+
if (!viewport) {
|
|
203
|
+
const windowSize = await page.evaluate(() => ({
|
|
204
|
+
width: window.innerWidth,
|
|
205
|
+
height: window.innerHeight
|
|
206
|
+
}));
|
|
207
|
+
viewport = windowSize;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Get device pixel ratio to handle high DPI screens
|
|
211
|
+
const dpr = await page.evaluate(() => window.devicePixelRatio) || 1;
|
|
212
|
+
|
|
213
|
+
// Get actual screenshot dimensions
|
|
214
|
+
const metadata = await sharp(screenshotBuffer).metadata();
|
|
215
|
+
const screenshotWidth = metadata.width || viewport.width;
|
|
216
|
+
const screenshotHeight = metadata.height || viewport.height;
|
|
217
|
+
|
|
218
|
+
// Calculate scaling factor between CSS pixels and screenshot pixels
|
|
219
|
+
const scaleX = screenshotWidth / viewport.width;
|
|
220
|
+
const scaleY = screenshotHeight / viewport.height;
|
|
221
|
+
|
|
222
|
+
// Debug logging for CDP mode
|
|
223
|
+
if (process.env.HYBRID_BROWSER_DEBUG === '1') {
|
|
224
|
+
console.log('[CDP Debug] Viewport size:', viewport);
|
|
225
|
+
console.log('[CDP Debug] Device pixel ratio:', dpr);
|
|
226
|
+
console.log('[CDP Debug] Screenshot dimensions:', { width: screenshotWidth, height: screenshotHeight });
|
|
227
|
+
console.log('[CDP Debug] Scale factors:', { scaleX, scaleY });
|
|
228
|
+
console.log('[CDP Debug] Elements with coordinates:', elementsWithCoords.length);
|
|
229
|
+
elementsWithCoords.slice(0, 3).forEach(([ref, element]) => {
|
|
230
|
+
console.log(`[CDP Debug] Element ${ref}:`, element.coordinates);
|
|
231
|
+
});
|
|
232
|
+
}
|
|
182
233
|
|
|
183
234
|
// Filter elements visible in viewport
|
|
184
235
|
const visibleElements = elementsWithCoords.filter(([ref, element]) => {
|
|
@@ -197,18 +248,19 @@ export class HybridBrowserToolkit {
|
|
|
197
248
|
const coords = element.coordinates!;
|
|
198
249
|
const isClickable = clickableElements.has(ref);
|
|
199
250
|
|
|
200
|
-
//
|
|
201
|
-
|
|
202
|
-
const
|
|
203
|
-
const
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
const
|
|
251
|
+
// Scale coordinates from CSS pixels to screenshot pixels
|
|
252
|
+
const x = Math.max(0, coords.x * scaleX);
|
|
253
|
+
const y = Math.max(0, coords.y * scaleY);
|
|
254
|
+
const width = coords.width * scaleX;
|
|
255
|
+
const height = coords.height * scaleY;
|
|
256
|
+
|
|
257
|
+
// Clamp to screenshot bounds
|
|
258
|
+
const clampedWidth = Math.min(width, screenshotWidth - x);
|
|
259
|
+
const clampedHeight = Math.min(height, screenshotHeight - y);
|
|
208
260
|
|
|
209
261
|
// Position text to be visible even if element is partially cut off
|
|
210
|
-
const textX = Math.max(2, Math.min(x + 2,
|
|
211
|
-
const textY = Math.max(14, Math.min(y + 14,
|
|
262
|
+
const textX = Math.max(2, Math.min(x + 2, screenshotWidth - 40));
|
|
263
|
+
const textY = Math.max(14, Math.min(y + 14, screenshotHeight - 4));
|
|
212
264
|
|
|
213
265
|
// Different colors for clickable vs non-clickable elements
|
|
214
266
|
const colors = isClickable ? {
|
|
@@ -222,7 +274,7 @@ export class HybridBrowserToolkit {
|
|
|
222
274
|
};
|
|
223
275
|
|
|
224
276
|
return `
|
|
225
|
-
<rect x="${x}" y="${y}" width="${
|
|
277
|
+
<rect x="${x}" y="${y}" width="${clampedWidth}" height="${clampedHeight}"
|
|
226
278
|
fill="${colors.fill}" stroke="${colors.stroke}" stroke-width="2" rx="2"/>
|
|
227
279
|
<text x="${textX}" y="${textY}" font-family="Arial, sans-serif"
|
|
228
280
|
font-size="12" fill="${colors.textFill}" font-weight="bold">${ref}</text>
|
|
@@ -230,7 +282,7 @@ export class HybridBrowserToolkit {
|
|
|
230
282
|
}).join('');
|
|
231
283
|
|
|
232
284
|
const svgOverlay = `
|
|
233
|
-
<svg width="${
|
|
285
|
+
<svg width="${screenshotWidth}" height="${screenshotHeight}" xmlns="http://www.w3.org/2000/svg">
|
|
234
286
|
${marks}
|
|
235
287
|
</svg>
|
|
236
288
|
`;
|
|
@@ -362,8 +414,20 @@ export class HybridBrowserToolkit {
|
|
|
362
414
|
return this.executeActionWithSnapshot(action);
|
|
363
415
|
}
|
|
364
416
|
|
|
365
|
-
async type(ref: string
|
|
366
|
-
|
|
417
|
+
async type(refOrInputs: string | Array<{ ref: string; text: string }>, text?: string): Promise<any> {
|
|
418
|
+
let action: BrowserAction;
|
|
419
|
+
|
|
420
|
+
if (typeof refOrInputs === 'string') {
|
|
421
|
+
// Single input mode (backward compatibility)
|
|
422
|
+
if (text === undefined) {
|
|
423
|
+
throw new Error('Text parameter is required when ref is a string');
|
|
424
|
+
}
|
|
425
|
+
action = { type: 'type', ref: refOrInputs, text };
|
|
426
|
+
} else {
|
|
427
|
+
// Multiple inputs mode
|
|
428
|
+
action = { type: 'type', inputs: refOrInputs };
|
|
429
|
+
}
|
|
430
|
+
|
|
367
431
|
return this.executeActionWithSnapshot(action);
|
|
368
432
|
}
|
|
369
433
|
|
|
@@ -382,6 +446,21 @@ export class HybridBrowserToolkit {
|
|
|
382
446
|
return this.executeActionWithSnapshot(action);
|
|
383
447
|
}
|
|
384
448
|
|
|
449
|
+
async mouseControl(control: 'click' | 'right_click'| 'dblclick', x: number, y: number): Promise<any> {
|
|
450
|
+
const action: BrowserAction = { type: 'mouse_control', control, x, y };
|
|
451
|
+
return this.executeActionWithSnapshot(action);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
async mouseDrag(from_ref: string, to_ref: string): Promise<any> {
|
|
455
|
+
const action: BrowserAction = { type: 'mouse_drag', from_ref, to_ref };
|
|
456
|
+
return this.executeActionWithSnapshot(action);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
async pressKeys(keys: string[]): Promise<any> {
|
|
460
|
+
const action: BrowserAction = { type: 'press_key', keys};
|
|
461
|
+
return this.executeActionWithSnapshot(action);
|
|
462
|
+
}
|
|
463
|
+
|
|
385
464
|
async back(): Promise<ActionResult> {
|
|
386
465
|
const startTime = Date.now();
|
|
387
466
|
|
|
@@ -519,4 +598,93 @@ export class HybridBrowserToolkit {
|
|
|
519
598
|
return await this.session.getTabInfo();
|
|
520
599
|
}
|
|
521
600
|
|
|
522
|
-
|
|
601
|
+
async getConsoleView(): Promise<any> {
|
|
602
|
+
const currentLogs = await this.session.getCurrentLogs();
|
|
603
|
+
// Format logs
|
|
604
|
+
return currentLogs.map(item => ({
|
|
605
|
+
type: item.type(),
|
|
606
|
+
text: item.text(),
|
|
607
|
+
}));
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
async consoleExecute(code: string): Promise<any> {
|
|
611
|
+
const startTime = Date.now();
|
|
612
|
+
try {
|
|
613
|
+
const page = await this.session.getCurrentPage();
|
|
614
|
+
|
|
615
|
+
// Wrap the code to capture console.log output
|
|
616
|
+
const wrappedCode = `
|
|
617
|
+
(function() {
|
|
618
|
+
const _logs = [];
|
|
619
|
+
const originalLog = console.log;
|
|
620
|
+
console.log = function(...args) {
|
|
621
|
+
_logs.push(args.map(arg => {
|
|
622
|
+
try {
|
|
623
|
+
return typeof arg === 'object' ? JSON.stringify(arg) : String(arg);
|
|
624
|
+
} catch (e) {
|
|
625
|
+
return String(arg);
|
|
626
|
+
}
|
|
627
|
+
}).join(' '));
|
|
628
|
+
originalLog.apply(console, args);
|
|
629
|
+
};
|
|
630
|
+
|
|
631
|
+
let result;
|
|
632
|
+
try {
|
|
633
|
+
result = eval(${JSON.stringify(code)});
|
|
634
|
+
} catch (e) {
|
|
635
|
+
try {
|
|
636
|
+
result = (function() { ${code} })();
|
|
637
|
+
} catch (error) {
|
|
638
|
+
console.log = originalLog;
|
|
639
|
+
throw error;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
console.log = originalLog;
|
|
644
|
+
return { result, logs: _logs };
|
|
645
|
+
})()
|
|
646
|
+
`;
|
|
647
|
+
|
|
648
|
+
const evalResult = await page.evaluate(wrappedCode) as { result: any; logs: string[] };
|
|
649
|
+
const { result, logs } = evalResult;
|
|
650
|
+
|
|
651
|
+
const snapshotStart = Date.now();
|
|
652
|
+
const snapshot = await this.getPageSnapshot(this.viewportLimit);
|
|
653
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
654
|
+
const totalTime = Date.now() - startTime;
|
|
655
|
+
|
|
656
|
+
// Properly serialize the result
|
|
657
|
+
let resultStr: string;
|
|
658
|
+
try {
|
|
659
|
+
resultStr = JSON.stringify(result, null, 2);
|
|
660
|
+
} catch (e) {
|
|
661
|
+
// Fallback for non-serializable values
|
|
662
|
+
resultStr = String(result);
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
return {
|
|
666
|
+
result: `Console execution result: ${resultStr}`,
|
|
667
|
+
console_output: logs,
|
|
668
|
+
snapshot: snapshot,
|
|
669
|
+
timing: {
|
|
670
|
+
total_time_ms: totalTime,
|
|
671
|
+
snapshot_time_ms: snapshotTime,
|
|
672
|
+
},
|
|
673
|
+
};
|
|
674
|
+
|
|
675
|
+
} catch (error) {
|
|
676
|
+
const totalTime = Date.now() - startTime;
|
|
677
|
+
return {
|
|
678
|
+
result: `Console execution failed: ${error}`,
|
|
679
|
+
console_output: [],
|
|
680
|
+
snapshot: '',
|
|
681
|
+
timing: {
|
|
682
|
+
total_time_ms: totalTime,
|
|
683
|
+
snapshot_time_ms: 0,
|
|
684
|
+
},
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
}
|
|
690
|
+
|
|
@@ -81,8 +81,9 @@ export interface ClickAction {
|
|
|
81
81
|
|
|
82
82
|
export interface TypeAction {
|
|
83
83
|
type: 'type';
|
|
84
|
-
ref
|
|
85
|
-
text
|
|
84
|
+
ref?: string; // Optional for backward compatibility
|
|
85
|
+
text?: string; // Optional for backward compatibility
|
|
86
|
+
inputs?: Array<{ ref: string; text: string }>; // New field for multiple inputs
|
|
86
87
|
}
|
|
87
88
|
|
|
88
89
|
export interface SelectAction {
|
|
@@ -101,7 +102,25 @@ export interface EnterAction {
|
|
|
101
102
|
type: 'enter';
|
|
102
103
|
}
|
|
103
104
|
|
|
104
|
-
export
|
|
105
|
+
export interface MouseAction {
|
|
106
|
+
type: 'mouse_control';
|
|
107
|
+
control: 'click' | 'right_click' | 'dblclick';
|
|
108
|
+
x: number;
|
|
109
|
+
y: number;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export interface MouseDragAction {
|
|
113
|
+
type: 'mouse_drag';
|
|
114
|
+
from_ref: string;
|
|
115
|
+
to_ref: string;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export interface PressKeyAction {
|
|
119
|
+
type: 'press_key';
|
|
120
|
+
keys: string[];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export type BrowserAction = ClickAction | TypeAction | SelectAction | ScrollAction | EnterAction | MouseAction | MouseDragAction | PressKeyAction;
|
|
105
124
|
|
|
106
125
|
export interface VisualMarkResult {
|
|
107
126
|
text: string;
|
|
@@ -160,7 +160,14 @@ class WebSocketBrowserServer {
|
|
|
160
160
|
|
|
161
161
|
case 'type':
|
|
162
162
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
163
|
-
|
|
163
|
+
// Handle both single input and multiple inputs
|
|
164
|
+
if (params.inputs) {
|
|
165
|
+
// Multiple inputs mode - pass inputs array directly
|
|
166
|
+
return await this.toolkit.type(params.inputs);
|
|
167
|
+
} else {
|
|
168
|
+
// Single input mode - pass ref and text
|
|
169
|
+
return await this.toolkit.type(params.ref, params.text);
|
|
170
|
+
}
|
|
164
171
|
|
|
165
172
|
case 'select':
|
|
166
173
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
@@ -173,6 +180,18 @@ class WebSocketBrowserServer {
|
|
|
173
180
|
case 'enter':
|
|
174
181
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
175
182
|
return await this.toolkit.enter();
|
|
183
|
+
|
|
184
|
+
case 'mouse_control':
|
|
185
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
186
|
+
return await this.toolkit.mouseControl(params.control, params.x, params.y);
|
|
187
|
+
|
|
188
|
+
case 'mouse_drag':
|
|
189
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
190
|
+
return await this.toolkit.mouseDrag(params.from_ref, params.to_ref);
|
|
191
|
+
|
|
192
|
+
case 'press_key':
|
|
193
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
194
|
+
return await this.toolkit.pressKeys(params.keys);
|
|
176
195
|
|
|
177
196
|
case 'back':
|
|
178
197
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
@@ -194,6 +213,14 @@ class WebSocketBrowserServer {
|
|
|
194
213
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
195
214
|
return await this.toolkit.getTabInfo();
|
|
196
215
|
|
|
216
|
+
case 'console_view':
|
|
217
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
218
|
+
return await this.toolkit.getConsoleView();
|
|
219
|
+
|
|
220
|
+
case 'console_exec':
|
|
221
|
+
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
222
|
+
return await this.toolkit.consoleExecute(params.code);
|
|
223
|
+
|
|
197
224
|
case 'wait_user':
|
|
198
225
|
if (!this.toolkit) throw new Error('Toolkit not initialized');
|
|
199
226
|
return await this.toolkit.waitUser(params.timeout);
|
|
@@ -396,6 +396,9 @@ class WebSocketBrowserWrapper:
|
|
|
396
396
|
"""Send a command to the WebSocket server and get response."""
|
|
397
397
|
await self._ensure_connection()
|
|
398
398
|
|
|
399
|
+
# Process params to ensure refs have 'e' prefix
|
|
400
|
+
params = self._process_refs_in_params(params)
|
|
401
|
+
|
|
399
402
|
message_id = str(uuid.uuid4())
|
|
400
403
|
message = {'id': message_id, 'command': command, 'params': params}
|
|
401
404
|
|
|
@@ -503,6 +506,55 @@ class WebSocketBrowserWrapper:
|
|
|
503
506
|
|
|
504
507
|
return ToolResult(text=response['text'], images=response['images'])
|
|
505
508
|
|
|
509
|
+
def _ensure_ref_prefix(self, ref: str) -> str:
|
|
510
|
+
"""Ensure ref has proper prefix"""
|
|
511
|
+
if not ref:
|
|
512
|
+
return ref
|
|
513
|
+
|
|
514
|
+
# If ref is purely numeric, add 'e' prefix for main frame
|
|
515
|
+
if ref.isdigit():
|
|
516
|
+
return f'e{ref}'
|
|
517
|
+
|
|
518
|
+
return ref
|
|
519
|
+
|
|
520
|
+
def _process_refs_in_params(
|
|
521
|
+
self, params: Dict[str, Any]
|
|
522
|
+
) -> Dict[str, Any]:
|
|
523
|
+
"""Process parameters to ensure all refs have 'e' prefix."""
|
|
524
|
+
if not params:
|
|
525
|
+
return params
|
|
526
|
+
|
|
527
|
+
# Create a copy to avoid modifying the original
|
|
528
|
+
processed = params.copy()
|
|
529
|
+
|
|
530
|
+
# Handle direct ref parameters
|
|
531
|
+
if 'ref' in processed:
|
|
532
|
+
processed['ref'] = self._ensure_ref_prefix(processed['ref'])
|
|
533
|
+
|
|
534
|
+
# Handle from_ref and to_ref for drag operations
|
|
535
|
+
if 'from_ref' in processed:
|
|
536
|
+
processed['from_ref'] = self._ensure_ref_prefix(
|
|
537
|
+
processed['from_ref']
|
|
538
|
+
)
|
|
539
|
+
if 'to_ref' in processed:
|
|
540
|
+
processed['to_ref'] = self._ensure_ref_prefix(processed['to_ref'])
|
|
541
|
+
|
|
542
|
+
# Handle inputs array for type_multiple
|
|
543
|
+
if 'inputs' in processed and isinstance(processed['inputs'], list):
|
|
544
|
+
processed_inputs = []
|
|
545
|
+
for input_item in processed['inputs']:
|
|
546
|
+
if isinstance(input_item, dict) and 'ref' in input_item:
|
|
547
|
+
processed_input = input_item.copy()
|
|
548
|
+
processed_input['ref'] = self._ensure_ref_prefix(
|
|
549
|
+
input_item['ref']
|
|
550
|
+
)
|
|
551
|
+
processed_inputs.append(processed_input)
|
|
552
|
+
else:
|
|
553
|
+
processed_inputs.append(input_item)
|
|
554
|
+
processed['inputs'] = processed_inputs
|
|
555
|
+
|
|
556
|
+
return processed
|
|
557
|
+
|
|
506
558
|
@action_logger
|
|
507
559
|
async def click(self, ref: str) -> Dict[str, Any]:
|
|
508
560
|
"""Click an element."""
|
|
@@ -515,6 +567,14 @@ class WebSocketBrowserWrapper:
|
|
|
515
567
|
response = await self._send_command('type', {'ref': ref, 'text': text})
|
|
516
568
|
return response
|
|
517
569
|
|
|
570
|
+
@action_logger
|
|
571
|
+
async def type_multiple(
|
|
572
|
+
self, inputs: List[Dict[str, str]]
|
|
573
|
+
) -> Dict[str, Any]:
|
|
574
|
+
"""Type text into multiple elements."""
|
|
575
|
+
response = await self._send_command('type', {'inputs': inputs})
|
|
576
|
+
return response
|
|
577
|
+
|
|
518
578
|
@action_logger
|
|
519
579
|
async def select(self, ref: str, value: str) -> Dict[str, Any]:
|
|
520
580
|
"""Select an option."""
|
|
@@ -537,6 +597,31 @@ class WebSocketBrowserWrapper:
|
|
|
537
597
|
response = await self._send_command('enter', {})
|
|
538
598
|
return response
|
|
539
599
|
|
|
600
|
+
@action_logger
|
|
601
|
+
async def mouse_control(
|
|
602
|
+
self, control: str, x: float, y: float
|
|
603
|
+
) -> Dict[str, Any]:
|
|
604
|
+
"""Control the mouse to interact with browser with x, y coordinates."""
|
|
605
|
+
response = await self._send_command(
|
|
606
|
+
'mouse_control', {'control': control, 'x': x, 'y': y}
|
|
607
|
+
)
|
|
608
|
+
return response
|
|
609
|
+
|
|
610
|
+
@action_logger
|
|
611
|
+
async def mouse_drag(self, from_ref: str, to_ref: str) -> Dict[str, Any]:
|
|
612
|
+
"""Control the mouse to drag and drop in the browser using ref IDs."""
|
|
613
|
+
response = await self._send_command(
|
|
614
|
+
'mouse_drag',
|
|
615
|
+
{'from_ref': from_ref, 'to_ref': to_ref},
|
|
616
|
+
)
|
|
617
|
+
return response
|
|
618
|
+
|
|
619
|
+
@action_logger
|
|
620
|
+
async def press_key(self, keys: List[str]) -> Dict[str, Any]:
|
|
621
|
+
"""Press key and key combinations."""
|
|
622
|
+
response = await self._send_command('press_key', {'keys': keys})
|
|
623
|
+
return response
|
|
624
|
+
|
|
540
625
|
@action_logger
|
|
541
626
|
async def back(self) -> Dict[str, Any]:
|
|
542
627
|
"""Navigate back."""
|
|
@@ -571,6 +656,22 @@ class WebSocketBrowserWrapper:
|
|
|
571
656
|
# Fallback if wrapped in an object
|
|
572
657
|
return response.get('tabs', [])
|
|
573
658
|
|
|
659
|
+
@action_logger
|
|
660
|
+
async def console_view(self) -> List[Dict[str, Any]]:
|
|
661
|
+
"""Get current page console view"""
|
|
662
|
+
response = await self._send_command('console_view', {})
|
|
663
|
+
|
|
664
|
+
if isinstance(response, list):
|
|
665
|
+
return response
|
|
666
|
+
|
|
667
|
+
return response.get('logs', [])
|
|
668
|
+
|
|
669
|
+
@action_logger
|
|
670
|
+
async def console_exec(self, code: str) -> Dict[str, Any]:
|
|
671
|
+
"""Execute javascript code and get result."""
|
|
672
|
+
response = await self._send_command('console_exec', {'code': code})
|
|
673
|
+
return response
|
|
674
|
+
|
|
574
675
|
@action_logger
|
|
575
676
|
async def wait_user(
|
|
576
677
|
self, timeout_sec: Optional[float] = None
|