camel-ai 0.2.74a5__py3-none-any.whl → 0.2.75a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +2 -2
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/models/aiml_model.py +1 -16
- camel/models/anthropic_model.py +6 -22
- camel/models/aws_bedrock_model.py +1 -16
- camel/models/azure_openai_model.py +1 -16
- camel/models/base_model.py +0 -12
- camel/models/cohere_model.py +1 -16
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/gemini_model.py +1 -16
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/mistral_model.py +1 -16
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +6 -22
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +1 -16
- camel/models/openai_compatible_model.py +0 -3
- camel/models/openai_model.py +1 -16
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +0 -32
- camel/models/sglang_model.py +1 -16
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +1 -16
- camel/societies/workforce/prompts.py +1 -8
- camel/toolkits/__init__.py +0 -2
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +225 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +164 -8
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +106 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +19 -1
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +20 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +41 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/search_toolkit.py +140 -27
- camel/types/__init__.py +2 -2
- camel/types/enums.py +20 -1
- camel/types/openai_types.py +2 -2
- camel/utils/mcp.py +2 -2
- camel/utils/token_counting.py +18 -3
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/METADATA +6 -6
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/RECORD +67 -68
- camel/toolkits/openai_agent_toolkit.py +0 -135
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -64,11 +64,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
64
64
|
"browser_select",
|
|
65
65
|
"browser_scroll",
|
|
66
66
|
"browser_enter",
|
|
67
|
+
"browser_mouse_control",
|
|
68
|
+
"browser_mouse_drag",
|
|
69
|
+
"browser_press_key",
|
|
67
70
|
"browser_wait_user",
|
|
68
71
|
"browser_solve_task",
|
|
69
72
|
"browser_switch_tab",
|
|
70
73
|
"browser_close_tab",
|
|
71
74
|
"browser_get_tab_info",
|
|
75
|
+
"browser_console_view",
|
|
76
|
+
"browser_console_exec",
|
|
72
77
|
]
|
|
73
78
|
|
|
74
79
|
def __init__(
|
|
@@ -863,6 +868,156 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
863
868
|
"total_tabs": 0,
|
|
864
869
|
}
|
|
865
870
|
|
|
871
|
+
async def browser_mouse_control(
|
|
872
|
+
self, *, control: str, x: float, y: float
|
|
873
|
+
) -> Dict[str, Any]:
|
|
874
|
+
r"""Control the mouse to interact with browser with x, y coordinates
|
|
875
|
+
|
|
876
|
+
Args:
|
|
877
|
+
control ([str]): The action to perform: 'click', 'right_click'
|
|
878
|
+
or 'dblclick'.
|
|
879
|
+
x (float): x-coordinate for the control action.
|
|
880
|
+
y (float): y-coordinate for the control action.
|
|
881
|
+
|
|
882
|
+
Returns:
|
|
883
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
884
|
+
- "result" (str): Confirmation of the action.
|
|
885
|
+
- "snapshot" (str): A snapshot of the page after mouse
|
|
886
|
+
control action.
|
|
887
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
888
|
+
- "current_tab" (int): Index of the active tab.
|
|
889
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
890
|
+
"""
|
|
891
|
+
try:
|
|
892
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
893
|
+
result = await ws_wrapper.mouse_control(control, x, y)
|
|
894
|
+
|
|
895
|
+
# Add tab information
|
|
896
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
897
|
+
result.update(
|
|
898
|
+
{
|
|
899
|
+
"tabs": tab_info,
|
|
900
|
+
"current_tab": next(
|
|
901
|
+
(
|
|
902
|
+
i
|
|
903
|
+
for i, tab in enumerate(tab_info)
|
|
904
|
+
if tab.get("is_current")
|
|
905
|
+
),
|
|
906
|
+
0,
|
|
907
|
+
),
|
|
908
|
+
"total_tabs": len(tab_info),
|
|
909
|
+
}
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
return result
|
|
913
|
+
except Exception as e:
|
|
914
|
+
logger.error(f"Failed to control mouse: {e}")
|
|
915
|
+
return {
|
|
916
|
+
"result": f"Error with mouse control: {e}",
|
|
917
|
+
"snapshot": "",
|
|
918
|
+
"tabs": [],
|
|
919
|
+
"current_tab": 0,
|
|
920
|
+
"total_tabs": 0,
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
async def browser_mouse_drag(
|
|
924
|
+
self, *, from_ref: str, to_ref: str
|
|
925
|
+
) -> Dict[str, Any]:
|
|
926
|
+
r"""Control the mouse to drag and drop in the browser using ref IDs.
|
|
927
|
+
|
|
928
|
+
Args:
|
|
929
|
+
from_ref (str): The `ref` ID of the source element to drag from.
|
|
930
|
+
to_ref (str): The `ref` ID of the target element to drag to.
|
|
931
|
+
|
|
932
|
+
Returns:
|
|
933
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
934
|
+
- "result" (str): Confirmation of the action.
|
|
935
|
+
- "snapshot" (str): A new page snapshot.
|
|
936
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
937
|
+
- "current_tab" (int): Index of the active tab.
|
|
938
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
939
|
+
"""
|
|
940
|
+
try:
|
|
941
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
942
|
+
result = await ws_wrapper.mouse_drag(from_ref, to_ref)
|
|
943
|
+
|
|
944
|
+
# Add tab information
|
|
945
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
946
|
+
result.update(
|
|
947
|
+
{
|
|
948
|
+
"tabs": tab_info,
|
|
949
|
+
"current_tab": next(
|
|
950
|
+
(
|
|
951
|
+
i
|
|
952
|
+
for i, tab in enumerate(tab_info)
|
|
953
|
+
if tab.get("is_current")
|
|
954
|
+
),
|
|
955
|
+
0,
|
|
956
|
+
),
|
|
957
|
+
"total_tabs": len(tab_info),
|
|
958
|
+
}
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
return result
|
|
962
|
+
except Exception as e:
|
|
963
|
+
logger.error(f"Error with mouse drag and drop: {e}")
|
|
964
|
+
return {
|
|
965
|
+
"result": f"Error with mouse drag and drop: {e}",
|
|
966
|
+
"snapshot": "",
|
|
967
|
+
"tabs": [],
|
|
968
|
+
"current_tab": 0,
|
|
969
|
+
"total_tabs": 0,
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
|
|
973
|
+
r"""Press key and key combinations.
|
|
974
|
+
Supports single key press or combination of keys by concatenating
|
|
975
|
+
them with '+' separator.
|
|
976
|
+
|
|
977
|
+
Args:
|
|
978
|
+
keys (List[str]): key or list of keys.
|
|
979
|
+
|
|
980
|
+
Returns:
|
|
981
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
982
|
+
- "result" (str): Confirmation of the action.
|
|
983
|
+
- "snapshot" (str): A snapshot of the page after
|
|
984
|
+
press key action.
|
|
985
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
986
|
+
- "current_tab" (int): Index of the active tab.
|
|
987
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
988
|
+
"""
|
|
989
|
+
try:
|
|
990
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
991
|
+
result = await ws_wrapper.press_key(keys)
|
|
992
|
+
|
|
993
|
+
# Add tab information
|
|
994
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
995
|
+
result.update(
|
|
996
|
+
{
|
|
997
|
+
"tabs": tab_info,
|
|
998
|
+
"current_tab": next(
|
|
999
|
+
(
|
|
1000
|
+
i
|
|
1001
|
+
for i, tab in enumerate(tab_info)
|
|
1002
|
+
if tab.get("is_current")
|
|
1003
|
+
),
|
|
1004
|
+
0,
|
|
1005
|
+
),
|
|
1006
|
+
"total_tabs": len(tab_info),
|
|
1007
|
+
}
|
|
1008
|
+
)
|
|
1009
|
+
|
|
1010
|
+
return result
|
|
1011
|
+
except Exception as e:
|
|
1012
|
+
logger.error(f"Failed to press key: {e}")
|
|
1013
|
+
return {
|
|
1014
|
+
"result": f"Error with press key: {e}",
|
|
1015
|
+
"snapshot": "",
|
|
1016
|
+
"tabs": [],
|
|
1017
|
+
"current_tab": 0,
|
|
1018
|
+
"total_tabs": 0,
|
|
1019
|
+
}
|
|
1020
|
+
|
|
866
1021
|
async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
867
1022
|
r"""Switches to a different browser tab using its ID.
|
|
868
1023
|
|
|
@@ -1002,6 +1157,71 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1002
1157
|
"total_tabs": 0,
|
|
1003
1158
|
}
|
|
1004
1159
|
|
|
1160
|
+
async def browser_console_view(self) -> Dict[str, Any]:
|
|
1161
|
+
r"""View current page console logs.
|
|
1162
|
+
|
|
1163
|
+
Returns:
|
|
1164
|
+
Dict[str, Any]: A dictionary with tab information:
|
|
1165
|
+
- "console_messages" (List[Dict]) : List of messages logged
|
|
1166
|
+
in the current page
|
|
1167
|
+
|
|
1168
|
+
"""
|
|
1169
|
+
try:
|
|
1170
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1171
|
+
console_logs = await ws_wrapper.console_view()
|
|
1172
|
+
|
|
1173
|
+
return {"console_messages": console_logs}
|
|
1174
|
+
except Exception as e:
|
|
1175
|
+
logger.error(f"Failed to get console view: {e}")
|
|
1176
|
+
return {"console_messages": []}
|
|
1177
|
+
|
|
1178
|
+
async def browser_console_exec(self, code: str) -> Dict[str, Any]:
|
|
1179
|
+
r"""Execute javascript code in the console of the current page and get
|
|
1180
|
+
results.
|
|
1181
|
+
|
|
1182
|
+
Args:
|
|
1183
|
+
code (str): JavaScript code to execute in the browser console.
|
|
1184
|
+
|
|
1185
|
+
Returns:
|
|
1186
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1187
|
+
- "result" (str): Confirmation of the action.
|
|
1188
|
+
- "snapshot" (str): A snapshot of the active tab after
|
|
1189
|
+
console execute action.
|
|
1190
|
+
- "tabs" (List[Dict]): Information about remaining tabs.
|
|
1191
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1192
|
+
- "total_tabs" (int): Total number of remaining tabs.
|
|
1193
|
+
"""
|
|
1194
|
+
try:
|
|
1195
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1196
|
+
result = await ws_wrapper.console_exec(code)
|
|
1197
|
+
|
|
1198
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1199
|
+
result.update(
|
|
1200
|
+
{
|
|
1201
|
+
"tabs": tab_info,
|
|
1202
|
+
"current_tab": next(
|
|
1203
|
+
(
|
|
1204
|
+
i
|
|
1205
|
+
for i, tab in enumerate(tab_info)
|
|
1206
|
+
if tab.get("is_current")
|
|
1207
|
+
),
|
|
1208
|
+
0,
|
|
1209
|
+
),
|
|
1210
|
+
"total_tabs": len(tab_info),
|
|
1211
|
+
}
|
|
1212
|
+
)
|
|
1213
|
+
|
|
1214
|
+
return result
|
|
1215
|
+
except Exception as e:
|
|
1216
|
+
logger.error(f"Failed to execute javascript in console: {e}")
|
|
1217
|
+
return {
|
|
1218
|
+
"result": f"Error in code execution: {e}",
|
|
1219
|
+
"snapshot": "",
|
|
1220
|
+
"tabs": [],
|
|
1221
|
+
"current_tab": 0,
|
|
1222
|
+
"total_tabs": 0,
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1005
1225
|
# Additional methods for backward compatibility
|
|
1006
1226
|
async def browser_wait_user(
|
|
1007
1227
|
self, timeout_sec: Optional[float] = None
|
|
@@ -1146,10 +1366,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1146
1366
|
"browser_select": self.browser_select,
|
|
1147
1367
|
"browser_scroll": self.browser_scroll,
|
|
1148
1368
|
"browser_enter": self.browser_enter,
|
|
1369
|
+
"browser_mouse_click": self.browser_mouse_control,
|
|
1370
|
+
"browser_mouse_drag": self.browser_mouse_drag,
|
|
1371
|
+
"browser_press_key": self.browser_press_key,
|
|
1149
1372
|
"browser_wait_user": self.browser_wait_user,
|
|
1150
1373
|
"browser_switch_tab": self.browser_switch_tab,
|
|
1151
1374
|
"browser_close_tab": self.browser_close_tab,
|
|
1152
1375
|
"browser_get_tab_info": self.browser_get_tab_info,
|
|
1376
|
+
"browser_console_view": self.browser_console_view,
|
|
1377
|
+
"browser_console_exec": self.browser_console_exec,
|
|
1153
1378
|
}
|
|
1154
1379
|
|
|
1155
1380
|
enabled_tools = []
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Page, Browser, BrowserContext, chromium } from 'playwright';
|
|
1
|
+
import { Page, Browser, BrowserContext, chromium, ConsoleMessage } from 'playwright';
|
|
2
2
|
import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
|
|
3
3
|
import { ConfigLoader, StealthConfig } from './config-loader';
|
|
4
4
|
|
|
@@ -6,18 +6,43 @@ export class HybridBrowserSession {
|
|
|
6
6
|
private browser: Browser | null = null;
|
|
7
7
|
private context: BrowserContext | null = null;
|
|
8
8
|
private pages: Map<string, Page> = new Map();
|
|
9
|
+
private consoleLogs: Map<string, ConsoleMessage[]> = new Map();
|
|
9
10
|
private currentTabId: string | null = null;
|
|
10
11
|
private tabCounter = 0;
|
|
11
12
|
private configLoader: ConfigLoader;
|
|
12
13
|
private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
|
|
13
14
|
private hasNavigatedBefore = false; // Track if we've navigated before
|
|
15
|
+
private logLimit: number;
|
|
14
16
|
|
|
15
17
|
constructor(config: BrowserToolkitConfig = {}) {
|
|
16
18
|
// Use ConfigLoader's fromPythonConfig to handle conversion properly
|
|
17
19
|
this.configLoader = ConfigLoader.fromPythonConfig(config);
|
|
20
|
+
// Load browser configuration for console log limit, default to 1000
|
|
21
|
+
this.logLimit = this.configLoader.getBrowserConfig().consoleLogLimit || 1000;
|
|
18
22
|
}
|
|
19
23
|
|
|
20
|
-
|
|
24
|
+
private registerNewPage(tabId: string, page: Page): void {
|
|
25
|
+
// Register page and logs with tabId
|
|
26
|
+
this.pages.set(tabId, page);
|
|
27
|
+
this.consoleLogs.set(tabId, []);
|
|
28
|
+
// Set up console log listener for the page
|
|
29
|
+
page.on('console', (msg: ConsoleMessage) => {
|
|
30
|
+
const logs = this.consoleLogs.get(tabId);
|
|
31
|
+
if (logs) {
|
|
32
|
+
logs.push(msg);
|
|
33
|
+
if (logs.length > this.logLimit) {
|
|
34
|
+
logs.shift();
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Clean logs on page close
|
|
40
|
+
page.on('close', () => {
|
|
41
|
+
this.consoleLogs.delete(tabId);
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async ensureBrowser(): Promise<void> {
|
|
21
46
|
if (this.browser) {
|
|
22
47
|
return;
|
|
23
48
|
}
|
|
@@ -57,7 +82,7 @@ export class HybridBrowserSession {
|
|
|
57
82
|
// In CDP mode, only consider pages with about:blank as available
|
|
58
83
|
if (pageUrl === 'about:blank') {
|
|
59
84
|
const tabId = this.generateTabId();
|
|
60
|
-
this.
|
|
85
|
+
this.registerNewPage(tabId, page);
|
|
61
86
|
if (!this.currentTabId) {
|
|
62
87
|
this.currentTabId = tabId;
|
|
63
88
|
availablePageFound = true;
|
|
@@ -97,7 +122,7 @@ export class HybridBrowserSession {
|
|
|
97
122
|
const pages = this.context.pages();
|
|
98
123
|
if (pages.length > 0) {
|
|
99
124
|
const initialTabId = this.generateTabId();
|
|
100
|
-
this.
|
|
125
|
+
this.registerNewPage(initialTabId, pages[0]);
|
|
101
126
|
this.currentTabId = initialTabId;
|
|
102
127
|
}
|
|
103
128
|
} else {
|
|
@@ -115,7 +140,7 @@ export class HybridBrowserSession {
|
|
|
115
140
|
|
|
116
141
|
const initialPage = await this.context.newPage();
|
|
117
142
|
const initialTabId = this.generateTabId();
|
|
118
|
-
this.
|
|
143
|
+
this.registerNewPage(initialTabId, initialPage);
|
|
119
144
|
this.currentTabId = initialTabId;
|
|
120
145
|
}
|
|
121
146
|
}
|
|
@@ -139,6 +164,13 @@ export class HybridBrowserSession {
|
|
|
139
164
|
return this.pages.get(this.currentTabId)!;
|
|
140
165
|
}
|
|
141
166
|
|
|
167
|
+
async getCurrentLogs(): Promise<ConsoleMessage[]> {
|
|
168
|
+
if (!this.currentTabId || !this.consoleLogs.has(this.currentTabId)) {
|
|
169
|
+
return [];
|
|
170
|
+
}
|
|
171
|
+
return this.consoleLogs.get(this.currentTabId) || [];
|
|
172
|
+
}
|
|
173
|
+
|
|
142
174
|
/**
|
|
143
175
|
* Get current scroll position from the page
|
|
144
176
|
*/
|
|
@@ -343,7 +375,7 @@ export class HybridBrowserSession {
|
|
|
343
375
|
|
|
344
376
|
// Generate tab ID for the new page
|
|
345
377
|
const newTabId = this.generateTabId();
|
|
346
|
-
this.
|
|
378
|
+
this.registerNewPage(newTabId, newPage);
|
|
347
379
|
|
|
348
380
|
// Set up page properties
|
|
349
381
|
const browserConfig = this.configLoader.getBrowserConfig();
|
|
@@ -434,7 +466,97 @@ export class HybridBrowserSession {
|
|
|
434
466
|
}
|
|
435
467
|
}
|
|
436
468
|
|
|
469
|
+
/**
|
|
470
|
+
* Simplified mouse control implementation
|
|
471
|
+
*/
|
|
472
|
+
private async performMouseControl(page: Page, control: string, x: number, y: number): Promise<{ success: boolean; error?: string }> {
|
|
473
|
+
try {
|
|
474
|
+
const viewport = page.viewportSize();
|
|
475
|
+
if (!viewport) {
|
|
476
|
+
return { success: false, error: 'Viewport size not available from page.' };
|
|
477
|
+
}
|
|
478
|
+
if (x < 0 || y < 0 || x > viewport.width || y > viewport.height) {
|
|
479
|
+
return { success: false, error: `Invalid coordinates, outside viewport bounds: (${x}, ${y})` };
|
|
480
|
+
}
|
|
481
|
+
switch (control) {
|
|
482
|
+
case 'click': {
|
|
483
|
+
await page.mouse.click(x, y);
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
case 'right_click': {
|
|
487
|
+
await page.mouse.click(x, y, { button: 'right' });
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
case 'dblclick': {
|
|
491
|
+
await page.mouse.dblclick(x, y);
|
|
492
|
+
break;
|
|
493
|
+
}
|
|
494
|
+
default:
|
|
495
|
+
return { success: false, error: `Invalid control action: ${control}` };
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return { success: true };
|
|
499
|
+
} catch (error) {
|
|
500
|
+
return { success: false, error: `Mouse action failed: ${error}` };
|
|
501
|
+
}
|
|
502
|
+
}
|
|
437
503
|
|
|
504
|
+
/**
|
|
505
|
+
* Enhanced mouse drag and drop implementation using ref IDs
|
|
506
|
+
*/
|
|
507
|
+
private async performMouseDrag(page: Page, fromRef: string, toRef: string): Promise<{ success: boolean; error?: string }> {
|
|
508
|
+
try {
|
|
509
|
+
// Ensure we have the latest snapshot
|
|
510
|
+
await (page as any)._snapshotForAI();
|
|
511
|
+
|
|
512
|
+
// Get elements using Playwright's aria-ref selector
|
|
513
|
+
const fromSelector = `aria-ref=${fromRef}`;
|
|
514
|
+
const toSelector = `aria-ref=${toRef}`;
|
|
515
|
+
|
|
516
|
+
const fromElement = await page.locator(fromSelector).first();
|
|
517
|
+
const toElement = await page.locator(toSelector).first();
|
|
518
|
+
|
|
519
|
+
// Check if elements exist
|
|
520
|
+
const fromExists = await fromElement.count() > 0;
|
|
521
|
+
const toExists = await toElement.count() > 0;
|
|
522
|
+
|
|
523
|
+
if (!fromExists) {
|
|
524
|
+
return { success: false, error: `Source element with ref ${fromRef} not found` };
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
if (!toExists) {
|
|
528
|
+
return { success: false, error: `Target element with ref ${toRef} not found` };
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Get the center coordinates of both elements
|
|
532
|
+
const fromBox = await fromElement.boundingBox();
|
|
533
|
+
const toBox = await toElement.boundingBox();
|
|
534
|
+
|
|
535
|
+
if (!fromBox) {
|
|
536
|
+
return { success: false, error: `Could not get bounding box for source element with ref ${fromRef}` };
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
if (!toBox) {
|
|
540
|
+
return { success: false, error: `Could not get bounding box for target element with ref ${toRef}` };
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
const fromX = fromBox.x + fromBox.width / 2;
|
|
544
|
+
const fromY = fromBox.y + fromBox.height / 2;
|
|
545
|
+
const toX = toBox.x + toBox.width / 2;
|
|
546
|
+
const toY = toBox.y + toBox.height / 2;
|
|
547
|
+
|
|
548
|
+
// Perform the drag operation
|
|
549
|
+
await page.mouse.move(fromX, fromY);
|
|
550
|
+
await page.mouse.down();
|
|
551
|
+
// Destination coordinates
|
|
552
|
+
await page.mouse.move(toX, toY);
|
|
553
|
+
await page.mouse.up();
|
|
554
|
+
|
|
555
|
+
return { success: true };
|
|
556
|
+
} catch (error) {
|
|
557
|
+
return { success: false, error: `Mouse drag action failed: ${error}` };
|
|
558
|
+
}
|
|
559
|
+
}
|
|
438
560
|
|
|
439
561
|
async executeAction(action: BrowserAction): Promise<ActionResult> {
|
|
440
562
|
const startTime = Date.now();
|
|
@@ -519,6 +641,40 @@ export class HybridBrowserSession {
|
|
|
519
641
|
actionExecutionTime = Date.now() - enterStart;
|
|
520
642
|
break;
|
|
521
643
|
}
|
|
644
|
+
|
|
645
|
+
case 'mouse_control': {
|
|
646
|
+
elementSearchTime = Date.now() - elementSearchStart;
|
|
647
|
+
const mouseControlStart = Date.now();
|
|
648
|
+
const mouseControlResult = await this.performMouseControl(page, action.control, action.x, action.y);
|
|
649
|
+
|
|
650
|
+
if (!mouseControlResult.success) {
|
|
651
|
+
throw new Error(`Action failed: ${mouseControlResult.error}`);
|
|
652
|
+
}
|
|
653
|
+
actionExecutionTime = Date.now() - mouseControlStart;
|
|
654
|
+
break;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
case 'mouse_drag': {
|
|
658
|
+
elementSearchTime = Date.now() - elementSearchStart;
|
|
659
|
+
const mouseDragStart = Date.now();
|
|
660
|
+
const mouseDragResult = await this.performMouseDrag(page, action.from_ref, action.to_ref);
|
|
661
|
+
|
|
662
|
+
if (!mouseDragResult.success) {
|
|
663
|
+
throw new Error(`Action failed: ${mouseDragResult.error}`);
|
|
664
|
+
}
|
|
665
|
+
actionExecutionTime = Date.now() - mouseDragStart;
|
|
666
|
+
break;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
case 'press_key': {
|
|
670
|
+
elementSearchTime = Date.now() - elementSearchStart;
|
|
671
|
+
const keyPressStart = Date.now();
|
|
672
|
+
// concatenate keys with '+' for key combinations
|
|
673
|
+
const keys = action.keys.join('+');
|
|
674
|
+
await page.keyboard.press(keys);
|
|
675
|
+
actionExecutionTime = Date.now() - keyPressStart;
|
|
676
|
+
break;
|
|
677
|
+
}
|
|
522
678
|
|
|
523
679
|
default:
|
|
524
680
|
throw new Error(`Unknown action type: ${(action as any).type}`);
|
|
@@ -651,7 +807,7 @@ export class HybridBrowserSession {
|
|
|
651
807
|
if (!isTracked && pageUrl === 'about:blank') {
|
|
652
808
|
newPage = page;
|
|
653
809
|
newTabId = this.generateTabId();
|
|
654
|
-
this.
|
|
810
|
+
this.registerNewPage(newTabId, newPage);
|
|
655
811
|
break;
|
|
656
812
|
}
|
|
657
813
|
}
|
|
@@ -663,7 +819,7 @@ export class HybridBrowserSession {
|
|
|
663
819
|
// Non-CDP mode: create new page as usual
|
|
664
820
|
newPage = await this.context.newPage();
|
|
665
821
|
newTabId = this.generateTabId();
|
|
666
|
-
this.
|
|
822
|
+
this.registerNewPage(newTabId, newPage);
|
|
667
823
|
}
|
|
668
824
|
|
|
669
825
|
// Set up page properties
|
|
@@ -30,6 +30,7 @@ export interface BrowserConfig {
|
|
|
30
30
|
// Tab management
|
|
31
31
|
tabIdPrefix: string;
|
|
32
32
|
tabCounterPadding: number;
|
|
33
|
+
consoleLogLimit: number;
|
|
33
34
|
|
|
34
35
|
// Scroll and positioning
|
|
35
36
|
scrollPositionScale: number;
|
|
@@ -113,6 +114,7 @@ function getDefaultBrowserConfig(): BrowserConfig {
|
|
|
113
114
|
clickTimeout: 3000,
|
|
114
115
|
tabIdPrefix: 'tab-',
|
|
115
116
|
tabCounterPadding: 3,
|
|
117
|
+
consoleLogLimit: 1000,
|
|
116
118
|
scrollPositionScale: 0.1,
|
|
117
119
|
navigationDelay: 100,
|
|
118
120
|
blankPageUrls: ['about:blank', ''],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {HybridBrowserSession} from './browser-session';
|
|
2
2
|
import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
|
|
3
3
|
import {ConfigLoader} from './config-loader';
|
|
4
|
+
import {ConsoleMessage} from 'playwright';
|
|
4
5
|
|
|
5
6
|
export class HybridBrowserToolkit {
|
|
6
7
|
private session: HybridBrowserSession;
|
|
@@ -382,6 +383,21 @@ export class HybridBrowserToolkit {
|
|
|
382
383
|
return this.executeActionWithSnapshot(action);
|
|
383
384
|
}
|
|
384
385
|
|
|
386
|
+
async mouseControl(control: 'click' | 'right_click'| 'dblclick', x: number, y: number): Promise<any> {
|
|
387
|
+
const action: BrowserAction = { type: 'mouse_control', control, x, y };
|
|
388
|
+
return this.executeActionWithSnapshot(action);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
async mouseDrag(from_ref: string, to_ref: string): Promise<any> {
|
|
392
|
+
const action: BrowserAction = { type: 'mouse_drag', from_ref, to_ref };
|
|
393
|
+
return this.executeActionWithSnapshot(action);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
async pressKeys(keys: string[]): Promise<any> {
|
|
397
|
+
const action: BrowserAction = { type: 'press_key', keys};
|
|
398
|
+
return this.executeActionWithSnapshot(action);
|
|
399
|
+
}
|
|
400
|
+
|
|
385
401
|
async back(): Promise<ActionResult> {
|
|
386
402
|
const startTime = Date.now();
|
|
387
403
|
|
|
@@ -519,4 +535,93 @@ export class HybridBrowserToolkit {
|
|
|
519
535
|
return await this.session.getTabInfo();
|
|
520
536
|
}
|
|
521
537
|
|
|
522
|
-
|
|
538
|
+
async getConsoleView(): Promise<any> {
|
|
539
|
+
const currentLogs = await this.session.getCurrentLogs();
|
|
540
|
+
// Format logs
|
|
541
|
+
return currentLogs.map(item => ({
|
|
542
|
+
type: item.type(),
|
|
543
|
+
text: item.text(),
|
|
544
|
+
}));
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
async consoleExecute(code: string): Promise<any> {
|
|
548
|
+
const startTime = Date.now();
|
|
549
|
+
try {
|
|
550
|
+
const page = await this.session.getCurrentPage();
|
|
551
|
+
|
|
552
|
+
// Wrap the code to capture console.log output
|
|
553
|
+
const wrappedCode = `
|
|
554
|
+
(function() {
|
|
555
|
+
const _logs = [];
|
|
556
|
+
const originalLog = console.log;
|
|
557
|
+
console.log = function(...args) {
|
|
558
|
+
_logs.push(args.map(arg => {
|
|
559
|
+
try {
|
|
560
|
+
return typeof arg === 'object' ? JSON.stringify(arg) : String(arg);
|
|
561
|
+
} catch (e) {
|
|
562
|
+
return String(arg);
|
|
563
|
+
}
|
|
564
|
+
}).join(' '));
|
|
565
|
+
originalLog.apply(console, args);
|
|
566
|
+
};
|
|
567
|
+
|
|
568
|
+
let result;
|
|
569
|
+
try {
|
|
570
|
+
result = eval(${JSON.stringify(code)});
|
|
571
|
+
} catch (e) {
|
|
572
|
+
try {
|
|
573
|
+
result = (function() { ${code} })();
|
|
574
|
+
} catch (error) {
|
|
575
|
+
console.log = originalLog;
|
|
576
|
+
throw error;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
console.log = originalLog;
|
|
581
|
+
return { result, logs: _logs };
|
|
582
|
+
})()
|
|
583
|
+
`;
|
|
584
|
+
|
|
585
|
+
const evalResult = await page.evaluate(wrappedCode) as { result: any; logs: string[] };
|
|
586
|
+
const { result, logs } = evalResult;
|
|
587
|
+
|
|
588
|
+
const snapshotStart = Date.now();
|
|
589
|
+
const snapshot = await this.getPageSnapshot(this.viewportLimit);
|
|
590
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
591
|
+
const totalTime = Date.now() - startTime;
|
|
592
|
+
|
|
593
|
+
// Properly serialize the result
|
|
594
|
+
let resultStr: string;
|
|
595
|
+
try {
|
|
596
|
+
resultStr = JSON.stringify(result, null, 2);
|
|
597
|
+
} catch (e) {
|
|
598
|
+
// Fallback for non-serializable values
|
|
599
|
+
resultStr = String(result);
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return {
|
|
603
|
+
result: `Console execution result: ${resultStr}`,
|
|
604
|
+
console_output: logs,
|
|
605
|
+
snapshot: snapshot,
|
|
606
|
+
timing: {
|
|
607
|
+
total_time_ms: totalTime,
|
|
608
|
+
snapshot_time_ms: snapshotTime,
|
|
609
|
+
},
|
|
610
|
+
};
|
|
611
|
+
|
|
612
|
+
} catch (error) {
|
|
613
|
+
const totalTime = Date.now() - startTime;
|
|
614
|
+
return {
|
|
615
|
+
result: `Console execution failed: ${error}`,
|
|
616
|
+
console_output: [],
|
|
617
|
+
snapshot: '',
|
|
618
|
+
timing: {
|
|
619
|
+
total_time_ms: totalTime,
|
|
620
|
+
snapshot_time_ms: 0,
|
|
621
|
+
},
|
|
622
|
+
};
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
}
|
|
627
|
+
|
|
@@ -101,7 +101,25 @@ export interface EnterAction {
|
|
|
101
101
|
type: 'enter';
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
-
export
|
|
104
|
+
export interface MouseAction {
|
|
105
|
+
type: 'mouse_control';
|
|
106
|
+
control: 'click' | 'right_click' | 'dblclick';
|
|
107
|
+
x: number;
|
|
108
|
+
y: number;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export interface MouseDragAction {
|
|
112
|
+
type: 'mouse_drag';
|
|
113
|
+
from_ref: string;
|
|
114
|
+
to_ref: string;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export interface PressKeyAction {
|
|
118
|
+
type: 'press_key';
|
|
119
|
+
keys: string[];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export type BrowserAction = ClickAction | TypeAction | SelectAction | ScrollAction | EnterAction | MouseAction | MouseDragAction | PressKeyAction;
|
|
105
123
|
|
|
106
124
|
export interface VisualMarkResult {
|
|
107
125
|
text: string;
|