camel-ai 0.2.75a6__py3-none-any.whl → 0.2.76a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +151 -37
- camel/configs/__init__.py +3 -0
- camel/configs/amd_config.py +70 -0
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/models/__init__.py +2 -0
- camel/models/amd_model.py +101 -0
- camel/models/model_factory.py +2 -0
- camel/models/openai_model.py +0 -6
- camel/runtimes/daytona_runtime.py +11 -12
- camel/toolkits/__init__.py +5 -3
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/function_tool.py +6 -1
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +8 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +12 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +33 -14
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +135 -40
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +43 -207
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +231 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +39 -6
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +241 -56
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +5 -1
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +98 -31
- camel/toolkits/mcp_toolkit.py +39 -14
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/terminal_toolkit.py +12 -2
- camel/toolkits/video_analysis_toolkit.py +16 -10
- camel/types/enums.py +11 -0
- camel/utils/commons.py +2 -0
- camel/utils/mcp.py +136 -2
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/METADATA +5 -3
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/RECORD +38 -31
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/licenses/LICENSE +0 -0
camel/toolkits/code_execution.py
CHANGED
|
@@ -18,6 +18,7 @@ from camel.interpreters import (
|
|
|
18
18
|
E2BInterpreter,
|
|
19
19
|
InternalPythonInterpreter,
|
|
20
20
|
JupyterKernelInterpreter,
|
|
21
|
+
MicrosandboxInterpreter,
|
|
21
22
|
SubprocessInterpreter,
|
|
22
23
|
)
|
|
23
24
|
from camel.logger import get_logger
|
|
@@ -43,18 +44,31 @@ class CodeExecutionToolkit(BaseToolkit):
|
|
|
43
44
|
(default: :obj:`None`)
|
|
44
45
|
require_confirm (bool): Whether to require confirmation before
|
|
45
46
|
executing code. (default: :obj:`False`)
|
|
47
|
+
timeout (Optional[float]): General timeout for toolkit operations.
|
|
48
|
+
(default: :obj:`None`)
|
|
49
|
+
microsandbox_config (Optional[dict]): Configuration for microsandbox
|
|
50
|
+
interpreter. Available keys: 'server_url', 'api_key',
|
|
51
|
+
'namespace', 'sandbox_name', 'timeout'.
|
|
52
|
+
If None, uses default configuration. (default: :obj:`None`)
|
|
46
53
|
"""
|
|
47
54
|
|
|
48
55
|
def __init__(
|
|
49
56
|
self,
|
|
50
57
|
sandbox: Literal[
|
|
51
|
-
"internal_python",
|
|
58
|
+
"internal_python",
|
|
59
|
+
"jupyter",
|
|
60
|
+
"docker",
|
|
61
|
+
"subprocess",
|
|
62
|
+
"e2b",
|
|
63
|
+
"microsandbox",
|
|
52
64
|
] = "subprocess",
|
|
53
65
|
verbose: bool = False,
|
|
54
66
|
unsafe_mode: bool = False,
|
|
55
67
|
import_white_list: Optional[List[str]] = None,
|
|
56
68
|
require_confirm: bool = False,
|
|
57
69
|
timeout: Optional[float] = None,
|
|
70
|
+
# Microsandbox configuration dictionary
|
|
71
|
+
microsandbox_config: Optional[dict] = None,
|
|
58
72
|
) -> None:
|
|
59
73
|
super().__init__(timeout=timeout)
|
|
60
74
|
self.verbose = verbose
|
|
@@ -68,6 +82,7 @@ class CodeExecutionToolkit(BaseToolkit):
|
|
|
68
82
|
DockerInterpreter,
|
|
69
83
|
SubprocessInterpreter,
|
|
70
84
|
E2BInterpreter,
|
|
85
|
+
MicrosandboxInterpreter,
|
|
71
86
|
]
|
|
72
87
|
|
|
73
88
|
if sandbox == "internal_python":
|
|
@@ -95,6 +110,18 @@ class CodeExecutionToolkit(BaseToolkit):
|
|
|
95
110
|
)
|
|
96
111
|
elif sandbox == "e2b":
|
|
97
112
|
self.interpreter = E2BInterpreter(require_confirm=require_confirm)
|
|
113
|
+
elif sandbox == "microsandbox":
|
|
114
|
+
# Extract parameters with proper types for microsandbox
|
|
115
|
+
config = microsandbox_config or {}
|
|
116
|
+
|
|
117
|
+
self.interpreter = MicrosandboxInterpreter(
|
|
118
|
+
require_confirm=require_confirm,
|
|
119
|
+
server_url=config.get("server_url"),
|
|
120
|
+
api_key=config.get("api_key"),
|
|
121
|
+
namespace=config.get("namespace", "default"),
|
|
122
|
+
sandbox_name=config.get("sandbox_name"),
|
|
123
|
+
timeout=config.get("timeout", 30),
|
|
124
|
+
)
|
|
98
125
|
else:
|
|
99
126
|
raise RuntimeError(
|
|
100
127
|
f"The sandbox type `{sandbox}` is not supported."
|
camel/toolkits/function_tool.py
CHANGED
|
@@ -156,7 +156,12 @@ def get_openai_tool_schema(func: Callable) -> Dict[str, Any]:
|
|
|
156
156
|
if (name := param.arg_name) in parameters_dict["properties"] and (
|
|
157
157
|
description := param.description
|
|
158
158
|
):
|
|
159
|
-
|
|
159
|
+
# OpenAI does not allow descriptions on properties that use $ref.
|
|
160
|
+
# To avoid schema errors, we only add the description if "$ref" is
|
|
161
|
+
# not present.
|
|
162
|
+
prop = parameters_dict["properties"][name]
|
|
163
|
+
if "$ref" not in prop:
|
|
164
|
+
prop["description"] = description
|
|
160
165
|
|
|
161
166
|
short_description = docstring.short_description or ""
|
|
162
167
|
long_description = docstring.long_description or ""
|
|
@@ -44,6 +44,9 @@ class BrowserConfig:
|
|
|
44
44
|
connect_over_cdp: bool = False
|
|
45
45
|
cdp_url: Optional[str] = None
|
|
46
46
|
|
|
47
|
+
# Full visual mode configuration
|
|
48
|
+
full_visual_mode: bool = False
|
|
49
|
+
|
|
47
50
|
|
|
48
51
|
@dataclass
|
|
49
52
|
class ToolkitConfig:
|
|
@@ -51,6 +54,7 @@ class ToolkitConfig:
|
|
|
51
54
|
|
|
52
55
|
cache_dir: str = "tmp/"
|
|
53
56
|
browser_log_to_file: bool = False
|
|
57
|
+
log_dir: Optional[str] = None
|
|
54
58
|
session_id: Optional[str] = None
|
|
55
59
|
enabled_tools: Optional[list] = None
|
|
56
60
|
|
|
@@ -116,6 +120,8 @@ class ConfigLoader:
|
|
|
116
120
|
toolkit_kwargs["session_id"] = value
|
|
117
121
|
elif key == "enabledTools":
|
|
118
122
|
toolkit_kwargs["enabled_tools"] = value
|
|
123
|
+
elif key == "fullVisualMode":
|
|
124
|
+
browser_kwargs["full_visual_mode"] = value
|
|
119
125
|
|
|
120
126
|
browser_config = BrowserConfig(**browser_kwargs)
|
|
121
127
|
toolkit_config = ToolkitConfig(**toolkit_kwargs)
|
|
@@ -142,10 +148,12 @@ class ConfigLoader:
|
|
|
142
148
|
"screenshotTimeout": self.browser_config.screenshot_timeout,
|
|
143
149
|
"pageStabilityTimeout": self.browser_config.page_stability_timeout,
|
|
144
150
|
"browser_log_to_file": self.toolkit_config.browser_log_to_file,
|
|
151
|
+
"log_dir": self.toolkit_config.log_dir,
|
|
145
152
|
"session_id": self.toolkit_config.session_id,
|
|
146
153
|
"viewport_limit": self.browser_config.viewport_limit,
|
|
147
154
|
"connectOverCdp": self.browser_config.connect_over_cdp,
|
|
148
155
|
"cdpUrl": self.browser_config.cdp_url,
|
|
156
|
+
"fullVisualMode": self.browser_config.full_visual_mode,
|
|
149
157
|
}
|
|
150
158
|
|
|
151
159
|
def get_timeout_config(self) -> Dict[str, Optional[int]]:
|
|
@@ -38,6 +38,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
38
38
|
cache_dir: str = "tmp/",
|
|
39
39
|
enabled_tools: Optional[List[str]] = None,
|
|
40
40
|
browser_log_to_file: bool = False,
|
|
41
|
+
log_dir: Optional[str] = None,
|
|
41
42
|
session_id: Optional[str] = None,
|
|
42
43
|
default_start_url: str = "https://google.com/",
|
|
43
44
|
default_timeout: Optional[int] = None,
|
|
@@ -50,6 +51,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
50
51
|
viewport_limit: bool = False,
|
|
51
52
|
connect_over_cdp: bool = False,
|
|
52
53
|
cdp_url: Optional[str] = None,
|
|
54
|
+
full_visual_mode: bool = False,
|
|
53
55
|
**kwargs: Any,
|
|
54
56
|
) -> Any:
|
|
55
57
|
r"""Create a HybridBrowserToolkit instance with the specified mode.
|
|
@@ -72,6 +74,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
72
74
|
Defaults to None.
|
|
73
75
|
browser_log_to_file (bool): Whether to log browser actions to
|
|
74
76
|
file. Defaults to False.
|
|
77
|
+
log_dir (Optional[str]): Custom directory path for log files.
|
|
78
|
+
If None, defaults to "browser_log". Defaults to None.
|
|
75
79
|
session_id (Optional[str]): Session identifier. Defaults to None.
|
|
76
80
|
default_start_url (str): Default URL to start with. Defaults
|
|
77
81
|
to "https://google.com/".
|
|
@@ -98,6 +102,11 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
98
102
|
cdp_url (Optional[str]): WebSocket endpoint URL for CDP
|
|
99
103
|
connection. Required when connect_over_cdp is True.
|
|
100
104
|
Defaults to None. (Only supported in TypeScript mode)
|
|
105
|
+
full_visual_mode (bool): When True, browser actions like click,
|
|
106
|
+
browser_open, visit_page, etc. will return 'full visual mode'
|
|
107
|
+
as snapshot instead of actual page content. The
|
|
108
|
+
browser_get_page_snapshot method will still return the actual
|
|
109
|
+
snapshot. Defaults to False.
|
|
101
110
|
**kwargs: Additional keyword arguments passed to the
|
|
102
111
|
implementation.
|
|
103
112
|
|
|
@@ -117,6 +126,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
117
126
|
cache_dir=cache_dir,
|
|
118
127
|
enabled_tools=enabled_tools,
|
|
119
128
|
browser_log_to_file=browser_log_to_file,
|
|
129
|
+
log_dir=log_dir,
|
|
120
130
|
session_id=session_id,
|
|
121
131
|
default_start_url=default_start_url,
|
|
122
132
|
default_timeout=default_timeout,
|
|
@@ -129,6 +139,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
129
139
|
viewport_limit=viewport_limit,
|
|
130
140
|
connect_over_cdp=connect_over_cdp,
|
|
131
141
|
cdp_url=cdp_url,
|
|
142
|
+
full_visual_mode=full_visual_mode,
|
|
132
143
|
**kwargs,
|
|
133
144
|
)
|
|
134
145
|
elif mode == "python":
|
|
@@ -160,6 +171,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
160
171
|
cache_dir=cache_dir,
|
|
161
172
|
enabled_tools=enabled_tools,
|
|
162
173
|
browser_log_to_file=browser_log_to_file,
|
|
174
|
+
log_dir=log_dir,
|
|
163
175
|
session_id=session_id,
|
|
164
176
|
default_start_url=default_start_url,
|
|
165
177
|
default_timeout=default_timeout,
|
|
@@ -86,6 +86,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
86
86
|
cache_dir: str = "tmp/",
|
|
87
87
|
enabled_tools: Optional[List[str]] = None,
|
|
88
88
|
browser_log_to_file: bool = False,
|
|
89
|
+
log_dir: Optional[str] = None,
|
|
89
90
|
session_id: Optional[str] = None,
|
|
90
91
|
default_start_url: str = "https://google.com/",
|
|
91
92
|
default_timeout: Optional[int] = None,
|
|
@@ -98,6 +99,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
98
99
|
viewport_limit: bool = False,
|
|
99
100
|
connect_over_cdp: bool = False,
|
|
100
101
|
cdp_url: Optional[str] = None,
|
|
102
|
+
full_visual_mode: bool = False,
|
|
101
103
|
) -> None:
|
|
102
104
|
r"""Initialize the HybridBrowserToolkit.
|
|
103
105
|
|
|
@@ -115,6 +117,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
115
117
|
Defaults to None.
|
|
116
118
|
browser_log_to_file (bool): Whether to log browser actions to
|
|
117
119
|
file. Defaults to False.
|
|
120
|
+
log_dir (Optional[str]): Custom directory path for log files.
|
|
121
|
+
If None, defaults to "browser_log". Defaults to None.
|
|
118
122
|
session_id (Optional[str]): Session identifier. Defaults to None.
|
|
119
123
|
default_start_url (str): Default URL to start with. Defaults
|
|
120
124
|
to "https://google.com/".
|
|
@@ -143,6 +147,9 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
143
147
|
cdp_url (Optional[str]): WebSocket endpoint URL for CDP
|
|
144
148
|
connection (e.g., 'ws://localhost:9222/devtools/browser/...').
|
|
145
149
|
Required when connect_over_cdp is True. Defaults to None.
|
|
150
|
+
full_visual_mode (bool): When True, browser actions like click,
|
|
151
|
+
browser_open, visit_page, etc. will not return snapshots.
|
|
152
|
+
Defaults to False.
|
|
146
153
|
"""
|
|
147
154
|
super().__init__()
|
|
148
155
|
RegisteredAgentToolkit.__init__(self)
|
|
@@ -163,10 +170,12 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
163
170
|
viewport_limit=viewport_limit,
|
|
164
171
|
cache_dir=cache_dir,
|
|
165
172
|
browser_log_to_file=browser_log_to_file,
|
|
173
|
+
log_dir=log_dir,
|
|
166
174
|
session_id=session_id,
|
|
167
175
|
enabled_tools=enabled_tools,
|
|
168
176
|
connect_over_cdp=connect_over_cdp,
|
|
169
177
|
cdp_url=cdp_url,
|
|
178
|
+
full_visual_mode=full_visual_mode,
|
|
170
179
|
)
|
|
171
180
|
|
|
172
181
|
# Legacy attribute access for backward compatibility
|
|
@@ -182,6 +191,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
182
191
|
self._default_start_url = browser_config.default_start_url
|
|
183
192
|
self._session_id = toolkit_config.session_id or "default"
|
|
184
193
|
self._viewport_limit = browser_config.viewport_limit
|
|
194
|
+
self._full_visual_mode = browser_config.full_visual_mode
|
|
185
195
|
|
|
186
196
|
# Store timeout configuration for backward compatibility
|
|
187
197
|
self._default_timeout = browser_config.default_timeout
|
|
@@ -648,22 +658,29 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
648
658
|
|
|
649
659
|
# Add tab information
|
|
650
660
|
tab_info = await ws_wrapper.get_tab_info()
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
+
|
|
662
|
+
response = {
|
|
663
|
+
"result": result.get("result", ""),
|
|
664
|
+
"snapshot": result.get("snapshot", ""),
|
|
665
|
+
"tabs": tab_info,
|
|
666
|
+
"current_tab": next(
|
|
667
|
+
(
|
|
668
|
+
i
|
|
669
|
+
for i, tab in enumerate(tab_info)
|
|
670
|
+
if tab.get("is_current")
|
|
661
671
|
),
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
672
|
+
0,
|
|
673
|
+
),
|
|
674
|
+
"total_tabs": len(tab_info),
|
|
675
|
+
}
|
|
665
676
|
|
|
666
|
-
|
|
677
|
+
if "newTabId" in result:
|
|
678
|
+
response["newTabId"] = result["newTabId"]
|
|
679
|
+
|
|
680
|
+
if "timing" in result:
|
|
681
|
+
response["timing"] = result["timing"]
|
|
682
|
+
|
|
683
|
+
return response
|
|
667
684
|
except Exception as e:
|
|
668
685
|
logger.error(f"Failed to click element: {e}")
|
|
669
686
|
return {
|
|
@@ -1377,6 +1394,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1377
1394
|
screenshot_timeout=self._screenshot_timeout,
|
|
1378
1395
|
page_stability_timeout=self._page_stability_timeout,
|
|
1379
1396
|
dom_content_loaded_timeout=self._dom_content_loaded_timeout,
|
|
1397
|
+
viewport_limit=self._viewport_limit,
|
|
1398
|
+
full_visual_mode=self._full_visual_mode,
|
|
1380
1399
|
)
|
|
1381
1400
|
|
|
1382
1401
|
def get_tools(self) -> List[FunctionTool]:
|
|
@@ -59,14 +59,30 @@ export class HybridBrowserSession {
|
|
|
59
59
|
const contexts = this.browser.contexts();
|
|
60
60
|
if (contexts.length > 0) {
|
|
61
61
|
this.context = contexts[0];
|
|
62
|
+
|
|
63
|
+
// Apply stealth headers to existing context if configured
|
|
64
|
+
// Note: userAgent cannot be changed on an existing context
|
|
65
|
+
if (stealthConfig.enabled) {
|
|
66
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
67
|
+
await this.context.setExtraHTTPHeaders(stealthConfig.extraHTTPHeaders);
|
|
68
|
+
}
|
|
69
|
+
if (stealthConfig.userAgent) {
|
|
70
|
+
console.warn('[HybridBrowserSession] Cannot apply userAgent to existing context. Consider creating a new context if userAgent customization is required.');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
62
73
|
} else {
|
|
63
74
|
const contextOptions: any = {
|
|
64
75
|
viewport: browserConfig.viewport
|
|
65
76
|
};
|
|
66
77
|
|
|
67
|
-
// Apply stealth headers if configured
|
|
68
|
-
if (stealthConfig.enabled
|
|
69
|
-
|
|
78
|
+
// Apply stealth headers and UA if configured
|
|
79
|
+
if (stealthConfig.enabled) {
|
|
80
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
81
|
+
contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
|
|
82
|
+
}
|
|
83
|
+
if (stealthConfig.userAgent) {
|
|
84
|
+
contextOptions.userAgent = stealthConfig.userAgent;
|
|
85
|
+
}
|
|
70
86
|
}
|
|
71
87
|
|
|
72
88
|
this.context = await this.browser.newContext(contextOptions);
|
|
@@ -105,13 +121,18 @@ export class HybridBrowserSession {
|
|
|
105
121
|
if (stealthConfig.enabled) {
|
|
106
122
|
launchOptions.args = stealthConfig.args || [];
|
|
107
123
|
|
|
108
|
-
// Apply stealth user agent if configured
|
|
124
|
+
// Apply stealth user agent/headers if configured
|
|
109
125
|
if (stealthConfig.userAgent) {
|
|
110
126
|
launchOptions.userAgent = stealthConfig.userAgent;
|
|
111
127
|
}
|
|
128
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
129
|
+
launchOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
|
|
130
|
+
}
|
|
112
131
|
}
|
|
113
132
|
|
|
114
133
|
if (browserConfig.userDataDir) {
|
|
134
|
+
// Ensure viewport is honored in persistent context
|
|
135
|
+
launchOptions.viewport = browserConfig.viewport;
|
|
115
136
|
this.context = await chromium.launchPersistentContext(
|
|
116
137
|
browserConfig.userDataDir,
|
|
117
138
|
launchOptions
|
|
@@ -129,9 +150,14 @@ export class HybridBrowserSession {
|
|
|
129
150
|
viewport: browserConfig.viewport
|
|
130
151
|
};
|
|
131
152
|
|
|
132
|
-
// Apply stealth headers if configured
|
|
133
|
-
if (stealthConfig.enabled
|
|
134
|
-
|
|
153
|
+
// Apply stealth headers and UA if configured
|
|
154
|
+
if (stealthConfig.enabled) {
|
|
155
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
156
|
+
contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
|
|
157
|
+
}
|
|
158
|
+
if (stealthConfig.userAgent) {
|
|
159
|
+
contextOptions.userAgent = stealthConfig.userAgent;
|
|
160
|
+
}
|
|
135
161
|
}
|
|
136
162
|
|
|
137
163
|
this.context = await this.browser.newContext(contextOptions);
|
|
@@ -173,26 +199,10 @@ export class HybridBrowserSession {
|
|
|
173
199
|
|
|
174
200
|
async getCurrentPage(): Promise<Page> {
|
|
175
201
|
if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
|
|
176
|
-
// In CDP mode,
|
|
202
|
+
// In CDP mode, we cannot create new pages
|
|
177
203
|
const browserConfig = this.configLoader.getBrowserConfig();
|
|
178
|
-
if (browserConfig.connectOverCdp
|
|
179
|
-
|
|
180
|
-
try {
|
|
181
|
-
const newPage = await this.context.newPage();
|
|
182
|
-
const newTabId = this.generateTabId();
|
|
183
|
-
this.registerNewPage(newTabId, newPage);
|
|
184
|
-
this.currentTabId = newTabId;
|
|
185
|
-
|
|
186
|
-
// Set page timeouts
|
|
187
|
-
newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
|
|
188
|
-
newPage.setDefaultTimeout(browserConfig.navigationTimeout);
|
|
189
|
-
|
|
190
|
-
console.log(`[CDP] Created new page with tab ID: ${newTabId}`);
|
|
191
|
-
return newPage;
|
|
192
|
-
} catch (error) {
|
|
193
|
-
console.error('[CDP] Failed to create new page:', error);
|
|
194
|
-
throw new Error('No active page available and failed to create new page in CDP mode');
|
|
195
|
-
}
|
|
204
|
+
if (browserConfig.connectOverCdp) {
|
|
205
|
+
throw new Error('No active page available in CDP mode; frontend must pre-create blank tabs.');
|
|
196
206
|
}
|
|
197
207
|
throw new Error('No active page available');
|
|
198
208
|
}
|
|
@@ -235,6 +245,36 @@ export class HybridBrowserSession {
|
|
|
235
245
|
return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
|
|
236
246
|
}
|
|
237
247
|
|
|
248
|
+
private parseElementFromSnapshot(snapshotText: string, ref: string): { role?: string; text?: string } {
|
|
249
|
+
const lines = snapshotText.split('\n');
|
|
250
|
+
for (const line of lines) {
|
|
251
|
+
if (line.includes(`[ref=${ref}]`)) {
|
|
252
|
+
const typeMatch = line.match(/^\s*-?\s*([\w-]+)/);
|
|
253
|
+
const role = typeMatch ? typeMatch[1] : undefined;
|
|
254
|
+
const textMatch = line.match(/"([^"]*)"/);
|
|
255
|
+
const text = textMatch ? textMatch[1] : undefined;
|
|
256
|
+
return { role, text };
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
return {};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
private buildSnapshotIndex(snapshotText: string): Map<string, { role?: string; text?: string }> {
|
|
263
|
+
const index = new Map<string, { role?: string; text?: string }>();
|
|
264
|
+
const refRe = /\[ref=([^\]]+)\]/i;
|
|
265
|
+
for (const line of snapshotText.split('\n')) {
|
|
266
|
+
const m = line.match(refRe);
|
|
267
|
+
if (!m) continue;
|
|
268
|
+
const ref = m[1];
|
|
269
|
+
const roleMatch = line.match(/^\s*-?\s*([a-z0-9_-]+)/i);
|
|
270
|
+
const role = roleMatch ? roleMatch[1].toLowerCase() : undefined;
|
|
271
|
+
const textMatch = line.match(/"([^"]*)"/);
|
|
272
|
+
const text = textMatch ? textMatch[1] : undefined;
|
|
273
|
+
index.set(ref, { role, text });
|
|
274
|
+
}
|
|
275
|
+
return index;
|
|
276
|
+
}
|
|
277
|
+
|
|
238
278
|
private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
|
|
239
279
|
const startTime = Date.now();
|
|
240
280
|
const page = await this.getCurrentPage();
|
|
@@ -257,6 +297,17 @@ export class HybridBrowserSession {
|
|
|
257
297
|
const mappingStart = Date.now();
|
|
258
298
|
const playwrightMapping: Record<string, any> = {};
|
|
259
299
|
|
|
300
|
+
// Parse element info in a single pass
|
|
301
|
+
const snapshotIndex = this.buildSnapshotIndex(snapshotText);
|
|
302
|
+
for (const ref of refs) {
|
|
303
|
+
const elementInfo = snapshotIndex.get(ref) || {};
|
|
304
|
+
playwrightMapping[ref] = {
|
|
305
|
+
ref,
|
|
306
|
+
role: elementInfo.role || 'unknown',
|
|
307
|
+
text: elementInfo.text || '',
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
260
311
|
if (includeCoordinates) {
|
|
261
312
|
// Get coordinates for each ref using aria-ref selector
|
|
262
313
|
for (const ref of refs) {
|
|
@@ -270,8 +321,9 @@ export class HybridBrowserSession {
|
|
|
270
321
|
const boundingBox = await element.boundingBox();
|
|
271
322
|
|
|
272
323
|
if (boundingBox) {
|
|
324
|
+
// Add coordinates to existing element info
|
|
273
325
|
playwrightMapping[ref] = {
|
|
274
|
-
ref,
|
|
326
|
+
...playwrightMapping[ref],
|
|
275
327
|
coordinates: {
|
|
276
328
|
x: Math.round(boundingBox.x),
|
|
277
329
|
y: Math.round(boundingBox.y),
|
|
@@ -388,7 +440,6 @@ export class HybridBrowserSession {
|
|
|
388
440
|
|
|
389
441
|
if (shouldOpenNewTab) {
|
|
390
442
|
// Handle new tab opening
|
|
391
|
-
|
|
392
443
|
// If it's a link that doesn't naturally open in new tab, force it
|
|
393
444
|
if (isNavigableLink && !naturallyOpensNewTab) {
|
|
394
445
|
await element.evaluate((el, blankTarget) => {
|
|
@@ -803,6 +854,55 @@ export class HybridBrowserSession {
|
|
|
803
854
|
}
|
|
804
855
|
}
|
|
805
856
|
|
|
857
|
+
/**
|
|
858
|
+
* Wait for DOM to stop changing for a specified duration
|
|
859
|
+
*/
|
|
860
|
+
private async waitForDOMStability(page: Page, maxWaitTime: number = 500): Promise<void> {
|
|
861
|
+
const startTime = Date.now();
|
|
862
|
+
const stabilityThreshold = 100; // Consider stable if no changes for 100ms
|
|
863
|
+
let lastChangeTime = Date.now();
|
|
864
|
+
|
|
865
|
+
try {
|
|
866
|
+
// Monitor DOM changes
|
|
867
|
+
await page.evaluate(() => {
|
|
868
|
+
let changeCount = 0;
|
|
869
|
+
(window as any).__domStabilityCheck = { changeCount: 0, lastChange: Date.now() };
|
|
870
|
+
|
|
871
|
+
const observer = new MutationObserver(() => {
|
|
872
|
+
(window as any).__domStabilityCheck.changeCount++;
|
|
873
|
+
(window as any).__domStabilityCheck.lastChange = Date.now();
|
|
874
|
+
});
|
|
875
|
+
|
|
876
|
+
observer.observe(document.body, {
|
|
877
|
+
childList: true,
|
|
878
|
+
subtree: true,
|
|
879
|
+
attributes: true,
|
|
880
|
+
characterData: true
|
|
881
|
+
});
|
|
882
|
+
|
|
883
|
+
(window as any).__domStabilityObserver = observer;
|
|
884
|
+
});
|
|
885
|
+
|
|
886
|
+
// Wait until no changes for stabilityThreshold or timeout
|
|
887
|
+
await page.waitForFunction(
|
|
888
|
+
(threshold) => {
|
|
889
|
+
const check = (window as any).__domStabilityCheck;
|
|
890
|
+
return check && (Date.now() - check.lastChange) > threshold;
|
|
891
|
+
},
|
|
892
|
+
stabilityThreshold,
|
|
893
|
+
{ timeout: Math.max(0, maxWaitTime) }
|
|
894
|
+
).catch(() => {});
|
|
895
|
+
} finally {
|
|
896
|
+
// Cleanup
|
|
897
|
+
await page.evaluate(() => {
|
|
898
|
+
const observer = (window as any).__domStabilityObserver;
|
|
899
|
+
if (observer) observer.disconnect();
|
|
900
|
+
delete (window as any).__domStabilityObserver;
|
|
901
|
+
delete (window as any).__domStabilityCheck;
|
|
902
|
+
}).catch(() => {});
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
806
906
|
private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
|
|
807
907
|
let domContentLoadedTime = 0;
|
|
808
908
|
let networkIdleTime = 0;
|
|
@@ -1132,12 +1232,12 @@ export class HybridBrowserSession {
|
|
|
1132
1232
|
const filtered: Record<string, SnapshotElement> = {};
|
|
1133
1233
|
|
|
1134
1234
|
|
|
1135
|
-
// Apply viewport filtering
|
|
1136
|
-
|
|
1137
|
-
const
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1235
|
+
// Apply viewport filtering
|
|
1236
|
+
// boundingBox() returns viewport-relative coordinates, so we don't need to add scroll offsets
|
|
1237
|
+
const viewportLeft = 0;
|
|
1238
|
+
const viewportTop = 0;
|
|
1239
|
+
const viewportRight = viewport.width;
|
|
1240
|
+
const viewportBottom = viewport.height;
|
|
1141
1241
|
|
|
1142
1242
|
for (const [ref, element] of Object.entries(elements)) {
|
|
1143
1243
|
// If element has no coordinates, include it (fallback)
|
|
@@ -1148,14 +1248,9 @@ export class HybridBrowserSession {
|
|
|
1148
1248
|
|
|
1149
1249
|
const { x, y, width, height } = element.coordinates;
|
|
1150
1250
|
|
|
1151
|
-
// Calculate viewport bounds using adjusted scroll position
|
|
1152
|
-
const viewportLeft = adjustedScrollPos.x;
|
|
1153
|
-
const viewportTop = adjustedScrollPos.y;
|
|
1154
|
-
const viewportRight = adjustedScrollPos.x + viewport.width;
|
|
1155
|
-
const viewportBottom = adjustedScrollPos.y + viewport.height;
|
|
1156
|
-
|
|
1157
1251
|
// Check if element is visible in current viewport
|
|
1158
1252
|
// Element is visible if it overlaps with viewport bounds
|
|
1253
|
+
// Since boundingBox() coords are viewport-relative, we compare directly
|
|
1159
1254
|
const isVisible = (
|
|
1160
1255
|
x < viewportRight && // Left edge is before viewport right
|
|
1161
1256
|
y < viewportBottom && // Top edge is before viewport bottom
|
|
@@ -79,6 +79,7 @@ export interface WebSocketConfig {
|
|
|
79
79
|
browser_log_to_file: boolean;
|
|
80
80
|
session_id?: string;
|
|
81
81
|
viewport_limit: boolean;
|
|
82
|
+
fullVisualMode?: boolean;
|
|
82
83
|
}
|
|
83
84
|
|
|
84
85
|
// Default stealth configuration
|
|
@@ -212,6 +213,7 @@ export class ConfigLoader {
|
|
|
212
213
|
if (config.browser_log_to_file !== undefined) wsConfig.browser_log_to_file = config.browser_log_to_file;
|
|
213
214
|
if (config.session_id !== undefined) wsConfig.session_id = config.session_id;
|
|
214
215
|
if (config.viewport_limit !== undefined) wsConfig.viewport_limit = config.viewport_limit;
|
|
216
|
+
if (config.fullVisualMode !== undefined) wsConfig.fullVisualMode = config.fullVisualMode;
|
|
215
217
|
|
|
216
218
|
// CDP connection options
|
|
217
219
|
if (config.connectOverCdp !== undefined) browserConfig.connectOverCdp = config.connectOverCdp;
|