camel-ai 0.2.70__py3-none-any.whl → 0.2.71a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/societies/workforce/role_playing_worker.py +4 -4
- camel/societies/workforce/single_agent_worker.py +4 -4
- camel/societies/workforce/workforce.py +236 -41
- camel/societies/workforce/workforce_logger.py +0 -1
- camel/tasks/task.py +83 -7
- camel/toolkits/human_toolkit.py +23 -8
- camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +23 -2
- camel/toolkits/non_visual_browser_toolkit/nv_browser_session.py +53 -11
- camel/toolkits/non_visual_browser_toolkit/snapshot.js +211 -131
- camel/toolkits/non_visual_browser_toolkit/snapshot.py +9 -8
- camel/toolkits/terminal_toolkit.py +17 -16
- camel/toolkits/video_download_toolkit.py +5 -1
- {camel_ai-0.2.70.dist-info → camel_ai-0.2.71a1.dist-info}/METADATA +1 -1
- {camel_ai-0.2.70.dist-info → camel_ai-0.2.71a1.dist-info}/RECORD +17 -17
- {camel_ai-0.2.70.dist-info → camel_ai-0.2.71a1.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.70.dist-info → camel_ai-0.2.71a1.dist-info}/licenses/LICENSE +0 -0
camel/toolkits/human_toolkit.py
CHANGED
|
@@ -25,13 +25,21 @@ class HumanToolkit(BaseToolkit):
|
|
|
25
25
|
r"""A class representing a toolkit for human interaction."""
|
|
26
26
|
|
|
27
27
|
def ask_human_via_console(self, question: str) -> str:
|
|
28
|
-
r"""
|
|
28
|
+
r"""Use this tool to ask a question to the user when you are stuck,
|
|
29
|
+
need clarification, or require a decision to be made. This is a
|
|
30
|
+
two-way communication channel that will wait for the user's response.
|
|
31
|
+
You should use it to:
|
|
32
|
+
- Clarify ambiguous instructions or requirements.
|
|
33
|
+
- Request missing information that you cannot find (e.g., login
|
|
34
|
+
credentials, file paths).
|
|
35
|
+
- Ask for a decision when there are multiple viable options.
|
|
36
|
+
- Seek help when you encounter an error you cannot resolve on your own.
|
|
29
37
|
|
|
30
38
|
Args:
|
|
31
|
-
question (str): The question to ask the
|
|
39
|
+
question (str): The question to ask the user.
|
|
32
40
|
|
|
33
41
|
Returns:
|
|
34
|
-
str: The
|
|
42
|
+
str: The user's response to the question.
|
|
35
43
|
"""
|
|
36
44
|
print(f"Question: {question}")
|
|
37
45
|
logger.info(f"Question: {question}")
|
|
@@ -40,11 +48,18 @@ class HumanToolkit(BaseToolkit):
|
|
|
40
48
|
return reply
|
|
41
49
|
|
|
42
50
|
def send_message_to_user(self, message: str) -> None:
|
|
43
|
-
r"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
51
|
+
r"""Use this tool to send a message to the user to keep them
|
|
52
|
+
informed about your progress, decisions, or actions.
|
|
53
|
+
This is a one-way communication channel from you to the user and does
|
|
54
|
+
not require a response. You should use it to:
|
|
55
|
+
- Announce what you are about to do
|
|
56
|
+
(e.g., "I will now search for papers on GUI Agents.")
|
|
57
|
+
- Report the result of an action
|
|
58
|
+
(e.g., "I have found 15 relevant papers.")
|
|
59
|
+
- State a decision
|
|
60
|
+
(e.g., "I will now analyze the top 10 papers.")
|
|
61
|
+
- Inform the user about your current state if you are performing a
|
|
62
|
+
task.
|
|
48
63
|
|
|
49
64
|
Args:
|
|
50
65
|
message (str): The message to send to the user.
|
|
@@ -80,7 +80,28 @@ class BrowserNonVisualToolkit(BaseToolkit):
|
|
|
80
80
|
return
|
|
81
81
|
|
|
82
82
|
if loop.is_closed():
|
|
83
|
-
#
|
|
83
|
+
# The default loop is closed, create a *temporary* loop just
|
|
84
|
+
# for cleanup so that Playwright / asyncio transports are
|
|
85
|
+
# gracefully shut down. This avoids noisy warnings such as
|
|
86
|
+
# "RuntimeError: Event loop is closed" when the program
|
|
87
|
+
# exits.
|
|
88
|
+
try:
|
|
89
|
+
tmp_loop = asyncio.new_event_loop()
|
|
90
|
+
try:
|
|
91
|
+
asyncio.set_event_loop(tmp_loop)
|
|
92
|
+
tmp_loop.run_until_complete(self.close_browser())
|
|
93
|
+
finally:
|
|
94
|
+
# Best-effort shutdown of async generators and loop
|
|
95
|
+
# itself (Python ≥3.6).
|
|
96
|
+
if hasattr(tmp_loop, "shutdown_asyncgens"):
|
|
97
|
+
tmp_loop.run_until_complete(
|
|
98
|
+
tmp_loop.shutdown_asyncgens()
|
|
99
|
+
)
|
|
100
|
+
tmp_loop.close()
|
|
101
|
+
finally:
|
|
102
|
+
# Ensure no subsequent get_event_loop() picks up a now
|
|
103
|
+
# closed temporary loop.
|
|
104
|
+
asyncio.set_event_loop(None)
|
|
84
105
|
return
|
|
85
106
|
|
|
86
107
|
if loop.is_running():
|
|
@@ -155,7 +176,7 @@ class BrowserNonVisualToolkit(BaseToolkit):
|
|
|
155
176
|
self._agent = None
|
|
156
177
|
|
|
157
178
|
# Close session
|
|
158
|
-
await
|
|
179
|
+
await NVBrowserSession.close_all_sessions()
|
|
159
180
|
return "Browser session closed."
|
|
160
181
|
|
|
161
182
|
async def visit_page(self, url: str) -> Dict[str, str]:
|
|
@@ -13,8 +13,11 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
+
import asyncio
|
|
16
17
|
from pathlib import Path
|
|
17
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
18
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional
|
|
19
|
+
|
|
20
|
+
from camel.logger import get_logger
|
|
18
21
|
|
|
19
22
|
from .actions import ActionExecutor
|
|
20
23
|
from .snapshot import PageSnapshot
|
|
@@ -28,6 +31,9 @@ if TYPE_CHECKING:
|
|
|
28
31
|
)
|
|
29
32
|
|
|
30
33
|
|
|
34
|
+
logger = get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
31
37
|
class NVBrowserSession:
|
|
32
38
|
"""Lightweight wrapper around Playwright for non-visual (headless)
|
|
33
39
|
browsing.
|
|
@@ -35,15 +41,37 @@ class NVBrowserSession:
|
|
|
35
41
|
It provides a single *Page* instance plus helper utilities (snapshot &
|
|
36
42
|
executor). Multiple toolkits or agents can reuse this class without
|
|
37
43
|
duplicating Playwright setup code.
|
|
44
|
+
|
|
45
|
+
This class is a singleton per event-loop.
|
|
38
46
|
"""
|
|
39
47
|
|
|
40
48
|
# Configuration constants
|
|
41
49
|
DEFAULT_NAVIGATION_TIMEOUT = 10000 # 10 seconds
|
|
42
50
|
NETWORK_IDLE_TIMEOUT = 5000 # 5 seconds
|
|
43
51
|
|
|
52
|
+
_sessions: ClassVar[
|
|
53
|
+
Dict[asyncio.AbstractEventLoop, "NVBrowserSession"]
|
|
54
|
+
] = {}
|
|
55
|
+
|
|
56
|
+
_initialized: bool
|
|
57
|
+
|
|
58
|
+
def __new__(
|
|
59
|
+
cls, *, headless: bool = True, user_data_dir: Optional[str] = None
|
|
60
|
+
):
|
|
61
|
+
loop = asyncio.get_running_loop()
|
|
62
|
+
if loop not in cls._sessions:
|
|
63
|
+
instance = super().__new__(cls)
|
|
64
|
+
instance._initialized = False
|
|
65
|
+
cls._sessions[loop] = instance
|
|
66
|
+
return cls._sessions[loop]
|
|
67
|
+
|
|
44
68
|
def __init__(
|
|
45
69
|
self, *, headless: bool = True, user_data_dir: Optional[str] = None
|
|
46
70
|
):
|
|
71
|
+
if self._initialized:
|
|
72
|
+
return
|
|
73
|
+
self._initialized = True
|
|
74
|
+
|
|
47
75
|
self._headless = headless
|
|
48
76
|
self._user_data_dir = user_data_dir
|
|
49
77
|
|
|
@@ -56,8 +84,6 @@ class NVBrowserSession:
|
|
|
56
84
|
self.executor: Optional[ActionExecutor] = None
|
|
57
85
|
|
|
58
86
|
# Protect browser initialisation against concurrent calls
|
|
59
|
-
import asyncio
|
|
60
|
-
|
|
61
87
|
self._ensure_lock: "asyncio.Lock" = asyncio.Lock()
|
|
62
88
|
|
|
63
89
|
# ------------------------------------------------------------------
|
|
@@ -93,10 +119,6 @@ class NVBrowserSession:
|
|
|
93
119
|
self._browser = await pl.chromium.launch(headless=self._headless)
|
|
94
120
|
self._context = await self._browser.new_context()
|
|
95
121
|
|
|
96
|
-
from camel.logger import get_logger
|
|
97
|
-
|
|
98
|
-
_dbg_logger = get_logger(__name__)
|
|
99
|
-
|
|
100
122
|
# Reuse an already open page (persistent context may restore last
|
|
101
123
|
# session)
|
|
102
124
|
if self._context.pages:
|
|
@@ -105,7 +127,7 @@ class NVBrowserSession:
|
|
|
105
127
|
self._page = await self._context.new_page()
|
|
106
128
|
|
|
107
129
|
# Debug information to help trace concurrency issues
|
|
108
|
-
|
|
130
|
+
logger.debug(
|
|
109
131
|
"Session %s created browser=%s context=%s page=%s (url=%s)",
|
|
110
132
|
hex(id(self)),
|
|
111
133
|
hex(id(self._browser)) if self._browser else None,
|
|
@@ -122,6 +144,11 @@ class NVBrowserSession:
|
|
|
122
144
|
r"""Close all browser resources, ensuring cleanup even if some
|
|
123
145
|
operations fail.
|
|
124
146
|
"""
|
|
147
|
+
# The close method will now only close the *current* event-loop's
|
|
148
|
+
# browser instance. Use `close_all_sessions` for a full cleanup.
|
|
149
|
+
await self._close_session()
|
|
150
|
+
|
|
151
|
+
async def _close_session(self) -> None:
|
|
125
152
|
errors: list[str] = []
|
|
126
153
|
|
|
127
154
|
# Close context first (which closes pages)
|
|
@@ -151,13 +178,28 @@ class NVBrowserSession:
|
|
|
151
178
|
|
|
152
179
|
# Log errors if any occurred during cleanup
|
|
153
180
|
if errors:
|
|
154
|
-
from camel.logger import get_logger
|
|
155
|
-
|
|
156
|
-
logger = get_logger(__name__)
|
|
157
181
|
logger.warning(
|
|
158
182
|
"Errors during browser session cleanup: %s", "; ".join(errors)
|
|
159
183
|
)
|
|
160
184
|
|
|
185
|
+
@classmethod
|
|
186
|
+
async def close_all_sessions(cls) -> None:
|
|
187
|
+
r"""Iterate over all stored sessions and close them."""
|
|
188
|
+
for loop, session in cls._sessions.items():
|
|
189
|
+
if loop.is_running():
|
|
190
|
+
await session._close_session()
|
|
191
|
+
else:
|
|
192
|
+
try:
|
|
193
|
+
if not loop.is_closed():
|
|
194
|
+
loop.run_until_complete(session._close_session())
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.warning(
|
|
197
|
+
"Failed to close session for loop %s: %s",
|
|
198
|
+
hex(id(loop)),
|
|
199
|
+
e,
|
|
200
|
+
)
|
|
201
|
+
cls._sessions.clear()
|
|
202
|
+
|
|
161
203
|
# ------------------------------------------------------------------
|
|
162
204
|
# Convenience wrappers around common actions
|
|
163
205
|
# ------------------------------------------------------------------
|
|
@@ -1,188 +1,268 @@
|
|
|
1
1
|
(() => {
|
|
2
|
-
//
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
// Maximum lines allowed before we start dropping lower-priority nodes
|
|
6
|
-
const MAX_LINES = 400;
|
|
7
|
-
|
|
8
|
-
// Priority helper – lower number = higher priority
|
|
9
|
-
function getPriority(tag, role, text) {
|
|
10
|
-
// 1. Interactive elements
|
|
11
|
-
if (["input", "button", "a", "select", "textarea"].includes(tag)) return 1;
|
|
12
|
-
if (["checkbox", "radio"].includes(role)) return 1;
|
|
13
|
-
|
|
14
|
-
// 2. Labels / descriptive adjacent text (label elements)
|
|
15
|
-
if (tag === "label") return 2;
|
|
16
|
-
|
|
17
|
-
// 3. General visible text
|
|
18
|
-
if (text) return 3;
|
|
19
|
-
|
|
20
|
-
// 4. Low-value structural nodes
|
|
21
|
-
return 4;
|
|
22
|
-
}
|
|
2
|
+
// Playwright's snapshot logic focuses on semantics and visibility, not arbitrary limits.
|
|
3
|
+
// We will first build a semantic tree in memory, then render it.
|
|
23
4
|
|
|
24
5
|
function isVisible(node) {
|
|
25
|
-
|
|
26
|
-
if (rect.width === 0 || rect.height === 0) return false;
|
|
27
|
-
|
|
6
|
+
if (node.nodeType !== Node.ELEMENT_NODE) return true;
|
|
28
7
|
const style = window.getComputedStyle(node);
|
|
29
|
-
if (style.display === 'none' || style.visibility === 'hidden'
|
|
30
|
-
|
|
31
|
-
|
|
8
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0')
|
|
9
|
+
return false;
|
|
10
|
+
// An element with `display: contents` is not rendered itself, but its children are.
|
|
11
|
+
if (style.display === 'contents')
|
|
12
|
+
return true;
|
|
13
|
+
const rect = node.getBoundingClientRect();
|
|
14
|
+
return rect.width > 0 && rect.height > 0;
|
|
32
15
|
}
|
|
33
16
|
|
|
34
17
|
function getRole(node) {
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
if (node.getAttribute('role')) return node.getAttribute('role');
|
|
18
|
+
const role = node.getAttribute('role');
|
|
19
|
+
if (role) return role;
|
|
39
20
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
21
|
+
const tagName = node.tagName.toLowerCase();
|
|
22
|
+
if (tagName === 'a') return 'link';
|
|
23
|
+
if (tagName === 'button') return 'button';
|
|
24
|
+
if (tagName === 'input') {
|
|
25
|
+
const type = node.getAttribute('type')?.toLowerCase();
|
|
26
|
+
if (['button', 'checkbox', 'radio', 'reset', 'submit'].includes(type)) return type;
|
|
27
|
+
return 'textbox';
|
|
44
28
|
}
|
|
45
|
-
|
|
46
|
-
if (
|
|
47
|
-
if (tag === 'a') return 'link';
|
|
48
|
-
if (tag === 'select') return 'select';
|
|
49
|
-
if (tag === 'textarea') return 'textarea';
|
|
50
|
-
if (tag === 'p') return 'paragraph';
|
|
51
|
-
if (tag === 'span') return 'text';
|
|
52
|
-
|
|
29
|
+
if (['select', 'textarea'].includes(tagName)) return tagName;
|
|
30
|
+
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) return 'heading';
|
|
53
31
|
return 'generic';
|
|
54
32
|
}
|
|
55
33
|
|
|
56
34
|
function getAccessibleName(node) {
|
|
57
|
-
if (node.hasAttribute('aria-label'))
|
|
58
|
-
return node.getAttribute('aria-label');
|
|
59
|
-
}
|
|
35
|
+
if (node.hasAttribute('aria-label')) return node.getAttribute('aria-label') || '';
|
|
60
36
|
if (node.hasAttribute('aria-labelledby')) {
|
|
61
37
|
const id = node.getAttribute('aria-labelledby');
|
|
62
38
|
const labelEl = document.getElementById(id);
|
|
63
|
-
if (labelEl) return labelEl.textContent
|
|
39
|
+
if (labelEl) return labelEl.textContent || '';
|
|
64
40
|
}
|
|
65
|
-
|
|
66
|
-
|
|
41
|
+
// This is the new, visibility-aware text extraction logic.
|
|
42
|
+
const text = getVisibleTextContent(node);
|
|
43
|
+
|
|
44
|
+
// Add a heuristic to ignore code-like text that might be in the DOM
|
|
45
|
+
if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
|
|
46
|
+
return text;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const textCache = new Map();
|
|
50
|
+
function getVisibleTextContent(_node) {
|
|
51
|
+
if (textCache.has(_node)) return textCache.get(_node);
|
|
52
|
+
|
|
53
|
+
if (_node.nodeType === Node.TEXT_NODE) {
|
|
54
|
+
// For a text node, its content is visible if its parent is.
|
|
55
|
+
// The isVisible check on the parent happens before this recursion.
|
|
56
|
+
return _node.nodeValue || '';
|
|
67
57
|
}
|
|
68
58
|
|
|
69
|
-
|
|
70
|
-
if (['style', 'script', 'meta', 'noscript', 'svg'].includes(tagName)) {
|
|
59
|
+
if (_node.nodeType !== Node.ELEMENT_NODE || !isVisible(_node) || ['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'HEAD'].includes(_node.tagName)) {
|
|
71
60
|
return '';
|
|
72
61
|
}
|
|
73
62
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
|
|
63
|
+
let result = '';
|
|
64
|
+
for (const child of _node.childNodes) {
|
|
65
|
+
result += getVisibleTextContent(child);
|
|
66
|
+
}
|
|
79
67
|
|
|
80
|
-
|
|
68
|
+
// Caching the result for performance.
|
|
69
|
+
textCache.set(_node, result);
|
|
70
|
+
return result;
|
|
81
71
|
}
|
|
82
72
|
|
|
83
73
|
let refCounter = 1;
|
|
74
|
+
function generateRef() {
|
|
75
|
+
return `e${refCounter++}`;
|
|
76
|
+
}
|
|
84
77
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
78
|
+
/**
|
|
79
|
+
* Phase 1: Build an in-memory representation of the accessibility tree.
|
|
80
|
+
*/
|
|
81
|
+
function buildAriaTree(rootElement) {
|
|
82
|
+
const visited = new Set();
|
|
88
83
|
|
|
89
|
-
|
|
90
|
-
|
|
84
|
+
function toAriaNode(element) {
|
|
85
|
+
// Only consider visible elements
|
|
86
|
+
if (!isVisible(element)) return null;
|
|
91
87
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
for (const child of node.children) {
|
|
96
|
-
traverse(child, depth + 1);
|
|
97
|
-
}
|
|
98
|
-
return;
|
|
99
|
-
}
|
|
88
|
+
const role = getRole(element);
|
|
89
|
+
// 'presentation' and 'none' roles are ignored, but their children are processed.
|
|
90
|
+
if (['presentation', 'none'].includes(role)) return null;
|
|
100
91
|
|
|
101
|
-
|
|
102
|
-
node.getAttribute('role') || text;
|
|
92
|
+
const name = getAccessibleName(element);
|
|
103
93
|
|
|
104
|
-
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
94
|
+
// Create the node
|
|
95
|
+
const node = {
|
|
96
|
+
role,
|
|
97
|
+
name,
|
|
98
|
+
children: [],
|
|
99
|
+
element: element,
|
|
100
|
+
ref: generateRef(),
|
|
101
|
+
};
|
|
108
102
|
|
|
109
|
-
//
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
const priority = getPriority(tagName, role, text);
|
|
103
|
+
// Add states for interactive elements, similar to Playwright
|
|
104
|
+
if (element.hasAttribute('disabled')) node.disabled = true;
|
|
105
|
+
if (element.hasAttribute('aria-checked')) node.checked = element.getAttribute('aria-checked');
|
|
106
|
+
if (element.hasAttribute('aria-expanded')) node.expanded = element.getAttribute('aria-expanded');
|
|
114
107
|
|
|
115
|
-
|
|
108
|
+
// Tag element with a ref for later lookup
|
|
109
|
+
element.setAttribute('aria-ref', node.ref);
|
|
116
110
|
|
|
117
|
-
|
|
118
|
-
node.setAttribute('aria-ref', ref);
|
|
111
|
+
return node;
|
|
119
112
|
}
|
|
120
113
|
|
|
121
|
-
|
|
122
|
-
|
|
114
|
+
function traverse(element, parentNode) {
|
|
115
|
+
if (visited.has(element)) return;
|
|
116
|
+
visited.add(element);
|
|
117
|
+
|
|
118
|
+
// FIX: Completely skip script and style tags and their children.
|
|
119
|
+
const tagName = element.tagName.toLowerCase();
|
|
120
|
+
if (['script', 'style', 'meta', 'noscript'].includes(tagName))
|
|
121
|
+
return;
|
|
122
|
+
|
|
123
|
+
// Check if element is explicitly hidden by CSS - if so, skip entirely including children
|
|
124
|
+
const style = window.getComputedStyle(element);
|
|
125
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') {
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const ariaNode = toAriaNode(element);
|
|
130
|
+
// If the element is not rendered or is presentational, its children
|
|
131
|
+
// are attached directly to the parent.
|
|
132
|
+
const newParent = ariaNode || parentNode;
|
|
133
|
+
if (ariaNode) parentNode.children.push(ariaNode);
|
|
134
|
+
|
|
135
|
+
for (const child of element.childNodes) {
|
|
136
|
+
if (child.nodeType === Node.ELEMENT_NODE) {
|
|
137
|
+
traverse(child, newParent);
|
|
138
|
+
} else if (child.nodeType === Node.TEXT_NODE) {
|
|
139
|
+
const text = (child.textContent || '').trim();
|
|
140
|
+
if (text) newParent.children.push(text);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Also traverse into shadow DOM if it exists
|
|
145
|
+
if (element.shadowRoot) {
|
|
146
|
+
for (const child of element.shadowRoot.childNodes) {
|
|
147
|
+
if (child.nodeType === Node.ELEMENT_NODE) {
|
|
148
|
+
traverse(child, newParent);
|
|
149
|
+
} else if (child.nodeType === Node.TEXT_NODE) {
|
|
150
|
+
const text = (child.textContent || '').trim();
|
|
151
|
+
if (text) newParent.children.push(text);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// FIX: If an element's name is the same as its only text child, remove the redundant child.
|
|
157
|
+
if (ariaNode && ariaNode.children.length === 1 && typeof ariaNode.children[0] === 'string' && ariaNode.name === ariaNode.children[0]) {
|
|
158
|
+
ariaNode.children = [];
|
|
159
|
+
}
|
|
123
160
|
}
|
|
161
|
+
|
|
162
|
+
const root = { role: 'Root', name: '', children: [], element: rootElement };
|
|
163
|
+
traverse(rootElement, root);
|
|
164
|
+
return root;
|
|
124
165
|
}
|
|
125
166
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
167
|
+
/**
|
|
168
|
+
* Phase 2: Normalize the tree by removing redundant generic wrappers.
|
|
169
|
+
* This is a key optimization in Playwright to simplify the structure.
|
|
170
|
+
*/
|
|
171
|
+
function normalizeTree(node) {
|
|
172
|
+
if (typeof node === 'string') return [node];
|
|
132
173
|
|
|
133
|
-
const
|
|
134
|
-
for (const
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
174
|
+
const newChildren = [];
|
|
175
|
+
for (const child of node.children) {
|
|
176
|
+
newChildren.push(...normalizeTree(child));
|
|
177
|
+
}
|
|
178
|
+
node.children = newChildren;
|
|
179
|
+
|
|
180
|
+
// Remove child elements that have the same name as their parent
|
|
181
|
+
if (node.children.length === 1 && typeof node.children[0] !== 'string') {
|
|
182
|
+
const child = node.children[0];
|
|
183
|
+
if (child.name && node.name && child.name.trim() === node.name.trim()) {
|
|
184
|
+
// Merge child's children into parent and remove the redundant child
|
|
185
|
+
node.children = child.children || [];
|
|
141
186
|
}
|
|
142
187
|
}
|
|
188
|
+
|
|
189
|
+
// A 'generic' role that just wraps a single other element is redundant.
|
|
190
|
+
// We lift its child up to replace it, simplifying the hierarchy.
|
|
191
|
+
const isRedundantWrapper = node.role === 'generic' && node.children.length === 1 && typeof node.children[0] !== 'string';
|
|
192
|
+
if (isRedundantWrapper) {
|
|
193
|
+
return node.children;
|
|
194
|
+
}
|
|
195
|
+
return [node];
|
|
143
196
|
}
|
|
144
197
|
|
|
145
|
-
processDocument(document);
|
|
146
198
|
|
|
147
|
-
|
|
148
|
-
|
|
199
|
+
/**
|
|
200
|
+
* Phase 3: Render the normalized tree into the final string format.
|
|
201
|
+
*/
|
|
202
|
+
function renderTree(node, indent = '') {
|
|
203
|
+
const lines = [];
|
|
204
|
+
let meaningfulProps = '';
|
|
205
|
+
if (node.disabled) meaningfulProps += ' disabled';
|
|
206
|
+
if (node.checked !== undefined) meaningfulProps += ` checked=${node.checked}`;
|
|
207
|
+
if (node.expanded !== undefined) meaningfulProps += ` expanded=${node.expanded}`;
|
|
208
|
+
|
|
209
|
+
const ref = node.ref ? ` [ref=${node.ref}]` : '';
|
|
210
|
+
const name = (node.name || '').replace(/\s+/g, ' ').trim();
|
|
149
211
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
212
|
+
// Skip elements with empty names and no meaningful props (ref is not considered meaningful)
|
|
213
|
+
if (!name && !meaningfulProps) {
|
|
214
|
+
// If element has no name and no meaningful props, render its children directly at current level
|
|
215
|
+
for (const child of node.children) {
|
|
216
|
+
if (typeof child === 'string') {
|
|
217
|
+
const childText = child.replace(/\s+/g, ' ').trim();
|
|
218
|
+
if (childText) { // Only add non-empty text
|
|
219
|
+
lines.push(`${indent}- text "${childText}"`);
|
|
220
|
+
}
|
|
221
|
+
} else {
|
|
222
|
+
lines.push(...renderTree(child, indent));
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
return lines;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
lines.push(`${indent}- ${node.role}${name ? ` "${name}"` : ''}${meaningfulProps}${ref}`);
|
|
153
229
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
230
|
+
for (const child of node.children) {
|
|
231
|
+
if (typeof child === 'string') {
|
|
232
|
+
const childText = child.replace(/\s+/g, ' ').trim();
|
|
233
|
+
if (childText) { // Only add non-empty text
|
|
234
|
+
lines.push(`${indent} - text "${childText}"`);
|
|
235
|
+
}
|
|
236
|
+
} else {
|
|
237
|
+
lines.push(...renderTree(child, indent + ' '));
|
|
160
238
|
}
|
|
161
239
|
}
|
|
240
|
+
return lines;
|
|
162
241
|
}
|
|
163
242
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
const depthStack = []; // keeps track of kept original depths
|
|
172
|
-
|
|
173
|
-
for (const el of finalElements) {
|
|
174
|
-
// Pop depths that are not ancestors of current element
|
|
175
|
-
while (depthStack.length && depthStack[depthStack.length - 1] >= el.depth) {
|
|
176
|
-
depthStack.pop();
|
|
177
|
-
}
|
|
243
|
+
function processDocument(doc) {
|
|
244
|
+
if (!doc.body) return [];
|
|
245
|
+
|
|
246
|
+
// Clear cache for each new document processing.
|
|
247
|
+
textCache.clear();
|
|
248
|
+
let tree = buildAriaTree(doc.body);
|
|
249
|
+
[tree] = normalizeTree(tree);
|
|
178
250
|
|
|
179
|
-
|
|
180
|
-
depthStack.push(el.depth);
|
|
251
|
+
const lines = renderTree(tree).slice(1); // Skip the root node line
|
|
181
252
|
|
|
182
|
-
const
|
|
183
|
-
const
|
|
184
|
-
|
|
253
|
+
const frames = doc.querySelectorAll('iframe');
|
|
254
|
+
for (const frame of frames) {
|
|
255
|
+
try {
|
|
256
|
+
if (frame.contentDocument) {
|
|
257
|
+
lines.push(...processDocument(frame.contentDocument));
|
|
258
|
+
}
|
|
259
|
+
} catch (e) {
|
|
260
|
+
// Skip cross-origin iframes
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return lines;
|
|
185
264
|
}
|
|
186
265
|
|
|
266
|
+
const outputLines = processDocument(document);
|
|
187
267
|
return outputLines.join('\n');
|
|
188
268
|
})();
|
|
@@ -48,14 +48,15 @@ class PageSnapshot:
|
|
|
48
48
|
try:
|
|
49
49
|
current_url = self.page.url
|
|
50
50
|
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
51
|
+
# Previously we skipped regeneration when the URL had not changed
|
|
52
|
+
# and no explicit refresh was requested. This prevented the agent
|
|
53
|
+
# from seeing DOM updates that occur without a navigation (e.g.
|
|
54
|
+
# single-page apps, dynamic games such as Wordle). The early-exit
|
|
55
|
+
# logic has been removed so that we always capture a *fresh* DOM
|
|
56
|
+
# snapshot. If the snapshot happens to be byte-for-byte identical
|
|
57
|
+
# to the previous one we simply return it after the standard
|
|
58
|
+
# comparison step below; otherwise callers receive the updated
|
|
59
|
+
# snapshot even when the URL did not change.
|
|
59
60
|
|
|
60
61
|
# ensure DOM stability
|
|
61
62
|
await self.page.wait_for_load_state(
|