camel-ai 0.2.70__py3-none-any.whl → 0.2.71a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -25,13 +25,21 @@ class HumanToolkit(BaseToolkit):
25
25
  r"""A class representing a toolkit for human interaction."""
26
26
 
27
27
  def ask_human_via_console(self, question: str) -> str:
28
- r"""Ask a question to the human via the console.
28
+ r"""Use this tool to ask a question to the user when you are stuck,
29
+ need clarification, or require a decision to be made. This is a
30
+ two-way communication channel that will wait for the user's response.
31
+ You should use it to:
32
+ - Clarify ambiguous instructions or requirements.
33
+ - Request missing information that you cannot find (e.g., login
34
+ credentials, file paths).
35
+ - Ask for a decision when there are multiple viable options.
36
+ - Seek help when you encounter an error you cannot resolve on your own.
29
37
 
30
38
  Args:
31
- question (str): The question to ask the human.
39
+ question (str): The question to ask the user.
32
40
 
33
41
  Returns:
34
- str: The answer from the human.
42
+ str: The user's response to the question.
35
43
  """
36
44
  print(f"Question: {question}")
37
45
  logger.info(f"Question: {question}")
@@ -40,11 +48,18 @@ class HumanToolkit(BaseToolkit):
40
48
  return reply
41
49
 
42
50
  def send_message_to_user(self, message: str) -> None:
43
- r"""Send a message to the user, without waiting for
44
- a response. This will send to stdout in a noticeable way.
45
-
46
- This is guaranteed to reach the user regardless of
47
- actual user interface.
51
+ r"""Use this tool to send a message to the user to keep them
52
+ informed about your progress, decisions, or actions.
53
+ This is a one-way communication channel from you to the user and does
54
+ not require a response. You should use it to:
55
+ - Announce what you are about to do
56
+ (e.g., "I will now search for papers on GUI Agents.")
57
+ - Report the result of an action
58
+ (e.g., "I have found 15 relevant papers.")
59
+ - State a decision
60
+ (e.g., "I will now analyze the top 10 papers.")
61
+ - Inform the user about your current state if you are performing a
62
+ task.
48
63
 
49
64
  Args:
50
65
  message (str): The message to send to the user.
@@ -80,7 +80,28 @@ class BrowserNonVisualToolkit(BaseToolkit):
80
80
  return
81
81
 
82
82
  if loop.is_closed():
83
- # Event loop already closed cannot run async cleanup
83
+ # The default loop is closed, create a *temporary* loop just
84
+ # for cleanup so that Playwright / asyncio transports are
85
+ # gracefully shut down. This avoids noisy warnings such as
86
+ # "RuntimeError: Event loop is closed" when the program
87
+ # exits.
88
+ try:
89
+ tmp_loop = asyncio.new_event_loop()
90
+ try:
91
+ asyncio.set_event_loop(tmp_loop)
92
+ tmp_loop.run_until_complete(self.close_browser())
93
+ finally:
94
+ # Best-effort shutdown of async generators and loop
95
+ # itself (Python ≥3.6).
96
+ if hasattr(tmp_loop, "shutdown_asyncgens"):
97
+ tmp_loop.run_until_complete(
98
+ tmp_loop.shutdown_asyncgens()
99
+ )
100
+ tmp_loop.close()
101
+ finally:
102
+ # Ensure no subsequent get_event_loop() picks up a now
103
+ # closed temporary loop.
104
+ asyncio.set_event_loop(None)
84
105
  return
85
106
 
86
107
  if loop.is_running():
@@ -155,7 +176,7 @@ class BrowserNonVisualToolkit(BaseToolkit):
155
176
  self._agent = None
156
177
 
157
178
  # Close session
158
- await self._session.close()
179
+ await NVBrowserSession.close_all_sessions()
159
180
  return "Browser session closed."
160
181
 
161
182
  async def visit_page(self, url: str) -> Dict[str, str]:
@@ -13,8 +13,11 @@
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
  from __future__ import annotations
15
15
 
16
+ import asyncio
16
17
  from pathlib import Path
17
- from typing import TYPE_CHECKING, Any, Optional
18
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional
19
+
20
+ from camel.logger import get_logger
18
21
 
19
22
  from .actions import ActionExecutor
20
23
  from .snapshot import PageSnapshot
@@ -28,6 +31,9 @@ if TYPE_CHECKING:
28
31
  )
29
32
 
30
33
 
34
+ logger = get_logger(__name__)
35
+
36
+
31
37
  class NVBrowserSession:
32
38
  """Lightweight wrapper around Playwright for non-visual (headless)
33
39
  browsing.
@@ -35,15 +41,37 @@ class NVBrowserSession:
35
41
  It provides a single *Page* instance plus helper utilities (snapshot &
36
42
  executor). Multiple toolkits or agents can reuse this class without
37
43
  duplicating Playwright setup code.
44
+
45
+ This class is a singleton per event-loop.
38
46
  """
39
47
 
40
48
  # Configuration constants
41
49
  DEFAULT_NAVIGATION_TIMEOUT = 10000 # 10 seconds
42
50
  NETWORK_IDLE_TIMEOUT = 5000 # 5 seconds
43
51
 
52
+ _sessions: ClassVar[
53
+ Dict[asyncio.AbstractEventLoop, "NVBrowserSession"]
54
+ ] = {}
55
+
56
+ _initialized: bool
57
+
58
+ def __new__(
59
+ cls, *, headless: bool = True, user_data_dir: Optional[str] = None
60
+ ):
61
+ loop = asyncio.get_running_loop()
62
+ if loop not in cls._sessions:
63
+ instance = super().__new__(cls)
64
+ instance._initialized = False
65
+ cls._sessions[loop] = instance
66
+ return cls._sessions[loop]
67
+
44
68
  def __init__(
45
69
  self, *, headless: bool = True, user_data_dir: Optional[str] = None
46
70
  ):
71
+ if self._initialized:
72
+ return
73
+ self._initialized = True
74
+
47
75
  self._headless = headless
48
76
  self._user_data_dir = user_data_dir
49
77
 
@@ -56,8 +84,6 @@ class NVBrowserSession:
56
84
  self.executor: Optional[ActionExecutor] = None
57
85
 
58
86
  # Protect browser initialisation against concurrent calls
59
- import asyncio
60
-
61
87
  self._ensure_lock: "asyncio.Lock" = asyncio.Lock()
62
88
 
63
89
  # ------------------------------------------------------------------
@@ -93,10 +119,6 @@ class NVBrowserSession:
93
119
  self._browser = await pl.chromium.launch(headless=self._headless)
94
120
  self._context = await self._browser.new_context()
95
121
 
96
- from camel.logger import get_logger
97
-
98
- _dbg_logger = get_logger(__name__)
99
-
100
122
  # Reuse an already open page (persistent context may restore last
101
123
  # session)
102
124
  if self._context.pages:
@@ -105,7 +127,7 @@ class NVBrowserSession:
105
127
  self._page = await self._context.new_page()
106
128
 
107
129
  # Debug information to help trace concurrency issues
108
- _dbg_logger.debug(
130
+ logger.debug(
109
131
  "Session %s created browser=%s context=%s page=%s (url=%s)",
110
132
  hex(id(self)),
111
133
  hex(id(self._browser)) if self._browser else None,
@@ -122,6 +144,11 @@ class NVBrowserSession:
122
144
  r"""Close all browser resources, ensuring cleanup even if some
123
145
  operations fail.
124
146
  """
147
+ # The close method will now only close the *current* event-loop's
148
+ # browser instance. Use `close_all_sessions` for a full cleanup.
149
+ await self._close_session()
150
+
151
+ async def _close_session(self) -> None:
125
152
  errors: list[str] = []
126
153
 
127
154
  # Close context first (which closes pages)
@@ -151,13 +178,28 @@ class NVBrowserSession:
151
178
 
152
179
  # Log errors if any occurred during cleanup
153
180
  if errors:
154
- from camel.logger import get_logger
155
-
156
- logger = get_logger(__name__)
157
181
  logger.warning(
158
182
  "Errors during browser session cleanup: %s", "; ".join(errors)
159
183
  )
160
184
 
185
+ @classmethod
186
+ async def close_all_sessions(cls) -> None:
187
+ r"""Iterate over all stored sessions and close them."""
188
+ for loop, session in cls._sessions.items():
189
+ if loop.is_running():
190
+ await session._close_session()
191
+ else:
192
+ try:
193
+ if not loop.is_closed():
194
+ loop.run_until_complete(session._close_session())
195
+ except Exception as e:
196
+ logger.warning(
197
+ "Failed to close session for loop %s: %s",
198
+ hex(id(loop)),
199
+ e,
200
+ )
201
+ cls._sessions.clear()
202
+
161
203
  # ------------------------------------------------------------------
162
204
  # Convenience wrappers around common actions
163
205
  # ------------------------------------------------------------------
@@ -1,188 +1,268 @@
1
1
  (() => {
2
- // Store each element as {text, priority, depth}
3
- const elements = [];
4
-
5
- // Maximum lines allowed before we start dropping lower-priority nodes
6
- const MAX_LINES = 400;
7
-
8
- // Priority helper – lower number = higher priority
9
- function getPriority(tag, role, text) {
10
- // 1. Interactive elements
11
- if (["input", "button", "a", "select", "textarea"].includes(tag)) return 1;
12
- if (["checkbox", "radio"].includes(role)) return 1;
13
-
14
- // 2. Labels / descriptive adjacent text (label elements)
15
- if (tag === "label") return 2;
16
-
17
- // 3. General visible text
18
- if (text) return 3;
19
-
20
- // 4. Low-value structural nodes
21
- return 4;
22
- }
2
+ // Playwright's snapshot logic focuses on semantics and visibility, not arbitrary limits.
3
+ // We will first build a semantic tree in memory, then render it.
23
4
 
24
5
  function isVisible(node) {
25
- const rect = node.getBoundingClientRect();
26
- if (rect.width === 0 || rect.height === 0) return false;
27
-
6
+ if (node.nodeType !== Node.ELEMENT_NODE) return true;
28
7
  const style = window.getComputedStyle(node);
29
- if (style.display === 'none' || style.visibility === 'hidden') return false;
30
-
31
- return true;
8
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0')
9
+ return false;
10
+ // An element with `display: contents` is not rendered itself, but its children are.
11
+ if (style.display === 'contents')
12
+ return true;
13
+ const rect = node.getBoundingClientRect();
14
+ return rect.width > 0 && rect.height > 0;
32
15
  }
33
16
 
34
17
  function getRole(node) {
35
- const tag = node.tagName.toLowerCase();
36
- const type = node.getAttribute('type');
37
-
38
- if (node.getAttribute('role')) return node.getAttribute('role');
18
+ const role = node.getAttribute('role');
19
+ if (role) return role;
39
20
 
40
- if (tag === 'input') {
41
- if (type === 'checkbox') return 'checkbox';
42
- if (type === 'radio') return 'radio';
43
- return 'input';
21
+ const tagName = node.tagName.toLowerCase();
22
+ if (tagName === 'a') return 'link';
23
+ if (tagName === 'button') return 'button';
24
+ if (tagName === 'input') {
25
+ const type = node.getAttribute('type')?.toLowerCase();
26
+ if (['button', 'checkbox', 'radio', 'reset', 'submit'].includes(type)) return type;
27
+ return 'textbox';
44
28
  }
45
-
46
- if (tag === 'button') return 'button';
47
- if (tag === 'a') return 'link';
48
- if (tag === 'select') return 'select';
49
- if (tag === 'textarea') return 'textarea';
50
- if (tag === 'p') return 'paragraph';
51
- if (tag === 'span') return 'text';
52
-
29
+ if (['select', 'textarea'].includes(tagName)) return tagName;
30
+ if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) return 'heading';
53
31
  return 'generic';
54
32
  }
55
33
 
56
34
  function getAccessibleName(node) {
57
- if (node.hasAttribute('aria-label')) {
58
- return node.getAttribute('aria-label');
59
- }
35
+ if (node.hasAttribute('aria-label')) return node.getAttribute('aria-label') || '';
60
36
  if (node.hasAttribute('aria-labelledby')) {
61
37
  const id = node.getAttribute('aria-labelledby');
62
38
  const labelEl = document.getElementById(id);
63
- if (labelEl) return labelEl.textContent.trim();
39
+ if (labelEl) return labelEl.textContent || '';
64
40
  }
65
- if (node.hasAttribute('title')) {
66
- return node.getAttribute('title');
41
+ // This is the new, visibility-aware text extraction logic.
42
+ const text = getVisibleTextContent(node);
43
+
44
+ // Add a heuristic to ignore code-like text that might be in the DOM
45
+ if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
46
+ return text;
47
+ }
48
+
49
+ const textCache = new Map();
50
+ function getVisibleTextContent(_node) {
51
+ if (textCache.has(_node)) return textCache.get(_node);
52
+
53
+ if (_node.nodeType === Node.TEXT_NODE) {
54
+ // For a text node, its content is visible if its parent is.
55
+ // The isVisible check on the parent happens before this recursion.
56
+ return _node.nodeValue || '';
67
57
  }
68
58
 
69
- const tagName = node.tagName?.toLowerCase();
70
- if (['style', 'script', 'meta', 'noscript', 'svg'].includes(tagName)) {
59
+ if (_node.nodeType !== Node.ELEMENT_NODE || !isVisible(_node) || ['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'HEAD'].includes(_node.tagName)) {
71
60
  return '';
72
61
  }
73
62
 
74
- const text = node.textContent?.trim() || '';
75
-
76
- // Ignore styles, tokens, or long CSS-like expressions
77
- if (/^[.#]?[a-zA-Z0-9\-_]+\s*\{[^}]*\}/.test(text)) return '';
78
- if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
63
+ let result = '';
64
+ for (const child of _node.childNodes) {
65
+ result += getVisibleTextContent(child);
66
+ }
79
67
 
80
- return text.replace(/[^\w\u4e00-\u9fa5\s\-.,?!'"()()]/g, '').trim();
68
+ // Caching the result for performance.
69
+ textCache.set(_node, result);
70
+ return result;
81
71
  }
82
72
 
83
73
  let refCounter = 1;
74
+ function generateRef() {
75
+ return `e${refCounter++}`;
76
+ }
84
77
 
85
- function traverse(node, depth) {
86
- if (node.nodeType !== Node.ELEMENT_NODE) return;
87
- if (!isVisible(node)) return;
78
+ /**
79
+ * Phase 1: Build an in-memory representation of the accessibility tree.
80
+ */
81
+ function buildAriaTree(rootElement) {
82
+ const visited = new Set();
88
83
 
89
- const tagName = node.tagName.toLowerCase();
90
- const text = getAccessibleName(node).slice(0, 50);
84
+ function toAriaNode(element) {
85
+ // Only consider visible elements
86
+ if (!isVisible(element)) return null;
91
87
 
92
- // Skip unlabeled links (anchors without any accessible name)
93
- if (tagName === 'a' && !text) {
94
- // Skip unlabeled links; process children if any
95
- for (const child of node.children) {
96
- traverse(child, depth + 1);
97
- }
98
- return;
99
- }
88
+ const role = getRole(element);
89
+ // 'presentation' and 'none' roles are ignored, but their children are processed.
90
+ if (['presentation', 'none'].includes(role)) return null;
100
91
 
101
- const hasRoleOrText = ['button', 'a', 'input', 'select', 'textarea', 'p', 'span'].includes(tagName) ||
102
- node.getAttribute('role') || text;
92
+ const name = getAccessibleName(element);
103
93
 
104
- if (hasRoleOrText) {
105
- const role = getRole(node);
106
- const ref = `e${refCounter++}`;
107
- const label = text ? `"${text}"` : '';
94
+ // Create the node
95
+ const node = {
96
+ role,
97
+ name,
98
+ children: [],
99
+ element: element,
100
+ ref: generateRef(),
101
+ };
108
102
 
109
- // Raw line (without indent) we will apply indentation later once we know
110
- // which ancestor lines survive filtering so that indentation always reflects
111
- // the visible hierarchy.
112
- const lineText = `- ${role}${label ? ` ${label}` : ''} [ref=${ref}]`;
113
- const priority = getPriority(tagName, role, text);
103
+ // Add states for interactive elements, similar to Playwright
104
+ if (element.hasAttribute('disabled')) node.disabled = true;
105
+ if (element.hasAttribute('aria-checked')) node.checked = element.getAttribute('aria-checked');
106
+ if (element.hasAttribute('aria-expanded')) node.expanded = element.getAttribute('aria-expanded');
114
107
 
115
- elements.push({ text: lineText, priority, depth });
108
+ // Tag element with a ref for later lookup
109
+ element.setAttribute('aria-ref', node.ref);
116
110
 
117
- // Always inject ref so Playwright can still locate the element even if line is later filtered out.
118
- node.setAttribute('aria-ref', ref);
111
+ return node;
119
112
  }
120
113
 
121
- for (const child of node.children) {
122
- traverse(child, depth + 1);
114
+ function traverse(element, parentNode) {
115
+ if (visited.has(element)) return;
116
+ visited.add(element);
117
+
118
+ // FIX: Completely skip script and style tags and their children.
119
+ const tagName = element.tagName.toLowerCase();
120
+ if (['script', 'style', 'meta', 'noscript'].includes(tagName))
121
+ return;
122
+
123
+ // Check if element is explicitly hidden by CSS - if so, skip entirely including children
124
+ const style = window.getComputedStyle(element);
125
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') {
126
+ return;
127
+ }
128
+
129
+ const ariaNode = toAriaNode(element);
130
+ // If the element is not rendered or is presentational, its children
131
+ // are attached directly to the parent.
132
+ const newParent = ariaNode || parentNode;
133
+ if (ariaNode) parentNode.children.push(ariaNode);
134
+
135
+ for (const child of element.childNodes) {
136
+ if (child.nodeType === Node.ELEMENT_NODE) {
137
+ traverse(child, newParent);
138
+ } else if (child.nodeType === Node.TEXT_NODE) {
139
+ const text = (child.textContent || '').trim();
140
+ if (text) newParent.children.push(text);
141
+ }
142
+ }
143
+
144
+ // Also traverse into shadow DOM if it exists
145
+ if (element.shadowRoot) {
146
+ for (const child of element.shadowRoot.childNodes) {
147
+ if (child.nodeType === Node.ELEMENT_NODE) {
148
+ traverse(child, newParent);
149
+ } else if (child.nodeType === Node.TEXT_NODE) {
150
+ const text = (child.textContent || '').trim();
151
+ if (text) newParent.children.push(text);
152
+ }
153
+ }
154
+ }
155
+
156
+ // FIX: If an element's name is the same as its only text child, remove the redundant child.
157
+ if (ariaNode && ariaNode.children.length === 1 && typeof ariaNode.children[0] === 'string' && ariaNode.name === ariaNode.children[0]) {
158
+ ariaNode.children = [];
159
+ }
123
160
  }
161
+
162
+ const root = { role: 'Root', name: '', children: [], element: rootElement };
163
+ traverse(rootElement, root);
164
+ return root;
124
165
  }
125
166
 
126
- function processDocument(doc, depth = 0) {
127
- try {
128
- traverse(doc.body, depth);
129
- } catch (e) {
130
- // Handle docs without body (e.g., about:blank)
131
- }
167
+ /**
168
+ * Phase 2: Normalize the tree by removing redundant generic wrappers.
169
+ * This is a key optimization in Playwright to simplify the structure.
170
+ */
171
+ function normalizeTree(node) {
172
+ if (typeof node === 'string') return [node];
132
173
 
133
- const frames = doc.querySelectorAll('iframe');
134
- for (const frame of frames) {
135
- try {
136
- if (frame.contentDocument) {
137
- processDocument(frame.contentDocument, depth + 1);
138
- }
139
- } catch (e) {
140
- // Skip cross-origin iframes
174
+ const newChildren = [];
175
+ for (const child of node.children) {
176
+ newChildren.push(...normalizeTree(child));
177
+ }
178
+ node.children = newChildren;
179
+
180
+ // Remove child elements that have the same name as their parent
181
+ if (node.children.length === 1 && typeof node.children[0] !== 'string') {
182
+ const child = node.children[0];
183
+ if (child.name && node.name && child.name.trim() === node.name.trim()) {
184
+ // Merge child's children into parent and remove the redundant child
185
+ node.children = child.children || [];
141
186
  }
142
187
  }
188
+
189
+ // A 'generic' role that just wraps a single other element is redundant.
190
+ // We lift its child up to replace it, simplifying the hierarchy.
191
+ const isRedundantWrapper = node.role === 'generic' && node.children.length === 1 && typeof node.children[0] !== 'string';
192
+ if (isRedundantWrapper) {
193
+ return node.children;
194
+ }
195
+ return [node];
143
196
  }
144
197
 
145
- processDocument(document);
146
198
 
147
- // Always drop priority-4 nodes (low-value structural or invisible)
148
- let finalElements = elements.filter(el => el.priority <= 3);
199
+ /**
200
+ * Phase 3: Render the normalized tree into the final string format.
201
+ */
202
+ function renderTree(node, indent = '') {
203
+ const lines = [];
204
+ let meaningfulProps = '';
205
+ if (node.disabled) meaningfulProps += ' disabled';
206
+ if (node.checked !== undefined) meaningfulProps += ` checked=${node.checked}`;
207
+ if (node.expanded !== undefined) meaningfulProps += ` expanded=${node.expanded}`;
208
+
209
+ const ref = node.ref ? ` [ref=${node.ref}]` : '';
210
+ const name = (node.name || '').replace(/\s+/g, ' ').trim();
149
211
 
150
- // Additional size condensation when still exceeding MAX_LINES
151
- if (finalElements.length > MAX_LINES) {
152
- const filterBy = (maxPriority) => finalElements.filter(el => el.priority <= maxPriority);
212
+ // Skip elements with empty names and no meaningful props (ref is not considered meaningful)
213
+ if (!name && !meaningfulProps) {
214
+ // If element has no name and no meaningful props, render its children directly at current level
215
+ for (const child of node.children) {
216
+ if (typeof child === 'string') {
217
+ const childText = child.replace(/\s+/g, ' ').trim();
218
+ if (childText) { // Only add non-empty text
219
+ lines.push(`${indent}- text "${childText}"`);
220
+ }
221
+ } else {
222
+ lines.push(...renderTree(child, indent));
223
+ }
224
+ }
225
+ return lines;
226
+ }
227
+
228
+ lines.push(`${indent}- ${node.role}${name ? ` "${name}"` : ''}${meaningfulProps}${ref}`);
153
229
 
154
- // Progressively tighten: keep 1-3, then 1-2, finally only 1
155
- for (const limit of [3, 2, 1]) {
156
- const candidate = filterBy(limit);
157
- if (candidate.length <= MAX_LINES || limit === 1) {
158
- finalElements = candidate;
159
- break;
230
+ for (const child of node.children) {
231
+ if (typeof child === 'string') {
232
+ const childText = child.replace(/\s+/g, ' ').trim();
233
+ if (childText) { // Only add non-empty text
234
+ lines.push(`${indent} - text "${childText}"`);
235
+ }
236
+ } else {
237
+ lines.push(...renderTree(child, indent + ' '));
160
238
  }
161
239
  }
240
+ return lines;
162
241
  }
163
242
 
164
- // ------------------------------------------------------------------
165
- // Re-apply indentation so that it matches the *visible* hierarchy only.
166
- // Whenever an ancestor element is removed due to priority rules, its
167
- // children will be re-indented one level up so the structure remains
168
- // intuitive.
169
- // ------------------------------------------------------------------
170
- const outputLines = [];
171
- const depthStack = []; // keeps track of kept original depths
172
-
173
- for (const el of finalElements) {
174
- // Pop depths that are not ancestors of current element
175
- while (depthStack.length && depthStack[depthStack.length - 1] >= el.depth) {
176
- depthStack.pop();
177
- }
243
+ function processDocument(doc) {
244
+ if (!doc.body) return [];
245
+
246
+ // Clear cache for each new document processing.
247
+ textCache.clear();
248
+ let tree = buildAriaTree(doc.body);
249
+ [tree] = normalizeTree(tree);
178
250
 
179
- // Push the current depth so future descendants know their ancestor chain
180
- depthStack.push(el.depth);
251
+ const lines = renderTree(tree).slice(1); // Skip the root node line
181
252
 
182
- const compressedDepth = depthStack.length - 1; // root level has zero indent
183
- const indent = '\t'.repeat(compressedDepth);
184
- outputLines.push(indent + el.text);
253
+ const frames = doc.querySelectorAll('iframe');
254
+ for (const frame of frames) {
255
+ try {
256
+ if (frame.contentDocument) {
257
+ lines.push(...processDocument(frame.contentDocument));
258
+ }
259
+ } catch (e) {
260
+ // Skip cross-origin iframes
261
+ }
262
+ }
263
+ return lines;
185
264
  }
186
265
 
266
+ const outputLines = processDocument(document);
187
267
  return outputLines.join('\n');
188
268
  })();
@@ -48,14 +48,15 @@ class PageSnapshot:
48
48
  try:
49
49
  current_url = self.page.url
50
50
 
51
- # Serve cached copy (unless diff requested)
52
- if (
53
- not force_refresh
54
- and current_url == self._last_url
55
- and self.snapshot_data
56
- and not diff_only
57
- ):
58
- return self.snapshot_data
51
+ # Previously we skipped regeneration when the URL had not changed
52
+ # and no explicit refresh was requested. This prevented the agent
53
+ # from seeing DOM updates that occur without a navigation (e.g.
54
+ # single-page apps, dynamic games such as Wordle). The early-exit
55
+ # logic has been removed so that we always capture a *fresh* DOM
56
+ # snapshot. If the snapshot happens to be byte-for-byte identical
57
+ # to the previous one we simply return it after the standard
58
+ # comparison step below; otherwise callers receive the updated
59
+ # snapshot even when the URL did not change.
59
60
 
60
61
  # ensure DOM stability
61
62
  await self.page.wait_for_load_state(