connectonion 0.5.10__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. connectonion/__init__.py +17 -16
  2. connectonion/cli/browser_agent/browser.py +488 -145
  3. connectonion/cli/browser_agent/scroll_strategies.py +276 -0
  4. connectonion/cli/commands/copy_commands.py +24 -1
  5. connectonion/cli/commands/deploy_commands.py +15 -0
  6. connectonion/cli/commands/eval_commands.py +286 -0
  7. connectonion/cli/commands/project_cmd_lib.py +1 -1
  8. connectonion/cli/main.py +11 -0
  9. connectonion/console.py +5 -5
  10. connectonion/core/__init__.py +53 -0
  11. connectonion/{agent.py → core/agent.py} +18 -15
  12. connectonion/{llm.py → core/llm.py} +9 -19
  13. connectonion/{tool_executor.py → core/tool_executor.py} +3 -2
  14. connectonion/{tool_factory.py → core/tool_factory.py} +3 -1
  15. connectonion/debug/__init__.py +51 -0
  16. connectonion/{interactive_debugger.py → debug/auto_debug.py} +7 -7
  17. connectonion/{auto_debug_exception.py → debug/auto_debug_exception.py} +3 -3
  18. connectonion/{debugger_ui.py → debug/auto_debug_ui.py} +1 -1
  19. connectonion/{debug_explainer → debug/debug_explainer}/explain_agent.py +1 -1
  20. connectonion/{debug_explainer → debug/debug_explainer}/explain_context.py +1 -1
  21. connectonion/{execution_analyzer → debug/execution_analyzer}/execution_analysis.py +1 -1
  22. connectonion/debug/runtime_inspector/__init__.py +13 -0
  23. connectonion/{debug_agent → debug/runtime_inspector}/agent.py +1 -1
  24. connectonion/{xray.py → debug/xray.py} +1 -1
  25. connectonion/llm_do.py +1 -1
  26. connectonion/logger.py +305 -135
  27. connectonion/network/__init__.py +37 -0
  28. connectonion/{announce.py → network/announce.py} +1 -1
  29. connectonion/{asgi.py → network/asgi.py} +122 -2
  30. connectonion/{connect.py → network/connect.py} +1 -1
  31. connectonion/network/connection.py +123 -0
  32. connectonion/{host.py → network/host.py} +31 -11
  33. connectonion/{trust.py → network/trust.py} +1 -1
  34. connectonion/tui/__init__.py +22 -0
  35. connectonion/tui/chat.py +647 -0
  36. connectonion/useful_events_handlers/reflect.py +2 -2
  37. connectonion/useful_plugins/__init__.py +4 -3
  38. connectonion/useful_plugins/calendar_plugin.py +2 -2
  39. connectonion/useful_plugins/eval.py +2 -2
  40. connectonion/useful_plugins/gmail_plugin.py +2 -2
  41. connectonion/useful_plugins/image_result_formatter.py +2 -2
  42. connectonion/useful_plugins/re_act.py +2 -2
  43. connectonion/useful_plugins/shell_approval.py +2 -2
  44. connectonion/useful_plugins/ui_stream.py +164 -0
  45. {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/METADATA +4 -3
  46. connectonion-0.6.1.dist-info/RECORD +123 -0
  47. connectonion/debug_agent/__init__.py +0 -13
  48. connectonion-0.5.10.dist-info/RECORD +0 -115
  49. /connectonion/{events.py → core/events.py} +0 -0
  50. /connectonion/{tool_registry.py → core/tool_registry.py} +0 -0
  51. /connectonion/{usage.py → core/usage.py} +0 -0
  52. /connectonion/{debug_explainer → debug/debug_explainer}/__init__.py +0 -0
  53. /connectonion/{debug_explainer → debug/debug_explainer}/explainer_prompt.md +0 -0
  54. /connectonion/{debug_explainer → debug/debug_explainer}/root_cause_analysis_prompt.md +0 -0
  55. /connectonion/{decorators.py → debug/decorators.py} +0 -0
  56. /connectonion/{execution_analyzer → debug/execution_analyzer}/__init__.py +0 -0
  57. /connectonion/{execution_analyzer → debug/execution_analyzer}/execution_analysis_prompt.md +0 -0
  58. /connectonion/{debug_agent → debug/runtime_inspector}/prompts/debug_assistant.md +0 -0
  59. /connectonion/{debug_agent → debug/runtime_inspector}/runtime_inspector.py +0 -0
  60. /connectonion/{relay.py → network/relay.py} +0 -0
  61. /connectonion/{static → network/static}/docs.html +0 -0
  62. /connectonion/{trust_agents.py → network/trust_agents.py} +0 -0
  63. /connectonion/{trust_functions.py → network/trust_functions.py} +0 -0
  64. {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/WHEEL +0 -0
  65. {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/entry_points.txt +0 -0
@@ -1,22 +1,32 @@
1
1
  """Browser Agent for CLI - Natural language browser automation.
2
2
 
3
3
  This module provides a browser automation agent that understands natural language
4
- requests for taking screenshots and other browser operations via the ConnectOnion CLI.
4
+ requests for browser operations via the ConnectOnion CLI.
5
+
6
+ Features:
7
+ - Chrome profile support for persistent sessions (cookies, logins)
8
+ - AI-powered element finding using natural language
9
+ - Form handling: find, fill, submit
10
+ - Screenshot with viewport presets
11
+ - Universal scroll with AI strategy selection
12
+ - Manual login pause for 2FA/CAPTCHA
5
13
  """
6
14
 
7
15
  import os
16
+ import base64
8
17
  from pathlib import Path
9
18
  from datetime import datetime
10
- from connectonion import Agent, llm_do, xray
19
+ from typing import Optional, List, Dict, Any
20
+ from connectonion import Agent, llm_do
11
21
  from dotenv import load_dotenv
12
- from pydantic import BaseModel
22
+ from pydantic import BaseModel, Field
13
23
 
14
- # Default screenshots directory in current working directory
24
+ # Default screenshots directory
15
25
  SCREENSHOTS_DIR = Path.cwd() / ".tmp"
16
26
 
17
27
  # Check Playwright availability
18
28
  try:
19
- from playwright.sync_api import sync_playwright
29
+ from playwright.sync_api import sync_playwright, Page, Browser, Playwright
20
30
  PLAYWRIGHT_AVAILABLE = True
21
31
  except ImportError:
22
32
  PLAYWRIGHT_AVAILABLE = False
@@ -25,190 +35,527 @@ except ImportError:
25
35
  PROMPT_PATH = Path(__file__).parent / "prompt.md"
26
36
 
27
37
 
38
+ class FormField(BaseModel):
39
+ """A form field on a web page."""
40
+ name: str = Field(..., description="Field name or identifier")
41
+ label: str = Field(..., description="User-facing label")
42
+ type: str = Field(..., description="Input type (text, email, select, etc.)")
43
+ value: Optional[str] = Field(None, description="Current value")
44
+ required: bool = Field(False, description="Is this field required?")
45
+ options: List[str] = Field(default_factory=list, description="Available options for select/radio")
46
+
47
+
28
48
  class BrowserAutomation:
29
- """Browser automation for screenshots and interactions."""
30
-
31
- def __init__(self):
49
+ """Browser automation with natural language support.
50
+
51
+ Simple interface for complex web interactions.
52
+ Auto-initializes browser on creation for immediate use.
53
+ Supports Chrome profile for persistent sessions.
54
+ """
55
+
56
+ def __init__(self, use_chrome_profile: bool = False, headless: bool = True):
57
+ """Initialize browser automation.
58
+
59
+ Args:
60
+ use_chrome_profile: If True, uses your Chrome cookies/sessions.
61
+ Chrome must be closed before running.
62
+ headless: If True, browser runs without visible window (default True).
63
+ """
64
+ self.playwright: Optional[Playwright] = None
65
+ self.browser: Optional[Browser] = None
66
+ self.page: Optional[Page] = None
67
+ self.current_url: str = ""
68
+ self.form_data: Dict[str, Any] = {}
69
+ self.use_chrome_profile = use_chrome_profile
32
70
  self._screenshots = []
33
- self._playwright = None
34
- self._browser = None
35
- self._page = None
71
+ self._headless = headless
72
+ # Auto-initialize browser so it's ready immediately
36
73
  self._initialize_browser()
37
-
74
+
38
75
  def _initialize_browser(self):
39
- """Initialize the browser instance."""
76
+ """Initialize the browser instance on startup."""
40
77
  if not PLAYWRIGHT_AVAILABLE:
41
78
  return
42
- from playwright.sync_api import sync_playwright
43
- self._playwright = sync_playwright().start()
44
- self._browser = self._playwright.chromium.launch(headless=True)
45
- self._page = self._browser.new_page()
46
-
47
- def navigate_to(self, url: str) -> str:
79
+ self.open_browser(headless=self._headless)
80
+
81
+ def open_browser(self, headless: bool = True) -> str:
82
+ """Open a new browser window.
83
+
84
+ Args:
85
+ headless: If True, browser runs without visible window.
86
+
87
+ Note: If use_chrome_profile=True, Chrome must be completely closed.
88
+ """
89
+ if not PLAYWRIGHT_AVAILABLE:
90
+ return "Browser tools not installed. Run: pip install playwright && playwright install chromium"
91
+
92
+ if self.browser:
93
+ return "Browser already open"
94
+
95
+ self.playwright = sync_playwright().start()
96
+
97
+ if self.use_chrome_profile:
98
+ # Use Chromium with Chrome profile copy
99
+ chromium_profile = Path.cwd() / "chromium_automation_profile"
100
+
101
+ if not chromium_profile.exists():
102
+ import shutil
103
+ home = Path.home()
104
+ if os.name == 'nt': # Windows
105
+ source_profile = home / "AppData/Local/Google/Chrome/User Data"
106
+ elif os.uname().sysname == 'Darwin': # macOS
107
+ source_profile = home / "Library/Application Support/Google/Chrome"
108
+ else: # Linux
109
+ source_profile = home / ".config/google-chrome"
110
+
111
+ if source_profile.exists():
112
+ shutil.copytree(
113
+ source_profile,
114
+ chromium_profile,
115
+ ignore=shutil.ignore_patterns('*Cache*', '*cache*', 'Service Worker', 'ShaderCache'),
116
+ dirs_exist_ok=True
117
+ )
118
+
119
+ self.browser = self.playwright.chromium.launch_persistent_context(
120
+ str(chromium_profile),
121
+ headless=headless,
122
+ args=['--disable-blink-features=AutomationControlled'],
123
+ ignore_default_args=['--enable-automation'],
124
+ timeout=120000,
125
+ )
126
+ self.page = self.browser.pages[0] if self.browser.pages else self.browser.new_page()
127
+ self.page.add_init_script("""
128
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
129
+ """)
130
+ return f"Browser opened with Chrome profile: {chromium_profile}"
131
+ else:
132
+ self.browser = self.playwright.chromium.launch(headless=headless)
133
+ self.page = self.browser.new_page()
134
+ return "Browser opened successfully"
135
+
136
+ def go_to(self, url: str) -> str:
48
137
  """Navigate to a URL."""
138
+ if not self.page:
139
+ self.open_browser()
140
+
49
141
  if not url.startswith(('http://', 'https://')):
50
142
  url = f'https://{url}' if '.' in url else f'http://{url}'
51
- self._page.goto(url, wait_until='networkidle', timeout=30000)
52
- # Sleep for 2 seconds to ensure page is fully loaded
53
- self._page.wait_for_timeout(2000)
54
- return f"Navigated to {url}"
55
-
56
- def set_viewport(self, width: int, height: int) -> str:
57
- """Set the browser viewport size.
58
-
143
+
144
+ self.page.goto(url, wait_until='networkidle', timeout=30000)
145
+ self.page.wait_for_timeout(2000)
146
+ self.current_url = self.page.url
147
+ return f"Navigated to {self.current_url}"
148
+
149
+ def find_element_by_description(self, description: str) -> str:
150
+ """Find element using natural language description.
151
+
152
+ Uses AI to analyze HTML and find the best matching element.
153
+
59
154
  Args:
60
- width: Viewport width in pixels
61
- height: Viewport height in pixels
62
-
155
+ description: e.g., "the submit button", "email input field"
156
+
63
157
  Returns:
64
- Success message
158
+ CSS selector for the element, or error message
65
159
  """
66
- if not PLAYWRIGHT_AVAILABLE:
67
- return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
68
- self._page.set_viewport_size({"width": width, "height": height})
69
- return f"Viewport set to {width}x{height}"
70
-
71
- def take_screenshot(self, url: str, path: str = "",
160
+ if not self.page:
161
+ return "Browser not open"
162
+
163
+ html = self.page.content()
164
+
165
+ class ElementSelector(BaseModel):
166
+ selector: str = Field(..., description="CSS selector for the element")
167
+ confidence: float = Field(..., description="Confidence score 0-1")
168
+ explanation: str = Field(..., description="Why this element matches")
169
+
170
+ result = llm_do(
171
+ f"""Analyze this HTML and find the CSS selector for: "{description}"
172
+
173
+ HTML (first 15000 chars): {html[:15000]}
174
+
175
+ Return the most specific CSS selector that uniquely identifies this element.
176
+ """,
177
+ output=ElementSelector,
178
+ model="gpt-4o",
179
+ temperature=0.1
180
+ )
181
+
182
+ if self.page.locator(result.selector).count() > 0:
183
+ return result.selector
184
+ else:
185
+ return f"Found selector {result.selector} but element not on page"
186
+
187
+ def click(self, description: str) -> str:
188
+ """Click on an element using natural language description.
189
+
190
+ Args:
191
+ description: e.g., "the blue submit button", "link to contact page"
192
+ """
193
+ if not self.page:
194
+ return "Browser not open"
195
+
196
+ selector = self.find_element_by_description(description)
197
+
198
+ if selector.startswith("Could not") or selector.startswith("Found selector"):
199
+ if self.page.locator(f"text='{description}'").count() > 0:
200
+ self.page.click(f"text='{description}'")
201
+ return f"Clicked on '{description}' (by text)"
202
+ return selector
203
+
204
+ self.page.click(selector)
205
+ return f"Clicked on '{description}'"
206
+
207
+ def type_text(self, field_description: str, text: str) -> str:
208
+ """Type text into a form field.
209
+
210
+ Args:
211
+ field_description: e.g., "email field", "password input"
212
+ text: The text to type
213
+ """
214
+ if not self.page:
215
+ return "Browser not open"
216
+
217
+ selector = self.find_element_by_description(field_description)
218
+
219
+ if selector.startswith("Could not") or selector.startswith("Found selector"):
220
+ for fallback in [
221
+ f"input[placeholder*='{field_description}' i]",
222
+ f"[aria-label*='{field_description}' i]",
223
+ f"input[name*='{field_description}' i]"
224
+ ]:
225
+ if self.page.locator(fallback).count() > 0:
226
+ self.page.fill(fallback, text)
227
+ self.form_data[field_description] = text
228
+ return f"Typed into {field_description}"
229
+ return f"Could not find field '{field_description}'"
230
+
231
+ self.page.fill(selector, text)
232
+ self.form_data[field_description] = text
233
+ return f"Typed into {field_description}"
234
+
235
+ def get_text(self) -> str:
236
+ """Get all visible text from the page."""
237
+ if not self.page:
238
+ return "Browser not open"
239
+ return self.page.inner_text("body")
240
+
241
+ def get_current_url(self) -> str:
242
+ """Get the current page URL."""
243
+ if not self.page:
244
+ return "Browser not open"
245
+ return self.page.url
246
+
247
+ def get_current_page_html(self) -> str:
248
+ """Get the HTML content of the current page."""
249
+ if not self.page:
250
+ return "Browser not open"
251
+ return self.page.content()
252
+
253
+ def take_screenshot(self, url: str = None, path: str = "",
72
254
  width: int = 1920, height: int = 1080,
73
255
  full_page: bool = False) -> str:
74
- """Take a screenshot of the specified URL.
75
-
256
+ """Take a screenshot of a URL or current page.
257
+
76
258
  Args:
77
- url: The URL to screenshot (e.g., "localhost:3000", "example.com")
78
- path: Optional path to save the screenshot (auto-generates if empty)
259
+ url: URL to screenshot (optional - uses current page if not provided)
260
+ path: Optional path to save (auto-generates if empty)
79
261
  width: Viewport width in pixels (default 1920)
80
262
  height: Viewport height in pixels (default 1080)
81
263
  full_page: If True, captures entire page height
82
-
264
+
83
265
  Returns:
84
- Success or error message
266
+ Path to saved screenshot
85
267
  """
86
268
  if not PLAYWRIGHT_AVAILABLE:
87
269
  return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
88
-
89
- # Navigate to URL
90
- self.navigate_to(url)
91
-
270
+
271
+ if not self.page:
272
+ return "Browser not open"
273
+
274
+ # Navigate if URL provided
275
+ if url:
276
+ self.go_to(url)
277
+
92
278
  # Set viewport size
93
- self._page.set_viewport_size({"width": width, "height": height})
94
-
279
+ self.page.set_viewport_size({"width": width, "height": height})
280
+
95
281
  # Generate filename if needed
96
282
  if not path:
97
- # Ensure screenshots directory exists
98
283
  SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
99
284
  timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
100
285
  path = str(SCREENSHOTS_DIR / f'screenshot_{timestamp}.png')
101
- elif not path.startswith('/'): # Relative path
102
- # If relative path given, save to screenshots dir
286
+ elif not path.startswith('/'):
103
287
  SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
104
288
  if not path.endswith(('.png', '.jpg', '.jpeg')):
105
289
  path += '.png'
106
290
  path = str(SCREENSHOTS_DIR / path)
107
291
  elif not path.endswith(('.png', '.jpg', '.jpeg')):
108
- # Absolute path without extension
109
292
  path += '.png'
110
-
293
+
111
294
  # Ensure directory exists
112
295
  Path(path).parent.mkdir(parents=True, exist_ok=True)
113
-
296
+
114
297
  # Take screenshot
115
- self._page.screenshot(path=path, full_page=full_page)
116
-
298
+ self.page.screenshot(path=path, full_page=full_page)
117
299
  self._screenshots.append(path)
118
300
  return f'Screenshot saved: {path}'
119
-
120
- def screenshot_with_iphone_viewport(self, url: str, path: str = "") -> str:
121
- """Take a screenshot with iPhone viewport (390x844)."""
122
- return self.take_screenshot(url, path, width=390, height=844)
123
-
124
- def screenshot_with_ipad_viewport(self, url: str, path: str = "") -> str:
125
- """Take a screenshot with iPad viewport (768x1024)."""
126
- return self.take_screenshot(url, path, width=768, height=1024)
127
-
128
- def screenshot_with_desktop_viewport(self, url: str, path: str = "") -> str:
129
- """Take a screenshot with desktop viewport (1920x1080)."""
130
- return self.take_screenshot(url, path, width=1920, height=1080)
131
-
132
- def get_current_page_html(self) -> str:
133
- """Get the HTML content of the current page.
134
-
135
- Returns:
136
- The HTML content of the current page
137
- """
138
- if not PLAYWRIGHT_AVAILABLE:
139
- return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
140
- return self._page.content()
141
-
142
- def get_current_url(self) -> str:
143
- """Get the current page URL.
144
-
145
- Returns:
146
- The current URL
147
- """
148
- if not PLAYWRIGHT_AVAILABLE:
149
- return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
150
- return self._page.url
151
-
301
+
302
+ def set_viewport(self, width: int, height: int) -> str:
303
+ """Set the browser viewport size."""
304
+ if not self.page:
305
+ return "Browser not open"
306
+ self.page.set_viewport_size({"width": width, "height": height})
307
+ return f"Viewport set to {width}x{height}"
308
+
309
+ def screenshot_mobile(self, url: str = None) -> str:
310
+ """Take screenshot with iPhone viewport (390x844)."""
311
+ if url:
312
+ self.go_to(url)
313
+ self.set_viewport(390, 844)
314
+ return self.take_screenshot()
315
+
316
+ def screenshot_tablet(self, url: str = None) -> str:
317
+ """Take screenshot with iPad viewport (768x1024)."""
318
+ if url:
319
+ self.go_to(url)
320
+ self.set_viewport(768, 1024)
321
+ return self.take_screenshot()
322
+
323
+ def screenshot_desktop(self, url: str = None) -> str:
324
+ """Take screenshot with desktop viewport (1920x1080)."""
325
+ if url:
326
+ self.go_to(url)
327
+ self.set_viewport(1920, 1080)
328
+ return self.take_screenshot()
329
+
330
+ def find_forms(self) -> List[FormField]:
331
+ """Find all form fields on the current page."""
332
+ if not self.page:
333
+ return []
334
+
335
+ fields_data = self.page.evaluate("""
336
+ () => {
337
+ const fields = [];
338
+ document.querySelectorAll('input, textarea, select').forEach(input => {
339
+ const label = input.labels?.[0]?.textContent ||
340
+ input.placeholder || input.name || input.id || 'Unknown';
341
+ fields.push({
342
+ name: input.name || input.id || label,
343
+ label: label.trim(),
344
+ type: input.type || input.tagName.toLowerCase(),
345
+ value: input.value || '',
346
+ required: input.required || false,
347
+ options: input.tagName === 'SELECT' ?
348
+ Array.from(input.options).map(o => o.text) : []
349
+ });
350
+ });
351
+ return fields;
352
+ }
353
+ """)
354
+ return [FormField(**field) for field in fields_data]
355
+
356
+ def fill_form(self, data: Dict[str, str]) -> str:
357
+ """Fill multiple form fields at once."""
358
+ if not self.page:
359
+ return "Browser not open"
360
+
361
+ results = []
362
+ for field_name, value in data.items():
363
+ result = self.type_text(field_name, value)
364
+ results.append(f"{field_name}: {result}")
365
+ return "\n".join(results)
366
+
367
+ def submit_form(self) -> str:
368
+ """Submit the current form."""
369
+ if not self.page:
370
+ return "Browser not open"
371
+
372
+ for selector in [
373
+ "button[type='submit']",
374
+ "input[type='submit']",
375
+ "button:has-text('Submit')",
376
+ "button:has-text('Send')",
377
+ "button:has-text('Continue')",
378
+ "button:has-text('Next')"
379
+ ]:
380
+ if self.page.locator(selector).count() > 0:
381
+ self.page.click(selector)
382
+ return "Form submitted"
383
+
384
+ return "Could not find submit button"
385
+
386
+ def select_option(self, field_description: str, option: str) -> str:
387
+ """Select an option from a dropdown."""
388
+ if not self.page:
389
+ return "Browser not open"
390
+
391
+ selector = self.find_element_by_description(field_description)
392
+ if selector.startswith("Could not"):
393
+ return selector
394
+
395
+ self.page.select_option(selector, label=option)
396
+ return f"Selected '{option}' in {field_description}"
397
+
398
+ def check_checkbox(self, description: str, checked: bool = True) -> str:
399
+ """Check or uncheck a checkbox."""
400
+ if not self.page:
401
+ return "Browser not open"
402
+
403
+ selector = self.find_element_by_description(description)
404
+ if selector.startswith("Could not"):
405
+ return selector
406
+
407
+ if checked:
408
+ self.page.check(selector)
409
+ return f"Checked {description}"
410
+ else:
411
+ self.page.uncheck(selector)
412
+ return f"Unchecked {description}"
413
+
414
+ def wait_for_element(self, description: str, timeout: int = 30) -> str:
415
+ """Wait for an element to appear."""
416
+ if not self.page:
417
+ return "Browser not open"
418
+
419
+ selector = self.find_element_by_description(description)
420
+ if selector.startswith("Could not"):
421
+ self.page.wait_for_selector(f"text='{description}'", timeout=timeout * 1000)
422
+ return f"Found text: '{description}'"
423
+
424
+ self.page.wait_for_selector(selector, timeout=timeout * 1000)
425
+ return f"Element appeared: {description}"
426
+
427
+ def wait_for_text(self, text: str, timeout: int = 30) -> str:
428
+ """Wait for specific text to appear on the page."""
429
+ if not self.page:
430
+ return "Browser not open"
431
+
432
+ self.page.wait_for_selector(f"text='{text}'", timeout=timeout * 1000)
433
+ return f"Found text: '{text}'"
434
+
152
435
  def wait(self, seconds: float) -> str:
153
- """Wait for a specified number of seconds.
154
-
436
+ """Wait for a specified number of seconds."""
437
+ if not self.page:
438
+ return "Browser not open"
439
+ self.page.wait_for_timeout(seconds * 1000)
440
+ return f"Waited for {seconds} seconds"
441
+
442
+ def scroll(self, times: int = 5, description: str = "the main content area") -> str:
443
+ """Universal scroll with automatic strategy selection.
444
+
445
+ Tries multiple strategies until one works:
446
+ 1. AI-generated strategy (analyzes page structure)
447
+ 2. Element scrolling
448
+ 3. Page scrolling
449
+
155
450
  Args:
156
- seconds: Number of seconds to wait
157
-
451
+ times: Number of scroll iterations
452
+ description: What to scroll (e.g., "the email list")
453
+
158
454
  Returns:
159
- Success message
455
+ Status message with successful strategy
160
456
  """
161
- if not PLAYWRIGHT_AVAILABLE:
162
- return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
163
- self._page.wait_for_timeout(seconds * 1000) # Convert to milliseconds
164
- return f"Waited for {seconds} seconds"
165
-
166
- @xray
167
- def get_debug_trace(self) -> str:
168
- """Get execution trace for debugging.
169
-
170
- Returns:
171
- Execution trace showing what happened
457
+ from . import scroll_strategies
458
+ return scroll_strategies.scroll_with_verification(
459
+ page=self.page,
460
+ take_screenshot=self.take_screenshot,
461
+ times=times,
462
+ description=description
463
+ )
464
+
465
+ def scroll_page(self, direction: str = "down", amount: int = 1000) -> str:
466
+ """Scroll the page in a direction.
467
+
468
+ Args:
469
+ direction: "down", "up", "top", or "bottom"
470
+ amount: Pixels to scroll (ignored for "bottom"/"top")
172
471
  """
173
- if hasattr(xray, 'trace'):
174
- return xray.trace()
175
- return "No trace available"
176
-
177
- def click_element_by_description(self, description: str) -> str:
178
- """Click an element on the current page based on natural language description.
179
-
472
+ if not self.page:
473
+ return "Browser not open"
474
+
475
+ if direction == "bottom":
476
+ self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
477
+ return "Scrolled to bottom of page"
478
+ elif direction == "top":
479
+ self.page.evaluate("window.scrollTo(0, 0)")
480
+ return "Scrolled to top of page"
481
+ elif direction == "down":
482
+ self.page.evaluate(f"window.scrollBy(0, {amount})")
483
+ return f"Scrolled down {amount} pixels"
484
+ elif direction == "up":
485
+ self.page.evaluate(f"window.scrollBy(0, -{amount})")
486
+ return f"Scrolled up {amount} pixels"
487
+ else:
488
+ return f"Unknown direction: {direction}"
489
+
490
+ def scroll_element(self, selector: str, amount: int = 1000) -> str:
491
+ """Scroll a specific element by CSS selector."""
492
+ if not self.page:
493
+ return "Browser not open"
494
+
495
+ result = self.page.evaluate(f"""
496
+ (() => {{
497
+ const element = document.querySelector('{selector}');
498
+ if (!element) return 'Element not found: {selector}';
499
+ const beforeScroll = element.scrollTop;
500
+ element.scrollTop += {amount};
501
+ const afterScroll = element.scrollTop;
502
+ return `Scrolled from ${{beforeScroll}}px to ${{afterScroll}}px`;
503
+ }})()
504
+ """)
505
+ return result
506
+
507
+ def wait_for_manual_login(self, site_name: str = "the website") -> str:
508
+ """Pause automation for user to login manually.
509
+
510
+ Useful for sites with 2FA or CAPTCHA.
511
+
180
512
  Args:
181
- description: Natural language description of what to click
182
-
513
+ site_name: Name of the site (e.g., "Gmail")
514
+
183
515
  Returns:
184
- Result message
516
+ Confirmation when user is ready to continue
185
517
  """
186
- if not PLAYWRIGHT_AVAILABLE:
187
- return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
188
-
189
- html_content = self._page.content()
190
-
191
- # Use llm_do to determine the selector
192
- class ElementSelector(BaseModel):
193
- selector: str
194
- method: str # "text" or "css"
195
-
196
- result = llm_do(
197
- f"Find selector for: {description}\n\nHTML:\n{html_content[:5000]}",
198
- output=ElementSelector,
199
- system_prompt="Return the best selector to click the element. Use method='text' for button text, method='css' for CSS selectors."
200
- )
201
-
202
- if result.method == "text":
203
- self._page.get_by_text(result.selector).click()
204
- else:
205
- self._page.locator(result.selector).click()
206
-
207
- self._page.wait_for_timeout(1000)
208
- return f"Clicked: {result.selector}"
518
+ if not self.page:
519
+ return "Browser not open"
209
520
 
521
+ print(f"\n{'='*60}")
522
+ print(f" MANUAL LOGIN REQUIRED")
523
+ print(f"{'='*60}")
524
+ print(f"Please login to {site_name} in the browser window.")
525
+ print(f"Once you're logged in and ready to continue:")
526
+ print(f" Type 'yes' or 'Y' and press Enter")
527
+ print(f"{'='*60}\n")
210
528
 
529
+ while True:
530
+ response = input("Ready to continue? (yes/Y): ").strip().lower()
531
+ if response in ['yes', 'y']:
532
+ print("Continuing automation...\n")
533
+ return f"User confirmed login to {site_name} - continuing"
534
+ else:
535
+ print("Please type 'yes' or 'Y' when ready.")
211
536
 
537
+ def extract_data(self, selector: str) -> List[str]:
538
+ """Extract text from elements matching a selector."""
539
+ if not self.page:
540
+ return []
541
+
542
+ elements = self.page.locator(selector)
543
+ count = elements.count()
544
+ return [elements.nth(i).inner_text() for i in range(count)]
545
+
546
+ def close(self) -> str:
547
+ """Close the browser."""
548
+ if self.page:
549
+ self.page.close()
550
+ if self.browser:
551
+ self.browser.close()
552
+ if self.playwright:
553
+ self.playwright.stop()
554
+
555
+ self.page = None
556
+ self.browser = None
557
+ self.playwright = None
558
+ return "Browser closed"
212
559
 
213
560
 
214
561
  def execute_browser_command(command: str) -> str:
@@ -216,11 +563,8 @@ def execute_browser_command(command: str) -> str:
216
563
 
217
564
  Returns the agent's natural language response directly.
218
565
  """
219
- # Framework auto-loads local .env, but CLI commands need global fallback
220
- # Check for API key in environment first
221
566
  api_key = os.getenv('OPENONION_API_KEY')
222
567
 
223
- # If not found, try loading from global config
224
568
  if not api_key:
225
569
  global_env = Path.home() / ".co" / "keys.env"
226
570
  if global_env.exists():
@@ -228,7 +572,7 @@ def execute_browser_command(command: str) -> str:
228
572
  api_key = os.getenv('OPENONION_API_KEY')
229
573
 
230
574
  if not api_key:
231
- return 'Browser agent requires authentication. Run: co auth'
575
+ return 'Browser agent requires authentication. Run: co auth'
232
576
 
233
577
  browser = BrowserAutomation()
234
578
  agent = Agent(
@@ -237,7 +581,6 @@ def execute_browser_command(command: str) -> str:
237
581
  api_key=api_key,
238
582
  system_prompt=PROMPT_PATH,
239
583
  tools=[browser],
240
- max_iterations=10
584
+ max_iterations=20
241
585
  )
242
586
  return agent.input(command)
243
-