connectonion 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +3 -2
- connectonion/cli/browser_agent/browser.py +433 -147
- connectonion/cli/browser_agent/element_finder.py +139 -0
- connectonion/cli/browser_agent/highlight_screenshot.py +174 -0
- connectonion/cli/browser_agent/prompt.md +188 -105
- connectonion/cli/browser_agent/prompts/element_matcher.md +59 -0
- connectonion/cli/browser_agent/prompts/form_filler.md +19 -0
- connectonion/cli/browser_agent/prompts/scroll_strategy.md +36 -0
- connectonion/cli/browser_agent/scripts/extract_elements.js +126 -0
- connectonion/cli/browser_agent/scroll.py +137 -0
- connectonion/cli/commands/eval_commands.py +286 -0
- connectonion/cli/main.py +11 -0
- connectonion/console.py +5 -5
- connectonion/core/agent.py +13 -10
- connectonion/core/llm.py +9 -19
- connectonion/logger.py +305 -135
- connectonion/network/__init__.py +3 -0
- connectonion/network/asgi.py +122 -2
- connectonion/network/connection.py +123 -0
- connectonion/network/host.py +7 -5
- connectonion/useful_plugins/__init__.py +4 -3
- connectonion/useful_plugins/ui_stream.py +164 -0
- {connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/METADATA +1 -1
- {connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/RECORD +27 -17
- /connectonion/{static → network/static}/docs.html +0 -0
- {connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/WHEEL +0 -0
- {connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,22 +1,33 @@
|
|
|
1
1
|
"""Browser Agent for CLI - Natural language browser automation.
|
|
2
2
|
|
|
3
3
|
This module provides a browser automation agent that understands natural language
|
|
4
|
-
requests for
|
|
4
|
+
requests for browser operations via the ConnectOnion CLI.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Chrome profile support for persistent sessions (cookies, logins)
|
|
8
|
+
- AI-powered element finding using natural language
|
|
9
|
+
- Form handling: find, fill, submit
|
|
10
|
+
- Screenshot with viewport presets
|
|
11
|
+
- Universal scroll with AI strategy selection
|
|
12
|
+
- Manual login pause for 2FA/CAPTCHA
|
|
5
13
|
"""
|
|
6
14
|
|
|
7
15
|
import os
|
|
16
|
+
import base64
|
|
8
17
|
from pathlib import Path
|
|
9
18
|
from datetime import datetime
|
|
10
|
-
from
|
|
19
|
+
from typing import Optional, List, Dict, Any
|
|
20
|
+
from connectonion import Agent, llm_do
|
|
11
21
|
from dotenv import load_dotenv
|
|
12
|
-
from pydantic import BaseModel
|
|
22
|
+
from pydantic import BaseModel, Field
|
|
23
|
+
from . import element_finder
|
|
13
24
|
|
|
14
|
-
# Default screenshots directory
|
|
25
|
+
# Default screenshots directory
|
|
15
26
|
SCREENSHOTS_DIR = Path.cwd() / ".tmp"
|
|
16
27
|
|
|
17
28
|
# Check Playwright availability
|
|
18
29
|
try:
|
|
19
|
-
from playwright.sync_api import sync_playwright
|
|
30
|
+
from playwright.sync_api import sync_playwright, Page, Browser, Playwright
|
|
20
31
|
PLAYWRIGHT_AVAILABLE = True
|
|
21
32
|
except ImportError:
|
|
22
33
|
PLAYWRIGHT_AVAILABLE = False
|
|
@@ -25,190 +36,469 @@ except ImportError:
|
|
|
25
36
|
PROMPT_PATH = Path(__file__).parent / "prompt.md"
|
|
26
37
|
|
|
27
38
|
|
|
39
|
+
class FormField(BaseModel):
|
|
40
|
+
"""A form field on a web page."""
|
|
41
|
+
name: str = Field(..., description="Field name or identifier")
|
|
42
|
+
label: str = Field(..., description="User-facing label")
|
|
43
|
+
type: str = Field(..., description="Input type (text, email, select, etc.)")
|
|
44
|
+
value: Optional[str] = Field(None, description="Current value")
|
|
45
|
+
required: bool = Field(False, description="Is this field required?")
|
|
46
|
+
options: List[str] = Field(default_factory=list, description="Available options for select/radio")
|
|
47
|
+
|
|
48
|
+
|
|
28
49
|
class BrowserAutomation:
|
|
29
|
-
"""Browser automation
|
|
30
|
-
|
|
31
|
-
|
|
50
|
+
"""Browser automation with natural language support.
|
|
51
|
+
|
|
52
|
+
Simple interface for complex web interactions.
|
|
53
|
+
Auto-initializes browser on creation for immediate use.
|
|
54
|
+
Supports Chrome profile for persistent sessions.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, use_chrome_profile: bool = True, headless: bool = True):
|
|
58
|
+
"""Initialize browser automation.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
use_chrome_profile: If True, uses your Chrome cookies/sessions.
|
|
62
|
+
Chrome must be closed before running.
|
|
63
|
+
headless: If True, browser runs without visible window (default True).
|
|
64
|
+
"""
|
|
65
|
+
self.playwright: Optional[Playwright] = None
|
|
66
|
+
self.browser: Optional[Browser] = None
|
|
67
|
+
self.page: Optional[Page] = None
|
|
68
|
+
self.current_url: str = ""
|
|
69
|
+
self.form_data: Dict[str, Any] = {}
|
|
70
|
+
self.use_chrome_profile = use_chrome_profile
|
|
32
71
|
self._screenshots = []
|
|
33
|
-
self.
|
|
34
|
-
|
|
35
|
-
self._page = None
|
|
72
|
+
self._headless = headless
|
|
73
|
+
# Auto-initialize browser so it's ready immediately
|
|
36
74
|
self._initialize_browser()
|
|
37
|
-
|
|
75
|
+
|
|
38
76
|
def _initialize_browser(self):
|
|
39
|
-
"""Initialize the browser instance."""
|
|
77
|
+
"""Initialize the browser instance on startup."""
|
|
40
78
|
if not PLAYWRIGHT_AVAILABLE:
|
|
41
79
|
return
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
80
|
+
self.open_browser(headless=self._headless)
|
|
81
|
+
|
|
82
|
+
def open_browser(self, headless: bool = True) -> str:
|
|
83
|
+
"""Open a new browser window.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
headless: If True, browser runs without visible window.
|
|
87
|
+
|
|
88
|
+
Note: If use_chrome_profile=True, Chrome must be completely closed.
|
|
89
|
+
"""
|
|
90
|
+
if not PLAYWRIGHT_AVAILABLE:
|
|
91
|
+
return "Browser tools not installed. Run: pip install playwright && playwright install chromium"
|
|
92
|
+
|
|
93
|
+
if self.browser:
|
|
94
|
+
return "Browser already open"
|
|
95
|
+
|
|
96
|
+
self.playwright = sync_playwright().start()
|
|
97
|
+
|
|
98
|
+
if self.use_chrome_profile:
|
|
99
|
+
# Use Chromium with Chrome profile copy
|
|
100
|
+
chromium_profile = Path.cwd() / "chromium_automation_profile"
|
|
101
|
+
|
|
102
|
+
if not chromium_profile.exists():
|
|
103
|
+
import shutil
|
|
104
|
+
home = Path.home()
|
|
105
|
+
if os.name == 'nt': # Windows
|
|
106
|
+
source_profile = home / "AppData/Local/Google/Chrome/User Data"
|
|
107
|
+
elif os.uname().sysname == 'Darwin': # macOS
|
|
108
|
+
source_profile = home / "Library/Application Support/Google/Chrome"
|
|
109
|
+
else: # Linux
|
|
110
|
+
source_profile = home / ".config/google-chrome"
|
|
111
|
+
|
|
112
|
+
if source_profile.exists():
|
|
113
|
+
def safe_copy(src, dst):
|
|
114
|
+
try:
|
|
115
|
+
shutil.copy2(src, dst)
|
|
116
|
+
except:
|
|
117
|
+
pass # Skip any file that can't be copied
|
|
118
|
+
|
|
119
|
+
shutil.copytree(
|
|
120
|
+
source_profile,
|
|
121
|
+
chromium_profile,
|
|
122
|
+
ignore=shutil.ignore_patterns(
|
|
123
|
+
'*Cache*', '*cache*', 'Service Worker', 'ShaderCache',
|
|
124
|
+
'Singleton*', '*lock*', '*Lock*', '*.tmp', 'GPUCache',
|
|
125
|
+
'Code Cache', 'DawnCache', 'GrShaderCache', 'blob_storage'
|
|
126
|
+
),
|
|
127
|
+
copy_function=safe_copy,
|
|
128
|
+
dirs_exist_ok=True
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
self.browser = self.playwright.chromium.launch_persistent_context(
|
|
132
|
+
str(chromium_profile),
|
|
133
|
+
headless=headless,
|
|
134
|
+
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled'],
|
|
135
|
+
ignore_default_args=['--enable-automation'],
|
|
136
|
+
timeout=120000,
|
|
137
|
+
)
|
|
138
|
+
self.page = self.browser.pages[0] if self.browser.pages else self.browser.new_page()
|
|
139
|
+
self.page.add_init_script("""
|
|
140
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
|
141
|
+
""")
|
|
142
|
+
self.page.set_viewport_size({"width": 1920, "height": 1080})
|
|
143
|
+
return f"Browser opened with Chrome profile: {chromium_profile}"
|
|
144
|
+
else:
|
|
145
|
+
self.browser = self.playwright.chromium.launch(headless=headless)
|
|
146
|
+
self.page = self.browser.new_page()
|
|
147
|
+
self.page.set_viewport_size({"width": 1920, "height": 1080})
|
|
148
|
+
return "Browser opened successfully"
|
|
149
|
+
|
|
150
|
+
def go_to(self, url: str) -> str:
|
|
48
151
|
"""Navigate to a URL."""
|
|
152
|
+
if not self.page:
|
|
153
|
+
self.open_browser()
|
|
154
|
+
|
|
49
155
|
if not url.startswith(('http://', 'https://')):
|
|
50
156
|
url = f'https://{url}' if '.' in url else f'http://{url}'
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
self.
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
157
|
+
|
|
158
|
+
self.page.goto(url, wait_until='networkidle', timeout=30000)
|
|
159
|
+
self.page.wait_for_timeout(2000)
|
|
160
|
+
self.current_url = self.page.url
|
|
161
|
+
return f"Navigated to {self.current_url}"
|
|
162
|
+
|
|
163
|
+
def find_element_by_description(self, description: str) -> str:
|
|
164
|
+
"""Find element using natural language description.
|
|
165
|
+
|
|
166
|
+
Uses element_finder: LLM selects from indexed list, never generates CSS.
|
|
167
|
+
|
|
59
168
|
Args:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
169
|
+
description: e.g., "the submit button", "email input field"
|
|
170
|
+
|
|
63
171
|
Returns:
|
|
64
|
-
|
|
172
|
+
Pre-built locator string, or error message
|
|
65
173
|
"""
|
|
66
|
-
if not
|
|
67
|
-
return
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
174
|
+
if not self.page:
|
|
175
|
+
return "Browser not open"
|
|
176
|
+
|
|
177
|
+
element = element_finder.find_element(self.page, description)
|
|
178
|
+
if element:
|
|
179
|
+
return element.locator
|
|
180
|
+
return f"Could not find element matching: {description}"
|
|
181
|
+
|
|
182
|
+
def click(self, description: str) -> str:
|
|
183
|
+
"""Click on an element using natural language description.
|
|
184
|
+
|
|
185
|
+
Uses element_finder: LLM selects from pre-built locators, never generates CSS.
|
|
186
|
+
"""
|
|
187
|
+
if not self.page:
|
|
188
|
+
return "Browser not open"
|
|
189
|
+
|
|
190
|
+
element = element_finder.find_element(self.page, description)
|
|
191
|
+
|
|
192
|
+
if not element:
|
|
193
|
+
# Fallback to simple text matching
|
|
194
|
+
text_locator = self.page.get_by_text(description)
|
|
195
|
+
if text_locator.count() > 0:
|
|
196
|
+
text_locator.first.click()
|
|
197
|
+
return f"Clicked on '{description}' (by text fallback)"
|
|
198
|
+
return f"Could not find element matching: {description}"
|
|
199
|
+
|
|
200
|
+
# Try the locator with fresh bounding box
|
|
201
|
+
locator = self.page.locator(element.locator)
|
|
202
|
+
|
|
203
|
+
if locator.count() > 0:
|
|
204
|
+
box = locator.first.bounding_box()
|
|
205
|
+
if box:
|
|
206
|
+
x = box['x'] + box['width'] / 2
|
|
207
|
+
y = box['y'] + box['height'] / 2
|
|
208
|
+
self.page.mouse.click(x, y)
|
|
209
|
+
return f"Clicked [{element.index}] {element.tag} '{element.text}'"
|
|
210
|
+
|
|
211
|
+
locator.first.click(force=True)
|
|
212
|
+
return f"Clicked [{element.index}] {element.tag} '{element.text}' (force)"
|
|
213
|
+
|
|
214
|
+
# Fallback: use original coordinates
|
|
215
|
+
x = element.x + element.width // 2
|
|
216
|
+
y = element.y + element.height // 2
|
|
217
|
+
self.page.mouse.click(x, y)
|
|
218
|
+
return f"Clicked [{element.index}] '{element.text}' at ({x}, {y})"
|
|
219
|
+
|
|
220
|
+
def type_text(self, field_description: str, text: str) -> str:
|
|
221
|
+
"""Type text into a form field.
|
|
222
|
+
|
|
223
|
+
Uses element_finder: LLM selects from pre-built locators, never generates CSS.
|
|
224
|
+
"""
|
|
225
|
+
if not self.page:
|
|
226
|
+
return "Browser not open"
|
|
227
|
+
|
|
228
|
+
element = element_finder.find_element(self.page, field_description)
|
|
229
|
+
|
|
230
|
+
if not element:
|
|
231
|
+
# Fallback to placeholder matching
|
|
232
|
+
placeholder_locator = self.page.get_by_placeholder(field_description)
|
|
233
|
+
if placeholder_locator.count() > 0:
|
|
234
|
+
placeholder_locator.first.fill(text)
|
|
235
|
+
self.form_data[field_description] = text
|
|
236
|
+
return f"Typed into '{field_description}'"
|
|
237
|
+
return f"Could not find field: {field_description}"
|
|
238
|
+
|
|
239
|
+
# Try the pre-built locator
|
|
240
|
+
locator = self.page.locator(element.locator)
|
|
241
|
+
|
|
242
|
+
if locator.count() > 0:
|
|
243
|
+
locator.first.fill(text)
|
|
244
|
+
self.form_data[field_description] = text
|
|
245
|
+
return f"Typed into [{element.index}] {element.tag}"
|
|
246
|
+
|
|
247
|
+
# Fallback: click then type
|
|
248
|
+
x = element.x + element.width // 2
|
|
249
|
+
y = element.y + element.height // 2
|
|
250
|
+
self.page.mouse.click(x, y)
|
|
251
|
+
self.page.keyboard.type(text)
|
|
252
|
+
self.form_data[field_description] = text
|
|
253
|
+
return f"Typed into [{element.index}] at ({x}, {y})"
|
|
254
|
+
|
|
255
|
+
def get_text(self) -> str:
|
|
256
|
+
"""Get all visible text from the page."""
|
|
257
|
+
if not self.page:
|
|
258
|
+
return "Browser not open"
|
|
259
|
+
return self.page.inner_text("body")
|
|
260
|
+
|
|
261
|
+
def get_current_url(self) -> str:
|
|
262
|
+
"""Get the current page URL."""
|
|
263
|
+
if not self.page:
|
|
264
|
+
return "Browser not open"
|
|
265
|
+
return self.page.url
|
|
266
|
+
|
|
267
|
+
def get_current_page_html(self) -> str:
|
|
268
|
+
"""Get the HTML content of the current page."""
|
|
269
|
+
if not self.page:
|
|
270
|
+
return "Browser not open"
|
|
271
|
+
return self.page.content()
|
|
272
|
+
|
|
273
|
+
def take_screenshot(self, url: str = None, path: str = "",
|
|
72
274
|
width: int = 1920, height: int = 1080,
|
|
73
275
|
full_page: bool = False) -> str:
|
|
74
|
-
"""Take a screenshot of
|
|
75
|
-
|
|
276
|
+
"""Take a screenshot of a URL or current page.
|
|
277
|
+
|
|
76
278
|
Args:
|
|
77
|
-
url:
|
|
78
|
-
path: Optional path to save
|
|
279
|
+
url: URL to screenshot (optional - uses current page if not provided)
|
|
280
|
+
path: Optional path to save (auto-generates if empty)
|
|
79
281
|
width: Viewport width in pixels (default 1920)
|
|
80
282
|
height: Viewport height in pixels (default 1080)
|
|
81
283
|
full_page: If True, captures entire page height
|
|
82
|
-
|
|
284
|
+
|
|
83
285
|
Returns:
|
|
84
|
-
|
|
286
|
+
Path to saved screenshot
|
|
85
287
|
"""
|
|
86
288
|
if not PLAYWRIGHT_AVAILABLE:
|
|
87
289
|
return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
290
|
+
|
|
291
|
+
if not self.page:
|
|
292
|
+
return "Browser not open"
|
|
293
|
+
|
|
294
|
+
# Navigate if URL provided
|
|
295
|
+
if url:
|
|
296
|
+
self.go_to(url)
|
|
297
|
+
|
|
92
298
|
# Set viewport size
|
|
93
|
-
self.
|
|
94
|
-
|
|
299
|
+
self.page.set_viewport_size({"width": width, "height": height})
|
|
300
|
+
|
|
95
301
|
# Generate filename if needed
|
|
96
302
|
if not path:
|
|
97
|
-
# Ensure screenshots directory exists
|
|
98
303
|
SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
99
304
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
100
305
|
path = str(SCREENSHOTS_DIR / f'screenshot_{timestamp}.png')
|
|
101
|
-
elif not path.startswith('/'):
|
|
102
|
-
# If relative path given, save to screenshots dir
|
|
306
|
+
elif not path.startswith('/'):
|
|
103
307
|
SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
104
308
|
if not path.endswith(('.png', '.jpg', '.jpeg')):
|
|
105
309
|
path += '.png'
|
|
106
310
|
path = str(SCREENSHOTS_DIR / path)
|
|
107
311
|
elif not path.endswith(('.png', '.jpg', '.jpeg')):
|
|
108
|
-
# Absolute path without extension
|
|
109
312
|
path += '.png'
|
|
110
|
-
|
|
313
|
+
|
|
111
314
|
# Ensure directory exists
|
|
112
315
|
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
113
|
-
|
|
316
|
+
|
|
114
317
|
# Take screenshot
|
|
115
|
-
self.
|
|
116
|
-
|
|
318
|
+
self.page.screenshot(path=path, full_page=full_page)
|
|
117
319
|
self._screenshots.append(path)
|
|
118
320
|
return f'Screenshot saved: {path}'
|
|
119
|
-
|
|
120
|
-
def
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
"
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
321
|
+
|
|
322
|
+
def set_viewport(self, width: int, height: int) -> str:
|
|
323
|
+
"""Set the browser viewport size."""
|
|
324
|
+
if not self.page:
|
|
325
|
+
return "Browser not open"
|
|
326
|
+
self.page.set_viewport_size({"width": width, "height": height})
|
|
327
|
+
return f"Viewport set to {width}x{height}"
|
|
328
|
+
|
|
329
|
+
def find_forms(self) -> List[FormField]:
|
|
330
|
+
"""Find all form fields on the current page."""
|
|
331
|
+
if not self.page:
|
|
332
|
+
return []
|
|
333
|
+
|
|
334
|
+
fields_data = self.page.evaluate("""
|
|
335
|
+
() => {
|
|
336
|
+
const fields = [];
|
|
337
|
+
document.querySelectorAll('input, textarea, select').forEach(input => {
|
|
338
|
+
const label = input.labels?.[0]?.textContent ||
|
|
339
|
+
input.placeholder || input.name || input.id || 'Unknown';
|
|
340
|
+
fields.push({
|
|
341
|
+
name: input.name || input.id || label,
|
|
342
|
+
label: label.trim(),
|
|
343
|
+
type: input.type || input.tagName.toLowerCase(),
|
|
344
|
+
value: input.value || '',
|
|
345
|
+
required: input.required || false,
|
|
346
|
+
options: input.tagName === 'SELECT' ?
|
|
347
|
+
Array.from(input.options).map(o => o.text) : []
|
|
348
|
+
});
|
|
349
|
+
});
|
|
350
|
+
return fields;
|
|
351
|
+
}
|
|
352
|
+
""")
|
|
353
|
+
return [FormField(**field) for field in fields_data]
|
|
354
|
+
|
|
355
|
+
def fill_form(self, data: Dict[str, str]) -> str:
|
|
356
|
+
"""Fill multiple form fields at once."""
|
|
357
|
+
if not self.page:
|
|
358
|
+
return "Browser not open"
|
|
359
|
+
|
|
360
|
+
results = []
|
|
361
|
+
for field_name, value in data.items():
|
|
362
|
+
result = self.type_text(field_name, value)
|
|
363
|
+
results.append(f"{field_name}: {result}")
|
|
364
|
+
return "\n".join(results)
|
|
365
|
+
|
|
366
|
+
def submit_form(self) -> str:
|
|
367
|
+
"""Submit the current form."""
|
|
368
|
+
if not self.page:
|
|
369
|
+
return "Browser not open"
|
|
370
|
+
|
|
371
|
+
for selector in [
|
|
372
|
+
"button[type='submit']",
|
|
373
|
+
"input[type='submit']",
|
|
374
|
+
"button:has-text('Submit')",
|
|
375
|
+
"button:has-text('Send')",
|
|
376
|
+
"button:has-text('Continue')",
|
|
377
|
+
"button:has-text('Next')"
|
|
378
|
+
]:
|
|
379
|
+
if self.page.locator(selector).count() > 0:
|
|
380
|
+
self.page.click(selector)
|
|
381
|
+
return "Form submitted"
|
|
382
|
+
|
|
383
|
+
return "Could not find submit button"
|
|
384
|
+
|
|
385
|
+
def select_option(self, field_description: str, option: str) -> str:
|
|
386
|
+
"""Select an option from a dropdown."""
|
|
387
|
+
if not self.page:
|
|
388
|
+
return "Browser not open"
|
|
389
|
+
|
|
390
|
+
selector = self.find_element_by_description(field_description)
|
|
391
|
+
if selector.startswith("Could not"):
|
|
392
|
+
return selector
|
|
393
|
+
|
|
394
|
+
self.page.select_option(selector, label=option)
|
|
395
|
+
return f"Selected '{option}' in {field_description}"
|
|
396
|
+
|
|
397
|
+
def check_checkbox(self, description: str, checked: bool = True) -> str:
|
|
398
|
+
"""Check or uncheck a checkbox."""
|
|
399
|
+
if not self.page:
|
|
400
|
+
return "Browser not open"
|
|
401
|
+
|
|
402
|
+
selector = self.find_element_by_description(description)
|
|
403
|
+
if selector.startswith("Could not"):
|
|
404
|
+
return selector
|
|
405
|
+
|
|
406
|
+
if checked:
|
|
407
|
+
self.page.check(selector)
|
|
408
|
+
return f"Checked {description}"
|
|
409
|
+
else:
|
|
410
|
+
self.page.uncheck(selector)
|
|
411
|
+
return f"Unchecked {description}"
|
|
412
|
+
|
|
413
|
+
def wait_for_element(self, description: str, timeout: int = 30) -> str:
|
|
414
|
+
"""Wait for an element to appear."""
|
|
415
|
+
if not self.page:
|
|
416
|
+
return "Browser not open"
|
|
417
|
+
|
|
418
|
+
selector = self.find_element_by_description(description)
|
|
419
|
+
if selector.startswith("Could not"):
|
|
420
|
+
self.page.wait_for_selector(f"text='{description}'", timeout=timeout * 1000)
|
|
421
|
+
return f"Found text: '{description}'"
|
|
422
|
+
|
|
423
|
+
self.page.wait_for_selector(selector, timeout=timeout * 1000)
|
|
424
|
+
return f"Element appeared: {description}"
|
|
425
|
+
|
|
426
|
+
def wait_for_text(self, text: str, timeout: int = 30) -> str:
|
|
427
|
+
"""Wait for specific text to appear on the page."""
|
|
428
|
+
if not self.page:
|
|
429
|
+
return "Browser not open"
|
|
430
|
+
|
|
431
|
+
self.page.wait_for_selector(f"text='{text}'", timeout=timeout * 1000)
|
|
432
|
+
return f"Found text: '{text}'"
|
|
433
|
+
|
|
152
434
|
def wait(self, seconds: float) -> str:
|
|
153
|
-
"""Wait for a specified number of seconds.
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
Returns:
|
|
159
|
-
Success message
|
|
160
|
-
"""
|
|
161
|
-
if not PLAYWRIGHT_AVAILABLE:
|
|
162
|
-
return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
|
|
163
|
-
self._page.wait_for_timeout(seconds * 1000) # Convert to milliseconds
|
|
435
|
+
"""Wait for a specified number of seconds."""
|
|
436
|
+
if not self.page:
|
|
437
|
+
return "Browser not open"
|
|
438
|
+
self.page.wait_for_timeout(seconds * 1000)
|
|
164
439
|
return f"Waited for {seconds} seconds"
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
Execution trace showing what happened
|
|
440
|
+
|
|
441
|
+
def scroll(self, times: int = 5, description: str = "the main content area") -> str:
|
|
442
|
+
"""Universal scroll with AI strategy and fallback.
|
|
443
|
+
|
|
444
|
+
Tries: AI-generated → Element scroll → Page scroll
|
|
445
|
+
Verifies success with screenshot comparison.
|
|
172
446
|
"""
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
447
|
+
from . import scroll
|
|
448
|
+
return scroll.scroll(self.page, self.take_screenshot, times, description)
|
|
449
|
+
|
|
450
|
+
def wait_for_manual_login(self, site_name: str = "the website") -> str:
|
|
451
|
+
"""Pause automation for user to login manually.
|
|
452
|
+
|
|
453
|
+
Useful for sites with 2FA or CAPTCHA.
|
|
454
|
+
|
|
180
455
|
Args:
|
|
181
|
-
|
|
182
|
-
|
|
456
|
+
site_name: Name of the site (e.g., "Gmail")
|
|
457
|
+
|
|
183
458
|
Returns:
|
|
184
|
-
|
|
459
|
+
Confirmation when user is ready to continue
|
|
185
460
|
"""
|
|
186
|
-
if not
|
|
187
|
-
return
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
result = llm_do(
|
|
197
|
-
f"Find selector for: {description}\n\nHTML:\n{html_content[:5000]}",
|
|
198
|
-
output=ElementSelector,
|
|
199
|
-
system_prompt="Return the best selector to click the element. Use method='text' for button text, method='css' for CSS selectors."
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
if result.method == "text":
|
|
203
|
-
self._page.get_by_text(result.selector).click()
|
|
204
|
-
else:
|
|
205
|
-
self._page.locator(result.selector).click()
|
|
206
|
-
|
|
207
|
-
self._page.wait_for_timeout(1000)
|
|
208
|
-
return f"Clicked: {result.selector}"
|
|
461
|
+
if not self.page:
|
|
462
|
+
return "Browser not open"
|
|
463
|
+
|
|
464
|
+
print(f"\n{'='*60}")
|
|
465
|
+
print(f" MANUAL LOGIN REQUIRED")
|
|
466
|
+
print(f"{'='*60}")
|
|
467
|
+
print(f"Please login to {site_name} in the browser window.")
|
|
468
|
+
print(f"Once you're logged in and ready to continue:")
|
|
469
|
+
print(f" Type 'yes' or 'Y' and press Enter")
|
|
470
|
+
print(f"{'='*60}\n")
|
|
209
471
|
|
|
472
|
+
while True:
|
|
473
|
+
response = input("Ready to continue? (yes/Y): ").strip().lower()
|
|
474
|
+
if response in ['yes', 'y']:
|
|
475
|
+
print("Continuing automation...\n")
|
|
476
|
+
return f"User confirmed login to {site_name} - continuing"
|
|
477
|
+
else:
|
|
478
|
+
print("Please type 'yes' or 'Y' when ready.")
|
|
210
479
|
|
|
480
|
+
def extract_data(self, selector: str) -> List[str]:
|
|
481
|
+
"""Extract text from elements matching a selector."""
|
|
482
|
+
if not self.page:
|
|
483
|
+
return []
|
|
211
484
|
|
|
485
|
+
elements = self.page.locator(selector)
|
|
486
|
+
count = elements.count()
|
|
487
|
+
return [elements.nth(i).inner_text() for i in range(count)]
|
|
488
|
+
|
|
489
|
+
def close(self) -> str:
|
|
490
|
+
"""Close the browser."""
|
|
491
|
+
if self.page:
|
|
492
|
+
self.page.close()
|
|
493
|
+
if self.browser:
|
|
494
|
+
self.browser.close()
|
|
495
|
+
if self.playwright:
|
|
496
|
+
self.playwright.stop()
|
|
497
|
+
|
|
498
|
+
self.page = None
|
|
499
|
+
self.browser = None
|
|
500
|
+
self.playwright = None
|
|
501
|
+
return "Browser closed"
|
|
212
502
|
|
|
213
503
|
|
|
214
504
|
def execute_browser_command(command: str) -> str:
|
|
@@ -216,11 +506,8 @@ def execute_browser_command(command: str) -> str:
|
|
|
216
506
|
|
|
217
507
|
Returns the agent's natural language response directly.
|
|
218
508
|
"""
|
|
219
|
-
# Framework auto-loads local .env, but CLI commands need global fallback
|
|
220
|
-
# Check for API key in environment first
|
|
221
509
|
api_key = os.getenv('OPENONION_API_KEY')
|
|
222
510
|
|
|
223
|
-
# If not found, try loading from global config
|
|
224
511
|
if not api_key:
|
|
225
512
|
global_env = Path.home() / ".co" / "keys.env"
|
|
226
513
|
if global_env.exists():
|
|
@@ -228,7 +515,7 @@ def execute_browser_command(command: str) -> str:
|
|
|
228
515
|
api_key = os.getenv('OPENONION_API_KEY')
|
|
229
516
|
|
|
230
517
|
if not api_key:
|
|
231
|
-
return '
|
|
518
|
+
return 'Browser agent requires authentication. Run: co auth'
|
|
232
519
|
|
|
233
520
|
browser = BrowserAutomation()
|
|
234
521
|
agent = Agent(
|
|
@@ -237,7 +524,6 @@ def execute_browser_command(command: str) -> str:
|
|
|
237
524
|
api_key=api_key,
|
|
238
525
|
system_prompt=PROMPT_PATH,
|
|
239
526
|
tools=[browser],
|
|
240
|
-
max_iterations=
|
|
527
|
+
max_iterations=20
|
|
241
528
|
)
|
|
242
529
|
return agent.input(command)
|
|
243
|
-
|