connectonion 0.5.10__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +17 -16
- connectonion/cli/browser_agent/browser.py +488 -145
- connectonion/cli/browser_agent/scroll_strategies.py +276 -0
- connectonion/cli/commands/copy_commands.py +24 -1
- connectonion/cli/commands/deploy_commands.py +15 -0
- connectonion/cli/commands/eval_commands.py +286 -0
- connectonion/cli/commands/project_cmd_lib.py +1 -1
- connectonion/cli/main.py +11 -0
- connectonion/console.py +5 -5
- connectonion/core/__init__.py +53 -0
- connectonion/{agent.py → core/agent.py} +18 -15
- connectonion/{llm.py → core/llm.py} +9 -19
- connectonion/{tool_executor.py → core/tool_executor.py} +3 -2
- connectonion/{tool_factory.py → core/tool_factory.py} +3 -1
- connectonion/debug/__init__.py +51 -0
- connectonion/{interactive_debugger.py → debug/auto_debug.py} +7 -7
- connectonion/{auto_debug_exception.py → debug/auto_debug_exception.py} +3 -3
- connectonion/{debugger_ui.py → debug/auto_debug_ui.py} +1 -1
- connectonion/{debug_explainer → debug/debug_explainer}/explain_agent.py +1 -1
- connectonion/{debug_explainer → debug/debug_explainer}/explain_context.py +1 -1
- connectonion/{execution_analyzer → debug/execution_analyzer}/execution_analysis.py +1 -1
- connectonion/debug/runtime_inspector/__init__.py +13 -0
- connectonion/{debug_agent → debug/runtime_inspector}/agent.py +1 -1
- connectonion/{xray.py → debug/xray.py} +1 -1
- connectonion/llm_do.py +1 -1
- connectonion/logger.py +305 -135
- connectonion/network/__init__.py +37 -0
- connectonion/{announce.py → network/announce.py} +1 -1
- connectonion/{asgi.py → network/asgi.py} +122 -2
- connectonion/{connect.py → network/connect.py} +1 -1
- connectonion/network/connection.py +123 -0
- connectonion/{host.py → network/host.py} +31 -11
- connectonion/{trust.py → network/trust.py} +1 -1
- connectonion/tui/__init__.py +22 -0
- connectonion/tui/chat.py +647 -0
- connectonion/useful_events_handlers/reflect.py +2 -2
- connectonion/useful_plugins/__init__.py +4 -3
- connectonion/useful_plugins/calendar_plugin.py +2 -2
- connectonion/useful_plugins/eval.py +2 -2
- connectonion/useful_plugins/gmail_plugin.py +2 -2
- connectonion/useful_plugins/image_result_formatter.py +2 -2
- connectonion/useful_plugins/re_act.py +2 -2
- connectonion/useful_plugins/shell_approval.py +2 -2
- connectonion/useful_plugins/ui_stream.py +164 -0
- {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/METADATA +4 -3
- connectonion-0.6.1.dist-info/RECORD +123 -0
- connectonion/debug_agent/__init__.py +0 -13
- connectonion-0.5.10.dist-info/RECORD +0 -115
- /connectonion/{events.py → core/events.py} +0 -0
- /connectonion/{tool_registry.py → core/tool_registry.py} +0 -0
- /connectonion/{usage.py → core/usage.py} +0 -0
- /connectonion/{debug_explainer → debug/debug_explainer}/__init__.py +0 -0
- /connectonion/{debug_explainer → debug/debug_explainer}/explainer_prompt.md +0 -0
- /connectonion/{debug_explainer → debug/debug_explainer}/root_cause_analysis_prompt.md +0 -0
- /connectonion/{decorators.py → debug/decorators.py} +0 -0
- /connectonion/{execution_analyzer → debug/execution_analyzer}/__init__.py +0 -0
- /connectonion/{execution_analyzer → debug/execution_analyzer}/execution_analysis_prompt.md +0 -0
- /connectonion/{debug_agent → debug/runtime_inspector}/prompts/debug_assistant.md +0 -0
- /connectonion/{debug_agent → debug/runtime_inspector}/runtime_inspector.py +0 -0
- /connectonion/{relay.py → network/relay.py} +0 -0
- /connectonion/{static → network/static}/docs.html +0 -0
- /connectonion/{trust_agents.py → network/trust_agents.py} +0 -0
- /connectonion/{trust_functions.py → network/trust_functions.py} +0 -0
- {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/WHEEL +0 -0
- {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,22 +1,32 @@
|
|
|
1
1
|
"""Browser Agent for CLI - Natural language browser automation.
|
|
2
2
|
|
|
3
3
|
This module provides a browser automation agent that understands natural language
|
|
4
|
-
requests for
|
|
4
|
+
requests for browser operations via the ConnectOnion CLI.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Chrome profile support for persistent sessions (cookies, logins)
|
|
8
|
+
- AI-powered element finding using natural language
|
|
9
|
+
- Form handling: find, fill, submit
|
|
10
|
+
- Screenshot with viewport presets
|
|
11
|
+
- Universal scroll with AI strategy selection
|
|
12
|
+
- Manual login pause for 2FA/CAPTCHA
|
|
5
13
|
"""
|
|
6
14
|
|
|
7
15
|
import os
|
|
16
|
+
import base64
|
|
8
17
|
from pathlib import Path
|
|
9
18
|
from datetime import datetime
|
|
10
|
-
from
|
|
19
|
+
from typing import Optional, List, Dict, Any
|
|
20
|
+
from connectonion import Agent, llm_do
|
|
11
21
|
from dotenv import load_dotenv
|
|
12
|
-
from pydantic import BaseModel
|
|
22
|
+
from pydantic import BaseModel, Field
|
|
13
23
|
|
|
14
|
-
# Default screenshots directory
|
|
24
|
+
# Default screenshots directory
|
|
15
25
|
SCREENSHOTS_DIR = Path.cwd() / ".tmp"
|
|
16
26
|
|
|
17
27
|
# Check Playwright availability
|
|
18
28
|
try:
|
|
19
|
-
from playwright.sync_api import sync_playwright
|
|
29
|
+
from playwright.sync_api import sync_playwright, Page, Browser, Playwright
|
|
20
30
|
PLAYWRIGHT_AVAILABLE = True
|
|
21
31
|
except ImportError:
|
|
22
32
|
PLAYWRIGHT_AVAILABLE = False
|
|
@@ -25,190 +35,527 @@ except ImportError:
|
|
|
25
35
|
PROMPT_PATH = Path(__file__).parent / "prompt.md"
|
|
26
36
|
|
|
27
37
|
|
|
38
|
+
class FormField(BaseModel):
|
|
39
|
+
"""A form field on a web page."""
|
|
40
|
+
name: str = Field(..., description="Field name or identifier")
|
|
41
|
+
label: str = Field(..., description="User-facing label")
|
|
42
|
+
type: str = Field(..., description="Input type (text, email, select, etc.)")
|
|
43
|
+
value: Optional[str] = Field(None, description="Current value")
|
|
44
|
+
required: bool = Field(False, description="Is this field required?")
|
|
45
|
+
options: List[str] = Field(default_factory=list, description="Available options for select/radio")
|
|
46
|
+
|
|
47
|
+
|
|
28
48
|
class BrowserAutomation:
|
|
29
|
-
"""Browser automation
|
|
30
|
-
|
|
31
|
-
|
|
49
|
+
"""Browser automation with natural language support.
|
|
50
|
+
|
|
51
|
+
Simple interface for complex web interactions.
|
|
52
|
+
Auto-initializes browser on creation for immediate use.
|
|
53
|
+
Supports Chrome profile for persistent sessions.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, use_chrome_profile: bool = False, headless: bool = True):
|
|
57
|
+
"""Initialize browser automation.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
use_chrome_profile: If True, uses your Chrome cookies/sessions.
|
|
61
|
+
Chrome must be closed before running.
|
|
62
|
+
headless: If True, browser runs without visible window (default True).
|
|
63
|
+
"""
|
|
64
|
+
self.playwright: Optional[Playwright] = None
|
|
65
|
+
self.browser: Optional[Browser] = None
|
|
66
|
+
self.page: Optional[Page] = None
|
|
67
|
+
self.current_url: str = ""
|
|
68
|
+
self.form_data: Dict[str, Any] = {}
|
|
69
|
+
self.use_chrome_profile = use_chrome_profile
|
|
32
70
|
self._screenshots = []
|
|
33
|
-
self.
|
|
34
|
-
|
|
35
|
-
self._page = None
|
|
71
|
+
self._headless = headless
|
|
72
|
+
# Auto-initialize browser so it's ready immediately
|
|
36
73
|
self._initialize_browser()
|
|
37
|
-
|
|
74
|
+
|
|
38
75
|
def _initialize_browser(self):
|
|
39
|
-
"""Initialize the browser instance."""
|
|
76
|
+
"""Initialize the browser instance on startup."""
|
|
40
77
|
if not PLAYWRIGHT_AVAILABLE:
|
|
41
78
|
return
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
79
|
+
self.open_browser(headless=self._headless)
|
|
80
|
+
|
|
81
|
+
def open_browser(self, headless: bool = True) -> str:
|
|
82
|
+
"""Open a new browser window.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
headless: If True, browser runs without visible window.
|
|
86
|
+
|
|
87
|
+
Note: If use_chrome_profile=True, Chrome must be completely closed.
|
|
88
|
+
"""
|
|
89
|
+
if not PLAYWRIGHT_AVAILABLE:
|
|
90
|
+
return "Browser tools not installed. Run: pip install playwright && playwright install chromium"
|
|
91
|
+
|
|
92
|
+
if self.browser:
|
|
93
|
+
return "Browser already open"
|
|
94
|
+
|
|
95
|
+
self.playwright = sync_playwright().start()
|
|
96
|
+
|
|
97
|
+
if self.use_chrome_profile:
|
|
98
|
+
# Use Chromium with Chrome profile copy
|
|
99
|
+
chromium_profile = Path.cwd() / "chromium_automation_profile"
|
|
100
|
+
|
|
101
|
+
if not chromium_profile.exists():
|
|
102
|
+
import shutil
|
|
103
|
+
home = Path.home()
|
|
104
|
+
if os.name == 'nt': # Windows
|
|
105
|
+
source_profile = home / "AppData/Local/Google/Chrome/User Data"
|
|
106
|
+
elif os.uname().sysname == 'Darwin': # macOS
|
|
107
|
+
source_profile = home / "Library/Application Support/Google/Chrome"
|
|
108
|
+
else: # Linux
|
|
109
|
+
source_profile = home / ".config/google-chrome"
|
|
110
|
+
|
|
111
|
+
if source_profile.exists():
|
|
112
|
+
shutil.copytree(
|
|
113
|
+
source_profile,
|
|
114
|
+
chromium_profile,
|
|
115
|
+
ignore=shutil.ignore_patterns('*Cache*', '*cache*', 'Service Worker', 'ShaderCache'),
|
|
116
|
+
dirs_exist_ok=True
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
self.browser = self.playwright.chromium.launch_persistent_context(
|
|
120
|
+
str(chromium_profile),
|
|
121
|
+
headless=headless,
|
|
122
|
+
args=['--disable-blink-features=AutomationControlled'],
|
|
123
|
+
ignore_default_args=['--enable-automation'],
|
|
124
|
+
timeout=120000,
|
|
125
|
+
)
|
|
126
|
+
self.page = self.browser.pages[0] if self.browser.pages else self.browser.new_page()
|
|
127
|
+
self.page.add_init_script("""
|
|
128
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
|
129
|
+
""")
|
|
130
|
+
return f"Browser opened with Chrome profile: {chromium_profile}"
|
|
131
|
+
else:
|
|
132
|
+
self.browser = self.playwright.chromium.launch(headless=headless)
|
|
133
|
+
self.page = self.browser.new_page()
|
|
134
|
+
return "Browser opened successfully"
|
|
135
|
+
|
|
136
|
+
def go_to(self, url: str) -> str:
|
|
48
137
|
"""Navigate to a URL."""
|
|
138
|
+
if not self.page:
|
|
139
|
+
self.open_browser()
|
|
140
|
+
|
|
49
141
|
if not url.startswith(('http://', 'https://')):
|
|
50
142
|
url = f'https://{url}' if '.' in url else f'http://{url}'
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
self.
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
143
|
+
|
|
144
|
+
self.page.goto(url, wait_until='networkidle', timeout=30000)
|
|
145
|
+
self.page.wait_for_timeout(2000)
|
|
146
|
+
self.current_url = self.page.url
|
|
147
|
+
return f"Navigated to {self.current_url}"
|
|
148
|
+
|
|
149
|
+
def find_element_by_description(self, description: str) -> str:
|
|
150
|
+
"""Find element using natural language description.
|
|
151
|
+
|
|
152
|
+
Uses AI to analyze HTML and find the best matching element.
|
|
153
|
+
|
|
59
154
|
Args:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
155
|
+
description: e.g., "the submit button", "email input field"
|
|
156
|
+
|
|
63
157
|
Returns:
|
|
64
|
-
|
|
158
|
+
CSS selector for the element, or error message
|
|
65
159
|
"""
|
|
66
|
-
if not
|
|
67
|
-
return
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
160
|
+
if not self.page:
|
|
161
|
+
return "Browser not open"
|
|
162
|
+
|
|
163
|
+
html = self.page.content()
|
|
164
|
+
|
|
165
|
+
class ElementSelector(BaseModel):
|
|
166
|
+
selector: str = Field(..., description="CSS selector for the element")
|
|
167
|
+
confidence: float = Field(..., description="Confidence score 0-1")
|
|
168
|
+
explanation: str = Field(..., description="Why this element matches")
|
|
169
|
+
|
|
170
|
+
result = llm_do(
|
|
171
|
+
f"""Analyze this HTML and find the CSS selector for: "{description}"
|
|
172
|
+
|
|
173
|
+
HTML (first 15000 chars): {html[:15000]}
|
|
174
|
+
|
|
175
|
+
Return the most specific CSS selector that uniquely identifies this element.
|
|
176
|
+
""",
|
|
177
|
+
output=ElementSelector,
|
|
178
|
+
model="gpt-4o",
|
|
179
|
+
temperature=0.1
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if self.page.locator(result.selector).count() > 0:
|
|
183
|
+
return result.selector
|
|
184
|
+
else:
|
|
185
|
+
return f"Found selector {result.selector} but element not on page"
|
|
186
|
+
|
|
187
|
+
def click(self, description: str) -> str:
|
|
188
|
+
"""Click on an element using natural language description.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
description: e.g., "the blue submit button", "link to contact page"
|
|
192
|
+
"""
|
|
193
|
+
if not self.page:
|
|
194
|
+
return "Browser not open"
|
|
195
|
+
|
|
196
|
+
selector = self.find_element_by_description(description)
|
|
197
|
+
|
|
198
|
+
if selector.startswith("Could not") or selector.startswith("Found selector"):
|
|
199
|
+
if self.page.locator(f"text='{description}'").count() > 0:
|
|
200
|
+
self.page.click(f"text='{description}'")
|
|
201
|
+
return f"Clicked on '{description}' (by text)"
|
|
202
|
+
return selector
|
|
203
|
+
|
|
204
|
+
self.page.click(selector)
|
|
205
|
+
return f"Clicked on '{description}'"
|
|
206
|
+
|
|
207
|
+
def type_text(self, field_description: str, text: str) -> str:
|
|
208
|
+
"""Type text into a form field.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
field_description: e.g., "email field", "password input"
|
|
212
|
+
text: The text to type
|
|
213
|
+
"""
|
|
214
|
+
if not self.page:
|
|
215
|
+
return "Browser not open"
|
|
216
|
+
|
|
217
|
+
selector = self.find_element_by_description(field_description)
|
|
218
|
+
|
|
219
|
+
if selector.startswith("Could not") or selector.startswith("Found selector"):
|
|
220
|
+
for fallback in [
|
|
221
|
+
f"input[placeholder*='{field_description}' i]",
|
|
222
|
+
f"[aria-label*='{field_description}' i]",
|
|
223
|
+
f"input[name*='{field_description}' i]"
|
|
224
|
+
]:
|
|
225
|
+
if self.page.locator(fallback).count() > 0:
|
|
226
|
+
self.page.fill(fallback, text)
|
|
227
|
+
self.form_data[field_description] = text
|
|
228
|
+
return f"Typed into {field_description}"
|
|
229
|
+
return f"Could not find field '{field_description}'"
|
|
230
|
+
|
|
231
|
+
self.page.fill(selector, text)
|
|
232
|
+
self.form_data[field_description] = text
|
|
233
|
+
return f"Typed into {field_description}"
|
|
234
|
+
|
|
235
|
+
def get_text(self) -> str:
|
|
236
|
+
"""Get all visible text from the page."""
|
|
237
|
+
if not self.page:
|
|
238
|
+
return "Browser not open"
|
|
239
|
+
return self.page.inner_text("body")
|
|
240
|
+
|
|
241
|
+
def get_current_url(self) -> str:
|
|
242
|
+
"""Get the current page URL."""
|
|
243
|
+
if not self.page:
|
|
244
|
+
return "Browser not open"
|
|
245
|
+
return self.page.url
|
|
246
|
+
|
|
247
|
+
def get_current_page_html(self) -> str:
|
|
248
|
+
"""Get the HTML content of the current page."""
|
|
249
|
+
if not self.page:
|
|
250
|
+
return "Browser not open"
|
|
251
|
+
return self.page.content()
|
|
252
|
+
|
|
253
|
+
def take_screenshot(self, url: str = None, path: str = "",
|
|
72
254
|
width: int = 1920, height: int = 1080,
|
|
73
255
|
full_page: bool = False) -> str:
|
|
74
|
-
"""Take a screenshot of
|
|
75
|
-
|
|
256
|
+
"""Take a screenshot of a URL or current page.
|
|
257
|
+
|
|
76
258
|
Args:
|
|
77
|
-
url:
|
|
78
|
-
path: Optional path to save
|
|
259
|
+
url: URL to screenshot (optional - uses current page if not provided)
|
|
260
|
+
path: Optional path to save (auto-generates if empty)
|
|
79
261
|
width: Viewport width in pixels (default 1920)
|
|
80
262
|
height: Viewport height in pixels (default 1080)
|
|
81
263
|
full_page: If True, captures entire page height
|
|
82
|
-
|
|
264
|
+
|
|
83
265
|
Returns:
|
|
84
|
-
|
|
266
|
+
Path to saved screenshot
|
|
85
267
|
"""
|
|
86
268
|
if not PLAYWRIGHT_AVAILABLE:
|
|
87
269
|
return 'Browser tools not installed. Run: pip install playwright && playwright install chromium'
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
270
|
+
|
|
271
|
+
if not self.page:
|
|
272
|
+
return "Browser not open"
|
|
273
|
+
|
|
274
|
+
# Navigate if URL provided
|
|
275
|
+
if url:
|
|
276
|
+
self.go_to(url)
|
|
277
|
+
|
|
92
278
|
# Set viewport size
|
|
93
|
-
self.
|
|
94
|
-
|
|
279
|
+
self.page.set_viewport_size({"width": width, "height": height})
|
|
280
|
+
|
|
95
281
|
# Generate filename if needed
|
|
96
282
|
if not path:
|
|
97
|
-
# Ensure screenshots directory exists
|
|
98
283
|
SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
99
284
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
100
285
|
path = str(SCREENSHOTS_DIR / f'screenshot_{timestamp}.png')
|
|
101
|
-
elif not path.startswith('/'):
|
|
102
|
-
# If relative path given, save to screenshots dir
|
|
286
|
+
elif not path.startswith('/'):
|
|
103
287
|
SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
104
288
|
if not path.endswith(('.png', '.jpg', '.jpeg')):
|
|
105
289
|
path += '.png'
|
|
106
290
|
path = str(SCREENSHOTS_DIR / path)
|
|
107
291
|
elif not path.endswith(('.png', '.jpg', '.jpeg')):
|
|
108
|
-
# Absolute path without extension
|
|
109
292
|
path += '.png'
|
|
110
|
-
|
|
293
|
+
|
|
111
294
|
# Ensure directory exists
|
|
112
295
|
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
113
|
-
|
|
296
|
+
|
|
114
297
|
# Take screenshot
|
|
115
|
-
self.
|
|
116
|
-
|
|
298
|
+
self.page.screenshot(path=path, full_page=full_page)
|
|
117
299
|
self._screenshots.append(path)
|
|
118
300
|
return f'Screenshot saved: {path}'
|
|
119
|
-
|
|
120
|
-
def
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
"
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
301
|
+
|
|
302
|
+
def set_viewport(self, width: int, height: int) -> str:
|
|
303
|
+
"""Set the browser viewport size."""
|
|
304
|
+
if not self.page:
|
|
305
|
+
return "Browser not open"
|
|
306
|
+
self.page.set_viewport_size({"width": width, "height": height})
|
|
307
|
+
return f"Viewport set to {width}x{height}"
|
|
308
|
+
|
|
309
|
+
def screenshot_mobile(self, url: str = None) -> str:
|
|
310
|
+
"""Take screenshot with iPhone viewport (390x844)."""
|
|
311
|
+
if url:
|
|
312
|
+
self.go_to(url)
|
|
313
|
+
self.set_viewport(390, 844)
|
|
314
|
+
return self.take_screenshot()
|
|
315
|
+
|
|
316
|
+
def screenshot_tablet(self, url: str = None) -> str:
|
|
317
|
+
"""Take screenshot with iPad viewport (768x1024)."""
|
|
318
|
+
if url:
|
|
319
|
+
self.go_to(url)
|
|
320
|
+
self.set_viewport(768, 1024)
|
|
321
|
+
return self.take_screenshot()
|
|
322
|
+
|
|
323
|
+
def screenshot_desktop(self, url: str = None) -> str:
|
|
324
|
+
"""Take screenshot with desktop viewport (1920x1080)."""
|
|
325
|
+
if url:
|
|
326
|
+
self.go_to(url)
|
|
327
|
+
self.set_viewport(1920, 1080)
|
|
328
|
+
return self.take_screenshot()
|
|
329
|
+
|
|
330
|
+
def find_forms(self) -> List[FormField]:
|
|
331
|
+
"""Find all form fields on the current page."""
|
|
332
|
+
if not self.page:
|
|
333
|
+
return []
|
|
334
|
+
|
|
335
|
+
fields_data = self.page.evaluate("""
|
|
336
|
+
() => {
|
|
337
|
+
const fields = [];
|
|
338
|
+
document.querySelectorAll('input, textarea, select').forEach(input => {
|
|
339
|
+
const label = input.labels?.[0]?.textContent ||
|
|
340
|
+
input.placeholder || input.name || input.id || 'Unknown';
|
|
341
|
+
fields.push({
|
|
342
|
+
name: input.name || input.id || label,
|
|
343
|
+
label: label.trim(),
|
|
344
|
+
type: input.type || input.tagName.toLowerCase(),
|
|
345
|
+
value: input.value || '',
|
|
346
|
+
required: input.required || false,
|
|
347
|
+
options: input.tagName === 'SELECT' ?
|
|
348
|
+
Array.from(input.options).map(o => o.text) : []
|
|
349
|
+
});
|
|
350
|
+
});
|
|
351
|
+
return fields;
|
|
352
|
+
}
|
|
353
|
+
""")
|
|
354
|
+
return [FormField(**field) for field in fields_data]
|
|
355
|
+
|
|
356
|
+
def fill_form(self, data: Dict[str, str]) -> str:
|
|
357
|
+
"""Fill multiple form fields at once."""
|
|
358
|
+
if not self.page:
|
|
359
|
+
return "Browser not open"
|
|
360
|
+
|
|
361
|
+
results = []
|
|
362
|
+
for field_name, value in data.items():
|
|
363
|
+
result = self.type_text(field_name, value)
|
|
364
|
+
results.append(f"{field_name}: {result}")
|
|
365
|
+
return "\n".join(results)
|
|
366
|
+
|
|
367
|
+
def submit_form(self) -> str:
|
|
368
|
+
"""Submit the current form."""
|
|
369
|
+
if not self.page:
|
|
370
|
+
return "Browser not open"
|
|
371
|
+
|
|
372
|
+
for selector in [
|
|
373
|
+
"button[type='submit']",
|
|
374
|
+
"input[type='submit']",
|
|
375
|
+
"button:has-text('Submit')",
|
|
376
|
+
"button:has-text('Send')",
|
|
377
|
+
"button:has-text('Continue')",
|
|
378
|
+
"button:has-text('Next')"
|
|
379
|
+
]:
|
|
380
|
+
if self.page.locator(selector).count() > 0:
|
|
381
|
+
self.page.click(selector)
|
|
382
|
+
return "Form submitted"
|
|
383
|
+
|
|
384
|
+
return "Could not find submit button"
|
|
385
|
+
|
|
386
|
+
def select_option(self, field_description: str, option: str) -> str:
|
|
387
|
+
"""Select an option from a dropdown."""
|
|
388
|
+
if not self.page:
|
|
389
|
+
return "Browser not open"
|
|
390
|
+
|
|
391
|
+
selector = self.find_element_by_description(field_description)
|
|
392
|
+
if selector.startswith("Could not"):
|
|
393
|
+
return selector
|
|
394
|
+
|
|
395
|
+
self.page.select_option(selector, label=option)
|
|
396
|
+
return f"Selected '{option}' in {field_description}"
|
|
397
|
+
|
|
398
|
+
def check_checkbox(self, description: str, checked: bool = True) -> str:
|
|
399
|
+
"""Check or uncheck a checkbox."""
|
|
400
|
+
if not self.page:
|
|
401
|
+
return "Browser not open"
|
|
402
|
+
|
|
403
|
+
selector = self.find_element_by_description(description)
|
|
404
|
+
if selector.startswith("Could not"):
|
|
405
|
+
return selector
|
|
406
|
+
|
|
407
|
+
if checked:
|
|
408
|
+
self.page.check(selector)
|
|
409
|
+
return f"Checked {description}"
|
|
410
|
+
else:
|
|
411
|
+
self.page.uncheck(selector)
|
|
412
|
+
return f"Unchecked {description}"
|
|
413
|
+
|
|
414
|
+
def wait_for_element(self, description: str, timeout: int = 30) -> str:
|
|
415
|
+
"""Wait for an element to appear."""
|
|
416
|
+
if not self.page:
|
|
417
|
+
return "Browser not open"
|
|
418
|
+
|
|
419
|
+
selector = self.find_element_by_description(description)
|
|
420
|
+
if selector.startswith("Could not"):
|
|
421
|
+
self.page.wait_for_selector(f"text='{description}'", timeout=timeout * 1000)
|
|
422
|
+
return f"Found text: '{description}'"
|
|
423
|
+
|
|
424
|
+
self.page.wait_for_selector(selector, timeout=timeout * 1000)
|
|
425
|
+
return f"Element appeared: {description}"
|
|
426
|
+
|
|
427
|
+
def wait_for_text(self, text: str, timeout: int = 30) -> str:
|
|
428
|
+
"""Wait for specific text to appear on the page."""
|
|
429
|
+
if not self.page:
|
|
430
|
+
return "Browser not open"
|
|
431
|
+
|
|
432
|
+
self.page.wait_for_selector(f"text='{text}'", timeout=timeout * 1000)
|
|
433
|
+
return f"Found text: '{text}'"
|
|
434
|
+
|
|
152
435
|
def wait(self, seconds: float) -> str:
|
|
153
|
-
"""Wait for a specified number of seconds.
|
|
154
|
-
|
|
436
|
+
"""Wait for a specified number of seconds."""
|
|
437
|
+
if not self.page:
|
|
438
|
+
return "Browser not open"
|
|
439
|
+
self.page.wait_for_timeout(seconds * 1000)
|
|
440
|
+
return f"Waited for {seconds} seconds"
|
|
441
|
+
|
|
442
|
+
def scroll(self, times: int = 5, description: str = "the main content area") -> str:
|
|
443
|
+
"""Universal scroll with automatic strategy selection.
|
|
444
|
+
|
|
445
|
+
Tries multiple strategies until one works:
|
|
446
|
+
1. AI-generated strategy (analyzes page structure)
|
|
447
|
+
2. Element scrolling
|
|
448
|
+
3. Page scrolling
|
|
449
|
+
|
|
155
450
|
Args:
|
|
156
|
-
|
|
157
|
-
|
|
451
|
+
times: Number of scroll iterations
|
|
452
|
+
description: What to scroll (e.g., "the email list")
|
|
453
|
+
|
|
158
454
|
Returns:
|
|
159
|
-
|
|
455
|
+
Status message with successful strategy
|
|
160
456
|
"""
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
457
|
+
from . import scroll_strategies
|
|
458
|
+
return scroll_strategies.scroll_with_verification(
|
|
459
|
+
page=self.page,
|
|
460
|
+
take_screenshot=self.take_screenshot,
|
|
461
|
+
times=times,
|
|
462
|
+
description=description
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
def scroll_page(self, direction: str = "down", amount: int = 1000) -> str:
|
|
466
|
+
"""Scroll the page in a direction.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
direction: "down", "up", "top", or "bottom"
|
|
470
|
+
amount: Pixels to scroll (ignored for "bottom"/"top")
|
|
172
471
|
"""
|
|
173
|
-
if
|
|
174
|
-
return
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
472
|
+
if not self.page:
|
|
473
|
+
return "Browser not open"
|
|
474
|
+
|
|
475
|
+
if direction == "bottom":
|
|
476
|
+
self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
477
|
+
return "Scrolled to bottom of page"
|
|
478
|
+
elif direction == "top":
|
|
479
|
+
self.page.evaluate("window.scrollTo(0, 0)")
|
|
480
|
+
return "Scrolled to top of page"
|
|
481
|
+
elif direction == "down":
|
|
482
|
+
self.page.evaluate(f"window.scrollBy(0, {amount})")
|
|
483
|
+
return f"Scrolled down {amount} pixels"
|
|
484
|
+
elif direction == "up":
|
|
485
|
+
self.page.evaluate(f"window.scrollBy(0, -{amount})")
|
|
486
|
+
return f"Scrolled up {amount} pixels"
|
|
487
|
+
else:
|
|
488
|
+
return f"Unknown direction: {direction}"
|
|
489
|
+
|
|
490
|
+
def scroll_element(self, selector: str, amount: int = 1000) -> str:
|
|
491
|
+
"""Scroll a specific element by CSS selector."""
|
|
492
|
+
if not self.page:
|
|
493
|
+
return "Browser not open"
|
|
494
|
+
|
|
495
|
+
result = self.page.evaluate(f"""
|
|
496
|
+
(() => {{
|
|
497
|
+
const element = document.querySelector('{selector}');
|
|
498
|
+
if (!element) return 'Element not found: {selector}';
|
|
499
|
+
const beforeScroll = element.scrollTop;
|
|
500
|
+
element.scrollTop += {amount};
|
|
501
|
+
const afterScroll = element.scrollTop;
|
|
502
|
+
return `Scrolled from ${{beforeScroll}}px to ${{afterScroll}}px`;
|
|
503
|
+
}})()
|
|
504
|
+
""")
|
|
505
|
+
return result
|
|
506
|
+
|
|
507
|
+
def wait_for_manual_login(self, site_name: str = "the website") -> str:
|
|
508
|
+
"""Pause automation for user to login manually.
|
|
509
|
+
|
|
510
|
+
Useful for sites with 2FA or CAPTCHA.
|
|
511
|
+
|
|
180
512
|
Args:
|
|
181
|
-
|
|
182
|
-
|
|
513
|
+
site_name: Name of the site (e.g., "Gmail")
|
|
514
|
+
|
|
183
515
|
Returns:
|
|
184
|
-
|
|
516
|
+
Confirmation when user is ready to continue
|
|
185
517
|
"""
|
|
186
|
-
if not
|
|
187
|
-
return
|
|
188
|
-
|
|
189
|
-
html_content = self._page.content()
|
|
190
|
-
|
|
191
|
-
# Use llm_do to determine the selector
|
|
192
|
-
class ElementSelector(BaseModel):
|
|
193
|
-
selector: str
|
|
194
|
-
method: str # "text" or "css"
|
|
195
|
-
|
|
196
|
-
result = llm_do(
|
|
197
|
-
f"Find selector for: {description}\n\nHTML:\n{html_content[:5000]}",
|
|
198
|
-
output=ElementSelector,
|
|
199
|
-
system_prompt="Return the best selector to click the element. Use method='text' for button text, method='css' for CSS selectors."
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
if result.method == "text":
|
|
203
|
-
self._page.get_by_text(result.selector).click()
|
|
204
|
-
else:
|
|
205
|
-
self._page.locator(result.selector).click()
|
|
206
|
-
|
|
207
|
-
self._page.wait_for_timeout(1000)
|
|
208
|
-
return f"Clicked: {result.selector}"
|
|
518
|
+
if not self.page:
|
|
519
|
+
return "Browser not open"
|
|
209
520
|
|
|
521
|
+
print(f"\n{'='*60}")
|
|
522
|
+
print(f" MANUAL LOGIN REQUIRED")
|
|
523
|
+
print(f"{'='*60}")
|
|
524
|
+
print(f"Please login to {site_name} in the browser window.")
|
|
525
|
+
print(f"Once you're logged in and ready to continue:")
|
|
526
|
+
print(f" Type 'yes' or 'Y' and press Enter")
|
|
527
|
+
print(f"{'='*60}\n")
|
|
210
528
|
|
|
529
|
+
while True:
|
|
530
|
+
response = input("Ready to continue? (yes/Y): ").strip().lower()
|
|
531
|
+
if response in ['yes', 'y']:
|
|
532
|
+
print("Continuing automation...\n")
|
|
533
|
+
return f"User confirmed login to {site_name} - continuing"
|
|
534
|
+
else:
|
|
535
|
+
print("Please type 'yes' or 'Y' when ready.")
|
|
211
536
|
|
|
537
|
+
def extract_data(self, selector: str) -> List[str]:
|
|
538
|
+
"""Extract text from elements matching a selector."""
|
|
539
|
+
if not self.page:
|
|
540
|
+
return []
|
|
541
|
+
|
|
542
|
+
elements = self.page.locator(selector)
|
|
543
|
+
count = elements.count()
|
|
544
|
+
return [elements.nth(i).inner_text() for i in range(count)]
|
|
545
|
+
|
|
546
|
+
def close(self) -> str:
|
|
547
|
+
"""Close the browser."""
|
|
548
|
+
if self.page:
|
|
549
|
+
self.page.close()
|
|
550
|
+
if self.browser:
|
|
551
|
+
self.browser.close()
|
|
552
|
+
if self.playwright:
|
|
553
|
+
self.playwright.stop()
|
|
554
|
+
|
|
555
|
+
self.page = None
|
|
556
|
+
self.browser = None
|
|
557
|
+
self.playwright = None
|
|
558
|
+
return "Browser closed"
|
|
212
559
|
|
|
213
560
|
|
|
214
561
|
def execute_browser_command(command: str) -> str:
|
|
@@ -216,11 +563,8 @@ def execute_browser_command(command: str) -> str:
|
|
|
216
563
|
|
|
217
564
|
Returns the agent's natural language response directly.
|
|
218
565
|
"""
|
|
219
|
-
# Framework auto-loads local .env, but CLI commands need global fallback
|
|
220
|
-
# Check for API key in environment first
|
|
221
566
|
api_key = os.getenv('OPENONION_API_KEY')
|
|
222
567
|
|
|
223
|
-
# If not found, try loading from global config
|
|
224
568
|
if not api_key:
|
|
225
569
|
global_env = Path.home() / ".co" / "keys.env"
|
|
226
570
|
if global_env.exists():
|
|
@@ -228,7 +572,7 @@ def execute_browser_command(command: str) -> str:
|
|
|
228
572
|
api_key = os.getenv('OPENONION_API_KEY')
|
|
229
573
|
|
|
230
574
|
if not api_key:
|
|
231
|
-
return '
|
|
575
|
+
return 'Browser agent requires authentication. Run: co auth'
|
|
232
576
|
|
|
233
577
|
browser = BrowserAutomation()
|
|
234
578
|
agent = Agent(
|
|
@@ -237,7 +581,6 @@ def execute_browser_command(command: str) -> str:
|
|
|
237
581
|
api_key=api_key,
|
|
238
582
|
system_prompt=PROMPT_PATH,
|
|
239
583
|
tools=[browser],
|
|
240
|
-
max_iterations=
|
|
584
|
+
max_iterations=20
|
|
241
585
|
)
|
|
242
586
|
return agent.input(command)
|
|
243
|
-
|