optexity 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optexity/cli.py +1 -1
- optexity/examples/__init__.py +0 -0
- optexity/examples/add_example.py +88 -0
- optexity/examples/download_pdf_url.py +29 -0
- optexity/examples/extract_price_stockanalysis.py +44 -0
- optexity/examples/file_upload.py +59 -0
- optexity/examples/i94.py +126 -0
- optexity/examples/i94_travel_history.py +126 -0
- optexity/examples/peachstate_medicaid.py +201 -0
- optexity/examples/supabase_login.py +75 -0
- optexity/inference/__init__.py +0 -0
- optexity/inference/agents/__init__.py +0 -0
- optexity/inference/agents/error_handler/__init__.py +0 -0
- optexity/inference/agents/error_handler/error_handler.py +39 -0
- optexity/inference/agents/error_handler/prompt.py +60 -0
- optexity/inference/agents/index_prediction/__init__.py +0 -0
- optexity/inference/agents/index_prediction/action_prediction_locator_axtree.py +45 -0
- optexity/inference/agents/index_prediction/prompt.py +14 -0
- optexity/inference/agents/select_value_prediction/__init__.py +0 -0
- optexity/inference/agents/select_value_prediction/prompt.py +20 -0
- optexity/inference/agents/select_value_prediction/select_value_prediction.py +39 -0
- optexity/inference/agents/two_fa_extraction/__init__.py +0 -0
- optexity/inference/agents/two_fa_extraction/prompt.py +23 -0
- optexity/inference/agents/two_fa_extraction/two_fa_extraction.py +47 -0
- optexity/inference/child_process.py +251 -0
- optexity/inference/core/__init__.py +0 -0
- optexity/inference/core/interaction/__init__.py +0 -0
- optexity/inference/core/interaction/handle_agentic_task.py +79 -0
- optexity/inference/core/interaction/handle_check.py +57 -0
- optexity/inference/core/interaction/handle_click.py +79 -0
- optexity/inference/core/interaction/handle_command.py +261 -0
- optexity/inference/core/interaction/handle_input.py +76 -0
- optexity/inference/core/interaction/handle_keypress.py +16 -0
- optexity/inference/core/interaction/handle_select.py +109 -0
- optexity/inference/core/interaction/handle_select_utils.py +132 -0
- optexity/inference/core/interaction/handle_upload.py +59 -0
- optexity/inference/core/interaction/utils.py +81 -0
- optexity/inference/core/logging.py +406 -0
- optexity/inference/core/run_assertion.py +55 -0
- optexity/inference/core/run_automation.py +463 -0
- optexity/inference/core/run_extraction.py +240 -0
- optexity/inference/core/run_interaction.py +254 -0
- optexity/inference/core/run_python_script.py +20 -0
- optexity/inference/core/run_two_fa.py +120 -0
- optexity/inference/core/two_factor_auth/__init__.py +0 -0
- optexity/inference/infra/__init__.py +0 -0
- optexity/inference/infra/browser.py +455 -0
- optexity/inference/infra/browser_extension.py +20 -0
- optexity/inference/models/__init__.py +22 -0
- optexity/inference/models/gemini.py +113 -0
- optexity/inference/models/human.py +20 -0
- optexity/inference/models/llm_model.py +210 -0
- optexity/inference/run_local.py +200 -0
- optexity/schema/__init__.py +0 -0
- optexity/schema/actions/__init__.py +0 -0
- optexity/schema/actions/assertion_action.py +66 -0
- optexity/schema/actions/extraction_action.py +143 -0
- optexity/schema/actions/interaction_action.py +330 -0
- optexity/schema/actions/misc_action.py +18 -0
- optexity/schema/actions/prompts.py +27 -0
- optexity/schema/actions/two_fa_action.py +24 -0
- optexity/schema/automation.py +432 -0
- optexity/schema/callback.py +16 -0
- optexity/schema/inference.py +87 -0
- optexity/schema/memory.py +100 -0
- optexity/schema/task.py +212 -0
- optexity/schema/token_usage.py +48 -0
- optexity/utils/__init__.py +0 -0
- optexity/utils/settings.py +54 -0
- optexity/utils/utils.py +76 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/METADATA +20 -36
- optexity-0.1.4.dist-info/RECORD +80 -0
- optexity-0.1.2.dist-info/RECORD +0 -11
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/WHEEL +0 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/entry_points.txt +0 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
from typing import Literal
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from browser_use import Agent, BrowserSession, ChatGoogle
|
|
10
|
+
from browser_use.browser.views import BrowserStateSummary
|
|
11
|
+
from patchright._impl._errors import TimeoutError as PatchrightTimeoutError
|
|
12
|
+
from playwright._impl._errors import TimeoutError as PlaywrightTimeoutError
|
|
13
|
+
from playwright.async_api import Download, Locator, Page, Request, Response
|
|
14
|
+
|
|
15
|
+
from optexity.schema.memory import Memory, NetworkRequest, NetworkResponse
|
|
16
|
+
from optexity.utils.settings import settings
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Browser:
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
memory: Memory,
|
|
25
|
+
user_data_dir: str = None,
|
|
26
|
+
headless: bool = False,
|
|
27
|
+
proxy: str = None,
|
|
28
|
+
stealth: bool = True,
|
|
29
|
+
backend: Literal["browser-use", "browserbase"] = "browser-use",
|
|
30
|
+
debug_port: int = 9222,
|
|
31
|
+
channel: Literal["chromium", "chrome"] = "chromium",
|
|
32
|
+
use_proxy: bool = False,
|
|
33
|
+
proxy_session_id: str | None = None,
|
|
34
|
+
):
|
|
35
|
+
|
|
36
|
+
if proxy:
|
|
37
|
+
proxy = proxy.removeprefix("http://").removeprefix("https://")
|
|
38
|
+
self.proxy = "http://" + proxy
|
|
39
|
+
|
|
40
|
+
self.headless = headless
|
|
41
|
+
self.stealth = stealth
|
|
42
|
+
self.user_data_dir = user_data_dir
|
|
43
|
+
self.backend = backend
|
|
44
|
+
self.debug_port = debug_port
|
|
45
|
+
self.use_proxy = use_proxy
|
|
46
|
+
self.proxy_session_id = proxy_session_id
|
|
47
|
+
self.playwright = None
|
|
48
|
+
self.browser = None
|
|
49
|
+
self.context = None
|
|
50
|
+
self.page = None
|
|
51
|
+
self.cdp_url = f"http://localhost:{self.debug_port}"
|
|
52
|
+
self.backend_agent = None
|
|
53
|
+
self.channel = channel
|
|
54
|
+
self.memory = memory
|
|
55
|
+
self.page_to_target_id = []
|
|
56
|
+
self.previous_total_pages = 0
|
|
57
|
+
|
|
58
|
+
self.active_downloads = 0
|
|
59
|
+
self.all_active_downloads_done = asyncio.Event()
|
|
60
|
+
self.all_active_downloads_done.set()
|
|
61
|
+
|
|
62
|
+
self.network_calls: list[NetworkResponse | NetworkRequest] = []
|
|
63
|
+
|
|
64
|
+
async def start(self):
|
|
65
|
+
logger.debug("Starting browser")
|
|
66
|
+
try:
|
|
67
|
+
if self.playwright is not None:
|
|
68
|
+
await self.playwright.stop()
|
|
69
|
+
|
|
70
|
+
if self.stealth:
|
|
71
|
+
from patchright.async_api import async_playwright
|
|
72
|
+
else:
|
|
73
|
+
from playwright.async_api import async_playwright
|
|
74
|
+
|
|
75
|
+
proxy = None
|
|
76
|
+
if self.use_proxy:
|
|
77
|
+
if settings.PROXY_URL is None:
|
|
78
|
+
raise ValueError("PROXY_URL is not set")
|
|
79
|
+
proxy = {"server": settings.PROXY_URL}
|
|
80
|
+
if settings.PROXY_USERNAME is not None:
|
|
81
|
+
if settings.PROXY_PROVIDER == "oxylabs":
|
|
82
|
+
assert settings.PROXY_COUNTRY, "PROXY_COUNTRY is not set"
|
|
83
|
+
assert settings.PROXY_USERNAME, "PROXY_USERNAME is not set"
|
|
84
|
+
assert settings.PROXY_PASSWORD, "PROXY_PASSWORD is not set"
|
|
85
|
+
|
|
86
|
+
proxy["username"] = (
|
|
87
|
+
f"customer-{settings.PROXY_USERNAME}-cc-{settings.PROXY_COUNTRY}-sessid-{self.proxy_session_id}-sesstime-20"
|
|
88
|
+
)
|
|
89
|
+
elif settings.PROXY_PROVIDER == "brightdata":
|
|
90
|
+
|
|
91
|
+
proxy["username"] = (
|
|
92
|
+
f"{settings.PROXY_USERNAME}-session-{self.proxy_session_id}"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
else:
|
|
96
|
+
proxy["username"] = settings.PROXY_USERNAME
|
|
97
|
+
|
|
98
|
+
if settings.PROXY_PASSWORD is not None:
|
|
99
|
+
proxy["password"] = settings.PROXY_PASSWORD
|
|
100
|
+
|
|
101
|
+
self.playwright = await async_playwright().start()
|
|
102
|
+
self.browser = await self.playwright.chromium.launch(
|
|
103
|
+
channel=self.channel,
|
|
104
|
+
headless=self.headless,
|
|
105
|
+
proxy=proxy,
|
|
106
|
+
args=[
|
|
107
|
+
"--start-fullscreen",
|
|
108
|
+
"--disable-popup-blocking",
|
|
109
|
+
"--window-size=1920,1080",
|
|
110
|
+
f"--remote-debugging-port={self.debug_port}",
|
|
111
|
+
"--disable-gpu",
|
|
112
|
+
"--disable-extensions",
|
|
113
|
+
"--disable-background-networking",
|
|
114
|
+
],
|
|
115
|
+
chromium_sandbox=False,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
self.context = await self.browser.new_context(
|
|
119
|
+
no_viewport=True, ignore_https_errors=True
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
async def log_request(req: Request):
|
|
123
|
+
await self.log_request(req)
|
|
124
|
+
|
|
125
|
+
async def handle_random_download(download: Download):
|
|
126
|
+
await self.handle_random_download(download)
|
|
127
|
+
|
|
128
|
+
async def handle_random_url_downloads(resp: Response):
|
|
129
|
+
await self.handle_random_url_downloads(resp)
|
|
130
|
+
|
|
131
|
+
self.context.on("request", log_request)
|
|
132
|
+
self.context.on("response", handle_random_url_downloads)
|
|
133
|
+
|
|
134
|
+
self.context.on(
|
|
135
|
+
"page", lambda p: (p.on("download", handle_random_download))
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
self.page = await self.context.new_page()
|
|
139
|
+
|
|
140
|
+
browser_session = BrowserSession(cdp_url=self.cdp_url, keep_alive=True)
|
|
141
|
+
|
|
142
|
+
self.backend_agent = Agent(
|
|
143
|
+
task="",
|
|
144
|
+
llm=ChatGoogle(model="gemini-flash-latest"),
|
|
145
|
+
browser_session=browser_session,
|
|
146
|
+
use_vision=False,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
await self.backend_agent.browser_session.start()
|
|
150
|
+
|
|
151
|
+
tabs = await self.backend_agent.browser_session.get_tabs()
|
|
152
|
+
|
|
153
|
+
for tab in tabs[::-1]:
|
|
154
|
+
if tab.target_id not in self.page_to_target_id:
|
|
155
|
+
self.page_to_target_id.append(tab.target_id)
|
|
156
|
+
self.previous_total_pages = len(self.context.pages)
|
|
157
|
+
|
|
158
|
+
logger.debug("Browser started successfully")
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"Error starting playwright: {e}")
|
|
162
|
+
raise e
|
|
163
|
+
|
|
164
|
+
async def stop(self):
|
|
165
|
+
logger.debug("Stopping full system")
|
|
166
|
+
if self.backend_agent is not None:
|
|
167
|
+
logger.debug("Stopping backend agent")
|
|
168
|
+
self.backend_agent.stop()
|
|
169
|
+
if self.backend_agent.browser_session:
|
|
170
|
+
logger.debug("Resetting browser session")
|
|
171
|
+
await self.backend_agent.browser_session.stop()
|
|
172
|
+
# await self.backend_agent.browser_session._storage_state_watchdog._stop_monitoring()
|
|
173
|
+
# await self.backend_agent.browser_session.reset()
|
|
174
|
+
logger.debug("Browser session reset")
|
|
175
|
+
self.backend_agent = None
|
|
176
|
+
|
|
177
|
+
if self.context is not None:
|
|
178
|
+
logger.debug("Stopping context")
|
|
179
|
+
await self.context.close()
|
|
180
|
+
self.context = None
|
|
181
|
+
|
|
182
|
+
if self.browser is not None:
|
|
183
|
+
logger.debug("Stopping browser")
|
|
184
|
+
await self.browser.close()
|
|
185
|
+
self.browser = None
|
|
186
|
+
|
|
187
|
+
if self.playwright is not None:
|
|
188
|
+
logger.debug("Stopping playwright")
|
|
189
|
+
await self.playwright.stop()
|
|
190
|
+
self.playwright = None
|
|
191
|
+
logger.debug("Full system stopped")
|
|
192
|
+
|
|
193
|
+
async def get_current_page(self) -> Page | None:
|
|
194
|
+
if self.context is None:
|
|
195
|
+
return None
|
|
196
|
+
pages = self.context.pages
|
|
197
|
+
if len(pages) == 0:
|
|
198
|
+
self.page = await self.context.new_page()
|
|
199
|
+
else:
|
|
200
|
+
self.page = pages[-1]
|
|
201
|
+
|
|
202
|
+
return self.page
|
|
203
|
+
|
|
204
|
+
async def handle_new_tabs(self, max_wait_time: float) -> bool:
|
|
205
|
+
|
|
206
|
+
total_time = 0
|
|
207
|
+
while total_time < max_wait_time:
|
|
208
|
+
pages = self.context.pages
|
|
209
|
+
if len(pages) > self.previous_total_pages:
|
|
210
|
+
break
|
|
211
|
+
await asyncio.sleep(1)
|
|
212
|
+
total_time += 1
|
|
213
|
+
|
|
214
|
+
pages = self.context.pages
|
|
215
|
+
if len(pages) == self.previous_total_pages:
|
|
216
|
+
return False, total_time
|
|
217
|
+
|
|
218
|
+
tabs = await self.backend_agent.browser_session.get_tabs()
|
|
219
|
+
|
|
220
|
+
for tab in tabs[::-1]:
|
|
221
|
+
if tab.target_id not in self.page_to_target_id:
|
|
222
|
+
self.page_to_target_id.append(tab.target_id)
|
|
223
|
+
self.previous_total_pages = len(pages)
|
|
224
|
+
|
|
225
|
+
tab_id = self.page_to_target_id[-1][-4:]
|
|
226
|
+
action_model = self.backend_agent.ActionModel(**{"switch": {"tab_id": tab_id}})
|
|
227
|
+
await self.backend_agent.multi_act([action_model])
|
|
228
|
+
return True, total_time
|
|
229
|
+
|
|
230
|
+
async def close_current_tab(self):
|
|
231
|
+
if self.context is None:
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
pages = self.context.pages
|
|
235
|
+
|
|
236
|
+
if len(pages) == 1:
|
|
237
|
+
logger.warning("Atleast one tab should be open, skipping close current tab")
|
|
238
|
+
return False
|
|
239
|
+
|
|
240
|
+
if len(self.page_to_target_id) > 1:
|
|
241
|
+
tab_id_after_close = self.page_to_target_id[-2][-4:]
|
|
242
|
+
action_model = self.backend_agent.ActionModel(
|
|
243
|
+
**{"switch": {"tab_id": tab_id_after_close}}
|
|
244
|
+
)
|
|
245
|
+
await self.backend_agent.multi_act([action_model])
|
|
246
|
+
self.page_to_target_id.pop()
|
|
247
|
+
|
|
248
|
+
last_page = pages[-1]
|
|
249
|
+
await last_page.close()
|
|
250
|
+
|
|
251
|
+
async def switch_tab(self, tab_index: int):
|
|
252
|
+
if self.context is None:
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
pages = self.context.pages
|
|
256
|
+
|
|
257
|
+
if len(pages) == 1:
|
|
258
|
+
logger.warning("Atleast one tab should be open, skipping close current tab")
|
|
259
|
+
return False
|
|
260
|
+
|
|
261
|
+
tab_id = self.page_to_target_id[tab_index][-4:]
|
|
262
|
+
page = pages[tab_index]
|
|
263
|
+
|
|
264
|
+
await page.bring_to_front()
|
|
265
|
+
|
|
266
|
+
action_model = self.backend_agent.ActionModel(**{"switch": {"tab_id": tab_id}})
|
|
267
|
+
await self.backend_agent.multi_act([action_model])
|
|
268
|
+
|
|
269
|
+
async def get_locator_from_command(self, command: str) -> Locator:
|
|
270
|
+
page = await self.get_current_page()
|
|
271
|
+
if page is None:
|
|
272
|
+
return None
|
|
273
|
+
locator: Locator = eval(f"page.{command}")
|
|
274
|
+
return locator
|
|
275
|
+
|
|
276
|
+
def get_xpath_from_index(self, index: int) -> str:
|
|
277
|
+
raise NotImplementedError("Not implemented")
|
|
278
|
+
|
|
279
|
+
async def go_to_url(self, url: str):
|
|
280
|
+
try:
|
|
281
|
+
if url == "about:blank":
|
|
282
|
+
return
|
|
283
|
+
page = await self.get_current_page()
|
|
284
|
+
if page is None:
|
|
285
|
+
return None
|
|
286
|
+
await page.goto(url, timeout=10000)
|
|
287
|
+
except TimeoutError as e:
|
|
288
|
+
pass
|
|
289
|
+
except PatchrightTimeoutError as e:
|
|
290
|
+
pass
|
|
291
|
+
except PlaywrightTimeoutError as e:
|
|
292
|
+
pass
|
|
293
|
+
|
|
294
|
+
async def get_browser_state_summary(self) -> BrowserStateSummary:
|
|
295
|
+
browser_state_summary = await self.backend_agent.browser_session.get_browser_state_summary(
|
|
296
|
+
include_screenshot=True, # always capture even if use_vision=False so that cloud sync is useful (it's fast now anyway)
|
|
297
|
+
include_recent_events=self.backend_agent.include_recent_events,
|
|
298
|
+
cached=False,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
return browser_state_summary
|
|
302
|
+
|
|
303
|
+
async def get_current_page_url(self) -> str:
|
|
304
|
+
try:
|
|
305
|
+
page = await self.get_current_page()
|
|
306
|
+
if page is None:
|
|
307
|
+
return None
|
|
308
|
+
return page.url
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(f"Error getting current page URL: {e}")
|
|
311
|
+
return None
|
|
312
|
+
|
|
313
|
+
async def get_current_page_title(self) -> str:
|
|
314
|
+
try:
|
|
315
|
+
page = await self.get_current_page()
|
|
316
|
+
if page is None:
|
|
317
|
+
return None
|
|
318
|
+
return await page.title()
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.error(f"Error getting current page title: {e}")
|
|
321
|
+
return None
|
|
322
|
+
|
|
323
|
+
async def handle_random_download(self, download: Download):
|
|
324
|
+
self.active_downloads += 1
|
|
325
|
+
self.all_active_downloads_done.clear()
|
|
326
|
+
|
|
327
|
+
temp_path = await download.path()
|
|
328
|
+
async with self.memory.download_lock:
|
|
329
|
+
if temp_path not in self.memory.raw_downloads:
|
|
330
|
+
self.memory.raw_downloads[temp_path] = (False, download)
|
|
331
|
+
self.active_downloads -= 1
|
|
332
|
+
|
|
333
|
+
if self.active_downloads == 0:
|
|
334
|
+
self.all_active_downloads_done.set()
|
|
335
|
+
|
|
336
|
+
async def handle_random_url_downloads(self, resp: Response):
|
|
337
|
+
try:
|
|
338
|
+
|
|
339
|
+
if "application/pdf" in resp.headers.get("content-type", ""):
|
|
340
|
+
self.active_downloads += 1
|
|
341
|
+
self.all_active_downloads_done.clear()
|
|
342
|
+
|
|
343
|
+
# Default filename fallback
|
|
344
|
+
filename = f"{uuid4()}.pdf"
|
|
345
|
+
|
|
346
|
+
# Try to get suggested filename from headers
|
|
347
|
+
content_disposition = resp.headers.get("content-disposition")
|
|
348
|
+
if content_disposition:
|
|
349
|
+
match = re.search(
|
|
350
|
+
r'filename\*?=(?:UTF-8\'\')?"?([^";]+)"?',
|
|
351
|
+
content_disposition,
|
|
352
|
+
)
|
|
353
|
+
if match:
|
|
354
|
+
filename = match.group(1)
|
|
355
|
+
|
|
356
|
+
self.memory.urls_to_downloads.append((resp.url, filename))
|
|
357
|
+
logger.info(f"Added URL to downloads: {resp.url}, {filename}")
|
|
358
|
+
self.active_downloads -= 1
|
|
359
|
+
except Exception as e:
|
|
360
|
+
logger.error(f"Error handling random responses: {e}")
|
|
361
|
+
|
|
362
|
+
if self.active_downloads == 0:
|
|
363
|
+
self.all_active_downloads_done.set()
|
|
364
|
+
|
|
365
|
+
async def log_request(self, req: Request):
|
|
366
|
+
try:
|
|
367
|
+
body = req.post_data # this is None for GET/HEAD
|
|
368
|
+
# Rebuild cookies exactly like curl -b
|
|
369
|
+
cookies = await req.frame.page.context.cookies()
|
|
370
|
+
cookie_header = "; ".join(f"{c['name']}={c['value']}" for c in cookies)
|
|
371
|
+
|
|
372
|
+
# Rebuild headers
|
|
373
|
+
headers = dict(req.headers)
|
|
374
|
+
headers["cookie"] = cookie_header
|
|
375
|
+
|
|
376
|
+
# Body as raw bytes
|
|
377
|
+
body = req.post_data
|
|
378
|
+
|
|
379
|
+
self.network_calls.append(
|
|
380
|
+
NetworkRequest(
|
|
381
|
+
url=req.url, method=req.method, headers=headers, body=body
|
|
382
|
+
)
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
except Exception as e:
|
|
386
|
+
# logger.error(f"Could not get body: {e}")
|
|
387
|
+
pass
|
|
388
|
+
|
|
389
|
+
async def attach_network_listeners(self):
|
|
390
|
+
page = await self.get_current_page()
|
|
391
|
+
|
|
392
|
+
# remove old listeners first
|
|
393
|
+
try:
|
|
394
|
+
page.remove_listener("response", self._on_response)
|
|
395
|
+
except Exception:
|
|
396
|
+
pass
|
|
397
|
+
|
|
398
|
+
page.on("response", self._on_response)
|
|
399
|
+
|
|
400
|
+
async def detach_network_listeners(self):
|
|
401
|
+
page = await self.get_current_page()
|
|
402
|
+
try:
|
|
403
|
+
page.remove_listener("response", self._on_response)
|
|
404
|
+
except Exception:
|
|
405
|
+
pass
|
|
406
|
+
|
|
407
|
+
async def _on_response(self, response: Response):
|
|
408
|
+
try:
|
|
409
|
+
body = await response.json()
|
|
410
|
+
except Exception:
|
|
411
|
+
try:
|
|
412
|
+
body = await response.text()
|
|
413
|
+
except Exception:
|
|
414
|
+
body = None
|
|
415
|
+
|
|
416
|
+
# Try to enrich response with request method and content length
|
|
417
|
+
method = None
|
|
418
|
+
try:
|
|
419
|
+
# Playwright provides request object for a response
|
|
420
|
+
method = response.request.method
|
|
421
|
+
except Exception:
|
|
422
|
+
pass
|
|
423
|
+
|
|
424
|
+
content_length = 0
|
|
425
|
+
try:
|
|
426
|
+
if body is not None:
|
|
427
|
+
if isinstance(body, (str, bytes)):
|
|
428
|
+
content_length = len(body)
|
|
429
|
+
elif isinstance(body, dict):
|
|
430
|
+
content_length = len(json.dumps(body))
|
|
431
|
+
except Exception:
|
|
432
|
+
pass
|
|
433
|
+
|
|
434
|
+
self.network_calls.append(
|
|
435
|
+
NetworkResponse(
|
|
436
|
+
url=response.url,
|
|
437
|
+
method=method,
|
|
438
|
+
status=response.status,
|
|
439
|
+
headers=response.headers,
|
|
440
|
+
body=body,
|
|
441
|
+
content_length=content_length,
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
async def clear_network_calls(self):
|
|
446
|
+
self.network_calls.clear()
|
|
447
|
+
|
|
448
|
+
async def get_screenshot(self, full_page: bool = False) -> str | None:
|
|
449
|
+
page = await self.get_current_page()
|
|
450
|
+
if page is None:
|
|
451
|
+
return None
|
|
452
|
+
screenshot_bytes = await page.screenshot(full_page=full_page)
|
|
453
|
+
screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
454
|
+
|
|
455
|
+
return screenshot_base64
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from browser_use.browser.profile import BrowserProfile
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BrowserExtension:
|
|
5
|
+
def __init__(self, browser_profile: BrowserProfile = None):
|
|
6
|
+
self.browser_profile = (
|
|
7
|
+
browser_profile if browser_profile is not None else BrowserProfile()
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
def get_extension_paths(self):
|
|
11
|
+
return self.browser_profile._get_extension_args()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
if __name__ == "__main__":
|
|
15
|
+
browser_profile = BrowserProfile(
|
|
16
|
+
user_data_dir="~/.config/browseruse/profiles/default",
|
|
17
|
+
headless=True,
|
|
18
|
+
)
|
|
19
|
+
paths = browser_profile._get_extension_args()
|
|
20
|
+
print(paths)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from .llm_model import GeminiModels, HumanModels, OpenAIModels
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_llm_model(
|
|
5
|
+
model_name: GeminiModels | HumanModels | OpenAIModels, use_structured_output: bool
|
|
6
|
+
):
|
|
7
|
+
if isinstance(model_name, GeminiModels):
|
|
8
|
+
from .gemini import Gemini
|
|
9
|
+
|
|
10
|
+
return Gemini(model_name, use_structured_output)
|
|
11
|
+
|
|
12
|
+
# if isinstance(model_name, OpenAIModels):
|
|
13
|
+
# from .openai import OpenAI
|
|
14
|
+
|
|
15
|
+
# return OpenAI(model_name, use_structured_output)
|
|
16
|
+
|
|
17
|
+
# if isinstance(model_name, HumanModels):
|
|
18
|
+
# from .human import HumanModel
|
|
19
|
+
|
|
20
|
+
# return HumanModel(model_name, use_structured_output)
|
|
21
|
+
|
|
22
|
+
raise ValueError(f"Invalid model type: {model_name}")
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
from google import genai
|
|
8
|
+
from google.genai import types
|
|
9
|
+
from pydantic import BaseModel, ValidationError
|
|
10
|
+
|
|
11
|
+
from .llm_model import GeminiModels, LLMModel, TokenUsage
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Gemini(LLMModel):
|
|
17
|
+
|
|
18
|
+
def __init__(self, model_name: GeminiModels, use_structured_output: bool):
|
|
19
|
+
super().__init__(model_name, use_structured_output)
|
|
20
|
+
|
|
21
|
+
self.api_key = os.environ["GOOGLE_API_KEY"]
|
|
22
|
+
try:
|
|
23
|
+
self.client = genai.Client(api_key=self.api_key)
|
|
24
|
+
self.client.models.list()
|
|
25
|
+
except Exception as e:
|
|
26
|
+
raise ValueError("Invalid GOOGLE_API_KEY")
|
|
27
|
+
|
|
28
|
+
def _get_model_response_with_structured_output(
|
|
29
|
+
self,
|
|
30
|
+
prompt: str,
|
|
31
|
+
response_schema: BaseModel,
|
|
32
|
+
screenshot: Optional[str] = None,
|
|
33
|
+
pdf_url: Optional[str] = None,
|
|
34
|
+
system_instruction: Optional[str] = None,
|
|
35
|
+
) -> tuple[BaseModel, TokenUsage]:
|
|
36
|
+
|
|
37
|
+
if pdf_url is not None and screenshot is not None:
|
|
38
|
+
raise ValueError("Cannot use both screenshot and pdf_url")
|
|
39
|
+
|
|
40
|
+
if screenshot is not None:
|
|
41
|
+
prompt = [
|
|
42
|
+
types.Part.from_bytes(
|
|
43
|
+
data=base64.b64decode(screenshot),
|
|
44
|
+
mime_type="image/png",
|
|
45
|
+
),
|
|
46
|
+
prompt,
|
|
47
|
+
]
|
|
48
|
+
if pdf_url is not None:
|
|
49
|
+
doc_data = httpx.get(pdf_url).content
|
|
50
|
+
prompt = [
|
|
51
|
+
types.Part.from_bytes(
|
|
52
|
+
data=doc_data,
|
|
53
|
+
mime_type="application/pdf",
|
|
54
|
+
),
|
|
55
|
+
prompt,
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
if self.use_structured_output:
|
|
60
|
+
response = self.client.models.generate_content(
|
|
61
|
+
model=self.model_name.value,
|
|
62
|
+
contents=prompt,
|
|
63
|
+
config={
|
|
64
|
+
"response_mime_type": "application/json",
|
|
65
|
+
"system_instruction": system_instruction,
|
|
66
|
+
"response_json_schema": response_schema.model_json_schema(),
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if isinstance(response.parsed, BaseModel):
|
|
71
|
+
parsed_response: BaseModel = response.parsed
|
|
72
|
+
else:
|
|
73
|
+
parsed_response = response_schema.model_validate(response.parsed)
|
|
74
|
+
else:
|
|
75
|
+
response = self.client.models.generate_content(
|
|
76
|
+
model=self.model_name.value,
|
|
77
|
+
contents=prompt,
|
|
78
|
+
config={"system_instruction": system_instruction},
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
parsed_response: BaseModel = self.parse_from_completion(
|
|
82
|
+
response.candidates[0].content.parts[0].text, response_schema
|
|
83
|
+
)
|
|
84
|
+
except ValidationError as e:
|
|
85
|
+
response = None
|
|
86
|
+
parsed_response = None
|
|
87
|
+
|
|
88
|
+
if response is not None:
|
|
89
|
+
token_usage = self.get_token_usage(
|
|
90
|
+
input_tokens=response.usage_metadata.prompt_token_count,
|
|
91
|
+
output_tokens=response.usage_metadata.candidates_token_count,
|
|
92
|
+
tool_use_tokens=response.usage_metadata.tool_use_prompt_token_count,
|
|
93
|
+
thoughts_tokens=response.usage_metadata.thoughts_token_count,
|
|
94
|
+
total_tokens=response.usage_metadata.total_token_count,
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
token_usage = TokenUsage()
|
|
98
|
+
return parsed_response, token_usage
|
|
99
|
+
|
|
100
|
+
def _get_model_response(
|
|
101
|
+
self, prompt: str, system_instruction: Optional[str] = None
|
|
102
|
+
) -> tuple[str, TokenUsage]:
|
|
103
|
+
|
|
104
|
+
response = self.client.models.generate_content(
|
|
105
|
+
model=self.model_name.value,
|
|
106
|
+
contents=prompt,
|
|
107
|
+
config={"system_instruction": system_instruction},
|
|
108
|
+
)
|
|
109
|
+
token_usage = self.get_token_usage(
|
|
110
|
+
input_tokens=response.usage_metadata.prompt_token_count,
|
|
111
|
+
output_tokens=response.usage_metadata.candidates_token_count,
|
|
112
|
+
)
|
|
113
|
+
return response.candidates[0].content.parts[0].text, token_usage
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
import aiofiles
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Human:
|
|
7
|
+
|
|
8
|
+
def __init__(self):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
async def get_next_action(self, axtree: str):
|
|
12
|
+
|
|
13
|
+
async with aiofiles.open("/tmp/axtree.txt", "w", encoding="utf-8") as f:
|
|
14
|
+
await f.write(axtree)
|
|
15
|
+
|
|
16
|
+
value = await asyncio.to_thread(
|
|
17
|
+
input, "Input the index of the element to click: "
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
return int(value)
|