sentienceapi 0.90.12__py3-none-any.whl → 0.90.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

sentience/async_api.py ADDED
@@ -0,0 +1,1160 @@
1
+ """
2
+ Async API for Sentience SDK - Use this in asyncio contexts
3
+
4
+ This module provides async versions of all Sentience SDK functions.
5
+ Use AsyncSentienceBrowser when working with async/await code.
6
+ """
7
+
8
+ import asyncio
9
+ import base64
10
+ import os
11
+ import shutil
12
+ import tempfile
13
+ import time
14
+ from pathlib import Path
15
+ from typing import Any, Optional
16
+ from urllib.parse import urlparse
17
+
18
+ from playwright.async_api import BrowserContext, Page, Playwright, async_playwright
19
+
20
+ from sentience._extension_loader import find_extension_path
21
+ from sentience.models import (
22
+ ActionResult,
23
+ BBox,
24
+ Element,
25
+ ProxyConfig,
26
+ Snapshot,
27
+ SnapshotOptions,
28
+ StorageState,
29
+ Viewport,
30
+ WaitResult,
31
+ )
32
+
33
+ # Import stealth for bot evasion (optional - graceful fallback if not available)
34
+ try:
35
+ from playwright_stealth import stealth_async
36
+
37
+ STEALTH_AVAILABLE = True
38
+ except ImportError:
39
+ STEALTH_AVAILABLE = False
40
+
41
+
42
+ class AsyncSentienceBrowser:
43
+ """Async version of SentienceBrowser for use in asyncio contexts."""
44
+
45
+ def __init__(
46
+ self,
47
+ api_key: str | None = None,
48
+ api_url: str | None = None,
49
+ headless: bool | None = None,
50
+ proxy: str | None = None,
51
+ user_data_dir: str | Path | None = None,
52
+ storage_state: str | Path | StorageState | dict | None = None,
53
+ record_video_dir: str | Path | None = None,
54
+ record_video_size: dict[str, int] | None = None,
55
+ viewport: Viewport | dict[str, int] | None = None,
56
+ ):
57
+ """
58
+ Initialize Async Sentience browser
59
+
60
+ Args:
61
+ api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
62
+ If None, uses free tier (local extension only)
63
+ api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
64
+ headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
65
+ proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
66
+ user_data_dir: Optional path to user data directory for persistent sessions
67
+ storage_state: Optional storage state to inject (cookies + localStorage)
68
+ record_video_dir: Optional directory path to save video recordings
69
+ record_video_size: Optional video resolution as dict with 'width' and 'height' keys
70
+ viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
71
+ Examples: Viewport(width=1280, height=800) (default)
72
+ Viewport(width=1920, height=1080) (Full HD)
73
+ {"width": 1280, "height": 800} (dict also supported)
74
+ If None, defaults to Viewport(width=1280, height=800).
75
+ """
76
+ self.api_key = api_key
77
+ # Only set api_url if api_key is provided, otherwise None (free tier)
78
+ if self.api_key and not api_url:
79
+ self.api_url = "https://api.sentienceapi.com"
80
+ else:
81
+ self.api_url = api_url
82
+
83
+ # Determine headless mode
84
+ if headless is None:
85
+ # Default to False for local dev, True for CI
86
+ self.headless = os.environ.get("CI", "").lower() == "true"
87
+ else:
88
+ self.headless = headless
89
+
90
+ # Support proxy from argument or environment variable
91
+ self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
92
+
93
+ # Auth injection support
94
+ self.user_data_dir = user_data_dir
95
+ self.storage_state = storage_state
96
+
97
+ # Video recording support
98
+ self.record_video_dir = record_video_dir
99
+ self.record_video_size = record_video_size or {"width": 1280, "height": 800}
100
+
101
+ # Viewport configuration - convert dict to Viewport if needed
102
+ if viewport is None:
103
+ self.viewport = Viewport(width=1280, height=800)
104
+ elif isinstance(viewport, dict):
105
+ self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
106
+ else:
107
+ self.viewport = viewport
108
+
109
+ self.playwright: Playwright | None = None
110
+ self.context: BrowserContext | None = None
111
+ self.page: Page | None = None
112
+ self._extension_path: str | None = None
113
+
114
+ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
115
+ """
116
+ Parse proxy connection string into ProxyConfig.
117
+
118
+ Args:
119
+ proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
120
+
121
+ Returns:
122
+ ProxyConfig object or None if invalid
123
+ """
124
+ if not proxy_string:
125
+ return None
126
+
127
+ try:
128
+ parsed = urlparse(proxy_string)
129
+
130
+ # Validate scheme
131
+ if parsed.scheme not in ("http", "https", "socks5"):
132
+ print(f"⚠️ [Sentience] Unsupported proxy scheme: {parsed.scheme}")
133
+ print(" Supported: http, https, socks5")
134
+ return None
135
+
136
+ # Validate host and port
137
+ if not parsed.hostname or not parsed.port:
138
+ print("⚠️ [Sentience] Proxy URL must include hostname and port")
139
+ print(" Expected format: http://username:password@host:port")
140
+ return None
141
+
142
+ # Build server URL
143
+ server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
144
+
145
+ # Create ProxyConfig with optional credentials
146
+ return ProxyConfig(
147
+ server=server,
148
+ username=parsed.username if parsed.username else None,
149
+ password=parsed.password if parsed.password else None,
150
+ )
151
+
152
+ except Exception as e:
153
+ print(f"⚠️ [Sentience] Invalid proxy configuration: {e}")
154
+ print(" Expected format: http://username:password@host:port")
155
+ return None
156
+
157
+ async def start(self) -> None:
158
+ """Launch browser with extension loaded (async)"""
159
+ # Get extension source path using shared utility
160
+ extension_source = find_extension_path()
161
+
162
+ # Create temporary extension bundle
163
+ self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
164
+ shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
165
+
166
+ self.playwright = await async_playwright().start()
167
+
168
+ # Build launch arguments
169
+ args = [
170
+ f"--disable-extensions-except={self._extension_path}",
171
+ f"--load-extension={self._extension_path}",
172
+ "--disable-blink-features=AutomationControlled",
173
+ "--no-sandbox",
174
+ "--disable-infobars",
175
+ "--disable-features=WebRtcHideLocalIpsWithMdns",
176
+ "--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
177
+ ]
178
+
179
+ if self.headless:
180
+ args.append("--headless=new")
181
+
182
+ # Parse proxy configuration if provided
183
+ proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
184
+
185
+ # Handle User Data Directory
186
+ if self.user_data_dir:
187
+ user_data_dir = str(self.user_data_dir)
188
+ Path(user_data_dir).mkdir(parents=True, exist_ok=True)
189
+ else:
190
+ user_data_dir = ""
191
+
192
+ # Build launch_persistent_context parameters
193
+ launch_params = {
194
+ "user_data_dir": user_data_dir,
195
+ "headless": False,
196
+ "args": args,
197
+ "viewport": {"width": self.viewport.width, "height": self.viewport.height},
198
+ "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
199
+ }
200
+
201
+ # Add proxy if configured
202
+ if proxy_config:
203
+ launch_params["proxy"] = proxy_config.to_playwright_dict()
204
+ launch_params["ignore_https_errors"] = True
205
+ print(f"🌐 [Sentience] Using proxy: {proxy_config.server}")
206
+
207
+ # Add video recording if configured
208
+ if self.record_video_dir:
209
+ video_dir = Path(self.record_video_dir)
210
+ video_dir.mkdir(parents=True, exist_ok=True)
211
+ launch_params["record_video_dir"] = str(video_dir)
212
+ launch_params["record_video_size"] = self.record_video_size
213
+ print(f"🎥 [Sentience] Recording video to: {video_dir}")
214
+ print(
215
+ f" Resolution: {self.record_video_size['width']}x{self.record_video_size['height']}"
216
+ )
217
+
218
+ # Launch persistent context
219
+ self.context = await self.playwright.chromium.launch_persistent_context(**launch_params)
220
+
221
+ self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()
222
+
223
+ # Inject storage state if provided
224
+ if self.storage_state:
225
+ await self._inject_storage_state(self.storage_state)
226
+
227
+ # Apply stealth if available
228
+ if STEALTH_AVAILABLE:
229
+ await stealth_async(self.page)
230
+
231
+ # Wait a moment for extension to initialize
232
+ await asyncio.sleep(0.5)
233
+
234
+ async def goto(self, url: str) -> None:
235
+ """Navigate to a URL and ensure extension is ready (async)"""
236
+ if not self.page:
237
+ raise RuntimeError("Browser not started. Call await start() first.")
238
+
239
+ await self.page.goto(url, wait_until="domcontentloaded")
240
+
241
+ # Wait for extension to be ready
242
+ if not await self._wait_for_extension():
243
+ try:
244
+ diag = await self.page.evaluate(
245
+ """() => ({
246
+ sentience_defined: typeof window.sentience !== 'undefined',
247
+ registry_defined: typeof window.sentience_registry !== 'undefined',
248
+ snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
249
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
250
+ url: window.location.href
251
+ })"""
252
+ )
253
+ except Exception as e:
254
+ diag = f"Failed to get diagnostics: {str(e)}"
255
+
256
+ raise RuntimeError(
257
+ "Extension failed to load after navigation. Make sure:\n"
258
+ "1. Extension is built (cd sentience-chrome && ./build.sh)\n"
259
+ "2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
260
+ "3. Check browser console for errors (run with headless=False to see console)\n"
261
+ f"4. Extension path: {self._extension_path}\n"
262
+ f"5. Diagnostic info: {diag}"
263
+ )
264
+
265
+ async def _inject_storage_state(self, storage_state: str | Path | StorageState | dict) -> None:
266
+ """Inject storage state (cookies + localStorage) into browser context (async)"""
267
+ import json
268
+
269
+ # Load storage state
270
+ if isinstance(storage_state, (str, Path)):
271
+ with open(storage_state, encoding="utf-8") as f:
272
+ state_dict = json.load(f)
273
+ state = StorageState.from_dict(state_dict)
274
+ elif isinstance(storage_state, StorageState):
275
+ state = storage_state
276
+ elif isinstance(storage_state, dict):
277
+ state = StorageState.from_dict(storage_state)
278
+ else:
279
+ raise ValueError(
280
+ f"Invalid storage_state type: {type(storage_state)}. "
281
+ "Expected str, Path, StorageState, or dict."
282
+ )
283
+
284
+ # Inject cookies
285
+ if state.cookies:
286
+ playwright_cookies = []
287
+ for cookie in state.cookies:
288
+ cookie_dict = cookie.model_dump()
289
+ playwright_cookie = {
290
+ "name": cookie_dict["name"],
291
+ "value": cookie_dict["value"],
292
+ "domain": cookie_dict["domain"],
293
+ "path": cookie_dict["path"],
294
+ }
295
+ if cookie_dict.get("expires"):
296
+ playwright_cookie["expires"] = cookie_dict["expires"]
297
+ if cookie_dict.get("httpOnly"):
298
+ playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
299
+ if cookie_dict.get("secure"):
300
+ playwright_cookie["secure"] = cookie_dict["secure"]
301
+ if cookie_dict.get("sameSite"):
302
+ playwright_cookie["sameSite"] = cookie_dict["sameSite"]
303
+ playwright_cookies.append(playwright_cookie)
304
+
305
+ await self.context.add_cookies(playwright_cookies)
306
+ print(f"✅ [Sentience] Injected {len(state.cookies)} cookie(s)")
307
+
308
+ # Inject LocalStorage
309
+ if state.origins:
310
+ for origin_data in state.origins:
311
+ origin = origin_data.origin
312
+ if not origin:
313
+ continue
314
+
315
+ try:
316
+ await self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
317
+
318
+ if origin_data.localStorage:
319
+ localStorage_dict = {
320
+ item.name: item.value for item in origin_data.localStorage
321
+ }
322
+ await self.page.evaluate(
323
+ """(localStorage_data) => {
324
+ for (const [key, value] of Object.entries(localStorage_data)) {
325
+ localStorage.setItem(key, value);
326
+ }
327
+ }""",
328
+ localStorage_dict,
329
+ )
330
+ print(
331
+ f"✅ [Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
332
+ )
333
+ except Exception as e:
334
+ print(f"⚠️ [Sentience] Failed to inject localStorage for {origin}: {e}")
335
+
336
+ async def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
337
+ """Poll for window.sentience to be available (async)"""
338
+ start_time = time.time()
339
+ last_error = None
340
+
341
+ while time.time() - start_time < timeout_sec:
342
+ try:
343
+ result = await self.page.evaluate(
344
+ """() => {
345
+ if (typeof window.sentience === 'undefined') {
346
+ return { ready: false, reason: 'window.sentience undefined' };
347
+ }
348
+ if (window.sentience._wasmModule === null) {
349
+ return { ready: false, reason: 'WASM module not fully loaded' };
350
+ }
351
+ return { ready: true };
352
+ }
353
+ """
354
+ )
355
+
356
+ if isinstance(result, dict):
357
+ if result.get("ready"):
358
+ return True
359
+ last_error = result.get("reason", "Unknown error")
360
+ except Exception as e:
361
+ last_error = f"Evaluation error: {str(e)}"
362
+
363
+ await asyncio.sleep(0.3)
364
+
365
+ if last_error:
366
+ import warnings
367
+
368
+ warnings.warn(f"Extension wait timeout. Last status: {last_error}")
369
+
370
+ return False
371
+
372
+ async def close(self, output_path: str | Path | None = None) -> str | None:
373
+ """
374
+ Close browser and cleanup (async)
375
+
376
+ Args:
377
+ output_path: Optional path to rename the video file to
378
+
379
+ Returns:
380
+ Path to video file if recording was enabled, None otherwise
381
+ """
382
+ temp_video_path = None
383
+
384
+ if self.record_video_dir:
385
+ try:
386
+ if self.page and self.page.video:
387
+ temp_video_path = await self.page.video.path()
388
+ elif self.context:
389
+ for page in self.context.pages:
390
+ if page.video:
391
+ temp_video_path = await page.video.path()
392
+ break
393
+ except Exception:
394
+ pass
395
+
396
+ if self.context:
397
+ await self.context.close()
398
+ self.context = None
399
+
400
+ if self.playwright:
401
+ await self.playwright.stop()
402
+ self.playwright = None
403
+
404
+ if self._extension_path and os.path.exists(self._extension_path):
405
+ shutil.rmtree(self._extension_path)
406
+
407
+ # Clear page reference after closing context
408
+ self.page = None
409
+
410
+ final_path = temp_video_path
411
+ if temp_video_path and output_path and os.path.exists(temp_video_path):
412
+ try:
413
+ output_path = str(output_path)
414
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
415
+ shutil.move(temp_video_path, output_path)
416
+ final_path = output_path
417
+ except Exception as e:
418
+ import warnings
419
+
420
+ warnings.warn(f"Failed to rename video file: {e}")
421
+ final_path = temp_video_path
422
+
423
+ return final_path
424
+
425
+ async def __aenter__(self):
426
+ """Async context manager entry"""
427
+ await self.start()
428
+ return self
429
+
430
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
431
+ """Async context manager exit"""
432
+ await self.close()
433
+
434
+ @classmethod
435
+ async def from_existing(
436
+ cls,
437
+ context: BrowserContext,
438
+ api_key: str | None = None,
439
+ api_url: str | None = None,
440
+ ) -> "AsyncSentienceBrowser":
441
+ """
442
+ Create AsyncSentienceBrowser from an existing Playwright BrowserContext.
443
+
444
+ Args:
445
+ context: Existing Playwright BrowserContext
446
+ api_key: Optional API key for server-side processing
447
+ api_url: Optional API URL
448
+
449
+ Returns:
450
+ AsyncSentienceBrowser instance configured to use the existing context
451
+ """
452
+ instance = cls(api_key=api_key, api_url=api_url)
453
+ instance.context = context
454
+ pages = context.pages
455
+ instance.page = pages[0] if pages else await context.new_page()
456
+
457
+ # Apply stealth if available
458
+ if STEALTH_AVAILABLE:
459
+ await stealth_async(instance.page)
460
+
461
+ # Wait for extension to be ready
462
+ await asyncio.sleep(0.5)
463
+
464
+ return instance
465
+
466
+ @classmethod
467
+ async def from_page(
468
+ cls,
469
+ page: Page,
470
+ api_key: str | None = None,
471
+ api_url: str | None = None,
472
+ ) -> "AsyncSentienceBrowser":
473
+ """
474
+ Create AsyncSentienceBrowser from an existing Playwright Page.
475
+
476
+ Args:
477
+ page: Existing Playwright Page
478
+ api_key: Optional API key for server-side processing
479
+ api_url: Optional API URL
480
+
481
+ Returns:
482
+ AsyncSentienceBrowser instance configured to use the existing page
483
+ """
484
+ instance = cls(api_key=api_key, api_url=api_url)
485
+ instance.page = page
486
+ instance.context = page.context
487
+
488
+ # Apply stealth if available
489
+ if STEALTH_AVAILABLE:
490
+ await stealth_async(instance.page)
491
+
492
+ # Wait for extension to be ready
493
+ await asyncio.sleep(0.5)
494
+
495
+ return instance
496
+
497
+
498
+ # ========== Async Snapshot Functions ==========
499
+
500
+
501
+ async def snapshot_async(
502
+ browser: AsyncSentienceBrowser,
503
+ options: SnapshotOptions | None = None,
504
+ ) -> Snapshot:
505
+ """
506
+ Take a snapshot of the current page (async)
507
+
508
+ Args:
509
+ browser: AsyncSentienceBrowser instance
510
+ options: Snapshot options (screenshot, limit, filter, etc.)
511
+ If None, uses default options.
512
+
513
+ Returns:
514
+ Snapshot object
515
+
516
+ Example:
517
+ # Basic snapshot with defaults
518
+ snap = await snapshot_async(browser)
519
+
520
+ # With options
521
+ snap = await snapshot_async(browser, SnapshotOptions(
522
+ screenshot=True,
523
+ limit=100,
524
+ show_overlay=True
525
+ ))
526
+ """
527
+ # Use default options if none provided
528
+ if options is None:
529
+ options = SnapshotOptions()
530
+
531
+ # Determine if we should use server-side API
532
+ should_use_api = (
533
+ options.use_api if options.use_api is not None else (browser.api_key is not None)
534
+ )
535
+
536
+ if should_use_api and browser.api_key:
537
+ # Use server-side API (Pro/Enterprise tier)
538
+ return await _snapshot_via_api_async(browser, options)
539
+ else:
540
+ # Use local extension (Free tier)
541
+ return await _snapshot_via_extension_async(browser, options)
542
+
543
+
544
+ async def _snapshot_via_extension_async(
545
+ browser: AsyncSentienceBrowser,
546
+ options: SnapshotOptions,
547
+ ) -> Snapshot:
548
+ """Take snapshot using local extension (Free tier) - async"""
549
+ if not browser.page:
550
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
551
+
552
+ # Wait for extension injection to complete
553
+ try:
554
+ await browser.page.wait_for_function(
555
+ "typeof window.sentience !== 'undefined'",
556
+ timeout=5000,
557
+ )
558
+ except Exception as e:
559
+ try:
560
+ diag = await browser.page.evaluate(
561
+ """() => ({
562
+ sentience_defined: typeof window.sentience !== 'undefined',
563
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
564
+ url: window.location.href
565
+ })"""
566
+ )
567
+ except Exception:
568
+ diag = {"error": "Could not gather diagnostics"}
569
+
570
+ raise RuntimeError(
571
+ f"Sentience extension failed to inject window.sentience API. "
572
+ f"Is the extension loaded? Diagnostics: {diag}"
573
+ ) from e
574
+
575
+ # Build options dict for extension API
576
+ ext_options: dict[str, Any] = {}
577
+ if options.screenshot is not False:
578
+ ext_options["screenshot"] = options.screenshot
579
+ if options.limit != 50:
580
+ ext_options["limit"] = options.limit
581
+ if options.filter is not None:
582
+ ext_options["filter"] = (
583
+ options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
584
+ )
585
+
586
+ # Call extension API
587
+ result = await browser.page.evaluate(
588
+ """
589
+ (options) => {
590
+ return window.sentience.snapshot(options);
591
+ }
592
+ """,
593
+ ext_options,
594
+ )
595
+
596
+ # Save trace if requested
597
+ if options.save_trace:
598
+ from sentience.snapshot import _save_trace_to_file
599
+
600
+ _save_trace_to_file(result.get("raw_elements", []), options.trace_path)
601
+
602
+ # Show visual overlay if requested
603
+ if options.show_overlay:
604
+ raw_elements = result.get("raw_elements", [])
605
+ if raw_elements:
606
+ await browser.page.evaluate(
607
+ """
608
+ (elements) => {
609
+ if (window.sentience && window.sentience.showOverlay) {
610
+ window.sentience.showOverlay(elements, null);
611
+ }
612
+ }
613
+ """,
614
+ raw_elements,
615
+ )
616
+
617
+ # Validate and parse with Pydantic
618
+ snapshot_obj = Snapshot(**result)
619
+ return snapshot_obj
620
+
621
+
622
+ async def _snapshot_via_api_async(
623
+ browser: AsyncSentienceBrowser,
624
+ options: SnapshotOptions,
625
+ ) -> Snapshot:
626
+ """Take snapshot using server-side API (Pro/Enterprise tier) - async"""
627
+ if not browser.page:
628
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
629
+
630
+ if not browser.api_key:
631
+ raise ValueError("API key required for server-side processing")
632
+
633
+ if not browser.api_url:
634
+ raise ValueError("API URL required for server-side processing")
635
+
636
+ # Wait for extension injection
637
+ try:
638
+ await browser.page.wait_for_function(
639
+ "typeof window.sentience !== 'undefined'", timeout=5000
640
+ )
641
+ except Exception as e:
642
+ raise RuntimeError(
643
+ "Sentience extension failed to inject. Cannot collect raw data for API processing."
644
+ ) from e
645
+
646
+ # Step 1: Get raw data from local extension
647
+ raw_options: dict[str, any] = {}
648
+ if options.screenshot is not False:
649
+ raw_options["screenshot"] = options.screenshot
650
+
651
+ raw_result = await browser.page.evaluate(
652
+ """
653
+ (options) => {
654
+ return window.sentience.snapshot(options);
655
+ }
656
+ """,
657
+ raw_options,
658
+ )
659
+
660
+ # Save trace if requested
661
+ if options.save_trace:
662
+ from sentience.snapshot import _save_trace_to_file
663
+
664
+ _save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)
665
+
666
+ # Step 2: Send to server for smart ranking/filtering
667
+ import json
668
+
669
+ from sentience.snapshot import MAX_PAYLOAD_BYTES
670
+
671
+ payload = {
672
+ "raw_elements": raw_result.get("raw_elements", []),
673
+ "url": raw_result.get("url", ""),
674
+ "viewport": raw_result.get("viewport"),
675
+ "goal": options.goal,
676
+ "options": {
677
+ "limit": options.limit,
678
+ "filter": options.filter.model_dump() if options.filter else None,
679
+ },
680
+ }
681
+
682
+ # Check payload size
683
+ payload_json = json.dumps(payload)
684
+ payload_size = len(payload_json.encode("utf-8"))
685
+ if payload_size > MAX_PAYLOAD_BYTES:
686
+ raise ValueError(
687
+ f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
688
+ f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
689
+ f"Try reducing the number of elements on the page or filtering elements."
690
+ )
691
+
692
+ headers = {
693
+ "Authorization": f"Bearer {browser.api_key}",
694
+ "Content-Type": "application/json",
695
+ }
696
+
697
+ try:
698
+ import aiohttp
699
+
700
+ async with aiohttp.ClientSession() as session:
701
+ async with session.post(
702
+ f"{browser.api_url}/v1/snapshot",
703
+ data=payload_json,
704
+ headers=headers,
705
+ timeout=aiohttp.ClientTimeout(total=30),
706
+ ) as response:
707
+ response.raise_for_status()
708
+ api_result = await response.json()
709
+
710
+ # Merge API result with local data
711
+ snapshot_data = {
712
+ "status": api_result.get("status", "success"),
713
+ "timestamp": api_result.get("timestamp"),
714
+ "url": api_result.get("url", raw_result.get("url", "")),
715
+ "viewport": api_result.get("viewport", raw_result.get("viewport")),
716
+ "elements": api_result.get("elements", []),
717
+ "screenshot": raw_result.get("screenshot"),
718
+ "screenshot_format": raw_result.get("screenshot_format"),
719
+ "error": api_result.get("error"),
720
+ }
721
+
722
+ # Show visual overlay if requested
723
+ if options.show_overlay:
724
+ elements = api_result.get("elements", [])
725
+ if elements:
726
+ await browser.page.evaluate(
727
+ """
728
+ (elements) => {
729
+ if (window.sentience && window.sentience.showOverlay) {
730
+ window.sentience.showOverlay(elements, null);
731
+ }
732
+ }
733
+ """,
734
+ elements,
735
+ )
736
+
737
+ return Snapshot(**snapshot_data)
738
+ except ImportError:
739
+ # Fallback to requests if aiohttp not available (shouldn't happen in async context)
740
+ raise RuntimeError(
741
+ "aiohttp is required for async API calls. Install it with: pip install aiohttp"
742
+ )
743
+ except Exception as e:
744
+ raise RuntimeError(f"API request failed: {e}")
745
+
746
+
747
+ # ========== Async Action Functions ==========
748
+
749
+
750
+ async def click_async(
751
+ browser: AsyncSentienceBrowser,
752
+ element_id: int,
753
+ use_mouse: bool = True,
754
+ take_snapshot: bool = False,
755
+ ) -> ActionResult:
756
+ """
757
+ Click an element by ID using hybrid approach (async)
758
+
759
+ Args:
760
+ browser: AsyncSentienceBrowser instance
761
+ element_id: Element ID from snapshot
762
+ use_mouse: If True, use Playwright's mouse.click() at element center
763
+ take_snapshot: Whether to take snapshot after action
764
+
765
+ Returns:
766
+ ActionResult
767
+ """
768
+ if not browser.page:
769
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
770
+
771
+ start_time = time.time()
772
+ url_before = browser.page.url
773
+
774
+ if use_mouse:
775
+ try:
776
+ snap = await snapshot_async(browser)
777
+ element = None
778
+ for el in snap.elements:
779
+ if el.id == element_id:
780
+ element = el
781
+ break
782
+
783
+ if element:
784
+ center_x = element.bbox.x + element.bbox.width / 2
785
+ center_y = element.bbox.y + element.bbox.height / 2
786
+ try:
787
+ await browser.page.mouse.click(center_x, center_y)
788
+ success = True
789
+ except Exception:
790
+ success = True
791
+ else:
792
+ try:
793
+ success = await browser.page.evaluate(
794
+ """
795
+ (id) => {
796
+ return window.sentience.click(id);
797
+ }
798
+ """,
799
+ element_id,
800
+ )
801
+ except Exception:
802
+ success = True
803
+ except Exception:
804
+ try:
805
+ success = await browser.page.evaluate(
806
+ """
807
+ (id) => {
808
+ return window.sentience.click(id);
809
+ }
810
+ """,
811
+ element_id,
812
+ )
813
+ except Exception:
814
+ success = True
815
+ else:
816
+ success = await browser.page.evaluate(
817
+ """
818
+ (id) => {
819
+ return window.sentience.click(id);
820
+ }
821
+ """,
822
+ element_id,
823
+ )
824
+
825
+ # Wait a bit for navigation/DOM updates
826
+ try:
827
+ await browser.page.wait_for_timeout(500)
828
+ except Exception:
829
+ pass
830
+
831
+ duration_ms = int((time.time() - start_time) * 1000)
832
+
833
+ # Check if URL changed
834
+ try:
835
+ url_after = browser.page.url
836
+ url_changed = url_before != url_after
837
+ except Exception:
838
+ url_after = url_before
839
+ url_changed = True
840
+
841
+ # Determine outcome
842
+ outcome: str | None = None
843
+ if url_changed:
844
+ outcome = "navigated"
845
+ elif success:
846
+ outcome = "dom_updated"
847
+ else:
848
+ outcome = "error"
849
+
850
+ # Optional snapshot after
851
+ snapshot_after: Snapshot | None = None
852
+ if take_snapshot:
853
+ try:
854
+ snapshot_after = await snapshot_async(browser)
855
+ except Exception:
856
+ pass
857
+
858
+ return ActionResult(
859
+ success=success,
860
+ duration_ms=duration_ms,
861
+ outcome=outcome,
862
+ url_changed=url_changed,
863
+ snapshot_after=snapshot_after,
864
+ error=(
865
+ None
866
+ if success
867
+ else {
868
+ "code": "click_failed",
869
+ "reason": "Element not found or not clickable",
870
+ }
871
+ ),
872
+ )
873
+
874
+
875
+ async def type_text_async(
876
+ browser: AsyncSentienceBrowser, element_id: int, text: str, take_snapshot: bool = False
877
+ ) -> ActionResult:
878
+ """
879
+ Type text into an element (async)
880
+
881
+ Args:
882
+ browser: AsyncSentienceBrowser instance
883
+ element_id: Element ID from snapshot
884
+ text: Text to type
885
+ take_snapshot: Whether to take snapshot after action
886
+
887
+ Returns:
888
+ ActionResult
889
+ """
890
+ if not browser.page:
891
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
892
+
893
+ start_time = time.time()
894
+ url_before = browser.page.url
895
+
896
+ # Focus element first
897
+ focused = await browser.page.evaluate(
898
+ """
899
+ (id) => {
900
+ const el = window.sentience_registry[id];
901
+ if (el) {
902
+ el.focus();
903
+ return true;
904
+ }
905
+ return false;
906
+ }
907
+ """,
908
+ element_id,
909
+ )
910
+
911
+ if not focused:
912
+ return ActionResult(
913
+ success=False,
914
+ duration_ms=int((time.time() - start_time) * 1000),
915
+ outcome="error",
916
+ error={"code": "focus_failed", "reason": "Element not found"},
917
+ )
918
+
919
+ # Type using Playwright keyboard
920
+ await browser.page.keyboard.type(text)
921
+
922
+ duration_ms = int((time.time() - start_time) * 1000)
923
+ url_after = browser.page.url
924
+ url_changed = url_before != url_after
925
+
926
+ outcome = "navigated" if url_changed else "dom_updated"
927
+
928
+ snapshot_after: Snapshot | None = None
929
+ if take_snapshot:
930
+ snapshot_after = await snapshot_async(browser)
931
+
932
+ return ActionResult(
933
+ success=True,
934
+ duration_ms=duration_ms,
935
+ outcome=outcome,
936
+ url_changed=url_changed,
937
+ snapshot_after=snapshot_after,
938
+ )
939
+
940
+
941
+ async def press_async(
942
+ browser: AsyncSentienceBrowser, key: str, take_snapshot: bool = False
943
+ ) -> ActionResult:
944
+ """
945
+ Press a keyboard key (async)
946
+
947
+ Args:
948
+ browser: AsyncSentienceBrowser instance
949
+ key: Key to press (e.g., "Enter", "Escape", "Tab")
950
+ take_snapshot: Whether to take snapshot after action
951
+
952
+ Returns:
953
+ ActionResult
954
+ """
955
+ if not browser.page:
956
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
957
+
958
+ start_time = time.time()
959
+ url_before = browser.page.url
960
+
961
+ # Press key using Playwright
962
+ await browser.page.keyboard.press(key)
963
+
964
+ # Wait a bit for navigation/DOM updates
965
+ await browser.page.wait_for_timeout(500)
966
+
967
+ duration_ms = int((time.time() - start_time) * 1000)
968
+ url_after = browser.page.url
969
+ url_changed = url_before != url_after
970
+
971
+ outcome = "navigated" if url_changed else "dom_updated"
972
+
973
+ snapshot_after: Snapshot | None = None
974
+ if take_snapshot:
975
+ snapshot_after = await snapshot_async(browser)
976
+
977
+ return ActionResult(
978
+ success=True,
979
+ duration_ms=duration_ms,
980
+ outcome=outcome,
981
+ url_changed=url_changed,
982
+ snapshot_after=snapshot_after,
983
+ )
984
+
985
+
986
+ async def _highlight_rect_async(
987
+ browser: AsyncSentienceBrowser, rect: dict[str, float], duration_sec: float = 2.0
988
+ ) -> None:
989
+ """Highlight a rectangle with a red border overlay (async)"""
990
+ if not browser.page:
991
+ return
992
+
993
+ highlight_id = f"sentience_highlight_{int(time.time() * 1000)}"
994
+
995
+ args = {
996
+ "rect": {
997
+ "x": rect["x"],
998
+ "y": rect["y"],
999
+ "w": rect["w"],
1000
+ "h": rect["h"],
1001
+ },
1002
+ "highlightId": highlight_id,
1003
+ "durationSec": duration_sec,
1004
+ }
1005
+
1006
+ await browser.page.evaluate(
1007
+ """
1008
+ (args) => {
1009
+ const { rect, highlightId, durationSec } = args;
1010
+ const overlay = document.createElement('div');
1011
+ overlay.id = highlightId;
1012
+ overlay.style.position = 'fixed';
1013
+ overlay.style.left = `${rect.x}px`;
1014
+ overlay.style.top = `${rect.y}px`;
1015
+ overlay.style.width = `${rect.w}px`;
1016
+ overlay.style.height = `${rect.h}px`;
1017
+ overlay.style.border = '3px solid red';
1018
+ overlay.style.borderRadius = '2px';
1019
+ overlay.style.boxSizing = 'border-box';
1020
+ overlay.style.pointerEvents = 'none';
1021
+ overlay.style.zIndex = '999999';
1022
+ overlay.style.backgroundColor = 'rgba(255, 0, 0, 0.1)';
1023
+ overlay.style.transition = 'opacity 0.3s ease-out';
1024
+
1025
+ document.body.appendChild(overlay);
1026
+
1027
+ setTimeout(() => {
1028
+ overlay.style.opacity = '0';
1029
+ setTimeout(() => {
1030
+ if (overlay.parentNode) {
1031
+ overlay.parentNode.removeChild(overlay);
1032
+ }
1033
+ }, 300);
1034
+ }, durationSec * 1000);
1035
+ }
1036
+ """,
1037
+ args,
1038
+ )
1039
+
1040
+
1041
+ async def click_rect_async(
1042
+ browser: AsyncSentienceBrowser,
1043
+ rect: dict[str, float] | BBox,
1044
+ highlight: bool = True,
1045
+ highlight_duration: float = 2.0,
1046
+ take_snapshot: bool = False,
1047
+ ) -> ActionResult:
1048
+ """
1049
+ Click at the center of a rectangle (async)
1050
+
1051
+ Args:
1052
+ browser: AsyncSentienceBrowser instance
1053
+ rect: Dictionary with x, y, width (w), height (h) keys, or BBox object
1054
+ highlight: Whether to show a red border highlight when clicking
1055
+ highlight_duration: How long to show the highlight in seconds
1056
+ take_snapshot: Whether to take snapshot after action
1057
+
1058
+ Returns:
1059
+ ActionResult
1060
+ """
1061
+ if not browser.page:
1062
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
1063
+
1064
+ # Handle BBox object or dict
1065
+ if isinstance(rect, BBox):
1066
+ x = rect.x
1067
+ y = rect.y
1068
+ w = rect.width
1069
+ h = rect.height
1070
+ else:
1071
+ x = rect.get("x", 0)
1072
+ y = rect.get("y", 0)
1073
+ w = rect.get("w") or rect.get("width", 0)
1074
+ h = rect.get("h") or rect.get("height", 0)
1075
+
1076
+ if w <= 0 or h <= 0:
1077
+ return ActionResult(
1078
+ success=False,
1079
+ duration_ms=0,
1080
+ outcome="error",
1081
+ error={
1082
+ "code": "invalid_rect",
1083
+ "reason": "Rectangle width and height must be positive",
1084
+ },
1085
+ )
1086
+
1087
+ start_time = time.time()
1088
+ url_before = browser.page.url
1089
+
1090
+ # Calculate center of rectangle
1091
+ center_x = x + w / 2
1092
+ center_y = y + h / 2
1093
+
1094
+ # Show highlight before clicking
1095
+ if highlight:
1096
+ await _highlight_rect_async(browser, {"x": x, "y": y, "w": w, "h": h}, highlight_duration)
1097
+ await browser.page.wait_for_timeout(50)
1098
+
1099
+ # Use Playwright's native mouse click
1100
+ try:
1101
+ await browser.page.mouse.click(center_x, center_y)
1102
+ success = True
1103
+ except Exception as e:
1104
+ success = False
1105
+ error_msg = str(e)
1106
+
1107
+ # Wait a bit for navigation/DOM updates
1108
+ await browser.page.wait_for_timeout(500)
1109
+
1110
+ duration_ms = int((time.time() - start_time) * 1000)
1111
+ url_after = browser.page.url
1112
+ url_changed = url_before != url_after
1113
+
1114
+ # Determine outcome
1115
+ outcome: str | None = None
1116
+ if url_changed:
1117
+ outcome = "navigated"
1118
+ elif success:
1119
+ outcome = "dom_updated"
1120
+ else:
1121
+ outcome = "error"
1122
+
1123
+ # Optional snapshot after
1124
+ snapshot_after: Snapshot | None = None
1125
+ if take_snapshot:
1126
+ snapshot_after = await snapshot_async(browser)
1127
+
1128
+ return ActionResult(
1129
+ success=success,
1130
+ duration_ms=duration_ms,
1131
+ outcome=outcome,
1132
+ url_changed=url_changed,
1133
+ snapshot_after=snapshot_after,
1134
+ error=(
1135
+ None
1136
+ if success
1137
+ else {
1138
+ "code": "click_failed",
1139
+ "reason": error_msg if not success else "Click failed",
1140
+ }
1141
+ ),
1142
+ )
1143
+
1144
+
1145
+ # ========== Re-export Query Functions (Pure Functions - No Async Needed) ==========
1146
+
1147
+ # Query functions (find, query) are pure functions that work with Snapshot objects
1148
+ # They don't need async versions, but we re-export them for convenience
1149
+ from sentience.query import find, query
1150
+
1151
+ __all__ = [
1152
+ "AsyncSentienceBrowser",
1153
+ "snapshot_async",
1154
+ "click_async",
1155
+ "type_text_async",
1156
+ "press_async",
1157
+ "click_rect_async",
1158
+ "find",
1159
+ "query",
1160
+ ]