sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
sentience/browser.py ADDED
@@ -0,0 +1,1215 @@
1
+ """
2
+ Playwright browser harness with extension loading
3
+ """
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import platform
9
+ import shutil
10
+ import tempfile
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Optional, Union
14
+ from urllib.parse import urlparse
15
+
16
+ from playwright.async_api import BrowserContext as AsyncBrowserContext
17
+ from playwright.async_api import Page as AsyncPage
18
+ from playwright.async_api import Playwright as AsyncPlaywright
19
+ from playwright.async_api import async_playwright
20
+ from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
21
+
22
+ from sentience._extension_loader import find_extension_path
23
+ from sentience.constants import SENTIENCE_API_URL
24
+ from sentience.models import ProxyConfig, StorageState, Viewport
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Import stealth for bot evasion (optional - graceful fallback if not available)
29
+ try:
30
+ from playwright_stealth import stealth_async, stealth_sync
31
+
32
+ STEALTH_AVAILABLE = True
33
+ except ImportError:
34
+ STEALTH_AVAILABLE = False
35
+
36
+
37
+ class SentienceBrowser:
38
+ """Main browser session with Sentience extension loaded"""
39
+
40
+ def __init__(
41
+ self,
42
+ api_key: str | None = None,
43
+ api_url: str | None = None,
44
+ headless: bool | None = None,
45
+ proxy: str | None = None,
46
+ user_data_dir: str | None = None,
47
+ storage_state: str | Path | StorageState | dict | None = None,
48
+ record_video_dir: str | Path | None = None,
49
+ record_video_size: dict[str, int] | None = None,
50
+ viewport: Viewport | dict[str, int] | None = None,
51
+ device_scale_factor: float | None = None,
52
+ ):
53
+ """
54
+ Initialize Sentience browser
55
+
56
+ Args:
57
+ api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
58
+ If None, uses free tier (local extension only)
59
+ api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
60
+ If None and api_key is provided, uses default URL
61
+ If None and no api_key, uses free tier (local extension only)
62
+ If 'local' or Docker sidecar URL, uses Enterprise tier
63
+ headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
64
+ proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
65
+ Supports HTTP, HTTPS, and SOCKS5 proxies
66
+ Falls back to SENTIENCE_PROXY environment variable if not provided
67
+ user_data_dir: Optional path to user data directory for persistent sessions.
68
+ If None, uses temporary directory (session not persisted).
69
+ If provided, cookies and localStorage persist across browser restarts.
70
+ storage_state: Optional storage state to inject (cookies + localStorage).
71
+ Can be:
72
+ - Path to JSON file (str or Path)
73
+ - StorageState object
74
+ - Dictionary with 'cookies' and/or 'origins' keys
75
+ If provided, browser starts with pre-injected authentication.
76
+ record_video_dir: Optional directory path to save video recordings.
77
+ If provided, browser will record video of all pages.
78
+ Videos are saved as .webm files in the specified directory.
79
+ If None, no video recording is performed.
80
+ record_video_size: Optional video resolution as dict with 'width' and 'height' keys.
81
+ Examples: {"width": 1280, "height": 800} (default)
82
+ {"width": 1920, "height": 1080} (1080p)
83
+ If None, defaults to 1280x800.
84
+ viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
85
+ Examples: Viewport(width=1280, height=800) (default)
86
+ Viewport(width=1920, height=1080) (Full HD)
87
+ {"width": 1280, "height": 800} (dict also supported)
88
+ If None, defaults to Viewport(width=1280, height=800).
89
+ """
90
+ self.api_key = api_key
91
+ # Only set api_url if api_key is provided, otherwise None (free tier)
92
+ # Defaults to production API if key is present but url is missing
93
+ if self.api_key and not api_url:
94
+ self.api_url = SENTIENCE_API_URL
95
+ else:
96
+ self.api_url = api_url
97
+
98
+ # Determine headless mode
99
+ if headless is None:
100
+ # Default to False for local dev, True for CI
101
+ self.headless = os.environ.get("CI", "").lower() == "true"
102
+ else:
103
+ self.headless = headless
104
+
105
+ # Support proxy from argument or environment variable
106
+ self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
107
+
108
+ # Auth injection support
109
+ self.user_data_dir = user_data_dir
110
+ self.storage_state = storage_state
111
+
112
+ # Video recording support
113
+ self.record_video_dir = record_video_dir
114
+ self.record_video_size = record_video_size or {"width": 1280, "height": 800}
115
+
116
+ # Viewport configuration - convert dict to Viewport if needed
117
+ if viewport is None:
118
+ self.viewport = Viewport(width=1280, height=800)
119
+ elif isinstance(viewport, dict):
120
+ self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
121
+ else:
122
+ self.viewport = viewport
123
+
124
+ # Device scale factor for high-DPI emulation
125
+ self.device_scale_factor = device_scale_factor
126
+
127
+ self.playwright: Playwright | None = None
128
+ self.context: BrowserContext | None = None
129
+ self.page: Page | None = None
130
+ self._extension_path: str | None = None
131
+
132
+ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
133
+ """
134
+ Parse proxy connection string into ProxyConfig.
135
+
136
+ Args:
137
+ proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
138
+
139
+ Returns:
140
+ ProxyConfig object or None if invalid
141
+
142
+ Raises:
143
+ ValueError: If proxy format is invalid
144
+ """
145
+ if not proxy_string:
146
+ return None
147
+
148
+ try:
149
+ parsed = urlparse(proxy_string)
150
+
151
+ # Validate scheme
152
+ if parsed.scheme not in ("http", "https", "socks5"):
153
+ logger.warning(
154
+ f"Unsupported proxy scheme: {parsed.scheme}. Supported: http, https, socks5"
155
+ )
156
+ return None
157
+
158
+ # Validate host and port
159
+ if not parsed.hostname or not parsed.port:
160
+ logger.warning(
161
+ "Proxy URL must include hostname and port. Expected format: http://username:password@host:port"
162
+ )
163
+ return None
164
+
165
+ # Build server URL
166
+ server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
167
+
168
+ # Create ProxyConfig with optional credentials
169
+ return ProxyConfig(
170
+ server=server,
171
+ username=parsed.username if parsed.username else None,
172
+ password=parsed.password if parsed.password else None,
173
+ )
174
+
175
+ except Exception as e:
176
+ logger.warning(
177
+ f"Invalid proxy configuration: {e}. Expected format: http://username:password@host:port"
178
+ )
179
+ return None
180
+
181
+ def start(self) -> None:
182
+ """Launch browser with extension loaded"""
183
+ # Get extension source path using shared utility
184
+ extension_source = find_extension_path()
185
+
186
+ # Create temporary extension bundle
187
+ # We copy it to a temp dir to avoid file locking issues and ensure clean state
188
+ self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
189
+ shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
190
+
191
+ self.playwright = sync_playwright().start()
192
+
193
+ # Build launch arguments
194
+ args = [
195
+ f"--disable-extensions-except={self._extension_path}",
196
+ f"--load-extension={self._extension_path}",
197
+ "--disable-blink-features=AutomationControlled", # Hides 'navigator.webdriver'
198
+ "--disable-infobars",
199
+ # WebRTC leak protection (prevents real IP exposure when using proxies/VPNs)
200
+ "--disable-features=WebRtcHideLocalIpsWithMdns",
201
+ "--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
202
+ ]
203
+
204
+ # Only add --no-sandbox on Linux (causes crashes on macOS)
205
+ # macOS sandboxing works fine and the flag actually causes crashes
206
+ if platform.system() == "Linux":
207
+ args.append("--no-sandbox")
208
+
209
+ # Add GPU-disabling flags for macOS to prevent Chrome for Testing crash-on-exit
210
+ # These flags help avoid EXC_BAD_ACCESS crashes during browser shutdown
211
+ if platform.system() == "Darwin": # macOS
212
+ args.extend(
213
+ [
214
+ "--disable-gpu",
215
+ "--disable-software-rasterizer",
216
+ "--disable-dev-shm-usage",
217
+ "--disable-breakpad", # Disable crash reporter to prevent macOS crash dialogs
218
+ "--disable-crash-reporter", # Disable crash reporter UI
219
+ "--disable-crash-handler", # Disable crash handler completely
220
+ "--disable-in-process-stack-traces", # Disable stack trace collection
221
+ "--disable-hang-monitor", # Disable hang detection
222
+ "--disable-background-networking", # Disable background networking
223
+ "--disable-background-timer-throttling", # Disable background throttling
224
+ "--disable-backgrounding-occluded-windows", # Disable backgrounding
225
+ "--disable-renderer-backgrounding", # Disable renderer backgrounding
226
+ "--disable-features=TranslateUI", # Disable translate UI
227
+ "--disable-ipc-flooding-protection", # Disable IPC flooding protection
228
+ "--disable-logging", # Disable logging to reduce stderr noise
229
+ "--log-level=3", # Set log level to fatal only (suppresses warnings)
230
+ ]
231
+ )
232
+
233
+ # Handle headless mode correctly for extensions
234
+ # 'headless=True' DOES NOT support extensions in standard Chrome
235
+ # We must use 'headless="new"' (Chrome 112+) or run visible
236
+ # launch_headless_arg = False # Default to visible
237
+ if self.headless:
238
+ args.append("--headless=new") # Use new headless mode via args
239
+
240
+ # Parse proxy configuration if provided
241
+ proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
242
+
243
+ # Handle User Data Directory (Persistence)
244
+ if self.user_data_dir:
245
+ user_data_dir = str(self.user_data_dir)
246
+ Path(user_data_dir).mkdir(parents=True, exist_ok=True)
247
+ else:
248
+ user_data_dir = "" # Ephemeral temp dir (existing behavior)
249
+
250
+ # Build launch_persistent_context parameters
251
+ launch_params = {
252
+ "user_data_dir": user_data_dir,
253
+ "headless": False, # IMPORTANT: See note above
254
+ "args": args,
255
+ "viewport": {"width": self.viewport.width, "height": self.viewport.height},
256
+ # Remove "HeadlessChrome" from User Agent automatically
257
+ "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
258
+ # Note: Don't set "channel" - let Playwright use its default managed Chromium
259
+ # Setting channel=None doesn't force bundled Chromium and can still pick Chrome for Testing
260
+ }
261
+
262
+ # Add device scale factor if configured
263
+ if self.device_scale_factor is not None:
264
+ launch_params["device_scale_factor"] = self.device_scale_factor
265
+
266
+ # Add proxy if configured
267
+ if proxy_config:
268
+ launch_params["proxy"] = proxy_config.to_playwright_dict()
269
+ # Ignore HTTPS errors when using proxy (many residential proxies use self-signed certs)
270
+ launch_params["ignore_https_errors"] = True
271
+ logger.info(f"Using proxy: {proxy_config.server}")
272
+
273
+ # Add video recording if configured
274
+ if self.record_video_dir:
275
+ video_dir = Path(self.record_video_dir)
276
+ video_dir.mkdir(parents=True, exist_ok=True)
277
+ launch_params["record_video_dir"] = str(video_dir)
278
+ launch_params["record_video_size"] = self.record_video_size
279
+ logger.info(
280
+ f"Recording video to: {video_dir} (Resolution: {self.record_video_size['width']}x{self.record_video_size['height']})"
281
+ )
282
+
283
+ # Launch persistent context (required for extensions)
284
+ # Note: We pass headless=False to launch_persistent_context because we handle
285
+ # headless mode via the --headless=new arg above. This is a Playwright workaround.
286
+ self.context = self.playwright.chromium.launch_persistent_context(**launch_params)
287
+
288
+ self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
289
+
290
+ # Inject storage state if provided (must be after context creation)
291
+ if self.storage_state:
292
+ self._inject_storage_state(self.storage_state)
293
+
294
+ # Apply stealth if available
295
+ if STEALTH_AVAILABLE:
296
+ stealth_sync(self.page)
297
+
298
+ # Wait a moment for extension to initialize
299
+ time.sleep(0.5)
300
+
301
+ def goto(self, url: str) -> None:
302
+ """Navigate to a URL and ensure extension is ready"""
303
+ if not self.page:
304
+ raise RuntimeError("Browser not started. Call start() first.")
305
+
306
+ self.page.goto(url, wait_until="domcontentloaded")
307
+
308
+ # Wait for extension to be ready (injected into page)
309
+ if not self._wait_for_extension():
310
+ # Gather diagnostic info before failing
311
+ try:
312
+ diag = self.page.evaluate(
313
+ """() => ({
314
+ sentience_defined: typeof window.sentience !== 'undefined',
315
+ registry_defined: typeof window.sentience_registry !== 'undefined',
316
+ snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
317
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
318
+ url: window.location.href
319
+ })"""
320
+ )
321
+ except Exception as e:
322
+ diag = f"Failed to get diagnostics: {str(e)}"
323
+
324
+ raise RuntimeError(
325
+ "Extension failed to load after navigation. Make sure:\n"
326
+ "1. Extension is built (cd sentience-chrome && ./build.sh)\n"
327
+ "2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
328
+ "3. Check browser console for errors (run with headless=False to see console)\n"
329
+ f"4. Extension path: {self._extension_path}\n"
330
+ f"5. Diagnostic info: {diag}"
331
+ )
332
+
333
+ def _inject_storage_state(
334
+ self, storage_state: str | Path | StorageState | dict
335
+ ) -> None: # noqa: C901
336
+ """
337
+ Inject storage state (cookies + localStorage) into browser context.
338
+
339
+ Args:
340
+ storage_state: Path to JSON file, StorageState object, or dict containing storage state
341
+ """
342
+ import json
343
+
344
+ # Load storage state
345
+ if isinstance(storage_state, (str, Path)):
346
+ # Load from file
347
+ with open(storage_state, encoding="utf-8") as f:
348
+ state_dict = json.load(f)
349
+ state = StorageState.from_dict(state_dict)
350
+ elif isinstance(storage_state, StorageState):
351
+ # Already a StorageState object
352
+ state = storage_state
353
+ elif isinstance(storage_state, dict):
354
+ # Dictionary format
355
+ state = StorageState.from_dict(storage_state)
356
+ else:
357
+ raise ValueError(
358
+ f"Invalid storage_state type: {type(storage_state)}. "
359
+ "Expected str, Path, StorageState, or dict."
360
+ )
361
+
362
+ # Inject cookies (works globally)
363
+ if state.cookies:
364
+ # Convert to Playwright cookie format
365
+ playwright_cookies = []
366
+ for cookie in state.cookies:
367
+ cookie_dict = cookie.model_dump()
368
+ # Playwright expects lowercase keys for some fields
369
+ playwright_cookie = {
370
+ "name": cookie_dict["name"],
371
+ "value": cookie_dict["value"],
372
+ "domain": cookie_dict["domain"],
373
+ "path": cookie_dict["path"],
374
+ }
375
+ if cookie_dict.get("expires"):
376
+ playwright_cookie["expires"] = cookie_dict["expires"]
377
+ if cookie_dict.get("httpOnly"):
378
+ playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
379
+ if cookie_dict.get("secure"):
380
+ playwright_cookie["secure"] = cookie_dict["secure"]
381
+ if cookie_dict.get("sameSite"):
382
+ playwright_cookie["sameSite"] = cookie_dict["sameSite"]
383
+ playwright_cookies.append(playwright_cookie)
384
+
385
+ self.context.add_cookies(playwright_cookies)
386
+ logger.debug(f"Injected {len(state.cookies)} cookie(s)")
387
+
388
+ # Inject LocalStorage (requires navigation to each domain)
389
+ if state.origins:
390
+ for origin_data in state.origins:
391
+ origin = origin_data.origin
392
+ if not origin:
393
+ continue
394
+
395
+ # Navigate to origin to set localStorage
396
+ try:
397
+ self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
398
+
399
+ # Inject localStorage
400
+ if origin_data.localStorage:
401
+ # Convert to dict format for JavaScript
402
+ localStorage_dict = {
403
+ item.name: item.value for item in origin_data.localStorage
404
+ }
405
+ self.page.evaluate(
406
+ """(localStorage_data) => {
407
+ for (const [key, value] of Object.entries(localStorage_data)) {
408
+ localStorage.setItem(key, value);
409
+ }
410
+ }""",
411
+ localStorage_dict,
412
+ )
413
+ logger.debug(
414
+ f"Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
415
+ )
416
+ except Exception as e:
417
+ logger.warning(f"Failed to inject localStorage for {origin}: {e}")
418
+
419
+ def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
420
+ """Poll for window.sentience to be available"""
421
+ start_time = time.time()
422
+ last_error = None
423
+
424
+ while time.time() - start_time < timeout_sec:
425
+ try:
426
+ # Check if API exists and WASM is ready (optional check for _wasmModule)
427
+ result = self.page.evaluate(
428
+ """() => {
429
+ if (typeof window.sentience === 'undefined') {
430
+ return { ready: false, reason: 'window.sentience undefined' };
431
+ }
432
+ // Check if WASM loaded (if exposed) or if basic API works
433
+ // Note: injected_api.js defines window.sentience immediately,
434
+ // but _wasmModule might take a few ms to load.
435
+ if (window.sentience._wasmModule === null) {
436
+ // It's defined but WASM isn't linked yet
437
+ return { ready: false, reason: 'WASM module not fully loaded' };
438
+ }
439
+ // If _wasmModule is not exposed, that's okay - it might be internal
440
+ // Just verify the API structure is correct
441
+ return { ready: true };
442
+ }
443
+ """
444
+ )
445
+
446
+ if isinstance(result, dict):
447
+ if result.get("ready"):
448
+ return True
449
+ last_error = result.get("reason", "Unknown error")
450
+ except Exception as e:
451
+ # Continue waiting on errors
452
+ last_error = f"Evaluation error: {str(e)}"
453
+
454
+ time.sleep(0.3)
455
+
456
+ # Log the last error for debugging
457
+ if last_error:
458
+ import warnings
459
+
460
+ warnings.warn(f"Extension wait timeout. Last status: {last_error}")
461
+
462
+ return False
463
+
464
+ def close(self, output_path: str | Path | None = None) -> str | None:
465
+ """
466
+ Close browser and cleanup
467
+
468
+ Args:
469
+ output_path: Optional path to rename the video file to.
470
+ If provided, the recorded video will be moved to this location.
471
+ Useful for giving videos meaningful names instead of random hashes.
472
+
473
+ Returns:
474
+ Path to video file if recording was enabled, None otherwise
475
+ Note: Video files are saved automatically by Playwright when context closes.
476
+ If multiple pages exist, returns the path to the first page's video.
477
+ """
478
+ # CRITICAL: Don't access page.video.path() BEFORE closing context
479
+ # This can poke the video subsystem at an awkward time and cause crashes on macOS
480
+ # Instead, we'll locate the video file after context closes
481
+
482
+ # Close context (this triggers video file finalization)
483
+ if self.context:
484
+ self.context.close()
485
+ # Small grace period to ensure video file is fully flushed to disk
486
+ time.sleep(0.5)
487
+
488
+ # Close playwright
489
+ if self.playwright:
490
+ self.playwright.stop()
491
+
492
+ # Clean up extension directory
493
+ if self._extension_path and os.path.exists(self._extension_path):
494
+ shutil.rmtree(self._extension_path)
495
+
496
+ # NOW resolve video path after context is closed and video is finalized
497
+ temp_video_path = None
498
+ if self.record_video_dir:
499
+ try:
500
+ # Locate the newest .webm file in record_video_dir
501
+ # This avoids touching page.video during teardown
502
+ video_dir = Path(self.record_video_dir)
503
+ if video_dir.exists():
504
+ webm_files = list(video_dir.glob("*.webm"))
505
+ if webm_files:
506
+ # Get the most recently modified file
507
+ temp_video_path = max(webm_files, key=lambda p: p.stat().st_mtime)
508
+ logger.debug(f"Found video file: {temp_video_path}")
509
+ except Exception as e:
510
+ logger.warning(f"Could not locate video file: {e}")
511
+
512
+ # Rename/move video if output_path is specified
513
+ final_path = str(temp_video_path) if temp_video_path else None
514
+ if temp_video_path and output_path and os.path.exists(temp_video_path):
515
+ try:
516
+ output_path = str(output_path)
517
+ # Ensure parent directory exists
518
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
519
+ shutil.move(temp_video_path, output_path)
520
+ final_path = output_path
521
+ except Exception as e:
522
+ import warnings
523
+
524
+ warnings.warn(f"Failed to rename video file: {e}")
525
+ # Return original path if rename fails
526
+ final_path = str(temp_video_path)
527
+
528
+ return final_path
529
+
530
+ @classmethod
531
+ def from_existing(
532
+ cls,
533
+ context: BrowserContext,
534
+ api_key: str | None = None,
535
+ api_url: str | None = None,
536
+ ) -> "SentienceBrowser":
537
+ """
538
+ Create SentienceBrowser from an existing Playwright BrowserContext.
539
+
540
+ This allows you to use Sentience SDK with a browser context you've already created,
541
+ giving you more control over browser initialization.
542
+
543
+ Args:
544
+ context: Existing Playwright BrowserContext
545
+ api_key: Optional API key for server-side processing
546
+ api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
547
+
548
+ Returns:
549
+ SentienceBrowser instance configured to use the existing context
550
+
551
+ Example:
552
+ from playwright.sync_api import sync_playwright
553
+ from sentience import SentienceBrowser, snapshot
554
+
555
+ with sync_playwright() as p:
556
+ context = p.chromium.launch_persistent_context(...)
557
+ browser = SentienceBrowser.from_existing(context)
558
+ browser.page.goto("https://example.com")
559
+ snap = snapshot(browser)
560
+ """
561
+ instance = cls(api_key=api_key, api_url=api_url)
562
+ instance.context = context
563
+ instance.page = context.pages[0] if context.pages else context.new_page()
564
+
565
+ # Apply stealth if available
566
+ if STEALTH_AVAILABLE:
567
+ stealth_sync(instance.page)
568
+
569
+ # Wait for extension to be ready (if extension is loaded)
570
+ time.sleep(0.5)
571
+
572
+ return instance
573
+
574
+ @classmethod
575
+ def from_page(
576
+ cls,
577
+ page: Page,
578
+ api_key: str | None = None,
579
+ api_url: str | None = None,
580
+ ) -> "SentienceBrowser":
581
+ """
582
+ Create SentienceBrowser from an existing Playwright Page.
583
+
584
+ This allows you to use Sentience SDK with a page you've already created,
585
+ giving you more control over browser initialization.
586
+
587
+ Args:
588
+ page: Existing Playwright Page
589
+ api_key: Optional API key for server-side processing
590
+ api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
591
+
592
+ Returns:
593
+ SentienceBrowser instance configured to use the existing page
594
+
595
+ Example:
596
+ from playwright.sync_api import sync_playwright
597
+ from sentience import SentienceBrowser, snapshot
598
+
599
+ with sync_playwright() as p:
600
+ browser_instance = p.chromium.launch()
601
+ context = browser_instance.new_context()
602
+ page = context.new_page()
603
+ page.goto("https://example.com")
604
+
605
+ browser = SentienceBrowser.from_page(page)
606
+ snap = snapshot(browser)
607
+ """
608
+ instance = cls(api_key=api_key, api_url=api_url)
609
+ instance.page = page
610
+ instance.context = page.context
611
+
612
+ # Apply stealth if available
613
+ if STEALTH_AVAILABLE:
614
+ stealth_sync(instance.page)
615
+
616
+ # Wait for extension to be ready (if extension is loaded)
617
+ time.sleep(0.5)
618
+
619
+ return instance
620
+
621
+ def __enter__(self):
622
+ """Context manager entry"""
623
+ self.start()
624
+ return self
625
+
626
+ def __exit__(self, exc_type, exc_val, exc_tb):
627
+ """Context manager exit"""
628
+ self.close()
629
+
630
+
631
+ class AsyncSentienceBrowser:
632
+ """Async version of SentienceBrowser for use in asyncio contexts."""
633
+
634
+ def __init__(
635
+ self,
636
+ api_key: str | None = None,
637
+ api_url: str | None = None,
638
+ headless: bool | None = None,
639
+ proxy: str | None = None,
640
+ user_data_dir: str | Path | None = None,
641
+ storage_state: str | Path | StorageState | dict | None = None,
642
+ record_video_dir: str | Path | None = None,
643
+ record_video_size: dict[str, int] | None = None,
644
+ viewport: Viewport | dict[str, int] | None = None,
645
+ device_scale_factor: float | None = None,
646
+ executable_path: str | None = None,
647
+ ):
648
+ """
649
+ Initialize Async Sentience browser
650
+
651
+ Args:
652
+ api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
653
+ If None, uses free tier (local extension only)
654
+ api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
655
+ headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
656
+ proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
657
+ user_data_dir: Optional path to user data directory for persistent sessions
658
+ storage_state: Optional storage state to inject (cookies + localStorage)
659
+ record_video_dir: Optional directory path to save video recordings
660
+ record_video_size: Optional video resolution as dict with 'width' and 'height' keys
661
+ viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
662
+ Examples: Viewport(width=1280, height=800) (default)
663
+ Viewport(width=1920, height=1080) (Full HD)
664
+ {"width": 1280, "height": 800} (dict also supported)
665
+ If None, defaults to Viewport(width=1280, height=800).
666
+ device_scale_factor: Optional device scale factor to emulate high-DPI (Retina) screens.
667
+ Examples: 1.0 (default, standard DPI)
668
+ 2.0 (Retina/high-DPI, like MacBook Pro)
669
+ 3.0 (very high DPI)
670
+ If None, defaults to 1.0 (standard DPI).
671
+ executable_path: Optional path to Chromium executable. If provided, forces use of
672
+ this specific browser binary instead of Playwright's managed browser.
673
+ Useful to guarantee Chromium (not Chrome for Testing) on macOS.
674
+ Example: "/path/to/playwright/chromium-1234/chrome-mac/Chromium.app/Contents/MacOS/Chromium"
675
+ """
676
+ self.api_key = api_key
677
+ # Only set api_url if api_key is provided, otherwise None (free tier)
678
+ if self.api_key and not api_url:
679
+ self.api_url = SENTIENCE_API_URL
680
+ else:
681
+ self.api_url = api_url
682
+
683
+ # Determine headless mode
684
+ if headless is None:
685
+ # Default to False for local dev, True for CI
686
+ self.headless = os.environ.get("CI", "").lower() == "true"
687
+ else:
688
+ self.headless = headless
689
+
690
+ # Support proxy from argument or environment variable
691
+ self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
692
+
693
+ # Auth injection support
694
+ self.user_data_dir = user_data_dir
695
+ self.storage_state = storage_state
696
+
697
+ # Video recording support
698
+ self.record_video_dir = record_video_dir
699
+ self.record_video_size = record_video_size or {"width": 1280, "height": 800}
700
+
701
+ # Viewport configuration - convert dict to Viewport if needed
702
+ if viewport is None:
703
+ self.viewport = Viewport(width=1280, height=800)
704
+ elif isinstance(viewport, dict):
705
+ self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
706
+ else:
707
+ self.viewport = viewport
708
+
709
+ # Device scale factor for high-DPI emulation
710
+ self.device_scale_factor = device_scale_factor
711
+
712
+ # Executable path override (for forcing specific Chromium binary)
713
+ self.executable_path = executable_path
714
+
715
+ self.playwright: AsyncPlaywright | None = None
716
+ self.context: AsyncBrowserContext | None = None
717
+ self.page: AsyncPage | None = None
718
+ self._extension_path: str | None = None
719
+
720
+ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
721
+ """
722
+ Parse proxy connection string into ProxyConfig.
723
+
724
+ Args:
725
+ proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
726
+
727
+ Returns:
728
+ ProxyConfig object or None if invalid
729
+ """
730
+ if not proxy_string:
731
+ return None
732
+
733
+ try:
734
+ parsed = urlparse(proxy_string)
735
+
736
+ # Validate scheme
737
+ if parsed.scheme not in ("http", "https", "socks5"):
738
+ logger.warning(
739
+ f"Unsupported proxy scheme: {parsed.scheme}. Supported: http, https, socks5"
740
+ )
741
+ return None
742
+
743
+ # Validate host and port
744
+ if not parsed.hostname or not parsed.port:
745
+ logger.warning(
746
+ "Proxy URL must include hostname and port. Expected format: http://username:password@host:port"
747
+ )
748
+ return None
749
+
750
+ # Build server URL
751
+ server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
752
+
753
+ # Create ProxyConfig with optional credentials
754
+ return ProxyConfig(
755
+ server=server,
756
+ username=parsed.username if parsed.username else None,
757
+ password=parsed.password if parsed.password else None,
758
+ )
759
+
760
+ except Exception as e:
761
+ logger.warning(
762
+ f"Invalid proxy configuration: {e}. Expected format: http://username:password@host:port"
763
+ )
764
+ return None
765
+
766
+ async def start(self) -> None:
767
+ """Launch browser with extension loaded (async)"""
768
+ # Get extension source path using shared utility
769
+ extension_source = find_extension_path()
770
+
771
+ # Create temporary extension bundle
772
+ self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
773
+ shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
774
+
775
+ self.playwright = await async_playwright().start()
776
+
777
+ # Build launch arguments
778
+ args = [
779
+ f"--disable-extensions-except={self._extension_path}",
780
+ f"--load-extension={self._extension_path}",
781
+ "--disable-blink-features=AutomationControlled",
782
+ "--disable-infobars",
783
+ "--disable-features=WebRtcHideLocalIpsWithMdns",
784
+ "--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
785
+ ]
786
+
787
+ # Only add --no-sandbox on Linux (causes crashes on macOS)
788
+ # macOS sandboxing works fine and the flag actually causes crashes
789
+ if platform.system() == "Linux":
790
+ args.append("--no-sandbox")
791
+
792
+ # Add GPU-disabling flags for macOS to prevent Chrome for Testing crash-on-exit
793
+ # These flags help avoid EXC_BAD_ACCESS crashes during browser shutdown
794
+ if platform.system() == "Darwin": # macOS
795
+ args.extend(
796
+ [
797
+ "--disable-gpu",
798
+ "--disable-software-rasterizer",
799
+ "--disable-dev-shm-usage",
800
+ "--disable-breakpad", # Disable crash reporter to prevent macOS crash dialogs
801
+ "--disable-crash-reporter", # Disable crash reporter UI
802
+ "--disable-crash-handler", # Disable crash handler completely
803
+ "--disable-in-process-stack-traces", # Disable stack trace collection
804
+ "--disable-hang-monitor", # Disable hang detection
805
+ "--disable-background-networking", # Disable background networking
806
+ "--disable-background-timer-throttling", # Disable background throttling
807
+ "--disable-backgrounding-occluded-windows", # Disable backgrounding
808
+ "--disable-renderer-backgrounding", # Disable renderer backgrounding
809
+ "--disable-features=TranslateUI", # Disable translate UI
810
+ "--disable-ipc-flooding-protection", # Disable IPC flooding protection
811
+ "--disable-logging", # Disable logging to reduce stderr noise
812
+ "--log-level=3", # Set log level to fatal only (suppresses warnings)
813
+ ]
814
+ )
815
+
816
+ if self.headless:
817
+ args.append("--headless=new")
818
+
819
+ # Parse proxy configuration if provided
820
+ proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
821
+
822
+ # Handle User Data Directory
823
+ if self.user_data_dir:
824
+ user_data_dir = str(self.user_data_dir)
825
+ Path(user_data_dir).mkdir(parents=True, exist_ok=True)
826
+ else:
827
+ user_data_dir = ""
828
+
829
+ # Build launch_persistent_context parameters
830
+ launch_params = {
831
+ "user_data_dir": user_data_dir,
832
+ "headless": False,
833
+ "args": args,
834
+ "viewport": {"width": self.viewport.width, "height": self.viewport.height},
835
+ "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
836
+ # Note: Don't set "channel" - let Playwright use its default managed Chromium
837
+ # Setting channel=None doesn't force bundled Chromium and can still pick Chrome for Testing
838
+ }
839
+
840
+ # If executable_path is provided, use it to force specific Chromium binary
841
+ # This guarantees we use Chromium (not Chrome for Testing) on macOS
842
+ if self.executable_path:
843
+ launch_params["executable_path"] = self.executable_path
844
+ logger.info(f"Using explicit executable: {self.executable_path}")
845
+
846
+ # Add device scale factor if configured
847
+ if self.device_scale_factor is not None:
848
+ launch_params["device_scale_factor"] = self.device_scale_factor
849
+
850
+ # Add proxy if configured
851
+ if proxy_config:
852
+ launch_params["proxy"] = proxy_config.to_playwright_dict()
853
+ launch_params["ignore_https_errors"] = True
854
+ logger.info(f"Using proxy: {proxy_config.server}")
855
+
856
+ # Add video recording if configured
857
+ if self.record_video_dir:
858
+ video_dir = Path(self.record_video_dir)
859
+ video_dir.mkdir(parents=True, exist_ok=True)
860
+ launch_params["record_video_dir"] = str(video_dir)
861
+ launch_params["record_video_size"] = self.record_video_size
862
+ logger.info(
863
+ f"Recording video to: {video_dir} (Resolution: {self.record_video_size['width']}x{self.record_video_size['height']})"
864
+ )
865
+
866
+ # Launch persistent context
867
+ self.context = await self.playwright.chromium.launch_persistent_context(**launch_params)
868
+
869
+ self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()
870
+
871
+ # Inject storage state if provided
872
+ if self.storage_state:
873
+ await self._inject_storage_state(self.storage_state)
874
+
875
+ # Apply stealth if available
876
+ if STEALTH_AVAILABLE:
877
+ await stealth_async(self.page)
878
+
879
+ # Wait a moment for extension to initialize
880
+ await asyncio.sleep(0.5)
881
+
882
+ async def goto(self, url: str) -> None:
883
+ """Navigate to a URL and ensure extension is ready (async)"""
884
+ if not self.page:
885
+ raise RuntimeError("Browser not started. Call await start() first.")
886
+
887
+ await self.page.goto(url, wait_until="domcontentloaded")
888
+
889
+ # Wait for extension to be ready
890
+ if not await self._wait_for_extension():
891
+ try:
892
+ diag = await self.page.evaluate(
893
+ """() => ({
894
+ sentience_defined: typeof window.sentience !== 'undefined',
895
+ registry_defined: typeof window.sentience_registry !== 'undefined',
896
+ snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
897
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
898
+ url: window.location.href
899
+ })"""
900
+ )
901
+ except Exception as e:
902
+ diag = f"Failed to get diagnostics: {str(e)}"
903
+
904
+ raise RuntimeError(
905
+ "Extension failed to load after navigation. Make sure:\n"
906
+ "1. Extension is built (cd sentience-chrome && ./build.sh)\n"
907
+ "2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
908
+ "3. Check browser console for errors (run with headless=False to see console)\n"
909
+ f"4. Extension path: {self._extension_path}\n"
910
+ f"5. Diagnostic info: {diag}"
911
+ )
912
+
913
+ async def _inject_storage_state(self, storage_state: str | Path | StorageState | dict) -> None:
914
+ """Inject storage state (cookies + localStorage) into browser context (async)"""
915
+ import json
916
+
917
+ # Load storage state
918
+ if isinstance(storage_state, (str, Path)):
919
+ with open(storage_state, encoding="utf-8") as f:
920
+ state_dict = json.load(f)
921
+ state = StorageState.from_dict(state_dict)
922
+ elif isinstance(storage_state, StorageState):
923
+ state = storage_state
924
+ elif isinstance(storage_state, dict):
925
+ state = StorageState.from_dict(storage_state)
926
+ else:
927
+ raise ValueError(
928
+ f"Invalid storage_state type: {type(storage_state)}. "
929
+ "Expected str, Path, StorageState, or dict."
930
+ )
931
+
932
+ # Inject cookies
933
+ if state.cookies:
934
+ playwright_cookies = []
935
+ for cookie in state.cookies:
936
+ cookie_dict = cookie.model_dump()
937
+ playwright_cookie = {
938
+ "name": cookie_dict["name"],
939
+ "value": cookie_dict["value"],
940
+ "domain": cookie_dict["domain"],
941
+ "path": cookie_dict["path"],
942
+ }
943
+ if cookie_dict.get("expires"):
944
+ playwright_cookie["expires"] = cookie_dict["expires"]
945
+ if cookie_dict.get("httpOnly"):
946
+ playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
947
+ if cookie_dict.get("secure"):
948
+ playwright_cookie["secure"] = cookie_dict["secure"]
949
+ if cookie_dict.get("sameSite"):
950
+ playwright_cookie["sameSite"] = cookie_dict["sameSite"]
951
+ playwright_cookies.append(playwright_cookie)
952
+
953
+ await self.context.add_cookies(playwright_cookies)
954
+ logger.debug(f"Injected {len(state.cookies)} cookie(s)")
955
+
956
+ # Inject LocalStorage
957
+ if state.origins:
958
+ for origin_data in state.origins:
959
+ origin = origin_data.origin
960
+ if not origin:
961
+ continue
962
+
963
+ try:
964
+ await self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
965
+
966
+ if origin_data.localStorage:
967
+ localStorage_dict = {
968
+ item.name: item.value for item in origin_data.localStorage
969
+ }
970
+ await self.page.evaluate(
971
+ """(localStorage_data) => {
972
+ for (const [key, value] of Object.entries(localStorage_data)) {
973
+ localStorage.setItem(key, value);
974
+ }
975
+ }""",
976
+ localStorage_dict,
977
+ )
978
+ logger.debug(
979
+ f"Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
980
+ )
981
+ except Exception as e:
982
+ logger.warning(f"Failed to inject localStorage for {origin}: {e}")
983
+
984
+ async def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
985
+ """Poll for window.sentience to be available (async)"""
986
+ start_time = time.time()
987
+ last_error = None
988
+
989
+ while time.time() - start_time < timeout_sec:
990
+ try:
991
+ result = await self.page.evaluate(
992
+ """() => {
993
+ if (typeof window.sentience === 'undefined') {
994
+ return { ready: false, reason: 'window.sentience undefined' };
995
+ }
996
+ if (window.sentience._wasmModule === null) {
997
+ return { ready: false, reason: 'WASM module not fully loaded' };
998
+ }
999
+ return { ready: true };
1000
+ }
1001
+ """
1002
+ )
1003
+
1004
+ if isinstance(result, dict):
1005
+ if result.get("ready"):
1006
+ return True
1007
+ last_error = result.get("reason", "Unknown error")
1008
+ except Exception as e:
1009
+ last_error = f"Evaluation error: {str(e)}"
1010
+
1011
+ await asyncio.sleep(0.3)
1012
+
1013
+ if last_error:
1014
+ import warnings
1015
+
1016
+ warnings.warn(f"Extension wait timeout. Last status: {last_error}")
1017
+
1018
+ return False
1019
+
1020
+ async def close(self, output_path: str | Path | None = None) -> tuple[str | None, bool]:
1021
+ """
1022
+ Close browser and cleanup (async)
1023
+
1024
+ Args:
1025
+ output_path: Optional path to rename the video file to
1026
+
1027
+ Returns:
1028
+ Tuple of (video_path, shutdown_clean)
1029
+ - video_path: Path to video file if recording was enabled, None otherwise
1030
+ - shutdown_clean: True if shutdown completed without errors, False if there were issues
1031
+
1032
+ Note: Video path is resolved AFTER context close to avoid touching video
1033
+ subsystem during teardown, which can cause crashes on macOS.
1034
+ """
1035
+ # CRITICAL: Don't access page.video.path() BEFORE closing context
1036
+ # This can poke the video subsystem at an awkward time and cause crashes
1037
+ # Instead, we'll locate the video file after context closes
1038
+
1039
+ # CRITICAL: Wait before closing to ensure all operations are complete
1040
+ # This is especially important for video recording - we need to ensure
1041
+ # all frames are written and the encoder is ready to finalize
1042
+ if platform.system() == "Darwin": # macOS
1043
+ # On macOS, give extra time for video encoder to finish writing frames
1044
+ # 4K video recording needs more time to flush buffers
1045
+ logger.debug("Waiting for video recording to stabilize before closing (macOS)...")
1046
+ await asyncio.sleep(2.0)
1047
+ else:
1048
+ await asyncio.sleep(1.0)
1049
+
1050
+ # Graceful shutdown: close context first, then playwright
1051
+ # Use longer timeouts on macOS where video finalization can take longer
1052
+ context_close_success = True
1053
+ if self.context:
1054
+ try:
1055
+ # Give context time to close gracefully (especially for video finalization)
1056
+ # Increased timeout for macOS where 4K video finalization can take longer
1057
+ await asyncio.wait_for(self.context.close(), timeout=30.0)
1058
+ logger.debug("Context closed successfully")
1059
+ except TimeoutError:
1060
+ logger.warning("Context close timed out, continuing with cleanup...")
1061
+ context_close_success = False
1062
+ except Exception as e:
1063
+ logger.warning(f"Error closing context: {e}")
1064
+ context_close_success = False
1065
+ finally:
1066
+ self.context = None
1067
+
1068
+ # Give Chrome a moment to fully flush video + release resources
1069
+ # This avoids stopping the driver while the browser is still finishing the .webm write/encoder shutdown
1070
+ # Increased grace period on macOS to allow more time for process cleanup
1071
+ grace_period = 2.0 if platform.system() == "Darwin" else 1.0
1072
+ await asyncio.sleep(grace_period)
1073
+
1074
+ playwright_stop_success = True
1075
+ if self.playwright:
1076
+ try:
1077
+ # Give playwright time to stop gracefully
1078
+ # Increased timeout to match context close timeout
1079
+ await asyncio.wait_for(self.playwright.stop(), timeout=15.0)
1080
+ logger.debug("Playwright stopped successfully")
1081
+ except TimeoutError:
1082
+ logger.warning("Playwright stop timed out, continuing with cleanup...")
1083
+ playwright_stop_success = False
1084
+ except Exception as e:
1085
+ logger.warning(f"Error stopping playwright: {e}")
1086
+ playwright_stop_success = False
1087
+ finally:
1088
+ self.playwright = None
1089
+
1090
+ # Additional cleanup: On macOS, wait a bit more to ensure all browser processes are terminated
1091
+ # This helps prevent crash dialogs from appearing
1092
+ if platform.system() == "Darwin":
1093
+ await asyncio.sleep(0.5)
1094
+
1095
+ # NOW resolve video path after context is closed and video is finalized
1096
+ temp_video_path = None
1097
+ if self.record_video_dir:
1098
+ try:
1099
+ # Locate the newest .webm file in record_video_dir
1100
+ # This avoids touching page.video during teardown
1101
+ video_dir = Path(self.record_video_dir)
1102
+ if video_dir.exists():
1103
+ webm_files = list(video_dir.glob("*.webm"))
1104
+ if webm_files:
1105
+ # Get the most recently modified file
1106
+ temp_video_path = max(webm_files, key=lambda p: p.stat().st_mtime)
1107
+ logger.debug(f"Found video file: {temp_video_path}")
1108
+ except Exception as e:
1109
+ logger.warning(f"Could not locate video file: {e}")
1110
+
1111
+ if self._extension_path and os.path.exists(self._extension_path):
1112
+ shutil.rmtree(self._extension_path)
1113
+
1114
+ # Clear page reference after closing context
1115
+ self.page = None
1116
+
1117
+ final_path = temp_video_path
1118
+ if temp_video_path and output_path and os.path.exists(temp_video_path):
1119
+ try:
1120
+ output_path = str(output_path)
1121
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
1122
+ shutil.move(temp_video_path, output_path)
1123
+ final_path = output_path
1124
+ except Exception as e:
1125
+ import warnings
1126
+
1127
+ warnings.warn(f"Failed to rename video file: {e}")
1128
+ final_path = temp_video_path
1129
+
1130
+ # Log shutdown status (useful for detecting crashes in headless mode)
1131
+ shutdown_clean = context_close_success and playwright_stop_success
1132
+ if not shutdown_clean:
1133
+ logger.warning(
1134
+ f"Browser shutdown had issues - may indicate a crash "
1135
+ f"(context_close: {context_close_success}, playwright_stop: {playwright_stop_success})"
1136
+ )
1137
+ else:
1138
+ logger.debug("Browser shutdown completed cleanly")
1139
+
1140
+ # Return tuple: (video_path, shutdown_clean)
1141
+ # This allows callers to detect crashes even in headless mode
1142
+ return (final_path, shutdown_clean)
1143
+
1144
+ async def __aenter__(self):
1145
+ """Async context manager entry"""
1146
+ await self.start()
1147
+ return self
1148
+
1149
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1150
+ """Async context manager exit"""
1151
+ # Ignore return value in context manager exit
1152
+ await self.close()
1153
+
1154
+ @classmethod
1155
+ async def from_existing(
1156
+ cls,
1157
+ context: AsyncBrowserContext,
1158
+ api_key: str | None = None,
1159
+ api_url: str | None = None,
1160
+ ) -> "AsyncSentienceBrowser":
1161
+ """
1162
+ Create AsyncSentienceBrowser from an existing Playwright BrowserContext.
1163
+
1164
+ Args:
1165
+ context: Existing Playwright BrowserContext
1166
+ api_key: Optional API key for server-side processing
1167
+ api_url: Optional API URL
1168
+
1169
+ Returns:
1170
+ AsyncSentienceBrowser instance configured to use the existing context
1171
+ """
1172
+ instance = cls(api_key=api_key, api_url=api_url)
1173
+ instance.context = context
1174
+ pages = context.pages
1175
+ instance.page = pages[0] if pages else await context.new_page()
1176
+
1177
+ # Apply stealth if available
1178
+ if STEALTH_AVAILABLE:
1179
+ await stealth_async(instance.page)
1180
+
1181
+ # Wait for extension to be ready
1182
+ await asyncio.sleep(0.5)
1183
+
1184
+ return instance
1185
+
1186
+ @classmethod
1187
+ async def from_page(
1188
+ cls,
1189
+ page: AsyncPage,
1190
+ api_key: str | None = None,
1191
+ api_url: str | None = None,
1192
+ ) -> "AsyncSentienceBrowser":
1193
+ """
1194
+ Create AsyncSentienceBrowser from an existing Playwright Page.
1195
+
1196
+ Args:
1197
+ page: Existing Playwright Page
1198
+ api_key: Optional API key for server-side processing
1199
+ api_url: Optional API URL
1200
+
1201
+ Returns:
1202
+ AsyncSentienceBrowser instance configured to use the existing page
1203
+ """
1204
+ instance = cls(api_key=api_key, api_url=api_url)
1205
+ instance.page = page
1206
+ instance.context = page.context
1207
+
1208
+ # Apply stealth if available
1209
+ if STEALTH_AVAILABLE:
1210
+ await stealth_async(instance.page)
1211
+
1212
+ # Wait for extension to be ready
1213
+ await asyncio.sleep(0.5)
1214
+
1215
+ return instance