sentienceapi 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (90) hide show
  1. sentience/__init__.py +120 -6
  2. sentience/_extension_loader.py +156 -1
  3. sentience/action_executor.py +217 -0
  4. sentience/actions.py +758 -30
  5. sentience/agent.py +806 -293
  6. sentience/agent_config.py +3 -0
  7. sentience/agent_runtime.py +840 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +89 -1141
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +372 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +483 -0
  21. sentience/base_agent.py +95 -0
  22. sentience/browser.py +678 -39
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cloud_tracing.py +507 -42
  26. sentience/constants.py +6 -0
  27. sentience/conversational_agent.py +77 -43
  28. sentience/cursor_policy.py +142 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +98 -2
  31. sentience/extension/background.js +56 -185
  32. sentience/extension/content.js +150 -287
  33. sentience/extension/injected_api.js +1088 -1368
  34. sentience/extension/manifest.json +1 -1
  35. sentience/extension/pkg/sentience_core.d.ts +22 -22
  36. sentience/extension/pkg/sentience_core.js +275 -433
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/release.json +47 -47
  39. sentience/failure_artifacts.py +241 -0
  40. sentience/formatting.py +9 -53
  41. sentience/inspector.py +183 -1
  42. sentience/integrations/__init__.py +6 -0
  43. sentience/integrations/langchain/__init__.py +12 -0
  44. sentience/integrations/langchain/context.py +18 -0
  45. sentience/integrations/langchain/core.py +326 -0
  46. sentience/integrations/langchain/tools.py +180 -0
  47. sentience/integrations/models.py +46 -0
  48. sentience/integrations/pydanticai/__init__.py +15 -0
  49. sentience/integrations/pydanticai/deps.py +20 -0
  50. sentience/integrations/pydanticai/toolset.py +468 -0
  51. sentience/llm_interaction_handler.py +191 -0
  52. sentience/llm_provider.py +765 -66
  53. sentience/llm_provider_utils.py +120 -0
  54. sentience/llm_response_builder.py +153 -0
  55. sentience/models.py +595 -3
  56. sentience/ordinal.py +280 -0
  57. sentience/overlay.py +109 -2
  58. sentience/protocols.py +228 -0
  59. sentience/query.py +67 -5
  60. sentience/read.py +95 -3
  61. sentience/recorder.py +223 -3
  62. sentience/schemas/trace_v1.json +128 -9
  63. sentience/screenshot.py +48 -2
  64. sentience/sentience_methods.py +86 -0
  65. sentience/snapshot.py +599 -55
  66. sentience/snapshot_diff.py +126 -0
  67. sentience/text_search.py +120 -5
  68. sentience/trace_event_builder.py +148 -0
  69. sentience/trace_file_manager.py +197 -0
  70. sentience/trace_indexing/index_schema.py +95 -7
  71. sentience/trace_indexing/indexer.py +105 -48
  72. sentience/tracer_factory.py +120 -9
  73. sentience/tracing.py +172 -8
  74. sentience/utils/__init__.py +40 -0
  75. sentience/utils/browser.py +46 -0
  76. sentience/{utils.py → utils/element.py} +3 -42
  77. sentience/utils/formatting.py +59 -0
  78. sentience/verification.py +618 -0
  79. sentience/visual_agent.py +2058 -0
  80. sentience/wait.py +68 -2
  81. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/METADATA +199 -40
  82. sentienceapi-0.98.0.dist-info/RECORD +92 -0
  83. sentience/extension/test-content.js +0 -4
  84. sentienceapi-0.90.16.dist-info/RECORD +0 -50
  85. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/WHEEL +0 -0
  86. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/entry_points.txt +0 -0
  87. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE +0 -0
  88. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-APACHE +0 -0
  89. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-MIT +0 -0
  90. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/top_level.txt +0 -0
sentience/browser.py CHANGED
@@ -2,21 +2,32 @@
2
2
  Playwright browser harness with extension loading
3
3
  """
4
4
 
5
+ import asyncio
6
+ import logging
5
7
  import os
8
+ import platform
6
9
  import shutil
7
10
  import tempfile
8
11
  import time
9
12
  from pathlib import Path
13
+ from typing import Optional, Union
10
14
  from urllib.parse import urlparse
11
15
 
16
+ from playwright.async_api import BrowserContext as AsyncBrowserContext
17
+ from playwright.async_api import Page as AsyncPage
18
+ from playwright.async_api import Playwright as AsyncPlaywright
19
+ from playwright.async_api import async_playwright
12
20
  from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
13
21
 
14
22
  from sentience._extension_loader import find_extension_path
23
+ from sentience.constants import SENTIENCE_API_URL
15
24
  from sentience.models import ProxyConfig, StorageState, Viewport
16
25
 
26
+ logger = logging.getLogger(__name__)
27
+
17
28
  # Import stealth for bot evasion (optional - graceful fallback if not available)
18
29
  try:
19
- from playwright_stealth import stealth_sync
30
+ from playwright_stealth import stealth_async, stealth_sync
20
31
 
21
32
  STEALTH_AVAILABLE = True
22
33
  except ImportError:
@@ -37,6 +48,7 @@ class SentienceBrowser:
37
48
  record_video_dir: str | Path | None = None,
38
49
  record_video_size: dict[str, int] | None = None,
39
50
  viewport: Viewport | dict[str, int] | None = None,
51
+ device_scale_factor: float | None = None,
40
52
  ):
41
53
  """
42
54
  Initialize Sentience browser
@@ -79,7 +91,7 @@ class SentienceBrowser:
79
91
  # Only set api_url if api_key is provided, otherwise None (free tier)
80
92
  # Defaults to production API if key is present but url is missing
81
93
  if self.api_key and not api_url:
82
- self.api_url = "https://api.sentienceapi.com"
94
+ self.api_url = SENTIENCE_API_URL
83
95
  else:
84
96
  self.api_url = api_url
85
97
 
@@ -109,6 +121,9 @@ class SentienceBrowser:
109
121
  else:
110
122
  self.viewport = viewport
111
123
 
124
+ # Device scale factor for high-DPI emulation
125
+ self.device_scale_factor = device_scale_factor
126
+
112
127
  self.playwright: Playwright | None = None
113
128
  self.context: BrowserContext | None = None
114
129
  self.page: Page | None = None
@@ -135,14 +150,16 @@ class SentienceBrowser:
135
150
 
136
151
  # Validate scheme
137
152
  if parsed.scheme not in ("http", "https", "socks5"):
138
- print(f"⚠️ [Sentience] Unsupported proxy scheme: {parsed.scheme}")
139
- print(" Supported: http, https, socks5")
153
+ logger.warning(
154
+ f"Unsupported proxy scheme: {parsed.scheme}. Supported: http, https, socks5"
155
+ )
140
156
  return None
141
157
 
142
158
  # Validate host and port
143
159
  if not parsed.hostname or not parsed.port:
144
- print("⚠️ [Sentience] Proxy URL must include hostname and port")
145
- print(" Expected format: http://username:password@host:port")
160
+ logger.warning(
161
+ "Proxy URL must include hostname and port. Expected format: http://username:password@host:port"
162
+ )
146
163
  return None
147
164
 
148
165
  # Build server URL
@@ -156,8 +173,9 @@ class SentienceBrowser:
156
173
  )
157
174
 
158
175
  except Exception as e:
159
- print(f"⚠️ [Sentience] Invalid proxy configuration: {e}")
160
- print(" Expected format: http://username:password@host:port")
176
+ logger.warning(
177
+ f"Invalid proxy configuration: {e}. Expected format: http://username:password@host:port"
178
+ )
161
179
  return None
162
180
 
163
181
  def start(self) -> None:
@@ -177,13 +195,41 @@ class SentienceBrowser:
177
195
  f"--disable-extensions-except={self._extension_path}",
178
196
  f"--load-extension={self._extension_path}",
179
197
  "--disable-blink-features=AutomationControlled", # Hides 'navigator.webdriver'
180
- "--no-sandbox",
181
198
  "--disable-infobars",
182
199
  # WebRTC leak protection (prevents real IP exposure when using proxies/VPNs)
183
200
  "--disable-features=WebRtcHideLocalIpsWithMdns",
184
201
  "--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
185
202
  ]
186
203
 
204
+ # Only add --no-sandbox on Linux (causes crashes on macOS)
205
+ # macOS sandboxing works fine and the flag actually causes crashes
206
+ if platform.system() == "Linux":
207
+ args.append("--no-sandbox")
208
+
209
+ # Add GPU-disabling flags for macOS to prevent Chrome for Testing crash-on-exit
210
+ # These flags help avoid EXC_BAD_ACCESS crashes during browser shutdown
211
+ if platform.system() == "Darwin": # macOS
212
+ args.extend(
213
+ [
214
+ "--disable-gpu",
215
+ "--disable-software-rasterizer",
216
+ "--disable-dev-shm-usage",
217
+ "--disable-breakpad", # Disable crash reporter to prevent macOS crash dialogs
218
+ "--disable-crash-reporter", # Disable crash reporter UI
219
+ "--disable-crash-handler", # Disable crash handler completely
220
+ "--disable-in-process-stack-traces", # Disable stack trace collection
221
+ "--disable-hang-monitor", # Disable hang detection
222
+ "--disable-background-networking", # Disable background networking
223
+ "--disable-background-timer-throttling", # Disable background throttling
224
+ "--disable-backgrounding-occluded-windows", # Disable backgrounding
225
+ "--disable-renderer-backgrounding", # Disable renderer backgrounding
226
+ "--disable-features=TranslateUI", # Disable translate UI
227
+ "--disable-ipc-flooding-protection", # Disable IPC flooding protection
228
+ "--disable-logging", # Disable logging to reduce stderr noise
229
+ "--log-level=3", # Set log level to fatal only (suppresses warnings)
230
+ ]
231
+ )
232
+
187
233
  # Handle headless mode correctly for extensions
188
234
  # 'headless=True' DOES NOT support extensions in standard Chrome
189
235
  # We must use 'headless="new"' (Chrome 112+) or run visible
@@ -209,14 +255,20 @@ class SentienceBrowser:
209
255
  "viewport": {"width": self.viewport.width, "height": self.viewport.height},
210
256
  # Remove "HeadlessChrome" from User Agent automatically
211
257
  "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
258
+ # Note: Don't set "channel" - let Playwright use its default managed Chromium
259
+ # Setting channel=None doesn't force bundled Chromium and can still pick Chrome for Testing
212
260
  }
213
261
 
262
+ # Add device scale factor if configured
263
+ if self.device_scale_factor is not None:
264
+ launch_params["device_scale_factor"] = self.device_scale_factor
265
+
214
266
  # Add proxy if configured
215
267
  if proxy_config:
216
268
  launch_params["proxy"] = proxy_config.to_playwright_dict()
217
269
  # Ignore HTTPS errors when using proxy (many residential proxies use self-signed certs)
218
270
  launch_params["ignore_https_errors"] = True
219
- print(f"🌐 [Sentience] Using proxy: {proxy_config.server}")
271
+ logger.info(f"Using proxy: {proxy_config.server}")
220
272
 
221
273
  # Add video recording if configured
222
274
  if self.record_video_dir:
@@ -224,9 +276,8 @@ class SentienceBrowser:
224
276
  video_dir.mkdir(parents=True, exist_ok=True)
225
277
  launch_params["record_video_dir"] = str(video_dir)
226
278
  launch_params["record_video_size"] = self.record_video_size
227
- print(f"🎥 [Sentience] Recording video to: {video_dir}")
228
- print(
229
- f" Resolution: {self.record_video_size['width']}x{self.record_video_size['height']}"
279
+ logger.info(
280
+ f"Recording video to: {video_dir} (Resolution: {self.record_video_size['width']}x{self.record_video_size['height']})"
230
281
  )
231
282
 
232
283
  # Launch persistent context (required for extensions)
@@ -332,7 +383,7 @@ class SentienceBrowser:
332
383
  playwright_cookies.append(playwright_cookie)
333
384
 
334
385
  self.context.add_cookies(playwright_cookies)
335
- print(f"✅ [Sentience] Injected {len(state.cookies)} cookie(s)")
386
+ logger.debug(f"Injected {len(state.cookies)} cookie(s)")
336
387
 
337
388
  # Inject LocalStorage (requires navigation to each domain)
338
389
  if state.origins:
@@ -359,11 +410,11 @@ class SentienceBrowser:
359
410
  }""",
360
411
  localStorage_dict,
361
412
  )
362
- print(
363
- f"✅ [Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
413
+ logger.debug(
414
+ f"Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
364
415
  )
365
416
  except Exception as e:
366
- print(f"⚠️ [Sentience] Failed to inject localStorage for {origin}: {e}")
417
+ logger.warning(f"Failed to inject localStorage for {origin}: {e}")
367
418
 
368
419
  def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
369
420
  """Poll for window.sentience to be available"""
@@ -424,30 +475,15 @@ class SentienceBrowser:
424
475
  Note: Video files are saved automatically by Playwright when context closes.
425
476
  If multiple pages exist, returns the path to the first page's video.
426
477
  """
427
- temp_video_path = None
428
-
429
- # Get video path before closing (if recording was enabled)
430
- # Note: Playwright saves videos when pages/context close, but we can get the
431
- # expected path before closing. The actual file will be available after close.
432
- if self.record_video_dir:
433
- try:
434
- # Try to get video path from the first page
435
- if self.page and self.page.video:
436
- temp_video_path = self.page.video.path()
437
- # If that fails, check all pages in the context
438
- elif self.context:
439
- for page in self.context.pages:
440
- if page.video:
441
- temp_video_path = page.video.path()
442
- break
443
- except Exception:
444
- # Video path might not be available until after close
445
- # In that case, we'll return None and user can check the directory
446
- pass
478
+ # CRITICAL: Don't access page.video.path() BEFORE closing context
479
+ # This can poke the video subsystem at an awkward time and cause crashes on macOS
480
+ # Instead, we'll locate the video file after context closes
447
481
 
448
482
  # Close context (this triggers video file finalization)
449
483
  if self.context:
450
484
  self.context.close()
485
+ # Small grace period to ensure video file is fully flushed to disk
486
+ time.sleep(0.5)
451
487
 
452
488
  # Close playwright
453
489
  if self.playwright:
@@ -457,8 +493,24 @@ class SentienceBrowser:
457
493
  if self._extension_path and os.path.exists(self._extension_path):
458
494
  shutil.rmtree(self._extension_path)
459
495
 
496
+ # NOW resolve video path after context is closed and video is finalized
497
+ temp_video_path = None
498
+ if self.record_video_dir:
499
+ try:
500
+ # Locate the newest .webm file in record_video_dir
501
+ # This avoids touching page.video during teardown
502
+ video_dir = Path(self.record_video_dir)
503
+ if video_dir.exists():
504
+ webm_files = list(video_dir.glob("*.webm"))
505
+ if webm_files:
506
+ # Get the most recently modified file
507
+ temp_video_path = max(webm_files, key=lambda p: p.stat().st_mtime)
508
+ logger.debug(f"Found video file: {temp_video_path}")
509
+ except Exception as e:
510
+ logger.warning(f"Could not locate video file: {e}")
511
+
460
512
  # Rename/move video if output_path is specified
461
- final_path = temp_video_path
513
+ final_path = str(temp_video_path) if temp_video_path else None
462
514
  if temp_video_path and output_path and os.path.exists(temp_video_path):
463
515
  try:
464
516
  output_path = str(output_path)
@@ -471,7 +523,7 @@ class SentienceBrowser:
471
523
 
472
524
  warnings.warn(f"Failed to rename video file: {e}")
473
525
  # Return original path if rename fails
474
- final_path = temp_video_path
526
+ final_path = str(temp_video_path)
475
527
 
476
528
  return final_path
477
529
 
@@ -574,3 +626,590 @@ class SentienceBrowser:
574
626
  def __exit__(self, exc_type, exc_val, exc_tb):
575
627
  """Context manager exit"""
576
628
  self.close()
629
+
630
+
631
+ class AsyncSentienceBrowser:
632
+ """Async version of SentienceBrowser for use in asyncio contexts."""
633
+
634
+ def __init__(
635
+ self,
636
+ api_key: str | None = None,
637
+ api_url: str | None = None,
638
+ headless: bool | None = None,
639
+ proxy: str | None = None,
640
+ user_data_dir: str | Path | None = None,
641
+ storage_state: str | Path | StorageState | dict | None = None,
642
+ record_video_dir: str | Path | None = None,
643
+ record_video_size: dict[str, int] | None = None,
644
+ viewport: Viewport | dict[str, int] | None = None,
645
+ device_scale_factor: float | None = None,
646
+ executable_path: str | None = None,
647
+ ):
648
+ """
649
+ Initialize Async Sentience browser
650
+
651
+ Args:
652
+ api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
653
+ If None, uses free tier (local extension only)
654
+ api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
655
+ headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
656
+ proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
657
+ user_data_dir: Optional path to user data directory for persistent sessions
658
+ storage_state: Optional storage state to inject (cookies + localStorage)
659
+ record_video_dir: Optional directory path to save video recordings
660
+ record_video_size: Optional video resolution as dict with 'width' and 'height' keys
661
+ viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
662
+ Examples: Viewport(width=1280, height=800) (default)
663
+ Viewport(width=1920, height=1080) (Full HD)
664
+ {"width": 1280, "height": 800} (dict also supported)
665
+ If None, defaults to Viewport(width=1280, height=800).
666
+ device_scale_factor: Optional device scale factor to emulate high-DPI (Retina) screens.
667
+ Examples: 1.0 (default, standard DPI)
668
+ 2.0 (Retina/high-DPI, like MacBook Pro)
669
+ 3.0 (very high DPI)
670
+ If None, defaults to 1.0 (standard DPI).
671
+ executable_path: Optional path to Chromium executable. If provided, forces use of
672
+ this specific browser binary instead of Playwright's managed browser.
673
+ Useful to guarantee Chromium (not Chrome for Testing) on macOS.
674
+ Example: "/path/to/playwright/chromium-1234/chrome-mac/Chromium.app/Contents/MacOS/Chromium"
675
+ """
676
+ self.api_key = api_key
677
+ # Only set api_url if api_key is provided, otherwise None (free tier)
678
+ if self.api_key and not api_url:
679
+ self.api_url = SENTIENCE_API_URL
680
+ else:
681
+ self.api_url = api_url
682
+
683
+ # Determine headless mode
684
+ if headless is None:
685
+ # Default to False for local dev, True for CI
686
+ self.headless = os.environ.get("CI", "").lower() == "true"
687
+ else:
688
+ self.headless = headless
689
+
690
+ # Support proxy from argument or environment variable
691
+ self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
692
+
693
+ # Auth injection support
694
+ self.user_data_dir = user_data_dir
695
+ self.storage_state = storage_state
696
+
697
+ # Video recording support
698
+ self.record_video_dir = record_video_dir
699
+ self.record_video_size = record_video_size or {"width": 1280, "height": 800}
700
+
701
+ # Viewport configuration - convert dict to Viewport if needed
702
+ if viewport is None:
703
+ self.viewport = Viewport(width=1280, height=800)
704
+ elif isinstance(viewport, dict):
705
+ self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
706
+ else:
707
+ self.viewport = viewport
708
+
709
+ # Device scale factor for high-DPI emulation
710
+ self.device_scale_factor = device_scale_factor
711
+
712
+ # Executable path override (for forcing specific Chromium binary)
713
+ self.executable_path = executable_path
714
+
715
+ self.playwright: AsyncPlaywright | None = None
716
+ self.context: AsyncBrowserContext | None = None
717
+ self.page: AsyncPage | None = None
718
+ self._extension_path: str | None = None
719
+
720
+ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
721
+ """
722
+ Parse proxy connection string into ProxyConfig.
723
+
724
+ Args:
725
+ proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
726
+
727
+ Returns:
728
+ ProxyConfig object or None if invalid
729
+ """
730
+ if not proxy_string:
731
+ return None
732
+
733
+ try:
734
+ parsed = urlparse(proxy_string)
735
+
736
+ # Validate scheme
737
+ if parsed.scheme not in ("http", "https", "socks5"):
738
+ logger.warning(
739
+ f"Unsupported proxy scheme: {parsed.scheme}. Supported: http, https, socks5"
740
+ )
741
+ return None
742
+
743
+ # Validate host and port
744
+ if not parsed.hostname or not parsed.port:
745
+ logger.warning(
746
+ "Proxy URL must include hostname and port. Expected format: http://username:password@host:port"
747
+ )
748
+ return None
749
+
750
+ # Build server URL
751
+ server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
752
+
753
+ # Create ProxyConfig with optional credentials
754
+ return ProxyConfig(
755
+ server=server,
756
+ username=parsed.username if parsed.username else None,
757
+ password=parsed.password if parsed.password else None,
758
+ )
759
+
760
+ except Exception as e:
761
+ logger.warning(
762
+ f"Invalid proxy configuration: {e}. Expected format: http://username:password@host:port"
763
+ )
764
+ return None
765
+
766
+ async def start(self) -> None:
767
+ """Launch browser with extension loaded (async)"""
768
+ # Get extension source path using shared utility
769
+ extension_source = find_extension_path()
770
+
771
+ # Create temporary extension bundle
772
+ self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
773
+ shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
774
+
775
+ self.playwright = await async_playwright().start()
776
+
777
+ # Build launch arguments
778
+ args = [
779
+ f"--disable-extensions-except={self._extension_path}",
780
+ f"--load-extension={self._extension_path}",
781
+ "--disable-blink-features=AutomationControlled",
782
+ "--disable-infobars",
783
+ "--disable-features=WebRtcHideLocalIpsWithMdns",
784
+ "--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
785
+ ]
786
+
787
+ # Only add --no-sandbox on Linux (causes crashes on macOS)
788
+ # macOS sandboxing works fine and the flag actually causes crashes
789
+ if platform.system() == "Linux":
790
+ args.append("--no-sandbox")
791
+
792
+ # Add GPU-disabling flags for macOS to prevent Chrome for Testing crash-on-exit
793
+ # These flags help avoid EXC_BAD_ACCESS crashes during browser shutdown
794
+ if platform.system() == "Darwin": # macOS
795
+ args.extend(
796
+ [
797
+ "--disable-gpu",
798
+ "--disable-software-rasterizer",
799
+ "--disable-dev-shm-usage",
800
+ "--disable-breakpad", # Disable crash reporter to prevent macOS crash dialogs
801
+ "--disable-crash-reporter", # Disable crash reporter UI
802
+ "--disable-crash-handler", # Disable crash handler completely
803
+ "--disable-in-process-stack-traces", # Disable stack trace collection
804
+ "--disable-hang-monitor", # Disable hang detection
805
+ "--disable-background-networking", # Disable background networking
806
+ "--disable-background-timer-throttling", # Disable background throttling
807
+ "--disable-backgrounding-occluded-windows", # Disable backgrounding
808
+ "--disable-renderer-backgrounding", # Disable renderer backgrounding
809
+ "--disable-features=TranslateUI", # Disable translate UI
810
+ "--disable-ipc-flooding-protection", # Disable IPC flooding protection
811
+ "--disable-logging", # Disable logging to reduce stderr noise
812
+ "--log-level=3", # Set log level to fatal only (suppresses warnings)
813
+ ]
814
+ )
815
+
816
+ if self.headless:
817
+ args.append("--headless=new")
818
+
819
+ # Parse proxy configuration if provided
820
+ proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
821
+
822
+ # Handle User Data Directory
823
+ if self.user_data_dir:
824
+ user_data_dir = str(self.user_data_dir)
825
+ Path(user_data_dir).mkdir(parents=True, exist_ok=True)
826
+ else:
827
+ user_data_dir = ""
828
+
829
+ # Build launch_persistent_context parameters
830
+ launch_params = {
831
+ "user_data_dir": user_data_dir,
832
+ "headless": False,
833
+ "args": args,
834
+ "viewport": {"width": self.viewport.width, "height": self.viewport.height},
835
+ "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
836
+ # Note: Don't set "channel" - let Playwright use its default managed Chromium
837
+ # Setting channel=None doesn't force bundled Chromium and can still pick Chrome for Testing
838
+ }
839
+
840
+ # If executable_path is provided, use it to force specific Chromium binary
841
+ # This guarantees we use Chromium (not Chrome for Testing) on macOS
842
+ if self.executable_path:
843
+ launch_params["executable_path"] = self.executable_path
844
+ logger.info(f"Using explicit executable: {self.executable_path}")
845
+
846
+ # Add device scale factor if configured
847
+ if self.device_scale_factor is not None:
848
+ launch_params["device_scale_factor"] = self.device_scale_factor
849
+
850
+ # Add proxy if configured
851
+ if proxy_config:
852
+ launch_params["proxy"] = proxy_config.to_playwright_dict()
853
+ launch_params["ignore_https_errors"] = True
854
+ logger.info(f"Using proxy: {proxy_config.server}")
855
+
856
+ # Add video recording if configured
857
+ if self.record_video_dir:
858
+ video_dir = Path(self.record_video_dir)
859
+ video_dir.mkdir(parents=True, exist_ok=True)
860
+ launch_params["record_video_dir"] = str(video_dir)
861
+ launch_params["record_video_size"] = self.record_video_size
862
+ logger.info(
863
+ f"Recording video to: {video_dir} (Resolution: {self.record_video_size['width']}x{self.record_video_size['height']})"
864
+ )
865
+
866
+ # Launch persistent context
867
+ self.context = await self.playwright.chromium.launch_persistent_context(**launch_params)
868
+
869
+ self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()
870
+
871
+ # Inject storage state if provided
872
+ if self.storage_state:
873
+ await self._inject_storage_state(self.storage_state)
874
+
875
+ # Apply stealth if available
876
+ if STEALTH_AVAILABLE:
877
+ await stealth_async(self.page)
878
+
879
+ # Wait a moment for extension to initialize
880
+ await asyncio.sleep(0.5)
881
+
882
+ async def goto(self, url: str) -> None:
883
+ """Navigate to a URL and ensure extension is ready (async)"""
884
+ if not self.page:
885
+ raise RuntimeError("Browser not started. Call await start() first.")
886
+
887
+ await self.page.goto(url, wait_until="domcontentloaded")
888
+
889
+ # Wait for extension to be ready
890
+ if not await self._wait_for_extension():
891
+ try:
892
+ diag = await self.page.evaluate(
893
+ """() => ({
894
+ sentience_defined: typeof window.sentience !== 'undefined',
895
+ registry_defined: typeof window.sentience_registry !== 'undefined',
896
+ snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
897
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
898
+ url: window.location.href
899
+ })"""
900
+ )
901
+ except Exception as e:
902
+ diag = f"Failed to get diagnostics: {str(e)}"
903
+
904
+ raise RuntimeError(
905
+ "Extension failed to load after navigation. Make sure:\n"
906
+ "1. Extension is built (cd sentience-chrome && ./build.sh)\n"
907
+ "2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
908
+ "3. Check browser console for errors (run with headless=False to see console)\n"
909
+ f"4. Extension path: {self._extension_path}\n"
910
+ f"5. Diagnostic info: {diag}"
911
+ )
912
+
913
+ async def _inject_storage_state(self, storage_state: str | Path | StorageState | dict) -> None:
914
+ """Inject storage state (cookies + localStorage) into browser context (async)"""
915
+ import json
916
+
917
+ # Load storage state
918
+ if isinstance(storage_state, (str, Path)):
919
+ with open(storage_state, encoding="utf-8") as f:
920
+ state_dict = json.load(f)
921
+ state = StorageState.from_dict(state_dict)
922
+ elif isinstance(storage_state, StorageState):
923
+ state = storage_state
924
+ elif isinstance(storage_state, dict):
925
+ state = StorageState.from_dict(storage_state)
926
+ else:
927
+ raise ValueError(
928
+ f"Invalid storage_state type: {type(storage_state)}. "
929
+ "Expected str, Path, StorageState, or dict."
930
+ )
931
+
932
+ # Inject cookies
933
+ if state.cookies:
934
+ playwright_cookies = []
935
+ for cookie in state.cookies:
936
+ cookie_dict = cookie.model_dump()
937
+ playwright_cookie = {
938
+ "name": cookie_dict["name"],
939
+ "value": cookie_dict["value"],
940
+ "domain": cookie_dict["domain"],
941
+ "path": cookie_dict["path"],
942
+ }
943
+ if cookie_dict.get("expires"):
944
+ playwright_cookie["expires"] = cookie_dict["expires"]
945
+ if cookie_dict.get("httpOnly"):
946
+ playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
947
+ if cookie_dict.get("secure"):
948
+ playwright_cookie["secure"] = cookie_dict["secure"]
949
+ if cookie_dict.get("sameSite"):
950
+ playwright_cookie["sameSite"] = cookie_dict["sameSite"]
951
+ playwright_cookies.append(playwright_cookie)
952
+
953
+ await self.context.add_cookies(playwright_cookies)
954
+ logger.debug(f"Injected {len(state.cookies)} cookie(s)")
955
+
956
+ # Inject LocalStorage
957
+ if state.origins:
958
+ for origin_data in state.origins:
959
+ origin = origin_data.origin
960
+ if not origin:
961
+ continue
962
+
963
+ try:
964
+ await self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
965
+
966
+ if origin_data.localStorage:
967
+ localStorage_dict = {
968
+ item.name: item.value for item in origin_data.localStorage
969
+ }
970
+ await self.page.evaluate(
971
+ """(localStorage_data) => {
972
+ for (const [key, value] of Object.entries(localStorage_data)) {
973
+ localStorage.setItem(key, value);
974
+ }
975
+ }""",
976
+ localStorage_dict,
977
+ )
978
+ logger.debug(
979
+ f"Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
980
+ )
981
+ except Exception as e:
982
+ logger.warning(f"Failed to inject localStorage for {origin}: {e}")
983
+
984
+ async def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
985
+ """Poll for window.sentience to be available (async)"""
986
+ start_time = time.time()
987
+ last_error = None
988
+
989
+ while time.time() - start_time < timeout_sec:
990
+ try:
991
+ result = await self.page.evaluate(
992
+ """() => {
993
+ if (typeof window.sentience === 'undefined') {
994
+ return { ready: false, reason: 'window.sentience undefined' };
995
+ }
996
+ if (window.sentience._wasmModule === null) {
997
+ return { ready: false, reason: 'WASM module not fully loaded' };
998
+ }
999
+ return { ready: true };
1000
+ }
1001
+ """
1002
+ )
1003
+
1004
+ if isinstance(result, dict):
1005
+ if result.get("ready"):
1006
+ return True
1007
+ last_error = result.get("reason", "Unknown error")
1008
+ except Exception as e:
1009
+ last_error = f"Evaluation error: {str(e)}"
1010
+
1011
+ await asyncio.sleep(0.3)
1012
+
1013
+ if last_error:
1014
+ import warnings
1015
+
1016
+ warnings.warn(f"Extension wait timeout. Last status: {last_error}")
1017
+
1018
+ return False
1019
+
1020
+ async def close(self, output_path: str | Path | None = None) -> tuple[str | None, bool]:
1021
+ """
1022
+ Close browser and cleanup (async)
1023
+
1024
+ Args:
1025
+ output_path: Optional path to rename the video file to
1026
+
1027
+ Returns:
1028
+ Tuple of (video_path, shutdown_clean)
1029
+ - video_path: Path to video file if recording was enabled, None otherwise
1030
+ - shutdown_clean: True if shutdown completed without errors, False if there were issues
1031
+
1032
+ Note: Video path is resolved AFTER context close to avoid touching video
1033
+ subsystem during teardown, which can cause crashes on macOS.
1034
+ """
1035
+ # CRITICAL: Don't access page.video.path() BEFORE closing context
1036
+ # This can poke the video subsystem at an awkward time and cause crashes
1037
+ # Instead, we'll locate the video file after context closes
1038
+
1039
+ # CRITICAL: Wait before closing to ensure all operations are complete
1040
+ # This is especially important for video recording - we need to ensure
1041
+ # all frames are written and the encoder is ready to finalize
1042
+ if platform.system() == "Darwin": # macOS
1043
+ # On macOS, give extra time for video encoder to finish writing frames
1044
+ # 4K video recording needs more time to flush buffers
1045
+ logger.debug("Waiting for video recording to stabilize before closing (macOS)...")
1046
+ await asyncio.sleep(2.0)
1047
+ else:
1048
+ await asyncio.sleep(1.0)
1049
+
1050
+ # Graceful shutdown: close context first, then playwright
1051
+ # Use longer timeouts on macOS where video finalization can take longer
1052
+ context_close_success = True
1053
+ if self.context:
1054
+ try:
1055
+ # Give context time to close gracefully (especially for video finalization)
1056
+ # Increased timeout for macOS where 4K video finalization can take longer
1057
+ await asyncio.wait_for(self.context.close(), timeout=30.0)
1058
+ logger.debug("Context closed successfully")
1059
+ except TimeoutError:
1060
+ logger.warning("Context close timed out, continuing with cleanup...")
1061
+ context_close_success = False
1062
+ except Exception as e:
1063
+ logger.warning(f"Error closing context: {e}")
1064
+ context_close_success = False
1065
+ finally:
1066
+ self.context = None
1067
+
1068
+ # Give Chrome a moment to fully flush video + release resources
1069
+ # This avoids stopping the driver while the browser is still finishing the .webm write/encoder shutdown
1070
+ # Increased grace period on macOS to allow more time for process cleanup
1071
+ grace_period = 2.0 if platform.system() == "Darwin" else 1.0
1072
+ await asyncio.sleep(grace_period)
1073
+
1074
+ playwright_stop_success = True
1075
+ if self.playwright:
1076
+ try:
1077
+ # Give playwright time to stop gracefully
1078
+ # Increased timeout to match context close timeout
1079
+ await asyncio.wait_for(self.playwright.stop(), timeout=15.0)
1080
+ logger.debug("Playwright stopped successfully")
1081
+ except TimeoutError:
1082
+ logger.warning("Playwright stop timed out, continuing with cleanup...")
1083
+ playwright_stop_success = False
1084
+ except Exception as e:
1085
+ logger.warning(f"Error stopping playwright: {e}")
1086
+ playwright_stop_success = False
1087
+ finally:
1088
+ self.playwright = None
1089
+
1090
+ # Additional cleanup: On macOS, wait a bit more to ensure all browser processes are terminated
1091
+ # This helps prevent crash dialogs from appearing
1092
+ if platform.system() == "Darwin":
1093
+ await asyncio.sleep(0.5)
1094
+
1095
+ # NOW resolve video path after context is closed and video is finalized
1096
+ temp_video_path = None
1097
+ if self.record_video_dir:
1098
+ try:
1099
+ # Locate the newest .webm file in record_video_dir
1100
+ # This avoids touching page.video during teardown
1101
+ video_dir = Path(self.record_video_dir)
1102
+ if video_dir.exists():
1103
+ webm_files = list(video_dir.glob("*.webm"))
1104
+ if webm_files:
1105
+ # Get the most recently modified file
1106
+ temp_video_path = max(webm_files, key=lambda p: p.stat().st_mtime)
1107
+ logger.debug(f"Found video file: {temp_video_path}")
1108
+ except Exception as e:
1109
+ logger.warning(f"Could not locate video file: {e}")
1110
+
1111
+ if self._extension_path and os.path.exists(self._extension_path):
1112
+ shutil.rmtree(self._extension_path)
1113
+
1114
+ # Clear page reference after closing context
1115
+ self.page = None
1116
+
1117
+ final_path = temp_video_path
1118
+ if temp_video_path and output_path and os.path.exists(temp_video_path):
1119
+ try:
1120
+ output_path = str(output_path)
1121
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
1122
+ shutil.move(temp_video_path, output_path)
1123
+ final_path = output_path
1124
+ except Exception as e:
1125
+ import warnings
1126
+
1127
+ warnings.warn(f"Failed to rename video file: {e}")
1128
+ final_path = temp_video_path
1129
+
1130
+ # Log shutdown status (useful for detecting crashes in headless mode)
1131
+ shutdown_clean = context_close_success and playwright_stop_success
1132
+ if not shutdown_clean:
1133
+ logger.warning(
1134
+ f"Browser shutdown had issues - may indicate a crash "
1135
+ f"(context_close: {context_close_success}, playwright_stop: {playwright_stop_success})"
1136
+ )
1137
+ else:
1138
+ logger.debug("Browser shutdown completed cleanly")
1139
+
1140
+ # Return tuple: (video_path, shutdown_clean)
1141
+ # This allows callers to detect crashes even in headless mode
1142
+ return (final_path, shutdown_clean)
1143
+
1144
+ async def __aenter__(self):
1145
+ """Async context manager entry"""
1146
+ await self.start()
1147
+ return self
1148
+
1149
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1150
+ """Async context manager exit"""
1151
+ # Ignore return value in context manager exit
1152
+ await self.close()
1153
+
1154
+ @classmethod
1155
+ async def from_existing(
1156
+ cls,
1157
+ context: AsyncBrowserContext,
1158
+ api_key: str | None = None,
1159
+ api_url: str | None = None,
1160
+ ) -> "AsyncSentienceBrowser":
1161
+ """
1162
+ Create AsyncSentienceBrowser from an existing Playwright BrowserContext.
1163
+
1164
+ Args:
1165
+ context: Existing Playwright BrowserContext
1166
+ api_key: Optional API key for server-side processing
1167
+ api_url: Optional API URL
1168
+
1169
+ Returns:
1170
+ AsyncSentienceBrowser instance configured to use the existing context
1171
+ """
1172
+ instance = cls(api_key=api_key, api_url=api_url)
1173
+ instance.context = context
1174
+ pages = context.pages
1175
+ instance.page = pages[0] if pages else await context.new_page()
1176
+
1177
+ # Apply stealth if available
1178
+ if STEALTH_AVAILABLE:
1179
+ await stealth_async(instance.page)
1180
+
1181
+ # Wait for extension to be ready
1182
+ await asyncio.sleep(0.5)
1183
+
1184
+ return instance
1185
+
1186
+ @classmethod
1187
+ async def from_page(
1188
+ cls,
1189
+ page: AsyncPage,
1190
+ api_key: str | None = None,
1191
+ api_url: str | None = None,
1192
+ ) -> "AsyncSentienceBrowser":
1193
+ """
1194
+ Create AsyncSentienceBrowser from an existing Playwright Page.
1195
+
1196
+ Args:
1197
+ page: Existing Playwright Page
1198
+ api_key: Optional API key for server-side processing
1199
+ api_url: Optional API URL
1200
+
1201
+ Returns:
1202
+ AsyncSentienceBrowser instance configured to use the existing page
1203
+ """
1204
+ instance = cls(api_key=api_key, api_url=api_url)
1205
+ instance.page = page
1206
+ instance.context = page.context
1207
+
1208
+ # Apply stealth if available
1209
+ if STEALTH_AVAILABLE:
1210
+ await stealth_async(instance.page)
1211
+
1212
+ # Wait for extension to be ready
1213
+ await asyncio.sleep(0.5)
1214
+
1215
+ return instance