sentienceapi 0.95.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +253 -0
- sentience/_extension_loader.py +195 -0
- sentience/action_executor.py +215 -0
- sentience/actions.py +1020 -0
- sentience/agent.py +1181 -0
- sentience/agent_config.py +46 -0
- sentience/agent_runtime.py +424 -0
- sentience/asserts/__init__.py +70 -0
- sentience/asserts/expect.py +621 -0
- sentience/asserts/query.py +383 -0
- sentience/async_api.py +108 -0
- sentience/backends/__init__.py +137 -0
- sentience/backends/actions.py +343 -0
- sentience/backends/browser_use_adapter.py +241 -0
- sentience/backends/cdp_backend.py +393 -0
- sentience/backends/exceptions.py +211 -0
- sentience/backends/playwright_backend.py +194 -0
- sentience/backends/protocol.py +216 -0
- sentience/backends/sentience_context.py +469 -0
- sentience/backends/snapshot.py +427 -0
- sentience/base_agent.py +196 -0
- sentience/browser.py +1215 -0
- sentience/browser_evaluator.py +299 -0
- sentience/canonicalization.py +207 -0
- sentience/cli.py +130 -0
- sentience/cloud_tracing.py +807 -0
- sentience/constants.py +6 -0
- sentience/conversational_agent.py +543 -0
- sentience/element_filter.py +136 -0
- sentience/expect.py +188 -0
- sentience/extension/background.js +104 -0
- sentience/extension/content.js +161 -0
- sentience/extension/injected_api.js +914 -0
- sentience/extension/manifest.json +36 -0
- sentience/extension/pkg/sentience_core.d.ts +51 -0
- sentience/extension/pkg/sentience_core.js +323 -0
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
- sentience/extension/release.json +115 -0
- sentience/formatting.py +15 -0
- sentience/generator.py +202 -0
- sentience/inspector.py +367 -0
- sentience/llm_interaction_handler.py +191 -0
- sentience/llm_provider.py +875 -0
- sentience/llm_provider_utils.py +120 -0
- sentience/llm_response_builder.py +153 -0
- sentience/models.py +846 -0
- sentience/ordinal.py +280 -0
- sentience/overlay.py +222 -0
- sentience/protocols.py +228 -0
- sentience/query.py +303 -0
- sentience/read.py +188 -0
- sentience/recorder.py +589 -0
- sentience/schemas/trace_v1.json +335 -0
- sentience/screenshot.py +100 -0
- sentience/sentience_methods.py +86 -0
- sentience/snapshot.py +706 -0
- sentience/snapshot_diff.py +126 -0
- sentience/text_search.py +262 -0
- sentience/trace_event_builder.py +148 -0
- sentience/trace_file_manager.py +197 -0
- sentience/trace_indexing/__init__.py +27 -0
- sentience/trace_indexing/index_schema.py +199 -0
- sentience/trace_indexing/indexer.py +414 -0
- sentience/tracer_factory.py +322 -0
- sentience/tracing.py +449 -0
- sentience/utils/__init__.py +40 -0
- sentience/utils/browser.py +46 -0
- sentience/utils/element.py +257 -0
- sentience/utils/formatting.py +59 -0
- sentience/utils.py +296 -0
- sentience/verification.py +380 -0
- sentience/visual_agent.py +2058 -0
- sentience/wait.py +139 -0
- sentienceapi-0.95.0.dist-info/METADATA +984 -0
- sentienceapi-0.95.0.dist-info/RECORD +82 -0
- sentienceapi-0.95.0.dist-info/WHEEL +5 -0
- sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
- sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
- sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
- sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
- sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
sentience/browser.py
ADDED
|
@@ -0,0 +1,1215 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Playwright browser harness with extension loading
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import platform
|
|
9
|
+
import shutil
|
|
10
|
+
import tempfile
|
|
11
|
+
import time
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional, Union
|
|
14
|
+
from urllib.parse import urlparse
|
|
15
|
+
|
|
16
|
+
from playwright.async_api import BrowserContext as AsyncBrowserContext
|
|
17
|
+
from playwright.async_api import Page as AsyncPage
|
|
18
|
+
from playwright.async_api import Playwright as AsyncPlaywright
|
|
19
|
+
from playwright.async_api import async_playwright
|
|
20
|
+
from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
|
|
21
|
+
|
|
22
|
+
from sentience._extension_loader import find_extension_path
|
|
23
|
+
from sentience.constants import SENTIENCE_API_URL
|
|
24
|
+
from sentience.models import ProxyConfig, StorageState, Viewport
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
# Import stealth for bot evasion (optional - graceful fallback if not available)
|
|
29
|
+
try:
|
|
30
|
+
from playwright_stealth import stealth_async, stealth_sync
|
|
31
|
+
|
|
32
|
+
STEALTH_AVAILABLE = True
|
|
33
|
+
except ImportError:
|
|
34
|
+
STEALTH_AVAILABLE = False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SentienceBrowser:
|
|
38
|
+
"""Main browser session with Sentience extension loaded"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
api_key: str | None = None,
|
|
43
|
+
api_url: str | None = None,
|
|
44
|
+
headless: bool | None = None,
|
|
45
|
+
proxy: str | None = None,
|
|
46
|
+
user_data_dir: str | None = None,
|
|
47
|
+
storage_state: str | Path | StorageState | dict | None = None,
|
|
48
|
+
record_video_dir: str | Path | None = None,
|
|
49
|
+
record_video_size: dict[str, int] | None = None,
|
|
50
|
+
viewport: Viewport | dict[str, int] | None = None,
|
|
51
|
+
device_scale_factor: float | None = None,
|
|
52
|
+
):
|
|
53
|
+
"""
|
|
54
|
+
Initialize Sentience browser
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
|
|
58
|
+
If None, uses free tier (local extension only)
|
|
59
|
+
api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
|
|
60
|
+
If None and api_key is provided, uses default URL
|
|
61
|
+
If None and no api_key, uses free tier (local extension only)
|
|
62
|
+
If 'local' or Docker sidecar URL, uses Enterprise tier
|
|
63
|
+
headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
|
|
64
|
+
proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
65
|
+
Supports HTTP, HTTPS, and SOCKS5 proxies
|
|
66
|
+
Falls back to SENTIENCE_PROXY environment variable if not provided
|
|
67
|
+
user_data_dir: Optional path to user data directory for persistent sessions.
|
|
68
|
+
If None, uses temporary directory (session not persisted).
|
|
69
|
+
If provided, cookies and localStorage persist across browser restarts.
|
|
70
|
+
storage_state: Optional storage state to inject (cookies + localStorage).
|
|
71
|
+
Can be:
|
|
72
|
+
- Path to JSON file (str or Path)
|
|
73
|
+
- StorageState object
|
|
74
|
+
- Dictionary with 'cookies' and/or 'origins' keys
|
|
75
|
+
If provided, browser starts with pre-injected authentication.
|
|
76
|
+
record_video_dir: Optional directory path to save video recordings.
|
|
77
|
+
If provided, browser will record video of all pages.
|
|
78
|
+
Videos are saved as .webm files in the specified directory.
|
|
79
|
+
If None, no video recording is performed.
|
|
80
|
+
record_video_size: Optional video resolution as dict with 'width' and 'height' keys.
|
|
81
|
+
Examples: {"width": 1280, "height": 800} (default)
|
|
82
|
+
{"width": 1920, "height": 1080} (1080p)
|
|
83
|
+
If None, defaults to 1280x800.
|
|
84
|
+
viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
|
|
85
|
+
Examples: Viewport(width=1280, height=800) (default)
|
|
86
|
+
Viewport(width=1920, height=1080) (Full HD)
|
|
87
|
+
{"width": 1280, "height": 800} (dict also supported)
|
|
88
|
+
If None, defaults to Viewport(width=1280, height=800).
|
|
89
|
+
"""
|
|
90
|
+
self.api_key = api_key
|
|
91
|
+
# Only set api_url if api_key is provided, otherwise None (free tier)
|
|
92
|
+
# Defaults to production API if key is present but url is missing
|
|
93
|
+
if self.api_key and not api_url:
|
|
94
|
+
self.api_url = SENTIENCE_API_URL
|
|
95
|
+
else:
|
|
96
|
+
self.api_url = api_url
|
|
97
|
+
|
|
98
|
+
# Determine headless mode
|
|
99
|
+
if headless is None:
|
|
100
|
+
# Default to False for local dev, True for CI
|
|
101
|
+
self.headless = os.environ.get("CI", "").lower() == "true"
|
|
102
|
+
else:
|
|
103
|
+
self.headless = headless
|
|
104
|
+
|
|
105
|
+
# Support proxy from argument or environment variable
|
|
106
|
+
self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
|
|
107
|
+
|
|
108
|
+
# Auth injection support
|
|
109
|
+
self.user_data_dir = user_data_dir
|
|
110
|
+
self.storage_state = storage_state
|
|
111
|
+
|
|
112
|
+
# Video recording support
|
|
113
|
+
self.record_video_dir = record_video_dir
|
|
114
|
+
self.record_video_size = record_video_size or {"width": 1280, "height": 800}
|
|
115
|
+
|
|
116
|
+
# Viewport configuration - convert dict to Viewport if needed
|
|
117
|
+
if viewport is None:
|
|
118
|
+
self.viewport = Viewport(width=1280, height=800)
|
|
119
|
+
elif isinstance(viewport, dict):
|
|
120
|
+
self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
|
|
121
|
+
else:
|
|
122
|
+
self.viewport = viewport
|
|
123
|
+
|
|
124
|
+
# Device scale factor for high-DPI emulation
|
|
125
|
+
self.device_scale_factor = device_scale_factor
|
|
126
|
+
|
|
127
|
+
self.playwright: Playwright | None = None
|
|
128
|
+
self.context: BrowserContext | None = None
|
|
129
|
+
self.page: Page | None = None
|
|
130
|
+
self._extension_path: str | None = None
|
|
131
|
+
|
|
132
|
+
def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
|
|
133
|
+
"""
|
|
134
|
+
Parse proxy connection string into ProxyConfig.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
ProxyConfig object or None if invalid
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
ValueError: If proxy format is invalid
|
|
144
|
+
"""
|
|
145
|
+
if not proxy_string:
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
parsed = urlparse(proxy_string)
|
|
150
|
+
|
|
151
|
+
# Validate scheme
|
|
152
|
+
if parsed.scheme not in ("http", "https", "socks5"):
|
|
153
|
+
logger.warning(
|
|
154
|
+
f"Unsupported proxy scheme: {parsed.scheme}. Supported: http, https, socks5"
|
|
155
|
+
)
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
# Validate host and port
|
|
159
|
+
if not parsed.hostname or not parsed.port:
|
|
160
|
+
logger.warning(
|
|
161
|
+
"Proxy URL must include hostname and port. Expected format: http://username:password@host:port"
|
|
162
|
+
)
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
# Build server URL
|
|
166
|
+
server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
|
|
167
|
+
|
|
168
|
+
# Create ProxyConfig with optional credentials
|
|
169
|
+
return ProxyConfig(
|
|
170
|
+
server=server,
|
|
171
|
+
username=parsed.username if parsed.username else None,
|
|
172
|
+
password=parsed.password if parsed.password else None,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
logger.warning(
|
|
177
|
+
f"Invalid proxy configuration: {e}. Expected format: http://username:password@host:port"
|
|
178
|
+
)
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
def start(self) -> None:
|
|
182
|
+
"""Launch browser with extension loaded"""
|
|
183
|
+
# Get extension source path using shared utility
|
|
184
|
+
extension_source = find_extension_path()
|
|
185
|
+
|
|
186
|
+
# Create temporary extension bundle
|
|
187
|
+
# We copy it to a temp dir to avoid file locking issues and ensure clean state
|
|
188
|
+
self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
|
|
189
|
+
shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
|
|
190
|
+
|
|
191
|
+
self.playwright = sync_playwright().start()
|
|
192
|
+
|
|
193
|
+
# Build launch arguments
|
|
194
|
+
args = [
|
|
195
|
+
f"--disable-extensions-except={self._extension_path}",
|
|
196
|
+
f"--load-extension={self._extension_path}",
|
|
197
|
+
"--disable-blink-features=AutomationControlled", # Hides 'navigator.webdriver'
|
|
198
|
+
"--disable-infobars",
|
|
199
|
+
# WebRTC leak protection (prevents real IP exposure when using proxies/VPNs)
|
|
200
|
+
"--disable-features=WebRtcHideLocalIpsWithMdns",
|
|
201
|
+
"--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
# Only add --no-sandbox on Linux (causes crashes on macOS)
|
|
205
|
+
# macOS sandboxing works fine and the flag actually causes crashes
|
|
206
|
+
if platform.system() == "Linux":
|
|
207
|
+
args.append("--no-sandbox")
|
|
208
|
+
|
|
209
|
+
# Add GPU-disabling flags for macOS to prevent Chrome for Testing crash-on-exit
|
|
210
|
+
# These flags help avoid EXC_BAD_ACCESS crashes during browser shutdown
|
|
211
|
+
if platform.system() == "Darwin": # macOS
|
|
212
|
+
args.extend(
|
|
213
|
+
[
|
|
214
|
+
"--disable-gpu",
|
|
215
|
+
"--disable-software-rasterizer",
|
|
216
|
+
"--disable-dev-shm-usage",
|
|
217
|
+
"--disable-breakpad", # Disable crash reporter to prevent macOS crash dialogs
|
|
218
|
+
"--disable-crash-reporter", # Disable crash reporter UI
|
|
219
|
+
"--disable-crash-handler", # Disable crash handler completely
|
|
220
|
+
"--disable-in-process-stack-traces", # Disable stack trace collection
|
|
221
|
+
"--disable-hang-monitor", # Disable hang detection
|
|
222
|
+
"--disable-background-networking", # Disable background networking
|
|
223
|
+
"--disable-background-timer-throttling", # Disable background throttling
|
|
224
|
+
"--disable-backgrounding-occluded-windows", # Disable backgrounding
|
|
225
|
+
"--disable-renderer-backgrounding", # Disable renderer backgrounding
|
|
226
|
+
"--disable-features=TranslateUI", # Disable translate UI
|
|
227
|
+
"--disable-ipc-flooding-protection", # Disable IPC flooding protection
|
|
228
|
+
"--disable-logging", # Disable logging to reduce stderr noise
|
|
229
|
+
"--log-level=3", # Set log level to fatal only (suppresses warnings)
|
|
230
|
+
]
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Handle headless mode correctly for extensions
|
|
234
|
+
# 'headless=True' DOES NOT support extensions in standard Chrome
|
|
235
|
+
# We must use 'headless="new"' (Chrome 112+) or run visible
|
|
236
|
+
# launch_headless_arg = False # Default to visible
|
|
237
|
+
if self.headless:
|
|
238
|
+
args.append("--headless=new") # Use new headless mode via args
|
|
239
|
+
|
|
240
|
+
# Parse proxy configuration if provided
|
|
241
|
+
proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
|
|
242
|
+
|
|
243
|
+
# Handle User Data Directory (Persistence)
|
|
244
|
+
if self.user_data_dir:
|
|
245
|
+
user_data_dir = str(self.user_data_dir)
|
|
246
|
+
Path(user_data_dir).mkdir(parents=True, exist_ok=True)
|
|
247
|
+
else:
|
|
248
|
+
user_data_dir = "" # Ephemeral temp dir (existing behavior)
|
|
249
|
+
|
|
250
|
+
# Build launch_persistent_context parameters
|
|
251
|
+
launch_params = {
|
|
252
|
+
"user_data_dir": user_data_dir,
|
|
253
|
+
"headless": False, # IMPORTANT: See note above
|
|
254
|
+
"args": args,
|
|
255
|
+
"viewport": {"width": self.viewport.width, "height": self.viewport.height},
|
|
256
|
+
# Remove "HeadlessChrome" from User Agent automatically
|
|
257
|
+
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
258
|
+
# Note: Don't set "channel" - let Playwright use its default managed Chromium
|
|
259
|
+
# Setting channel=None doesn't force bundled Chromium and can still pick Chrome for Testing
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
# Add device scale factor if configured
|
|
263
|
+
if self.device_scale_factor is not None:
|
|
264
|
+
launch_params["device_scale_factor"] = self.device_scale_factor
|
|
265
|
+
|
|
266
|
+
# Add proxy if configured
|
|
267
|
+
if proxy_config:
|
|
268
|
+
launch_params["proxy"] = proxy_config.to_playwright_dict()
|
|
269
|
+
# Ignore HTTPS errors when using proxy (many residential proxies use self-signed certs)
|
|
270
|
+
launch_params["ignore_https_errors"] = True
|
|
271
|
+
logger.info(f"Using proxy: {proxy_config.server}")
|
|
272
|
+
|
|
273
|
+
# Add video recording if configured
|
|
274
|
+
if self.record_video_dir:
|
|
275
|
+
video_dir = Path(self.record_video_dir)
|
|
276
|
+
video_dir.mkdir(parents=True, exist_ok=True)
|
|
277
|
+
launch_params["record_video_dir"] = str(video_dir)
|
|
278
|
+
launch_params["record_video_size"] = self.record_video_size
|
|
279
|
+
logger.info(
|
|
280
|
+
f"Recording video to: {video_dir} (Resolution: {self.record_video_size['width']}x{self.record_video_size['height']})"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Launch persistent context (required for extensions)
|
|
284
|
+
# Note: We pass headless=False to launch_persistent_context because we handle
|
|
285
|
+
# headless mode via the --headless=new arg above. This is a Playwright workaround.
|
|
286
|
+
self.context = self.playwright.chromium.launch_persistent_context(**launch_params)
|
|
287
|
+
|
|
288
|
+
self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
|
|
289
|
+
|
|
290
|
+
# Inject storage state if provided (must be after context creation)
|
|
291
|
+
if self.storage_state:
|
|
292
|
+
self._inject_storage_state(self.storage_state)
|
|
293
|
+
|
|
294
|
+
# Apply stealth if available
|
|
295
|
+
if STEALTH_AVAILABLE:
|
|
296
|
+
stealth_sync(self.page)
|
|
297
|
+
|
|
298
|
+
# Wait a moment for extension to initialize
|
|
299
|
+
time.sleep(0.5)
|
|
300
|
+
|
|
301
|
+
def goto(self, url: str) -> None:
|
|
302
|
+
"""Navigate to a URL and ensure extension is ready"""
|
|
303
|
+
if not self.page:
|
|
304
|
+
raise RuntimeError("Browser not started. Call start() first.")
|
|
305
|
+
|
|
306
|
+
self.page.goto(url, wait_until="domcontentloaded")
|
|
307
|
+
|
|
308
|
+
# Wait for extension to be ready (injected into page)
|
|
309
|
+
if not self._wait_for_extension():
|
|
310
|
+
# Gather diagnostic info before failing
|
|
311
|
+
try:
|
|
312
|
+
diag = self.page.evaluate(
|
|
313
|
+
"""() => ({
|
|
314
|
+
sentience_defined: typeof window.sentience !== 'undefined',
|
|
315
|
+
registry_defined: typeof window.sentience_registry !== 'undefined',
|
|
316
|
+
snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
|
|
317
|
+
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
318
|
+
url: window.location.href
|
|
319
|
+
})"""
|
|
320
|
+
)
|
|
321
|
+
except Exception as e:
|
|
322
|
+
diag = f"Failed to get diagnostics: {str(e)}"
|
|
323
|
+
|
|
324
|
+
raise RuntimeError(
|
|
325
|
+
"Extension failed to load after navigation. Make sure:\n"
|
|
326
|
+
"1. Extension is built (cd sentience-chrome && ./build.sh)\n"
|
|
327
|
+
"2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
|
|
328
|
+
"3. Check browser console for errors (run with headless=False to see console)\n"
|
|
329
|
+
f"4. Extension path: {self._extension_path}\n"
|
|
330
|
+
f"5. Diagnostic info: {diag}"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
def _inject_storage_state(
|
|
334
|
+
self, storage_state: str | Path | StorageState | dict
|
|
335
|
+
) -> None: # noqa: C901
|
|
336
|
+
"""
|
|
337
|
+
Inject storage state (cookies + localStorage) into browser context.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
storage_state: Path to JSON file, StorageState object, or dict containing storage state
|
|
341
|
+
"""
|
|
342
|
+
import json
|
|
343
|
+
|
|
344
|
+
# Load storage state
|
|
345
|
+
if isinstance(storage_state, (str, Path)):
|
|
346
|
+
# Load from file
|
|
347
|
+
with open(storage_state, encoding="utf-8") as f:
|
|
348
|
+
state_dict = json.load(f)
|
|
349
|
+
state = StorageState.from_dict(state_dict)
|
|
350
|
+
elif isinstance(storage_state, StorageState):
|
|
351
|
+
# Already a StorageState object
|
|
352
|
+
state = storage_state
|
|
353
|
+
elif isinstance(storage_state, dict):
|
|
354
|
+
# Dictionary format
|
|
355
|
+
state = StorageState.from_dict(storage_state)
|
|
356
|
+
else:
|
|
357
|
+
raise ValueError(
|
|
358
|
+
f"Invalid storage_state type: {type(storage_state)}. "
|
|
359
|
+
"Expected str, Path, StorageState, or dict."
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# Inject cookies (works globally)
|
|
363
|
+
if state.cookies:
|
|
364
|
+
# Convert to Playwright cookie format
|
|
365
|
+
playwright_cookies = []
|
|
366
|
+
for cookie in state.cookies:
|
|
367
|
+
cookie_dict = cookie.model_dump()
|
|
368
|
+
# Playwright expects lowercase keys for some fields
|
|
369
|
+
playwright_cookie = {
|
|
370
|
+
"name": cookie_dict["name"],
|
|
371
|
+
"value": cookie_dict["value"],
|
|
372
|
+
"domain": cookie_dict["domain"],
|
|
373
|
+
"path": cookie_dict["path"],
|
|
374
|
+
}
|
|
375
|
+
if cookie_dict.get("expires"):
|
|
376
|
+
playwright_cookie["expires"] = cookie_dict["expires"]
|
|
377
|
+
if cookie_dict.get("httpOnly"):
|
|
378
|
+
playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
|
|
379
|
+
if cookie_dict.get("secure"):
|
|
380
|
+
playwright_cookie["secure"] = cookie_dict["secure"]
|
|
381
|
+
if cookie_dict.get("sameSite"):
|
|
382
|
+
playwright_cookie["sameSite"] = cookie_dict["sameSite"]
|
|
383
|
+
playwright_cookies.append(playwright_cookie)
|
|
384
|
+
|
|
385
|
+
self.context.add_cookies(playwright_cookies)
|
|
386
|
+
logger.debug(f"Injected {len(state.cookies)} cookie(s)")
|
|
387
|
+
|
|
388
|
+
# Inject LocalStorage (requires navigation to each domain)
|
|
389
|
+
if state.origins:
|
|
390
|
+
for origin_data in state.origins:
|
|
391
|
+
origin = origin_data.origin
|
|
392
|
+
if not origin:
|
|
393
|
+
continue
|
|
394
|
+
|
|
395
|
+
# Navigate to origin to set localStorage
|
|
396
|
+
try:
|
|
397
|
+
self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
|
|
398
|
+
|
|
399
|
+
# Inject localStorage
|
|
400
|
+
if origin_data.localStorage:
|
|
401
|
+
# Convert to dict format for JavaScript
|
|
402
|
+
localStorage_dict = {
|
|
403
|
+
item.name: item.value for item in origin_data.localStorage
|
|
404
|
+
}
|
|
405
|
+
self.page.evaluate(
|
|
406
|
+
"""(localStorage_data) => {
|
|
407
|
+
for (const [key, value] of Object.entries(localStorage_data)) {
|
|
408
|
+
localStorage.setItem(key, value);
|
|
409
|
+
}
|
|
410
|
+
}""",
|
|
411
|
+
localStorage_dict,
|
|
412
|
+
)
|
|
413
|
+
logger.debug(
|
|
414
|
+
f"Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
|
|
415
|
+
)
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.warning(f"Failed to inject localStorage for {origin}: {e}")
|
|
418
|
+
|
|
419
|
+
def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
|
|
420
|
+
"""Poll for window.sentience to be available"""
|
|
421
|
+
start_time = time.time()
|
|
422
|
+
last_error = None
|
|
423
|
+
|
|
424
|
+
while time.time() - start_time < timeout_sec:
|
|
425
|
+
try:
|
|
426
|
+
# Check if API exists and WASM is ready (optional check for _wasmModule)
|
|
427
|
+
result = self.page.evaluate(
|
|
428
|
+
"""() => {
|
|
429
|
+
if (typeof window.sentience === 'undefined') {
|
|
430
|
+
return { ready: false, reason: 'window.sentience undefined' };
|
|
431
|
+
}
|
|
432
|
+
// Check if WASM loaded (if exposed) or if basic API works
|
|
433
|
+
// Note: injected_api.js defines window.sentience immediately,
|
|
434
|
+
// but _wasmModule might take a few ms to load.
|
|
435
|
+
if (window.sentience._wasmModule === null) {
|
|
436
|
+
// It's defined but WASM isn't linked yet
|
|
437
|
+
return { ready: false, reason: 'WASM module not fully loaded' };
|
|
438
|
+
}
|
|
439
|
+
// If _wasmModule is not exposed, that's okay - it might be internal
|
|
440
|
+
// Just verify the API structure is correct
|
|
441
|
+
return { ready: true };
|
|
442
|
+
}
|
|
443
|
+
"""
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
if isinstance(result, dict):
|
|
447
|
+
if result.get("ready"):
|
|
448
|
+
return True
|
|
449
|
+
last_error = result.get("reason", "Unknown error")
|
|
450
|
+
except Exception as e:
|
|
451
|
+
# Continue waiting on errors
|
|
452
|
+
last_error = f"Evaluation error: {str(e)}"
|
|
453
|
+
|
|
454
|
+
time.sleep(0.3)
|
|
455
|
+
|
|
456
|
+
# Log the last error for debugging
|
|
457
|
+
if last_error:
|
|
458
|
+
import warnings
|
|
459
|
+
|
|
460
|
+
warnings.warn(f"Extension wait timeout. Last status: {last_error}")
|
|
461
|
+
|
|
462
|
+
return False
|
|
463
|
+
|
|
464
|
+
def close(self, output_path: str | Path | None = None) -> str | None:
|
|
465
|
+
"""
|
|
466
|
+
Close browser and cleanup
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
output_path: Optional path to rename the video file to.
|
|
470
|
+
If provided, the recorded video will be moved to this location.
|
|
471
|
+
Useful for giving videos meaningful names instead of random hashes.
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
Path to video file if recording was enabled, None otherwise
|
|
475
|
+
Note: Video files are saved automatically by Playwright when context closes.
|
|
476
|
+
If multiple pages exist, returns the path to the first page's video.
|
|
477
|
+
"""
|
|
478
|
+
# CRITICAL: Don't access page.video.path() BEFORE closing context
|
|
479
|
+
# This can poke the video subsystem at an awkward time and cause crashes on macOS
|
|
480
|
+
# Instead, we'll locate the video file after context closes
|
|
481
|
+
|
|
482
|
+
# Close context (this triggers video file finalization)
|
|
483
|
+
if self.context:
|
|
484
|
+
self.context.close()
|
|
485
|
+
# Small grace period to ensure video file is fully flushed to disk
|
|
486
|
+
time.sleep(0.5)
|
|
487
|
+
|
|
488
|
+
# Close playwright
|
|
489
|
+
if self.playwright:
|
|
490
|
+
self.playwright.stop()
|
|
491
|
+
|
|
492
|
+
# Clean up extension directory
|
|
493
|
+
if self._extension_path and os.path.exists(self._extension_path):
|
|
494
|
+
shutil.rmtree(self._extension_path)
|
|
495
|
+
|
|
496
|
+
# NOW resolve video path after context is closed and video is finalized
|
|
497
|
+
temp_video_path = None
|
|
498
|
+
if self.record_video_dir:
|
|
499
|
+
try:
|
|
500
|
+
# Locate the newest .webm file in record_video_dir
|
|
501
|
+
# This avoids touching page.video during teardown
|
|
502
|
+
video_dir = Path(self.record_video_dir)
|
|
503
|
+
if video_dir.exists():
|
|
504
|
+
webm_files = list(video_dir.glob("*.webm"))
|
|
505
|
+
if webm_files:
|
|
506
|
+
# Get the most recently modified file
|
|
507
|
+
temp_video_path = max(webm_files, key=lambda p: p.stat().st_mtime)
|
|
508
|
+
logger.debug(f"Found video file: {temp_video_path}")
|
|
509
|
+
except Exception as e:
|
|
510
|
+
logger.warning(f"Could not locate video file: {e}")
|
|
511
|
+
|
|
512
|
+
# Rename/move video if output_path is specified
|
|
513
|
+
final_path = str(temp_video_path) if temp_video_path else None
|
|
514
|
+
if temp_video_path and output_path and os.path.exists(temp_video_path):
|
|
515
|
+
try:
|
|
516
|
+
output_path = str(output_path)
|
|
517
|
+
# Ensure parent directory exists
|
|
518
|
+
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
519
|
+
shutil.move(temp_video_path, output_path)
|
|
520
|
+
final_path = output_path
|
|
521
|
+
except Exception as e:
|
|
522
|
+
import warnings
|
|
523
|
+
|
|
524
|
+
warnings.warn(f"Failed to rename video file: {e}")
|
|
525
|
+
# Return original path if rename fails
|
|
526
|
+
final_path = str(temp_video_path)
|
|
527
|
+
|
|
528
|
+
return final_path
|
|
529
|
+
|
|
530
|
+
@classmethod
|
|
531
|
+
def from_existing(
|
|
532
|
+
cls,
|
|
533
|
+
context: BrowserContext,
|
|
534
|
+
api_key: str | None = None,
|
|
535
|
+
api_url: str | None = None,
|
|
536
|
+
) -> "SentienceBrowser":
|
|
537
|
+
"""
|
|
538
|
+
Create SentienceBrowser from an existing Playwright BrowserContext.
|
|
539
|
+
|
|
540
|
+
This allows you to use Sentience SDK with a browser context you've already created,
|
|
541
|
+
giving you more control over browser initialization.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
context: Existing Playwright BrowserContext
|
|
545
|
+
api_key: Optional API key for server-side processing
|
|
546
|
+
api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
SentienceBrowser instance configured to use the existing context
|
|
550
|
+
|
|
551
|
+
Example:
|
|
552
|
+
from playwright.sync_api import sync_playwright
|
|
553
|
+
from sentience import SentienceBrowser, snapshot
|
|
554
|
+
|
|
555
|
+
with sync_playwright() as p:
|
|
556
|
+
context = p.chromium.launch_persistent_context(...)
|
|
557
|
+
browser = SentienceBrowser.from_existing(context)
|
|
558
|
+
browser.page.goto("https://example.com")
|
|
559
|
+
snap = snapshot(browser)
|
|
560
|
+
"""
|
|
561
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
562
|
+
instance.context = context
|
|
563
|
+
instance.page = context.pages[0] if context.pages else context.new_page()
|
|
564
|
+
|
|
565
|
+
# Apply stealth if available
|
|
566
|
+
if STEALTH_AVAILABLE:
|
|
567
|
+
stealth_sync(instance.page)
|
|
568
|
+
|
|
569
|
+
# Wait for extension to be ready (if extension is loaded)
|
|
570
|
+
time.sleep(0.5)
|
|
571
|
+
|
|
572
|
+
return instance
|
|
573
|
+
|
|
574
|
+
@classmethod
|
|
575
|
+
def from_page(
|
|
576
|
+
cls,
|
|
577
|
+
page: Page,
|
|
578
|
+
api_key: str | None = None,
|
|
579
|
+
api_url: str | None = None,
|
|
580
|
+
) -> "SentienceBrowser":
|
|
581
|
+
"""
|
|
582
|
+
Create SentienceBrowser from an existing Playwright Page.
|
|
583
|
+
|
|
584
|
+
This allows you to use Sentience SDK with a page you've already created,
|
|
585
|
+
giving you more control over browser initialization.
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
page: Existing Playwright Page
|
|
589
|
+
api_key: Optional API key for server-side processing
|
|
590
|
+
api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
SentienceBrowser instance configured to use the existing page
|
|
594
|
+
|
|
595
|
+
Example:
|
|
596
|
+
from playwright.sync_api import sync_playwright
|
|
597
|
+
from sentience import SentienceBrowser, snapshot
|
|
598
|
+
|
|
599
|
+
with sync_playwright() as p:
|
|
600
|
+
browser_instance = p.chromium.launch()
|
|
601
|
+
context = browser_instance.new_context()
|
|
602
|
+
page = context.new_page()
|
|
603
|
+
page.goto("https://example.com")
|
|
604
|
+
|
|
605
|
+
browser = SentienceBrowser.from_page(page)
|
|
606
|
+
snap = snapshot(browser)
|
|
607
|
+
"""
|
|
608
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
609
|
+
instance.page = page
|
|
610
|
+
instance.context = page.context
|
|
611
|
+
|
|
612
|
+
# Apply stealth if available
|
|
613
|
+
if STEALTH_AVAILABLE:
|
|
614
|
+
stealth_sync(instance.page)
|
|
615
|
+
|
|
616
|
+
# Wait for extension to be ready (if extension is loaded)
|
|
617
|
+
time.sleep(0.5)
|
|
618
|
+
|
|
619
|
+
return instance
|
|
620
|
+
|
|
621
|
+
def __enter__(self):
|
|
622
|
+
"""Context manager entry"""
|
|
623
|
+
self.start()
|
|
624
|
+
return self
|
|
625
|
+
|
|
626
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
627
|
+
"""Context manager exit"""
|
|
628
|
+
self.close()
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
class AsyncSentienceBrowser:
|
|
632
|
+
"""Async version of SentienceBrowser for use in asyncio contexts."""
|
|
633
|
+
|
|
634
|
+
def __init__(
|
|
635
|
+
self,
|
|
636
|
+
api_key: str | None = None,
|
|
637
|
+
api_url: str | None = None,
|
|
638
|
+
headless: bool | None = None,
|
|
639
|
+
proxy: str | None = None,
|
|
640
|
+
user_data_dir: str | Path | None = None,
|
|
641
|
+
storage_state: str | Path | StorageState | dict | None = None,
|
|
642
|
+
record_video_dir: str | Path | None = None,
|
|
643
|
+
record_video_size: dict[str, int] | None = None,
|
|
644
|
+
viewport: Viewport | dict[str, int] | None = None,
|
|
645
|
+
device_scale_factor: float | None = None,
|
|
646
|
+
executable_path: str | None = None,
|
|
647
|
+
):
|
|
648
|
+
"""
|
|
649
|
+
Initialize Async Sentience browser
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
|
|
653
|
+
If None, uses free tier (local extension only)
|
|
654
|
+
api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
|
|
655
|
+
headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
|
|
656
|
+
proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
657
|
+
user_data_dir: Optional path to user data directory for persistent sessions
|
|
658
|
+
storage_state: Optional storage state to inject (cookies + localStorage)
|
|
659
|
+
record_video_dir: Optional directory path to save video recordings
|
|
660
|
+
record_video_size: Optional video resolution as dict with 'width' and 'height' keys
|
|
661
|
+
viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
|
|
662
|
+
Examples: Viewport(width=1280, height=800) (default)
|
|
663
|
+
Viewport(width=1920, height=1080) (Full HD)
|
|
664
|
+
{"width": 1280, "height": 800} (dict also supported)
|
|
665
|
+
If None, defaults to Viewport(width=1280, height=800).
|
|
666
|
+
device_scale_factor: Optional device scale factor to emulate high-DPI (Retina) screens.
|
|
667
|
+
Examples: 1.0 (default, standard DPI)
|
|
668
|
+
2.0 (Retina/high-DPI, like MacBook Pro)
|
|
669
|
+
3.0 (very high DPI)
|
|
670
|
+
If None, defaults to 1.0 (standard DPI).
|
|
671
|
+
executable_path: Optional path to Chromium executable. If provided, forces use of
|
|
672
|
+
this specific browser binary instead of Playwright's managed browser.
|
|
673
|
+
Useful to guarantee Chromium (not Chrome for Testing) on macOS.
|
|
674
|
+
Example: "/path/to/playwright/chromium-1234/chrome-mac/Chromium.app/Contents/MacOS/Chromium"
|
|
675
|
+
"""
|
|
676
|
+
self.api_key = api_key
|
|
677
|
+
# Only set api_url if api_key is provided, otherwise None (free tier)
|
|
678
|
+
if self.api_key and not api_url:
|
|
679
|
+
self.api_url = SENTIENCE_API_URL
|
|
680
|
+
else:
|
|
681
|
+
self.api_url = api_url
|
|
682
|
+
|
|
683
|
+
# Determine headless mode
|
|
684
|
+
if headless is None:
|
|
685
|
+
# Default to False for local dev, True for CI
|
|
686
|
+
self.headless = os.environ.get("CI", "").lower() == "true"
|
|
687
|
+
else:
|
|
688
|
+
self.headless = headless
|
|
689
|
+
|
|
690
|
+
# Support proxy from argument or environment variable
|
|
691
|
+
self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
|
|
692
|
+
|
|
693
|
+
# Auth injection support
|
|
694
|
+
self.user_data_dir = user_data_dir
|
|
695
|
+
self.storage_state = storage_state
|
|
696
|
+
|
|
697
|
+
# Video recording support
|
|
698
|
+
self.record_video_dir = record_video_dir
|
|
699
|
+
self.record_video_size = record_video_size or {"width": 1280, "height": 800}
|
|
700
|
+
|
|
701
|
+
# Viewport configuration - convert dict to Viewport if needed
|
|
702
|
+
if viewport is None:
|
|
703
|
+
self.viewport = Viewport(width=1280, height=800)
|
|
704
|
+
elif isinstance(viewport, dict):
|
|
705
|
+
self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
|
|
706
|
+
else:
|
|
707
|
+
self.viewport = viewport
|
|
708
|
+
|
|
709
|
+
# Device scale factor for high-DPI emulation
|
|
710
|
+
self.device_scale_factor = device_scale_factor
|
|
711
|
+
|
|
712
|
+
# Executable path override (for forcing specific Chromium binary)
|
|
713
|
+
self.executable_path = executable_path
|
|
714
|
+
|
|
715
|
+
self.playwright: AsyncPlaywright | None = None
|
|
716
|
+
self.context: AsyncBrowserContext | None = None
|
|
717
|
+
self.page: AsyncPage | None = None
|
|
718
|
+
self._extension_path: str | None = None
|
|
719
|
+
|
|
720
|
+
def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
|
|
721
|
+
"""
|
|
722
|
+
Parse proxy connection string into ProxyConfig.
|
|
723
|
+
|
|
724
|
+
Args:
|
|
725
|
+
proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
ProxyConfig object or None if invalid
|
|
729
|
+
"""
|
|
730
|
+
if not proxy_string:
|
|
731
|
+
return None
|
|
732
|
+
|
|
733
|
+
try:
|
|
734
|
+
parsed = urlparse(proxy_string)
|
|
735
|
+
|
|
736
|
+
# Validate scheme
|
|
737
|
+
if parsed.scheme not in ("http", "https", "socks5"):
|
|
738
|
+
logger.warning(
|
|
739
|
+
f"Unsupported proxy scheme: {parsed.scheme}. Supported: http, https, socks5"
|
|
740
|
+
)
|
|
741
|
+
return None
|
|
742
|
+
|
|
743
|
+
# Validate host and port
|
|
744
|
+
if not parsed.hostname or not parsed.port:
|
|
745
|
+
logger.warning(
|
|
746
|
+
"Proxy URL must include hostname and port. Expected format: http://username:password@host:port"
|
|
747
|
+
)
|
|
748
|
+
return None
|
|
749
|
+
|
|
750
|
+
# Build server URL
|
|
751
|
+
server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
|
|
752
|
+
|
|
753
|
+
# Create ProxyConfig with optional credentials
|
|
754
|
+
return ProxyConfig(
|
|
755
|
+
server=server,
|
|
756
|
+
username=parsed.username if parsed.username else None,
|
|
757
|
+
password=parsed.password if parsed.password else None,
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
except Exception as e:
|
|
761
|
+
logger.warning(
|
|
762
|
+
f"Invalid proxy configuration: {e}. Expected format: http://username:password@host:port"
|
|
763
|
+
)
|
|
764
|
+
return None
|
|
765
|
+
|
|
766
|
+
async def start(self) -> None:
|
|
767
|
+
"""Launch browser with extension loaded (async)"""
|
|
768
|
+
# Get extension source path using shared utility
|
|
769
|
+
extension_source = find_extension_path()
|
|
770
|
+
|
|
771
|
+
# Create temporary extension bundle
|
|
772
|
+
self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
|
|
773
|
+
shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
|
|
774
|
+
|
|
775
|
+
self.playwright = await async_playwright().start()
|
|
776
|
+
|
|
777
|
+
# Build launch arguments
|
|
778
|
+
args = [
|
|
779
|
+
f"--disable-extensions-except={self._extension_path}",
|
|
780
|
+
f"--load-extension={self._extension_path}",
|
|
781
|
+
"--disable-blink-features=AutomationControlled",
|
|
782
|
+
"--disable-infobars",
|
|
783
|
+
"--disable-features=WebRtcHideLocalIpsWithMdns",
|
|
784
|
+
"--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
|
|
785
|
+
]
|
|
786
|
+
|
|
787
|
+
# Only add --no-sandbox on Linux (causes crashes on macOS)
|
|
788
|
+
# macOS sandboxing works fine and the flag actually causes crashes
|
|
789
|
+
if platform.system() == "Linux":
|
|
790
|
+
args.append("--no-sandbox")
|
|
791
|
+
|
|
792
|
+
# Add GPU-disabling flags for macOS to prevent Chrome for Testing crash-on-exit
|
|
793
|
+
# These flags help avoid EXC_BAD_ACCESS crashes during browser shutdown
|
|
794
|
+
if platform.system() == "Darwin": # macOS
|
|
795
|
+
args.extend(
|
|
796
|
+
[
|
|
797
|
+
"--disable-gpu",
|
|
798
|
+
"--disable-software-rasterizer",
|
|
799
|
+
"--disable-dev-shm-usage",
|
|
800
|
+
"--disable-breakpad", # Disable crash reporter to prevent macOS crash dialogs
|
|
801
|
+
"--disable-crash-reporter", # Disable crash reporter UI
|
|
802
|
+
"--disable-crash-handler", # Disable crash handler completely
|
|
803
|
+
"--disable-in-process-stack-traces", # Disable stack trace collection
|
|
804
|
+
"--disable-hang-monitor", # Disable hang detection
|
|
805
|
+
"--disable-background-networking", # Disable background networking
|
|
806
|
+
"--disable-background-timer-throttling", # Disable background throttling
|
|
807
|
+
"--disable-backgrounding-occluded-windows", # Disable backgrounding
|
|
808
|
+
"--disable-renderer-backgrounding", # Disable renderer backgrounding
|
|
809
|
+
"--disable-features=TranslateUI", # Disable translate UI
|
|
810
|
+
"--disable-ipc-flooding-protection", # Disable IPC flooding protection
|
|
811
|
+
"--disable-logging", # Disable logging to reduce stderr noise
|
|
812
|
+
"--log-level=3", # Set log level to fatal only (suppresses warnings)
|
|
813
|
+
]
|
|
814
|
+
)
|
|
815
|
+
|
|
816
|
+
if self.headless:
|
|
817
|
+
args.append("--headless=new")
|
|
818
|
+
|
|
819
|
+
# Parse proxy configuration if provided
|
|
820
|
+
proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
|
|
821
|
+
|
|
822
|
+
# Handle User Data Directory
|
|
823
|
+
if self.user_data_dir:
|
|
824
|
+
user_data_dir = str(self.user_data_dir)
|
|
825
|
+
Path(user_data_dir).mkdir(parents=True, exist_ok=True)
|
|
826
|
+
else:
|
|
827
|
+
user_data_dir = ""
|
|
828
|
+
|
|
829
|
+
# Build launch_persistent_context parameters
|
|
830
|
+
launch_params = {
|
|
831
|
+
"user_data_dir": user_data_dir,
|
|
832
|
+
"headless": False,
|
|
833
|
+
"args": args,
|
|
834
|
+
"viewport": {"width": self.viewport.width, "height": self.viewport.height},
|
|
835
|
+
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
836
|
+
# Note: Don't set "channel" - let Playwright use its default managed Chromium
|
|
837
|
+
# Setting channel=None doesn't force bundled Chromium and can still pick Chrome for Testing
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
# If executable_path is provided, use it to force specific Chromium binary
|
|
841
|
+
# This guarantees we use Chromium (not Chrome for Testing) on macOS
|
|
842
|
+
if self.executable_path:
|
|
843
|
+
launch_params["executable_path"] = self.executable_path
|
|
844
|
+
logger.info(f"Using explicit executable: {self.executable_path}")
|
|
845
|
+
|
|
846
|
+
# Add device scale factor if configured
|
|
847
|
+
if self.device_scale_factor is not None:
|
|
848
|
+
launch_params["device_scale_factor"] = self.device_scale_factor
|
|
849
|
+
|
|
850
|
+
# Add proxy if configured
|
|
851
|
+
if proxy_config:
|
|
852
|
+
launch_params["proxy"] = proxy_config.to_playwright_dict()
|
|
853
|
+
launch_params["ignore_https_errors"] = True
|
|
854
|
+
logger.info(f"Using proxy: {proxy_config.server}")
|
|
855
|
+
|
|
856
|
+
# Add video recording if configured
|
|
857
|
+
if self.record_video_dir:
|
|
858
|
+
video_dir = Path(self.record_video_dir)
|
|
859
|
+
video_dir.mkdir(parents=True, exist_ok=True)
|
|
860
|
+
launch_params["record_video_dir"] = str(video_dir)
|
|
861
|
+
launch_params["record_video_size"] = self.record_video_size
|
|
862
|
+
logger.info(
|
|
863
|
+
f"Recording video to: {video_dir} (Resolution: {self.record_video_size['width']}x{self.record_video_size['height']})"
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
# Launch persistent context
|
|
867
|
+
self.context = await self.playwright.chromium.launch_persistent_context(**launch_params)
|
|
868
|
+
|
|
869
|
+
self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()
|
|
870
|
+
|
|
871
|
+
# Inject storage state if provided
|
|
872
|
+
if self.storage_state:
|
|
873
|
+
await self._inject_storage_state(self.storage_state)
|
|
874
|
+
|
|
875
|
+
# Apply stealth if available
|
|
876
|
+
if STEALTH_AVAILABLE:
|
|
877
|
+
await stealth_async(self.page)
|
|
878
|
+
|
|
879
|
+
# Wait a moment for extension to initialize
|
|
880
|
+
await asyncio.sleep(0.5)
|
|
881
|
+
|
|
882
|
+
async def goto(self, url: str) -> None:
|
|
883
|
+
"""Navigate to a URL and ensure extension is ready (async)"""
|
|
884
|
+
if not self.page:
|
|
885
|
+
raise RuntimeError("Browser not started. Call await start() first.")
|
|
886
|
+
|
|
887
|
+
await self.page.goto(url, wait_until="domcontentloaded")
|
|
888
|
+
|
|
889
|
+
# Wait for extension to be ready
|
|
890
|
+
if not await self._wait_for_extension():
|
|
891
|
+
try:
|
|
892
|
+
diag = await self.page.evaluate(
|
|
893
|
+
"""() => ({
|
|
894
|
+
sentience_defined: typeof window.sentience !== 'undefined',
|
|
895
|
+
registry_defined: typeof window.sentience_registry !== 'undefined',
|
|
896
|
+
snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
|
|
897
|
+
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
898
|
+
url: window.location.href
|
|
899
|
+
})"""
|
|
900
|
+
)
|
|
901
|
+
except Exception as e:
|
|
902
|
+
diag = f"Failed to get diagnostics: {str(e)}"
|
|
903
|
+
|
|
904
|
+
raise RuntimeError(
|
|
905
|
+
"Extension failed to load after navigation. Make sure:\n"
|
|
906
|
+
"1. Extension is built (cd sentience-chrome && ./build.sh)\n"
|
|
907
|
+
"2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
|
|
908
|
+
"3. Check browser console for errors (run with headless=False to see console)\n"
|
|
909
|
+
f"4. Extension path: {self._extension_path}\n"
|
|
910
|
+
f"5. Diagnostic info: {diag}"
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
async def _inject_storage_state(self, storage_state: str | Path | StorageState | dict) -> None:
|
|
914
|
+
"""Inject storage state (cookies + localStorage) into browser context (async)"""
|
|
915
|
+
import json
|
|
916
|
+
|
|
917
|
+
# Load storage state
|
|
918
|
+
if isinstance(storage_state, (str, Path)):
|
|
919
|
+
with open(storage_state, encoding="utf-8") as f:
|
|
920
|
+
state_dict = json.load(f)
|
|
921
|
+
state = StorageState.from_dict(state_dict)
|
|
922
|
+
elif isinstance(storage_state, StorageState):
|
|
923
|
+
state = storage_state
|
|
924
|
+
elif isinstance(storage_state, dict):
|
|
925
|
+
state = StorageState.from_dict(storage_state)
|
|
926
|
+
else:
|
|
927
|
+
raise ValueError(
|
|
928
|
+
f"Invalid storage_state type: {type(storage_state)}. "
|
|
929
|
+
"Expected str, Path, StorageState, or dict."
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
# Inject cookies
|
|
933
|
+
if state.cookies:
|
|
934
|
+
playwright_cookies = []
|
|
935
|
+
for cookie in state.cookies:
|
|
936
|
+
cookie_dict = cookie.model_dump()
|
|
937
|
+
playwright_cookie = {
|
|
938
|
+
"name": cookie_dict["name"],
|
|
939
|
+
"value": cookie_dict["value"],
|
|
940
|
+
"domain": cookie_dict["domain"],
|
|
941
|
+
"path": cookie_dict["path"],
|
|
942
|
+
}
|
|
943
|
+
if cookie_dict.get("expires"):
|
|
944
|
+
playwright_cookie["expires"] = cookie_dict["expires"]
|
|
945
|
+
if cookie_dict.get("httpOnly"):
|
|
946
|
+
playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
|
|
947
|
+
if cookie_dict.get("secure"):
|
|
948
|
+
playwright_cookie["secure"] = cookie_dict["secure"]
|
|
949
|
+
if cookie_dict.get("sameSite"):
|
|
950
|
+
playwright_cookie["sameSite"] = cookie_dict["sameSite"]
|
|
951
|
+
playwright_cookies.append(playwright_cookie)
|
|
952
|
+
|
|
953
|
+
await self.context.add_cookies(playwright_cookies)
|
|
954
|
+
logger.debug(f"Injected {len(state.cookies)} cookie(s)")
|
|
955
|
+
|
|
956
|
+
# Inject LocalStorage
|
|
957
|
+
if state.origins:
|
|
958
|
+
for origin_data in state.origins:
|
|
959
|
+
origin = origin_data.origin
|
|
960
|
+
if not origin:
|
|
961
|
+
continue
|
|
962
|
+
|
|
963
|
+
try:
|
|
964
|
+
await self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
|
|
965
|
+
|
|
966
|
+
if origin_data.localStorage:
|
|
967
|
+
localStorage_dict = {
|
|
968
|
+
item.name: item.value for item in origin_data.localStorage
|
|
969
|
+
}
|
|
970
|
+
await self.page.evaluate(
|
|
971
|
+
"""(localStorage_data) => {
|
|
972
|
+
for (const [key, value] of Object.entries(localStorage_data)) {
|
|
973
|
+
localStorage.setItem(key, value);
|
|
974
|
+
}
|
|
975
|
+
}""",
|
|
976
|
+
localStorage_dict,
|
|
977
|
+
)
|
|
978
|
+
logger.debug(
|
|
979
|
+
f"Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
|
|
980
|
+
)
|
|
981
|
+
except Exception as e:
|
|
982
|
+
logger.warning(f"Failed to inject localStorage for {origin}: {e}")
|
|
983
|
+
|
|
984
|
+
async def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
|
|
985
|
+
"""Poll for window.sentience to be available (async)"""
|
|
986
|
+
start_time = time.time()
|
|
987
|
+
last_error = None
|
|
988
|
+
|
|
989
|
+
while time.time() - start_time < timeout_sec:
|
|
990
|
+
try:
|
|
991
|
+
result = await self.page.evaluate(
|
|
992
|
+
"""() => {
|
|
993
|
+
if (typeof window.sentience === 'undefined') {
|
|
994
|
+
return { ready: false, reason: 'window.sentience undefined' };
|
|
995
|
+
}
|
|
996
|
+
if (window.sentience._wasmModule === null) {
|
|
997
|
+
return { ready: false, reason: 'WASM module not fully loaded' };
|
|
998
|
+
}
|
|
999
|
+
return { ready: true };
|
|
1000
|
+
}
|
|
1001
|
+
"""
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
if isinstance(result, dict):
|
|
1005
|
+
if result.get("ready"):
|
|
1006
|
+
return True
|
|
1007
|
+
last_error = result.get("reason", "Unknown error")
|
|
1008
|
+
except Exception as e:
|
|
1009
|
+
last_error = f"Evaluation error: {str(e)}"
|
|
1010
|
+
|
|
1011
|
+
await asyncio.sleep(0.3)
|
|
1012
|
+
|
|
1013
|
+
if last_error:
|
|
1014
|
+
import warnings
|
|
1015
|
+
|
|
1016
|
+
warnings.warn(f"Extension wait timeout. Last status: {last_error}")
|
|
1017
|
+
|
|
1018
|
+
return False
|
|
1019
|
+
|
|
1020
|
+
async def close(self, output_path: str | Path | None = None) -> tuple[str | None, bool]:
|
|
1021
|
+
"""
|
|
1022
|
+
Close browser and cleanup (async)
|
|
1023
|
+
|
|
1024
|
+
Args:
|
|
1025
|
+
output_path: Optional path to rename the video file to
|
|
1026
|
+
|
|
1027
|
+
Returns:
|
|
1028
|
+
Tuple of (video_path, shutdown_clean)
|
|
1029
|
+
- video_path: Path to video file if recording was enabled, None otherwise
|
|
1030
|
+
- shutdown_clean: True if shutdown completed without errors, False if there were issues
|
|
1031
|
+
|
|
1032
|
+
Note: Video path is resolved AFTER context close to avoid touching video
|
|
1033
|
+
subsystem during teardown, which can cause crashes on macOS.
|
|
1034
|
+
"""
|
|
1035
|
+
# CRITICAL: Don't access page.video.path() BEFORE closing context
|
|
1036
|
+
# This can poke the video subsystem at an awkward time and cause crashes
|
|
1037
|
+
# Instead, we'll locate the video file after context closes
|
|
1038
|
+
|
|
1039
|
+
# CRITICAL: Wait before closing to ensure all operations are complete
|
|
1040
|
+
# This is especially important for video recording - we need to ensure
|
|
1041
|
+
# all frames are written and the encoder is ready to finalize
|
|
1042
|
+
if platform.system() == "Darwin": # macOS
|
|
1043
|
+
# On macOS, give extra time for video encoder to finish writing frames
|
|
1044
|
+
# 4K video recording needs more time to flush buffers
|
|
1045
|
+
logger.debug("Waiting for video recording to stabilize before closing (macOS)...")
|
|
1046
|
+
await asyncio.sleep(2.0)
|
|
1047
|
+
else:
|
|
1048
|
+
await asyncio.sleep(1.0)
|
|
1049
|
+
|
|
1050
|
+
# Graceful shutdown: close context first, then playwright
|
|
1051
|
+
# Use longer timeouts on macOS where video finalization can take longer
|
|
1052
|
+
context_close_success = True
|
|
1053
|
+
if self.context:
|
|
1054
|
+
try:
|
|
1055
|
+
# Give context time to close gracefully (especially for video finalization)
|
|
1056
|
+
# Increased timeout for macOS where 4K video finalization can take longer
|
|
1057
|
+
await asyncio.wait_for(self.context.close(), timeout=30.0)
|
|
1058
|
+
logger.debug("Context closed successfully")
|
|
1059
|
+
except TimeoutError:
|
|
1060
|
+
logger.warning("Context close timed out, continuing with cleanup...")
|
|
1061
|
+
context_close_success = False
|
|
1062
|
+
except Exception as e:
|
|
1063
|
+
logger.warning(f"Error closing context: {e}")
|
|
1064
|
+
context_close_success = False
|
|
1065
|
+
finally:
|
|
1066
|
+
self.context = None
|
|
1067
|
+
|
|
1068
|
+
# Give Chrome a moment to fully flush video + release resources
|
|
1069
|
+
# This avoids stopping the driver while the browser is still finishing the .webm write/encoder shutdown
|
|
1070
|
+
# Increased grace period on macOS to allow more time for process cleanup
|
|
1071
|
+
grace_period = 2.0 if platform.system() == "Darwin" else 1.0
|
|
1072
|
+
await asyncio.sleep(grace_period)
|
|
1073
|
+
|
|
1074
|
+
playwright_stop_success = True
|
|
1075
|
+
if self.playwright:
|
|
1076
|
+
try:
|
|
1077
|
+
# Give playwright time to stop gracefully
|
|
1078
|
+
# Increased timeout to match context close timeout
|
|
1079
|
+
await asyncio.wait_for(self.playwright.stop(), timeout=15.0)
|
|
1080
|
+
logger.debug("Playwright stopped successfully")
|
|
1081
|
+
except TimeoutError:
|
|
1082
|
+
logger.warning("Playwright stop timed out, continuing with cleanup...")
|
|
1083
|
+
playwright_stop_success = False
|
|
1084
|
+
except Exception as e:
|
|
1085
|
+
logger.warning(f"Error stopping playwright: {e}")
|
|
1086
|
+
playwright_stop_success = False
|
|
1087
|
+
finally:
|
|
1088
|
+
self.playwright = None
|
|
1089
|
+
|
|
1090
|
+
# Additional cleanup: On macOS, wait a bit more to ensure all browser processes are terminated
|
|
1091
|
+
# This helps prevent crash dialogs from appearing
|
|
1092
|
+
if platform.system() == "Darwin":
|
|
1093
|
+
await asyncio.sleep(0.5)
|
|
1094
|
+
|
|
1095
|
+
# NOW resolve video path after context is closed and video is finalized
|
|
1096
|
+
temp_video_path = None
|
|
1097
|
+
if self.record_video_dir:
|
|
1098
|
+
try:
|
|
1099
|
+
# Locate the newest .webm file in record_video_dir
|
|
1100
|
+
# This avoids touching page.video during teardown
|
|
1101
|
+
video_dir = Path(self.record_video_dir)
|
|
1102
|
+
if video_dir.exists():
|
|
1103
|
+
webm_files = list(video_dir.glob("*.webm"))
|
|
1104
|
+
if webm_files:
|
|
1105
|
+
# Get the most recently modified file
|
|
1106
|
+
temp_video_path = max(webm_files, key=lambda p: p.stat().st_mtime)
|
|
1107
|
+
logger.debug(f"Found video file: {temp_video_path}")
|
|
1108
|
+
except Exception as e:
|
|
1109
|
+
logger.warning(f"Could not locate video file: {e}")
|
|
1110
|
+
|
|
1111
|
+
if self._extension_path and os.path.exists(self._extension_path):
|
|
1112
|
+
shutil.rmtree(self._extension_path)
|
|
1113
|
+
|
|
1114
|
+
# Clear page reference after closing context
|
|
1115
|
+
self.page = None
|
|
1116
|
+
|
|
1117
|
+
final_path = temp_video_path
|
|
1118
|
+
if temp_video_path and output_path and os.path.exists(temp_video_path):
|
|
1119
|
+
try:
|
|
1120
|
+
output_path = str(output_path)
|
|
1121
|
+
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
1122
|
+
shutil.move(temp_video_path, output_path)
|
|
1123
|
+
final_path = output_path
|
|
1124
|
+
except Exception as e:
|
|
1125
|
+
import warnings
|
|
1126
|
+
|
|
1127
|
+
warnings.warn(f"Failed to rename video file: {e}")
|
|
1128
|
+
final_path = temp_video_path
|
|
1129
|
+
|
|
1130
|
+
# Log shutdown status (useful for detecting crashes in headless mode)
|
|
1131
|
+
shutdown_clean = context_close_success and playwright_stop_success
|
|
1132
|
+
if not shutdown_clean:
|
|
1133
|
+
logger.warning(
|
|
1134
|
+
f"Browser shutdown had issues - may indicate a crash "
|
|
1135
|
+
f"(context_close: {context_close_success}, playwright_stop: {playwright_stop_success})"
|
|
1136
|
+
)
|
|
1137
|
+
else:
|
|
1138
|
+
logger.debug("Browser shutdown completed cleanly")
|
|
1139
|
+
|
|
1140
|
+
# Return tuple: (video_path, shutdown_clean)
|
|
1141
|
+
# This allows callers to detect crashes even in headless mode
|
|
1142
|
+
return (final_path, shutdown_clean)
|
|
1143
|
+
|
|
1144
|
+
async def __aenter__(self):
|
|
1145
|
+
"""Async context manager entry"""
|
|
1146
|
+
await self.start()
|
|
1147
|
+
return self
|
|
1148
|
+
|
|
1149
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
1150
|
+
"""Async context manager exit"""
|
|
1151
|
+
# Ignore return value in context manager exit
|
|
1152
|
+
await self.close()
|
|
1153
|
+
|
|
1154
|
+
@classmethod
|
|
1155
|
+
async def from_existing(
|
|
1156
|
+
cls,
|
|
1157
|
+
context: AsyncBrowserContext,
|
|
1158
|
+
api_key: str | None = None,
|
|
1159
|
+
api_url: str | None = None,
|
|
1160
|
+
) -> "AsyncSentienceBrowser":
|
|
1161
|
+
"""
|
|
1162
|
+
Create AsyncSentienceBrowser from an existing Playwright BrowserContext.
|
|
1163
|
+
|
|
1164
|
+
Args:
|
|
1165
|
+
context: Existing Playwright BrowserContext
|
|
1166
|
+
api_key: Optional API key for server-side processing
|
|
1167
|
+
api_url: Optional API URL
|
|
1168
|
+
|
|
1169
|
+
Returns:
|
|
1170
|
+
AsyncSentienceBrowser instance configured to use the existing context
|
|
1171
|
+
"""
|
|
1172
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
1173
|
+
instance.context = context
|
|
1174
|
+
pages = context.pages
|
|
1175
|
+
instance.page = pages[0] if pages else await context.new_page()
|
|
1176
|
+
|
|
1177
|
+
# Apply stealth if available
|
|
1178
|
+
if STEALTH_AVAILABLE:
|
|
1179
|
+
await stealth_async(instance.page)
|
|
1180
|
+
|
|
1181
|
+
# Wait for extension to be ready
|
|
1182
|
+
await asyncio.sleep(0.5)
|
|
1183
|
+
|
|
1184
|
+
return instance
|
|
1185
|
+
|
|
1186
|
+
@classmethod
|
|
1187
|
+
async def from_page(
|
|
1188
|
+
cls,
|
|
1189
|
+
page: AsyncPage,
|
|
1190
|
+
api_key: str | None = None,
|
|
1191
|
+
api_url: str | None = None,
|
|
1192
|
+
) -> "AsyncSentienceBrowser":
|
|
1193
|
+
"""
|
|
1194
|
+
Create AsyncSentienceBrowser from an existing Playwright Page.
|
|
1195
|
+
|
|
1196
|
+
Args:
|
|
1197
|
+
page: Existing Playwright Page
|
|
1198
|
+
api_key: Optional API key for server-side processing
|
|
1199
|
+
api_url: Optional API URL
|
|
1200
|
+
|
|
1201
|
+
Returns:
|
|
1202
|
+
AsyncSentienceBrowser instance configured to use the existing page
|
|
1203
|
+
"""
|
|
1204
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
1205
|
+
instance.page = page
|
|
1206
|
+
instance.context = page.context
|
|
1207
|
+
|
|
1208
|
+
# Apply stealth if available
|
|
1209
|
+
if STEALTH_AVAILABLE:
|
|
1210
|
+
await stealth_async(instance.page)
|
|
1211
|
+
|
|
1212
|
+
# Wait for extension to be ready
|
|
1213
|
+
await asyncio.sleep(0.5)
|
|
1214
|
+
|
|
1215
|
+
return instance
|