sentienceapi 0.90.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +153 -0
- sentience/_extension_loader.py +40 -0
- sentience/actions.py +837 -0
- sentience/agent.py +1246 -0
- sentience/agent_config.py +43 -0
- sentience/async_api.py +101 -0
- sentience/base_agent.py +194 -0
- sentience/browser.py +1037 -0
- sentience/cli.py +130 -0
- sentience/cloud_tracing.py +382 -0
- sentience/conversational_agent.py +509 -0
- sentience/expect.py +188 -0
- sentience/extension/background.js +233 -0
- sentience/extension/content.js +298 -0
- sentience/extension/injected_api.js +1473 -0
- sentience/extension/manifest.json +36 -0
- sentience/extension/pkg/sentience_core.d.ts +51 -0
- sentience/extension/pkg/sentience_core.js +529 -0
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
- sentience/extension/release.json +115 -0
- sentience/extension/test-content.js +4 -0
- sentience/formatting.py +59 -0
- sentience/generator.py +202 -0
- sentience/inspector.py +365 -0
- sentience/llm_provider.py +637 -0
- sentience/models.py +412 -0
- sentience/overlay.py +222 -0
- sentience/query.py +303 -0
- sentience/read.py +185 -0
- sentience/recorder.py +589 -0
- sentience/schemas/trace_v1.json +216 -0
- sentience/screenshot.py +100 -0
- sentience/snapshot.py +516 -0
- sentience/text_search.py +290 -0
- sentience/trace_indexing/__init__.py +27 -0
- sentience/trace_indexing/index_schema.py +111 -0
- sentience/trace_indexing/indexer.py +357 -0
- sentience/tracer_factory.py +211 -0
- sentience/tracing.py +285 -0
- sentience/utils.py +296 -0
- sentience/wait.py +137 -0
- sentienceapi-0.90.17.dist-info/METADATA +917 -0
- sentienceapi-0.90.17.dist-info/RECORD +50 -0
- sentienceapi-0.90.17.dist-info/WHEEL +5 -0
- sentienceapi-0.90.17.dist-info/entry_points.txt +2 -0
- sentienceapi-0.90.17.dist-info/licenses/LICENSE +24 -0
- sentienceapi-0.90.17.dist-info/licenses/LICENSE-APACHE +201 -0
- sentienceapi-0.90.17.dist-info/licenses/LICENSE-MIT +21 -0
- sentienceapi-0.90.17.dist-info/top_level.txt +1 -0
sentience/browser.py
ADDED
|
@@ -0,0 +1,1037 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Playwright browser harness with extension loading
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import tempfile
|
|
9
|
+
import time
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
from playwright.async_api import BrowserContext as AsyncBrowserContext
|
|
14
|
+
from playwright.async_api import Page as AsyncPage
|
|
15
|
+
from playwright.async_api import Playwright as AsyncPlaywright
|
|
16
|
+
from playwright.async_api import async_playwright
|
|
17
|
+
from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
|
|
18
|
+
|
|
19
|
+
from sentience._extension_loader import find_extension_path
|
|
20
|
+
from sentience.models import ProxyConfig, StorageState, Viewport
|
|
21
|
+
|
|
22
|
+
# Import stealth for bot evasion (optional - graceful fallback if not available)
|
|
23
|
+
try:
|
|
24
|
+
from playwright_stealth import stealth_async, stealth_sync
|
|
25
|
+
|
|
26
|
+
STEALTH_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
STEALTH_AVAILABLE = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SentienceBrowser:
|
|
32
|
+
"""Main browser session with Sentience extension loaded"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
api_key: str | None = None,
|
|
37
|
+
api_url: str | None = None,
|
|
38
|
+
headless: bool | None = None,
|
|
39
|
+
proxy: str | None = None,
|
|
40
|
+
user_data_dir: str | None = None,
|
|
41
|
+
storage_state: str | Path | StorageState | dict | None = None,
|
|
42
|
+
record_video_dir: str | Path | None = None,
|
|
43
|
+
record_video_size: dict[str, int] | None = None,
|
|
44
|
+
viewport: Viewport | dict[str, int] | None = None,
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Initialize Sentience browser
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
|
|
51
|
+
If None, uses free tier (local extension only)
|
|
52
|
+
api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
|
|
53
|
+
If None and api_key is provided, uses default URL
|
|
54
|
+
If None and no api_key, uses free tier (local extension only)
|
|
55
|
+
If 'local' or Docker sidecar URL, uses Enterprise tier
|
|
56
|
+
headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
|
|
57
|
+
proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
58
|
+
Supports HTTP, HTTPS, and SOCKS5 proxies
|
|
59
|
+
Falls back to SENTIENCE_PROXY environment variable if not provided
|
|
60
|
+
user_data_dir: Optional path to user data directory for persistent sessions.
|
|
61
|
+
If None, uses temporary directory (session not persisted).
|
|
62
|
+
If provided, cookies and localStorage persist across browser restarts.
|
|
63
|
+
storage_state: Optional storage state to inject (cookies + localStorage).
|
|
64
|
+
Can be:
|
|
65
|
+
- Path to JSON file (str or Path)
|
|
66
|
+
- StorageState object
|
|
67
|
+
- Dictionary with 'cookies' and/or 'origins' keys
|
|
68
|
+
If provided, browser starts with pre-injected authentication.
|
|
69
|
+
record_video_dir: Optional directory path to save video recordings.
|
|
70
|
+
If provided, browser will record video of all pages.
|
|
71
|
+
Videos are saved as .webm files in the specified directory.
|
|
72
|
+
If None, no video recording is performed.
|
|
73
|
+
record_video_size: Optional video resolution as dict with 'width' and 'height' keys.
|
|
74
|
+
Examples: {"width": 1280, "height": 800} (default)
|
|
75
|
+
{"width": 1920, "height": 1080} (1080p)
|
|
76
|
+
If None, defaults to 1280x800.
|
|
77
|
+
viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
|
|
78
|
+
Examples: Viewport(width=1280, height=800) (default)
|
|
79
|
+
Viewport(width=1920, height=1080) (Full HD)
|
|
80
|
+
{"width": 1280, "height": 800} (dict also supported)
|
|
81
|
+
If None, defaults to Viewport(width=1280, height=800).
|
|
82
|
+
"""
|
|
83
|
+
self.api_key = api_key
|
|
84
|
+
# Only set api_url if api_key is provided, otherwise None (free tier)
|
|
85
|
+
# Defaults to production API if key is present but url is missing
|
|
86
|
+
if self.api_key and not api_url:
|
|
87
|
+
self.api_url = "https://api.sentienceapi.com"
|
|
88
|
+
else:
|
|
89
|
+
self.api_url = api_url
|
|
90
|
+
|
|
91
|
+
# Determine headless mode
|
|
92
|
+
if headless is None:
|
|
93
|
+
# Default to False for local dev, True for CI
|
|
94
|
+
self.headless = os.environ.get("CI", "").lower() == "true"
|
|
95
|
+
else:
|
|
96
|
+
self.headless = headless
|
|
97
|
+
|
|
98
|
+
# Support proxy from argument or environment variable
|
|
99
|
+
self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
|
|
100
|
+
|
|
101
|
+
# Auth injection support
|
|
102
|
+
self.user_data_dir = user_data_dir
|
|
103
|
+
self.storage_state = storage_state
|
|
104
|
+
|
|
105
|
+
# Video recording support
|
|
106
|
+
self.record_video_dir = record_video_dir
|
|
107
|
+
self.record_video_size = record_video_size or {"width": 1280, "height": 800}
|
|
108
|
+
|
|
109
|
+
# Viewport configuration - convert dict to Viewport if needed
|
|
110
|
+
if viewport is None:
|
|
111
|
+
self.viewport = Viewport(width=1280, height=800)
|
|
112
|
+
elif isinstance(viewport, dict):
|
|
113
|
+
self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
|
|
114
|
+
else:
|
|
115
|
+
self.viewport = viewport
|
|
116
|
+
|
|
117
|
+
self.playwright: Playwright | None = None
|
|
118
|
+
self.context: BrowserContext | None = None
|
|
119
|
+
self.page: Page | None = None
|
|
120
|
+
self._extension_path: str | None = None
|
|
121
|
+
|
|
122
|
+
def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
|
|
123
|
+
"""
|
|
124
|
+
Parse proxy connection string into ProxyConfig.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
ProxyConfig object or None if invalid
|
|
131
|
+
|
|
132
|
+
Raises:
|
|
133
|
+
ValueError: If proxy format is invalid
|
|
134
|
+
"""
|
|
135
|
+
if not proxy_string:
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
parsed = urlparse(proxy_string)
|
|
140
|
+
|
|
141
|
+
# Validate scheme
|
|
142
|
+
if parsed.scheme not in ("http", "https", "socks5"):
|
|
143
|
+
print(f"⚠️ [Sentience] Unsupported proxy scheme: {parsed.scheme}")
|
|
144
|
+
print(" Supported: http, https, socks5")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
# Validate host and port
|
|
148
|
+
if not parsed.hostname or not parsed.port:
|
|
149
|
+
print("⚠️ [Sentience] Proxy URL must include hostname and port")
|
|
150
|
+
print(" Expected format: http://username:password@host:port")
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
# Build server URL
|
|
154
|
+
server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
|
|
155
|
+
|
|
156
|
+
# Create ProxyConfig with optional credentials
|
|
157
|
+
return ProxyConfig(
|
|
158
|
+
server=server,
|
|
159
|
+
username=parsed.username if parsed.username else None,
|
|
160
|
+
password=parsed.password if parsed.password else None,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
print(f"⚠️ [Sentience] Invalid proxy configuration: {e}")
|
|
165
|
+
print(" Expected format: http://username:password@host:port")
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
def start(self) -> None:
|
|
169
|
+
"""Launch browser with extension loaded"""
|
|
170
|
+
# Get extension source path using shared utility
|
|
171
|
+
extension_source = find_extension_path()
|
|
172
|
+
|
|
173
|
+
# Create temporary extension bundle
|
|
174
|
+
# We copy it to a temp dir to avoid file locking issues and ensure clean state
|
|
175
|
+
self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
|
|
176
|
+
shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
|
|
177
|
+
|
|
178
|
+
self.playwright = sync_playwright().start()
|
|
179
|
+
|
|
180
|
+
# Build launch arguments
|
|
181
|
+
args = [
|
|
182
|
+
f"--disable-extensions-except={self._extension_path}",
|
|
183
|
+
f"--load-extension={self._extension_path}",
|
|
184
|
+
"--disable-blink-features=AutomationControlled", # Hides 'navigator.webdriver'
|
|
185
|
+
"--no-sandbox",
|
|
186
|
+
"--disable-infobars",
|
|
187
|
+
# WebRTC leak protection (prevents real IP exposure when using proxies/VPNs)
|
|
188
|
+
"--disable-features=WebRtcHideLocalIpsWithMdns",
|
|
189
|
+
"--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
# Handle headless mode correctly for extensions
|
|
193
|
+
# 'headless=True' DOES NOT support extensions in standard Chrome
|
|
194
|
+
# We must use 'headless="new"' (Chrome 112+) or run visible
|
|
195
|
+
# launch_headless_arg = False # Default to visible
|
|
196
|
+
if self.headless:
|
|
197
|
+
args.append("--headless=new") # Use new headless mode via args
|
|
198
|
+
|
|
199
|
+
# Parse proxy configuration if provided
|
|
200
|
+
proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
|
|
201
|
+
|
|
202
|
+
# Handle User Data Directory (Persistence)
|
|
203
|
+
if self.user_data_dir:
|
|
204
|
+
user_data_dir = str(self.user_data_dir)
|
|
205
|
+
Path(user_data_dir).mkdir(parents=True, exist_ok=True)
|
|
206
|
+
else:
|
|
207
|
+
user_data_dir = "" # Ephemeral temp dir (existing behavior)
|
|
208
|
+
|
|
209
|
+
# Build launch_persistent_context parameters
|
|
210
|
+
launch_params = {
|
|
211
|
+
"user_data_dir": user_data_dir,
|
|
212
|
+
"headless": False, # IMPORTANT: See note above
|
|
213
|
+
"args": args,
|
|
214
|
+
"viewport": {"width": self.viewport.width, "height": self.viewport.height},
|
|
215
|
+
# Remove "HeadlessChrome" from User Agent automatically
|
|
216
|
+
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
# Add proxy if configured
|
|
220
|
+
if proxy_config:
|
|
221
|
+
launch_params["proxy"] = proxy_config.to_playwright_dict()
|
|
222
|
+
# Ignore HTTPS errors when using proxy (many residential proxies use self-signed certs)
|
|
223
|
+
launch_params["ignore_https_errors"] = True
|
|
224
|
+
print(f"🌐 [Sentience] Using proxy: {proxy_config.server}")
|
|
225
|
+
|
|
226
|
+
# Add video recording if configured
|
|
227
|
+
if self.record_video_dir:
|
|
228
|
+
video_dir = Path(self.record_video_dir)
|
|
229
|
+
video_dir.mkdir(parents=True, exist_ok=True)
|
|
230
|
+
launch_params["record_video_dir"] = str(video_dir)
|
|
231
|
+
launch_params["record_video_size"] = self.record_video_size
|
|
232
|
+
print(f"🎥 [Sentience] Recording video to: {video_dir}")
|
|
233
|
+
print(
|
|
234
|
+
f" Resolution: {self.record_video_size['width']}x{self.record_video_size['height']}"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Launch persistent context (required for extensions)
|
|
238
|
+
# Note: We pass headless=False to launch_persistent_context because we handle
|
|
239
|
+
# headless mode via the --headless=new arg above. This is a Playwright workaround.
|
|
240
|
+
self.context = self.playwright.chromium.launch_persistent_context(**launch_params)
|
|
241
|
+
|
|
242
|
+
self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
|
|
243
|
+
|
|
244
|
+
# Inject storage state if provided (must be after context creation)
|
|
245
|
+
if self.storage_state:
|
|
246
|
+
self._inject_storage_state(self.storage_state)
|
|
247
|
+
|
|
248
|
+
# Apply stealth if available
|
|
249
|
+
if STEALTH_AVAILABLE:
|
|
250
|
+
stealth_sync(self.page)
|
|
251
|
+
|
|
252
|
+
# Wait a moment for extension to initialize
|
|
253
|
+
time.sleep(0.5)
|
|
254
|
+
|
|
255
|
+
def goto(self, url: str) -> None:
|
|
256
|
+
"""Navigate to a URL and ensure extension is ready"""
|
|
257
|
+
if not self.page:
|
|
258
|
+
raise RuntimeError("Browser not started. Call start() first.")
|
|
259
|
+
|
|
260
|
+
self.page.goto(url, wait_until="domcontentloaded")
|
|
261
|
+
|
|
262
|
+
# Wait for extension to be ready (injected into page)
|
|
263
|
+
if not self._wait_for_extension():
|
|
264
|
+
# Gather diagnostic info before failing
|
|
265
|
+
try:
|
|
266
|
+
diag = self.page.evaluate(
|
|
267
|
+
"""() => ({
|
|
268
|
+
sentience_defined: typeof window.sentience !== 'undefined',
|
|
269
|
+
registry_defined: typeof window.sentience_registry !== 'undefined',
|
|
270
|
+
snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
|
|
271
|
+
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
272
|
+
url: window.location.href
|
|
273
|
+
})"""
|
|
274
|
+
)
|
|
275
|
+
except Exception as e:
|
|
276
|
+
diag = f"Failed to get diagnostics: {str(e)}"
|
|
277
|
+
|
|
278
|
+
raise RuntimeError(
|
|
279
|
+
"Extension failed to load after navigation. Make sure:\n"
|
|
280
|
+
"1. Extension is built (cd sentience-chrome && ./build.sh)\n"
|
|
281
|
+
"2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
|
|
282
|
+
"3. Check browser console for errors (run with headless=False to see console)\n"
|
|
283
|
+
f"4. Extension path: {self._extension_path}\n"
|
|
284
|
+
f"5. Diagnostic info: {diag}"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
def _inject_storage_state(
|
|
288
|
+
self, storage_state: str | Path | StorageState | dict
|
|
289
|
+
) -> None: # noqa: C901
|
|
290
|
+
"""
|
|
291
|
+
Inject storage state (cookies + localStorage) into browser context.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
storage_state: Path to JSON file, StorageState object, or dict containing storage state
|
|
295
|
+
"""
|
|
296
|
+
import json
|
|
297
|
+
|
|
298
|
+
# Load storage state
|
|
299
|
+
if isinstance(storage_state, (str, Path)):
|
|
300
|
+
# Load from file
|
|
301
|
+
with open(storage_state, encoding="utf-8") as f:
|
|
302
|
+
state_dict = json.load(f)
|
|
303
|
+
state = StorageState.from_dict(state_dict)
|
|
304
|
+
elif isinstance(storage_state, StorageState):
|
|
305
|
+
# Already a StorageState object
|
|
306
|
+
state = storage_state
|
|
307
|
+
elif isinstance(storage_state, dict):
|
|
308
|
+
# Dictionary format
|
|
309
|
+
state = StorageState.from_dict(storage_state)
|
|
310
|
+
else:
|
|
311
|
+
raise ValueError(
|
|
312
|
+
f"Invalid storage_state type: {type(storage_state)}. "
|
|
313
|
+
"Expected str, Path, StorageState, or dict."
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Inject cookies (works globally)
|
|
317
|
+
if state.cookies:
|
|
318
|
+
# Convert to Playwright cookie format
|
|
319
|
+
playwright_cookies = []
|
|
320
|
+
for cookie in state.cookies:
|
|
321
|
+
cookie_dict = cookie.model_dump()
|
|
322
|
+
# Playwright expects lowercase keys for some fields
|
|
323
|
+
playwright_cookie = {
|
|
324
|
+
"name": cookie_dict["name"],
|
|
325
|
+
"value": cookie_dict["value"],
|
|
326
|
+
"domain": cookie_dict["domain"],
|
|
327
|
+
"path": cookie_dict["path"],
|
|
328
|
+
}
|
|
329
|
+
if cookie_dict.get("expires"):
|
|
330
|
+
playwright_cookie["expires"] = cookie_dict["expires"]
|
|
331
|
+
if cookie_dict.get("httpOnly"):
|
|
332
|
+
playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
|
|
333
|
+
if cookie_dict.get("secure"):
|
|
334
|
+
playwright_cookie["secure"] = cookie_dict["secure"]
|
|
335
|
+
if cookie_dict.get("sameSite"):
|
|
336
|
+
playwright_cookie["sameSite"] = cookie_dict["sameSite"]
|
|
337
|
+
playwright_cookies.append(playwright_cookie)
|
|
338
|
+
|
|
339
|
+
self.context.add_cookies(playwright_cookies)
|
|
340
|
+
print(f"✅ [Sentience] Injected {len(state.cookies)} cookie(s)")
|
|
341
|
+
|
|
342
|
+
# Inject LocalStorage (requires navigation to each domain)
|
|
343
|
+
if state.origins:
|
|
344
|
+
for origin_data in state.origins:
|
|
345
|
+
origin = origin_data.origin
|
|
346
|
+
if not origin:
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
# Navigate to origin to set localStorage
|
|
350
|
+
try:
|
|
351
|
+
self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
|
|
352
|
+
|
|
353
|
+
# Inject localStorage
|
|
354
|
+
if origin_data.localStorage:
|
|
355
|
+
# Convert to dict format for JavaScript
|
|
356
|
+
localStorage_dict = {
|
|
357
|
+
item.name: item.value for item in origin_data.localStorage
|
|
358
|
+
}
|
|
359
|
+
self.page.evaluate(
|
|
360
|
+
"""(localStorage_data) => {
|
|
361
|
+
for (const [key, value] of Object.entries(localStorage_data)) {
|
|
362
|
+
localStorage.setItem(key, value);
|
|
363
|
+
}
|
|
364
|
+
}""",
|
|
365
|
+
localStorage_dict,
|
|
366
|
+
)
|
|
367
|
+
print(
|
|
368
|
+
f"✅ [Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
|
|
369
|
+
)
|
|
370
|
+
except Exception as e:
|
|
371
|
+
print(f"⚠️ [Sentience] Failed to inject localStorage for {origin}: {e}")
|
|
372
|
+
|
|
373
|
+
def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
|
|
374
|
+
"""Poll for window.sentience to be available"""
|
|
375
|
+
start_time = time.time()
|
|
376
|
+
last_error = None
|
|
377
|
+
|
|
378
|
+
while time.time() - start_time < timeout_sec:
|
|
379
|
+
try:
|
|
380
|
+
# Check if API exists and WASM is ready (optional check for _wasmModule)
|
|
381
|
+
result = self.page.evaluate(
|
|
382
|
+
"""() => {
|
|
383
|
+
if (typeof window.sentience === 'undefined') {
|
|
384
|
+
return { ready: false, reason: 'window.sentience undefined' };
|
|
385
|
+
}
|
|
386
|
+
// Check if WASM loaded (if exposed) or if basic API works
|
|
387
|
+
// Note: injected_api.js defines window.sentience immediately,
|
|
388
|
+
// but _wasmModule might take a few ms to load.
|
|
389
|
+
if (window.sentience._wasmModule === null) {
|
|
390
|
+
// It's defined but WASM isn't linked yet
|
|
391
|
+
return { ready: false, reason: 'WASM module not fully loaded' };
|
|
392
|
+
}
|
|
393
|
+
// If _wasmModule is not exposed, that's okay - it might be internal
|
|
394
|
+
// Just verify the API structure is correct
|
|
395
|
+
return { ready: true };
|
|
396
|
+
}
|
|
397
|
+
"""
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
if isinstance(result, dict):
|
|
401
|
+
if result.get("ready"):
|
|
402
|
+
return True
|
|
403
|
+
last_error = result.get("reason", "Unknown error")
|
|
404
|
+
except Exception as e:
|
|
405
|
+
# Continue waiting on errors
|
|
406
|
+
last_error = f"Evaluation error: {str(e)}"
|
|
407
|
+
|
|
408
|
+
time.sleep(0.3)
|
|
409
|
+
|
|
410
|
+
# Log the last error for debugging
|
|
411
|
+
if last_error:
|
|
412
|
+
import warnings
|
|
413
|
+
|
|
414
|
+
warnings.warn(f"Extension wait timeout. Last status: {last_error}")
|
|
415
|
+
|
|
416
|
+
return False
|
|
417
|
+
|
|
418
|
+
def close(self, output_path: str | Path | None = None) -> str | None:
|
|
419
|
+
"""
|
|
420
|
+
Close browser and cleanup
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
output_path: Optional path to rename the video file to.
|
|
424
|
+
If provided, the recorded video will be moved to this location.
|
|
425
|
+
Useful for giving videos meaningful names instead of random hashes.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
Path to video file if recording was enabled, None otherwise
|
|
429
|
+
Note: Video files are saved automatically by Playwright when context closes.
|
|
430
|
+
If multiple pages exist, returns the path to the first page's video.
|
|
431
|
+
"""
|
|
432
|
+
temp_video_path = None
|
|
433
|
+
|
|
434
|
+
# Get video path before closing (if recording was enabled)
|
|
435
|
+
# Note: Playwright saves videos when pages/context close, but we can get the
|
|
436
|
+
# expected path before closing. The actual file will be available after close.
|
|
437
|
+
if self.record_video_dir:
|
|
438
|
+
try:
|
|
439
|
+
# Try to get video path from the first page
|
|
440
|
+
if self.page and self.page.video:
|
|
441
|
+
temp_video_path = self.page.video.path()
|
|
442
|
+
# If that fails, check all pages in the context
|
|
443
|
+
elif self.context:
|
|
444
|
+
for page in self.context.pages:
|
|
445
|
+
if page.video:
|
|
446
|
+
temp_video_path = page.video.path()
|
|
447
|
+
break
|
|
448
|
+
except Exception:
|
|
449
|
+
# Video path might not be available until after close
|
|
450
|
+
# In that case, we'll return None and user can check the directory
|
|
451
|
+
pass
|
|
452
|
+
|
|
453
|
+
# Close context (this triggers video file finalization)
|
|
454
|
+
if self.context:
|
|
455
|
+
self.context.close()
|
|
456
|
+
|
|
457
|
+
# Close playwright
|
|
458
|
+
if self.playwright:
|
|
459
|
+
self.playwright.stop()
|
|
460
|
+
|
|
461
|
+
# Clean up extension directory
|
|
462
|
+
if self._extension_path and os.path.exists(self._extension_path):
|
|
463
|
+
shutil.rmtree(self._extension_path)
|
|
464
|
+
|
|
465
|
+
# Rename/move video if output_path is specified
|
|
466
|
+
final_path = temp_video_path
|
|
467
|
+
if temp_video_path and output_path and os.path.exists(temp_video_path):
|
|
468
|
+
try:
|
|
469
|
+
output_path = str(output_path)
|
|
470
|
+
# Ensure parent directory exists
|
|
471
|
+
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
472
|
+
shutil.move(temp_video_path, output_path)
|
|
473
|
+
final_path = output_path
|
|
474
|
+
except Exception as e:
|
|
475
|
+
import warnings
|
|
476
|
+
|
|
477
|
+
warnings.warn(f"Failed to rename video file: {e}")
|
|
478
|
+
# Return original path if rename fails
|
|
479
|
+
final_path = temp_video_path
|
|
480
|
+
|
|
481
|
+
return final_path
|
|
482
|
+
|
|
483
|
+
@classmethod
|
|
484
|
+
def from_existing(
|
|
485
|
+
cls,
|
|
486
|
+
context: BrowserContext,
|
|
487
|
+
api_key: str | None = None,
|
|
488
|
+
api_url: str | None = None,
|
|
489
|
+
) -> "SentienceBrowser":
|
|
490
|
+
"""
|
|
491
|
+
Create SentienceBrowser from an existing Playwright BrowserContext.
|
|
492
|
+
|
|
493
|
+
This allows you to use Sentience SDK with a browser context you've already created,
|
|
494
|
+
giving you more control over browser initialization.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
context: Existing Playwright BrowserContext
|
|
498
|
+
api_key: Optional API key for server-side processing
|
|
499
|
+
api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
SentienceBrowser instance configured to use the existing context
|
|
503
|
+
|
|
504
|
+
Example:
|
|
505
|
+
from playwright.sync_api import sync_playwright
|
|
506
|
+
from sentience import SentienceBrowser, snapshot
|
|
507
|
+
|
|
508
|
+
with sync_playwright() as p:
|
|
509
|
+
context = p.chromium.launch_persistent_context(...)
|
|
510
|
+
browser = SentienceBrowser.from_existing(context)
|
|
511
|
+
browser.page.goto("https://example.com")
|
|
512
|
+
snap = snapshot(browser)
|
|
513
|
+
"""
|
|
514
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
515
|
+
instance.context = context
|
|
516
|
+
instance.page = context.pages[0] if context.pages else context.new_page()
|
|
517
|
+
|
|
518
|
+
# Apply stealth if available
|
|
519
|
+
if STEALTH_AVAILABLE:
|
|
520
|
+
stealth_sync(instance.page)
|
|
521
|
+
|
|
522
|
+
# Wait for extension to be ready (if extension is loaded)
|
|
523
|
+
time.sleep(0.5)
|
|
524
|
+
|
|
525
|
+
return instance
|
|
526
|
+
|
|
527
|
+
@classmethod
|
|
528
|
+
def from_page(
|
|
529
|
+
cls,
|
|
530
|
+
page: Page,
|
|
531
|
+
api_key: str | None = None,
|
|
532
|
+
api_url: str | None = None,
|
|
533
|
+
) -> "SentienceBrowser":
|
|
534
|
+
"""
|
|
535
|
+
Create SentienceBrowser from an existing Playwright Page.
|
|
536
|
+
|
|
537
|
+
This allows you to use Sentience SDK with a page you've already created,
|
|
538
|
+
giving you more control over browser initialization.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
page: Existing Playwright Page
|
|
542
|
+
api_key: Optional API key for server-side processing
|
|
543
|
+
api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
|
|
544
|
+
|
|
545
|
+
Returns:
|
|
546
|
+
SentienceBrowser instance configured to use the existing page
|
|
547
|
+
|
|
548
|
+
Example:
|
|
549
|
+
from playwright.sync_api import sync_playwright
|
|
550
|
+
from sentience import SentienceBrowser, snapshot
|
|
551
|
+
|
|
552
|
+
with sync_playwright() as p:
|
|
553
|
+
browser_instance = p.chromium.launch()
|
|
554
|
+
context = browser_instance.new_context()
|
|
555
|
+
page = context.new_page()
|
|
556
|
+
page.goto("https://example.com")
|
|
557
|
+
|
|
558
|
+
browser = SentienceBrowser.from_page(page)
|
|
559
|
+
snap = snapshot(browser)
|
|
560
|
+
"""
|
|
561
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
562
|
+
instance.page = page
|
|
563
|
+
instance.context = page.context
|
|
564
|
+
|
|
565
|
+
# Apply stealth if available
|
|
566
|
+
if STEALTH_AVAILABLE:
|
|
567
|
+
stealth_sync(instance.page)
|
|
568
|
+
|
|
569
|
+
# Wait for extension to be ready (if extension is loaded)
|
|
570
|
+
time.sleep(0.5)
|
|
571
|
+
|
|
572
|
+
return instance
|
|
573
|
+
|
|
574
|
+
def __enter__(self):
|
|
575
|
+
"""Context manager entry"""
|
|
576
|
+
self.start()
|
|
577
|
+
return self
|
|
578
|
+
|
|
579
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
580
|
+
"""Context manager exit"""
|
|
581
|
+
self.close()
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
class AsyncSentienceBrowser:
|
|
585
|
+
"""Async version of SentienceBrowser for use in asyncio contexts."""
|
|
586
|
+
|
|
587
|
+
def __init__(
|
|
588
|
+
self,
|
|
589
|
+
api_key: str | None = None,
|
|
590
|
+
api_url: str | None = None,
|
|
591
|
+
headless: bool | None = None,
|
|
592
|
+
proxy: str | None = None,
|
|
593
|
+
user_data_dir: str | Path | None = None,
|
|
594
|
+
storage_state: str | Path | StorageState | dict | None = None,
|
|
595
|
+
record_video_dir: str | Path | None = None,
|
|
596
|
+
record_video_size: dict[str, int] | None = None,
|
|
597
|
+
viewport: Viewport | dict[str, int] | None = None,
|
|
598
|
+
):
|
|
599
|
+
"""
|
|
600
|
+
Initialize Async Sentience browser
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
|
|
604
|
+
If None, uses free tier (local extension only)
|
|
605
|
+
api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
|
|
606
|
+
headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
|
|
607
|
+
proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
608
|
+
user_data_dir: Optional path to user data directory for persistent sessions
|
|
609
|
+
storage_state: Optional storage state to inject (cookies + localStorage)
|
|
610
|
+
record_video_dir: Optional directory path to save video recordings
|
|
611
|
+
record_video_size: Optional video resolution as dict with 'width' and 'height' keys
|
|
612
|
+
viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
|
|
613
|
+
Examples: Viewport(width=1280, height=800) (default)
|
|
614
|
+
Viewport(width=1920, height=1080) (Full HD)
|
|
615
|
+
{"width": 1280, "height": 800} (dict also supported)
|
|
616
|
+
If None, defaults to Viewport(width=1280, height=800).
|
|
617
|
+
"""
|
|
618
|
+
self.api_key = api_key
|
|
619
|
+
# Only set api_url if api_key is provided, otherwise None (free tier)
|
|
620
|
+
if self.api_key and not api_url:
|
|
621
|
+
self.api_url = "https://api.sentienceapi.com"
|
|
622
|
+
else:
|
|
623
|
+
self.api_url = api_url
|
|
624
|
+
|
|
625
|
+
# Determine headless mode
|
|
626
|
+
if headless is None:
|
|
627
|
+
# Default to False for local dev, True for CI
|
|
628
|
+
self.headless = os.environ.get("CI", "").lower() == "true"
|
|
629
|
+
else:
|
|
630
|
+
self.headless = headless
|
|
631
|
+
|
|
632
|
+
# Support proxy from argument or environment variable
|
|
633
|
+
self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
|
|
634
|
+
|
|
635
|
+
# Auth injection support
|
|
636
|
+
self.user_data_dir = user_data_dir
|
|
637
|
+
self.storage_state = storage_state
|
|
638
|
+
|
|
639
|
+
# Video recording support
|
|
640
|
+
self.record_video_dir = record_video_dir
|
|
641
|
+
self.record_video_size = record_video_size or {"width": 1280, "height": 800}
|
|
642
|
+
|
|
643
|
+
# Viewport configuration - convert dict to Viewport if needed
|
|
644
|
+
if viewport is None:
|
|
645
|
+
self.viewport = Viewport(width=1280, height=800)
|
|
646
|
+
elif isinstance(viewport, dict):
|
|
647
|
+
self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
|
|
648
|
+
else:
|
|
649
|
+
self.viewport = viewport
|
|
650
|
+
|
|
651
|
+
self.playwright: AsyncPlaywright | None = None
|
|
652
|
+
self.context: AsyncBrowserContext | None = None
|
|
653
|
+
self.page: AsyncPage | None = None
|
|
654
|
+
self._extension_path: str | None = None
|
|
655
|
+
|
|
656
|
+
def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
|
|
657
|
+
"""
|
|
658
|
+
Parse proxy connection string into ProxyConfig.
|
|
659
|
+
|
|
660
|
+
Args:
|
|
661
|
+
proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
662
|
+
|
|
663
|
+
Returns:
|
|
664
|
+
ProxyConfig object or None if invalid
|
|
665
|
+
"""
|
|
666
|
+
if not proxy_string:
|
|
667
|
+
return None
|
|
668
|
+
|
|
669
|
+
try:
|
|
670
|
+
parsed = urlparse(proxy_string)
|
|
671
|
+
|
|
672
|
+
# Validate scheme
|
|
673
|
+
if parsed.scheme not in ("http", "https", "socks5"):
|
|
674
|
+
print(f"⚠️ [Sentience] Unsupported proxy scheme: {parsed.scheme}")
|
|
675
|
+
print(" Supported: http, https, socks5")
|
|
676
|
+
return None
|
|
677
|
+
|
|
678
|
+
# Validate host and port
|
|
679
|
+
if not parsed.hostname or not parsed.port:
|
|
680
|
+
print("⚠️ [Sentience] Proxy URL must include hostname and port")
|
|
681
|
+
print(" Expected format: http://username:password@host:port")
|
|
682
|
+
return None
|
|
683
|
+
|
|
684
|
+
# Build server URL
|
|
685
|
+
server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
|
|
686
|
+
|
|
687
|
+
# Create ProxyConfig with optional credentials
|
|
688
|
+
return ProxyConfig(
|
|
689
|
+
server=server,
|
|
690
|
+
username=parsed.username if parsed.username else None,
|
|
691
|
+
password=parsed.password if parsed.password else None,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
except Exception as e:
|
|
695
|
+
print(f"⚠️ [Sentience] Invalid proxy configuration: {e}")
|
|
696
|
+
print(" Expected format: http://username:password@host:port")
|
|
697
|
+
return None
|
|
698
|
+
|
|
699
|
+
async def start(self) -> None:
|
|
700
|
+
"""Launch browser with extension loaded (async)"""
|
|
701
|
+
# Get extension source path using shared utility
|
|
702
|
+
extension_source = find_extension_path()
|
|
703
|
+
|
|
704
|
+
# Create temporary extension bundle
|
|
705
|
+
self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
|
|
706
|
+
shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
|
|
707
|
+
|
|
708
|
+
self.playwright = await async_playwright().start()
|
|
709
|
+
|
|
710
|
+
# Build launch arguments
|
|
711
|
+
args = [
|
|
712
|
+
f"--disable-extensions-except={self._extension_path}",
|
|
713
|
+
f"--load-extension={self._extension_path}",
|
|
714
|
+
"--disable-blink-features=AutomationControlled",
|
|
715
|
+
"--no-sandbox",
|
|
716
|
+
"--disable-infobars",
|
|
717
|
+
"--disable-features=WebRtcHideLocalIpsWithMdns",
|
|
718
|
+
"--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
|
|
719
|
+
]
|
|
720
|
+
|
|
721
|
+
if self.headless:
|
|
722
|
+
args.append("--headless=new")
|
|
723
|
+
|
|
724
|
+
# Parse proxy configuration if provided
|
|
725
|
+
proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
|
|
726
|
+
|
|
727
|
+
# Handle User Data Directory
|
|
728
|
+
if self.user_data_dir:
|
|
729
|
+
user_data_dir = str(self.user_data_dir)
|
|
730
|
+
Path(user_data_dir).mkdir(parents=True, exist_ok=True)
|
|
731
|
+
else:
|
|
732
|
+
user_data_dir = ""
|
|
733
|
+
|
|
734
|
+
# Build launch_persistent_context parameters
|
|
735
|
+
launch_params = {
|
|
736
|
+
"user_data_dir": user_data_dir,
|
|
737
|
+
"headless": False,
|
|
738
|
+
"args": args,
|
|
739
|
+
"viewport": {"width": self.viewport.width, "height": self.viewport.height},
|
|
740
|
+
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
# Add proxy if configured
|
|
744
|
+
if proxy_config:
|
|
745
|
+
launch_params["proxy"] = proxy_config.to_playwright_dict()
|
|
746
|
+
launch_params["ignore_https_errors"] = True
|
|
747
|
+
print(f"🌐 [Sentience] Using proxy: {proxy_config.server}")
|
|
748
|
+
|
|
749
|
+
# Add video recording if configured
|
|
750
|
+
if self.record_video_dir:
|
|
751
|
+
video_dir = Path(self.record_video_dir)
|
|
752
|
+
video_dir.mkdir(parents=True, exist_ok=True)
|
|
753
|
+
launch_params["record_video_dir"] = str(video_dir)
|
|
754
|
+
launch_params["record_video_size"] = self.record_video_size
|
|
755
|
+
print(f"🎥 [Sentience] Recording video to: {video_dir}")
|
|
756
|
+
print(
|
|
757
|
+
f" Resolution: {self.record_video_size['width']}x{self.record_video_size['height']}"
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
# Launch persistent context
|
|
761
|
+
self.context = await self.playwright.chromium.launch_persistent_context(**launch_params)
|
|
762
|
+
|
|
763
|
+
self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()
|
|
764
|
+
|
|
765
|
+
# Inject storage state if provided
|
|
766
|
+
if self.storage_state:
|
|
767
|
+
await self._inject_storage_state(self.storage_state)
|
|
768
|
+
|
|
769
|
+
# Apply stealth if available
|
|
770
|
+
if STEALTH_AVAILABLE:
|
|
771
|
+
await stealth_async(self.page)
|
|
772
|
+
|
|
773
|
+
# Wait a moment for extension to initialize
|
|
774
|
+
await asyncio.sleep(0.5)
|
|
775
|
+
|
|
776
|
+
async def goto(self, url: str) -> None:
|
|
777
|
+
"""Navigate to a URL and ensure extension is ready (async)"""
|
|
778
|
+
if not self.page:
|
|
779
|
+
raise RuntimeError("Browser not started. Call await start() first.")
|
|
780
|
+
|
|
781
|
+
await self.page.goto(url, wait_until="domcontentloaded")
|
|
782
|
+
|
|
783
|
+
# Wait for extension to be ready
|
|
784
|
+
if not await self._wait_for_extension():
|
|
785
|
+
try:
|
|
786
|
+
diag = await self.page.evaluate(
|
|
787
|
+
"""() => ({
|
|
788
|
+
sentience_defined: typeof window.sentience !== 'undefined',
|
|
789
|
+
registry_defined: typeof window.sentience_registry !== 'undefined',
|
|
790
|
+
snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
|
|
791
|
+
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
792
|
+
url: window.location.href
|
|
793
|
+
})"""
|
|
794
|
+
)
|
|
795
|
+
except Exception as e:
|
|
796
|
+
diag = f"Failed to get diagnostics: {str(e)}"
|
|
797
|
+
|
|
798
|
+
raise RuntimeError(
|
|
799
|
+
"Extension failed to load after navigation. Make sure:\n"
|
|
800
|
+
"1. Extension is built (cd sentience-chrome && ./build.sh)\n"
|
|
801
|
+
"2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
|
|
802
|
+
"3. Check browser console for errors (run with headless=False to see console)\n"
|
|
803
|
+
f"4. Extension path: {self._extension_path}\n"
|
|
804
|
+
f"5. Diagnostic info: {diag}"
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
async def _inject_storage_state(self, storage_state: str | Path | StorageState | dict) -> None:
|
|
808
|
+
"""Inject storage state (cookies + localStorage) into browser context (async)"""
|
|
809
|
+
import json
|
|
810
|
+
|
|
811
|
+
# Load storage state
|
|
812
|
+
if isinstance(storage_state, (str, Path)):
|
|
813
|
+
with open(storage_state, encoding="utf-8") as f:
|
|
814
|
+
state_dict = json.load(f)
|
|
815
|
+
state = StorageState.from_dict(state_dict)
|
|
816
|
+
elif isinstance(storage_state, StorageState):
|
|
817
|
+
state = storage_state
|
|
818
|
+
elif isinstance(storage_state, dict):
|
|
819
|
+
state = StorageState.from_dict(storage_state)
|
|
820
|
+
else:
|
|
821
|
+
raise ValueError(
|
|
822
|
+
f"Invalid storage_state type: {type(storage_state)}. "
|
|
823
|
+
"Expected str, Path, StorageState, or dict."
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
# Inject cookies
|
|
827
|
+
if state.cookies:
|
|
828
|
+
playwright_cookies = []
|
|
829
|
+
for cookie in state.cookies:
|
|
830
|
+
cookie_dict = cookie.model_dump()
|
|
831
|
+
playwright_cookie = {
|
|
832
|
+
"name": cookie_dict["name"],
|
|
833
|
+
"value": cookie_dict["value"],
|
|
834
|
+
"domain": cookie_dict["domain"],
|
|
835
|
+
"path": cookie_dict["path"],
|
|
836
|
+
}
|
|
837
|
+
if cookie_dict.get("expires"):
|
|
838
|
+
playwright_cookie["expires"] = cookie_dict["expires"]
|
|
839
|
+
if cookie_dict.get("httpOnly"):
|
|
840
|
+
playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
|
|
841
|
+
if cookie_dict.get("secure"):
|
|
842
|
+
playwright_cookie["secure"] = cookie_dict["secure"]
|
|
843
|
+
if cookie_dict.get("sameSite"):
|
|
844
|
+
playwright_cookie["sameSite"] = cookie_dict["sameSite"]
|
|
845
|
+
playwright_cookies.append(playwright_cookie)
|
|
846
|
+
|
|
847
|
+
await self.context.add_cookies(playwright_cookies)
|
|
848
|
+
print(f"✅ [Sentience] Injected {len(state.cookies)} cookie(s)")
|
|
849
|
+
|
|
850
|
+
# Inject LocalStorage
|
|
851
|
+
if state.origins:
|
|
852
|
+
for origin_data in state.origins:
|
|
853
|
+
origin = origin_data.origin
|
|
854
|
+
if not origin:
|
|
855
|
+
continue
|
|
856
|
+
|
|
857
|
+
try:
|
|
858
|
+
await self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
|
|
859
|
+
|
|
860
|
+
if origin_data.localStorage:
|
|
861
|
+
localStorage_dict = {
|
|
862
|
+
item.name: item.value for item in origin_data.localStorage
|
|
863
|
+
}
|
|
864
|
+
await self.page.evaluate(
|
|
865
|
+
"""(localStorage_data) => {
|
|
866
|
+
for (const [key, value] of Object.entries(localStorage_data)) {
|
|
867
|
+
localStorage.setItem(key, value);
|
|
868
|
+
}
|
|
869
|
+
}""",
|
|
870
|
+
localStorage_dict,
|
|
871
|
+
)
|
|
872
|
+
print(
|
|
873
|
+
f"✅ [Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
|
|
874
|
+
)
|
|
875
|
+
except Exception as e:
|
|
876
|
+
print(f"⚠️ [Sentience] Failed to inject localStorage for {origin}: {e}")
|
|
877
|
+
|
|
878
|
+
async def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
|
|
879
|
+
"""Poll for window.sentience to be available (async)"""
|
|
880
|
+
start_time = time.time()
|
|
881
|
+
last_error = None
|
|
882
|
+
|
|
883
|
+
while time.time() - start_time < timeout_sec:
|
|
884
|
+
try:
|
|
885
|
+
result = await self.page.evaluate(
|
|
886
|
+
"""() => {
|
|
887
|
+
if (typeof window.sentience === 'undefined') {
|
|
888
|
+
return { ready: false, reason: 'window.sentience undefined' };
|
|
889
|
+
}
|
|
890
|
+
if (window.sentience._wasmModule === null) {
|
|
891
|
+
return { ready: false, reason: 'WASM module not fully loaded' };
|
|
892
|
+
}
|
|
893
|
+
return { ready: true };
|
|
894
|
+
}
|
|
895
|
+
"""
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
if isinstance(result, dict):
|
|
899
|
+
if result.get("ready"):
|
|
900
|
+
return True
|
|
901
|
+
last_error = result.get("reason", "Unknown error")
|
|
902
|
+
except Exception as e:
|
|
903
|
+
last_error = f"Evaluation error: {str(e)}"
|
|
904
|
+
|
|
905
|
+
await asyncio.sleep(0.3)
|
|
906
|
+
|
|
907
|
+
if last_error:
|
|
908
|
+
import warnings
|
|
909
|
+
|
|
910
|
+
warnings.warn(f"Extension wait timeout. Last status: {last_error}")
|
|
911
|
+
|
|
912
|
+
return False
|
|
913
|
+
|
|
914
|
+
async def close(self, output_path: str | Path | None = None) -> str | None:
|
|
915
|
+
"""
|
|
916
|
+
Close browser and cleanup (async)
|
|
917
|
+
|
|
918
|
+
Args:
|
|
919
|
+
output_path: Optional path to rename the video file to
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
Path to video file if recording was enabled, None otherwise
|
|
923
|
+
"""
|
|
924
|
+
temp_video_path = None
|
|
925
|
+
|
|
926
|
+
if self.record_video_dir:
|
|
927
|
+
try:
|
|
928
|
+
if self.page and self.page.video:
|
|
929
|
+
temp_video_path = await self.page.video.path()
|
|
930
|
+
elif self.context:
|
|
931
|
+
for page in self.context.pages:
|
|
932
|
+
if page.video:
|
|
933
|
+
temp_video_path = await page.video.path()
|
|
934
|
+
break
|
|
935
|
+
except Exception:
|
|
936
|
+
pass
|
|
937
|
+
|
|
938
|
+
if self.context:
|
|
939
|
+
await self.context.close()
|
|
940
|
+
self.context = None
|
|
941
|
+
|
|
942
|
+
if self.playwright:
|
|
943
|
+
await self.playwright.stop()
|
|
944
|
+
self.playwright = None
|
|
945
|
+
|
|
946
|
+
if self._extension_path and os.path.exists(self._extension_path):
|
|
947
|
+
shutil.rmtree(self._extension_path)
|
|
948
|
+
|
|
949
|
+
# Clear page reference after closing context
|
|
950
|
+
self.page = None
|
|
951
|
+
|
|
952
|
+
final_path = temp_video_path
|
|
953
|
+
if temp_video_path and output_path and os.path.exists(temp_video_path):
|
|
954
|
+
try:
|
|
955
|
+
output_path = str(output_path)
|
|
956
|
+
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
957
|
+
shutil.move(temp_video_path, output_path)
|
|
958
|
+
final_path = output_path
|
|
959
|
+
except Exception as e:
|
|
960
|
+
import warnings
|
|
961
|
+
|
|
962
|
+
warnings.warn(f"Failed to rename video file: {e}")
|
|
963
|
+
final_path = temp_video_path
|
|
964
|
+
|
|
965
|
+
return final_path
|
|
966
|
+
|
|
967
|
+
async def __aenter__(self):
|
|
968
|
+
"""Async context manager entry"""
|
|
969
|
+
await self.start()
|
|
970
|
+
return self
|
|
971
|
+
|
|
972
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
973
|
+
"""Async context manager exit"""
|
|
974
|
+
await self.close()
|
|
975
|
+
|
|
976
|
+
@classmethod
|
|
977
|
+
async def from_existing(
|
|
978
|
+
cls,
|
|
979
|
+
context: AsyncBrowserContext,
|
|
980
|
+
api_key: str | None = None,
|
|
981
|
+
api_url: str | None = None,
|
|
982
|
+
) -> "AsyncSentienceBrowser":
|
|
983
|
+
"""
|
|
984
|
+
Create AsyncSentienceBrowser from an existing Playwright BrowserContext.
|
|
985
|
+
|
|
986
|
+
Args:
|
|
987
|
+
context: Existing Playwright BrowserContext
|
|
988
|
+
api_key: Optional API key for server-side processing
|
|
989
|
+
api_url: Optional API URL
|
|
990
|
+
|
|
991
|
+
Returns:
|
|
992
|
+
AsyncSentienceBrowser instance configured to use the existing context
|
|
993
|
+
"""
|
|
994
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
995
|
+
instance.context = context
|
|
996
|
+
pages = context.pages
|
|
997
|
+
instance.page = pages[0] if pages else await context.new_page()
|
|
998
|
+
|
|
999
|
+
# Apply stealth if available
|
|
1000
|
+
if STEALTH_AVAILABLE:
|
|
1001
|
+
await stealth_async(instance.page)
|
|
1002
|
+
|
|
1003
|
+
# Wait for extension to be ready
|
|
1004
|
+
await asyncio.sleep(0.5)
|
|
1005
|
+
|
|
1006
|
+
return instance
|
|
1007
|
+
|
|
1008
|
+
@classmethod
|
|
1009
|
+
async def from_page(
|
|
1010
|
+
cls,
|
|
1011
|
+
page: AsyncPage,
|
|
1012
|
+
api_key: str | None = None,
|
|
1013
|
+
api_url: str | None = None,
|
|
1014
|
+
) -> "AsyncSentienceBrowser":
|
|
1015
|
+
"""
|
|
1016
|
+
Create AsyncSentienceBrowser from an existing Playwright Page.
|
|
1017
|
+
|
|
1018
|
+
Args:
|
|
1019
|
+
page: Existing Playwright Page
|
|
1020
|
+
api_key: Optional API key for server-side processing
|
|
1021
|
+
api_url: Optional API URL
|
|
1022
|
+
|
|
1023
|
+
Returns:
|
|
1024
|
+
AsyncSentienceBrowser instance configured to use the existing page
|
|
1025
|
+
"""
|
|
1026
|
+
instance = cls(api_key=api_key, api_url=api_url)
|
|
1027
|
+
instance.page = page
|
|
1028
|
+
instance.context = page.context
|
|
1029
|
+
|
|
1030
|
+
# Apply stealth if available
|
|
1031
|
+
if STEALTH_AVAILABLE:
|
|
1032
|
+
await stealth_async(instance.page)
|
|
1033
|
+
|
|
1034
|
+
# Wait for extension to be ready
|
|
1035
|
+
await asyncio.sleep(0.5)
|
|
1036
|
+
|
|
1037
|
+
return instance
|