sentienceapi 0.90.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (50) hide show
  1. sentience/__init__.py +153 -0
  2. sentience/_extension_loader.py +40 -0
  3. sentience/actions.py +837 -0
  4. sentience/agent.py +1246 -0
  5. sentience/agent_config.py +43 -0
  6. sentience/async_api.py +101 -0
  7. sentience/base_agent.py +194 -0
  8. sentience/browser.py +1037 -0
  9. sentience/cli.py +130 -0
  10. sentience/cloud_tracing.py +382 -0
  11. sentience/conversational_agent.py +509 -0
  12. sentience/expect.py +188 -0
  13. sentience/extension/background.js +233 -0
  14. sentience/extension/content.js +298 -0
  15. sentience/extension/injected_api.js +1473 -0
  16. sentience/extension/manifest.json +36 -0
  17. sentience/extension/pkg/sentience_core.d.ts +51 -0
  18. sentience/extension/pkg/sentience_core.js +529 -0
  19. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  20. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  21. sentience/extension/release.json +115 -0
  22. sentience/extension/test-content.js +4 -0
  23. sentience/formatting.py +59 -0
  24. sentience/generator.py +202 -0
  25. sentience/inspector.py +365 -0
  26. sentience/llm_provider.py +637 -0
  27. sentience/models.py +412 -0
  28. sentience/overlay.py +222 -0
  29. sentience/query.py +303 -0
  30. sentience/read.py +185 -0
  31. sentience/recorder.py +589 -0
  32. sentience/schemas/trace_v1.json +216 -0
  33. sentience/screenshot.py +100 -0
  34. sentience/snapshot.py +516 -0
  35. sentience/text_search.py +290 -0
  36. sentience/trace_indexing/__init__.py +27 -0
  37. sentience/trace_indexing/index_schema.py +111 -0
  38. sentience/trace_indexing/indexer.py +357 -0
  39. sentience/tracer_factory.py +211 -0
  40. sentience/tracing.py +285 -0
  41. sentience/utils.py +296 -0
  42. sentience/wait.py +137 -0
  43. sentienceapi-0.90.17.dist-info/METADATA +917 -0
  44. sentienceapi-0.90.17.dist-info/RECORD +50 -0
  45. sentienceapi-0.90.17.dist-info/WHEEL +5 -0
  46. sentienceapi-0.90.17.dist-info/entry_points.txt +2 -0
  47. sentienceapi-0.90.17.dist-info/licenses/LICENSE +24 -0
  48. sentienceapi-0.90.17.dist-info/licenses/LICENSE-APACHE +201 -0
  49. sentienceapi-0.90.17.dist-info/licenses/LICENSE-MIT +21 -0
  50. sentienceapi-0.90.17.dist-info/top_level.txt +1 -0
sentience/browser.py ADDED
@@ -0,0 +1,1037 @@
1
+ """
2
+ Playwright browser harness with extension loading
3
+ """
4
+
5
+ import asyncio
6
+ import os
7
+ import shutil
8
+ import tempfile
9
+ import time
10
+ from pathlib import Path
11
+ from urllib.parse import urlparse
12
+
13
+ from playwright.async_api import BrowserContext as AsyncBrowserContext
14
+ from playwright.async_api import Page as AsyncPage
15
+ from playwright.async_api import Playwright as AsyncPlaywright
16
+ from playwright.async_api import async_playwright
17
+ from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
18
+
19
+ from sentience._extension_loader import find_extension_path
20
+ from sentience.models import ProxyConfig, StorageState, Viewport
21
+
22
+ # Import stealth for bot evasion (optional - graceful fallback if not available)
23
+ try:
24
+ from playwright_stealth import stealth_async, stealth_sync
25
+
26
+ STEALTH_AVAILABLE = True
27
+ except ImportError:
28
+ STEALTH_AVAILABLE = False
29
+
30
+
31
+ class SentienceBrowser:
32
+ """Main browser session with Sentience extension loaded"""
33
+
34
+ def __init__(
35
+ self,
36
+ api_key: str | None = None,
37
+ api_url: str | None = None,
38
+ headless: bool | None = None,
39
+ proxy: str | None = None,
40
+ user_data_dir: str | None = None,
41
+ storage_state: str | Path | StorageState | dict | None = None,
42
+ record_video_dir: str | Path | None = None,
43
+ record_video_size: dict[str, int] | None = None,
44
+ viewport: Viewport | dict[str, int] | None = None,
45
+ ):
46
+ """
47
+ Initialize Sentience browser
48
+
49
+ Args:
50
+ api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
51
+ If None, uses free tier (local extension only)
52
+ api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
53
+ If None and api_key is provided, uses default URL
54
+ If None and no api_key, uses free tier (local extension only)
55
+ If 'local' or Docker sidecar URL, uses Enterprise tier
56
+ headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
57
+ proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
58
+ Supports HTTP, HTTPS, and SOCKS5 proxies
59
+ Falls back to SENTIENCE_PROXY environment variable if not provided
60
+ user_data_dir: Optional path to user data directory for persistent sessions.
61
+ If None, uses temporary directory (session not persisted).
62
+ If provided, cookies and localStorage persist across browser restarts.
63
+ storage_state: Optional storage state to inject (cookies + localStorage).
64
+ Can be:
65
+ - Path to JSON file (str or Path)
66
+ - StorageState object
67
+ - Dictionary with 'cookies' and/or 'origins' keys
68
+ If provided, browser starts with pre-injected authentication.
69
+ record_video_dir: Optional directory path to save video recordings.
70
+ If provided, browser will record video of all pages.
71
+ Videos are saved as .webm files in the specified directory.
72
+ If None, no video recording is performed.
73
+ record_video_size: Optional video resolution as dict with 'width' and 'height' keys.
74
+ Examples: {"width": 1280, "height": 800} (default)
75
+ {"width": 1920, "height": 1080} (1080p)
76
+ If None, defaults to 1280x800.
77
+ viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
78
+ Examples: Viewport(width=1280, height=800) (default)
79
+ Viewport(width=1920, height=1080) (Full HD)
80
+ {"width": 1280, "height": 800} (dict also supported)
81
+ If None, defaults to Viewport(width=1280, height=800).
82
+ """
83
+ self.api_key = api_key
84
+ # Only set api_url if api_key is provided, otherwise None (free tier)
85
+ # Defaults to production API if key is present but url is missing
86
+ if self.api_key and not api_url:
87
+ self.api_url = "https://api.sentienceapi.com"
88
+ else:
89
+ self.api_url = api_url
90
+
91
+ # Determine headless mode
92
+ if headless is None:
93
+ # Default to False for local dev, True for CI
94
+ self.headless = os.environ.get("CI", "").lower() == "true"
95
+ else:
96
+ self.headless = headless
97
+
98
+ # Support proxy from argument or environment variable
99
+ self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
100
+
101
+ # Auth injection support
102
+ self.user_data_dir = user_data_dir
103
+ self.storage_state = storage_state
104
+
105
+ # Video recording support
106
+ self.record_video_dir = record_video_dir
107
+ self.record_video_size = record_video_size or {"width": 1280, "height": 800}
108
+
109
+ # Viewport configuration - convert dict to Viewport if needed
110
+ if viewport is None:
111
+ self.viewport = Viewport(width=1280, height=800)
112
+ elif isinstance(viewport, dict):
113
+ self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
114
+ else:
115
+ self.viewport = viewport
116
+
117
+ self.playwright: Playwright | None = None
118
+ self.context: BrowserContext | None = None
119
+ self.page: Page | None = None
120
+ self._extension_path: str | None = None
121
+
122
+ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
123
+ """
124
+ Parse proxy connection string into ProxyConfig.
125
+
126
+ Args:
127
+ proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
128
+
129
+ Returns:
130
+ ProxyConfig object or None if invalid
131
+
132
+ Raises:
133
+ ValueError: If proxy format is invalid
134
+ """
135
+ if not proxy_string:
136
+ return None
137
+
138
+ try:
139
+ parsed = urlparse(proxy_string)
140
+
141
+ # Validate scheme
142
+ if parsed.scheme not in ("http", "https", "socks5"):
143
+ print(f"⚠️ [Sentience] Unsupported proxy scheme: {parsed.scheme}")
144
+ print(" Supported: http, https, socks5")
145
+ return None
146
+
147
+ # Validate host and port
148
+ if not parsed.hostname or not parsed.port:
149
+ print("⚠️ [Sentience] Proxy URL must include hostname and port")
150
+ print(" Expected format: http://username:password@host:port")
151
+ return None
152
+
153
+ # Build server URL
154
+ server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
155
+
156
+ # Create ProxyConfig with optional credentials
157
+ return ProxyConfig(
158
+ server=server,
159
+ username=parsed.username if parsed.username else None,
160
+ password=parsed.password if parsed.password else None,
161
+ )
162
+
163
+ except Exception as e:
164
+ print(f"⚠️ [Sentience] Invalid proxy configuration: {e}")
165
+ print(" Expected format: http://username:password@host:port")
166
+ return None
167
+
168
+ def start(self) -> None:
169
+ """Launch browser with extension loaded"""
170
+ # Get extension source path using shared utility
171
+ extension_source = find_extension_path()
172
+
173
+ # Create temporary extension bundle
174
+ # We copy it to a temp dir to avoid file locking issues and ensure clean state
175
+ self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
176
+ shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
177
+
178
+ self.playwright = sync_playwright().start()
179
+
180
+ # Build launch arguments
181
+ args = [
182
+ f"--disable-extensions-except={self._extension_path}",
183
+ f"--load-extension={self._extension_path}",
184
+ "--disable-blink-features=AutomationControlled", # Hides 'navigator.webdriver'
185
+ "--no-sandbox",
186
+ "--disable-infobars",
187
+ # WebRTC leak protection (prevents real IP exposure when using proxies/VPNs)
188
+ "--disable-features=WebRtcHideLocalIpsWithMdns",
189
+ "--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
190
+ ]
191
+
192
+ # Handle headless mode correctly for extensions
193
+ # 'headless=True' DOES NOT support extensions in standard Chrome
194
+ # We must use 'headless="new"' (Chrome 112+) or run visible
195
+ # launch_headless_arg = False # Default to visible
196
+ if self.headless:
197
+ args.append("--headless=new") # Use new headless mode via args
198
+
199
+ # Parse proxy configuration if provided
200
+ proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
201
+
202
+ # Handle User Data Directory (Persistence)
203
+ if self.user_data_dir:
204
+ user_data_dir = str(self.user_data_dir)
205
+ Path(user_data_dir).mkdir(parents=True, exist_ok=True)
206
+ else:
207
+ user_data_dir = "" # Ephemeral temp dir (existing behavior)
208
+
209
+ # Build launch_persistent_context parameters
210
+ launch_params = {
211
+ "user_data_dir": user_data_dir,
212
+ "headless": False, # IMPORTANT: See note above
213
+ "args": args,
214
+ "viewport": {"width": self.viewport.width, "height": self.viewport.height},
215
+ # Remove "HeadlessChrome" from User Agent automatically
216
+ "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
217
+ }
218
+
219
+ # Add proxy if configured
220
+ if proxy_config:
221
+ launch_params["proxy"] = proxy_config.to_playwright_dict()
222
+ # Ignore HTTPS errors when using proxy (many residential proxies use self-signed certs)
223
+ launch_params["ignore_https_errors"] = True
224
+ print(f"🌐 [Sentience] Using proxy: {proxy_config.server}")
225
+
226
+ # Add video recording if configured
227
+ if self.record_video_dir:
228
+ video_dir = Path(self.record_video_dir)
229
+ video_dir.mkdir(parents=True, exist_ok=True)
230
+ launch_params["record_video_dir"] = str(video_dir)
231
+ launch_params["record_video_size"] = self.record_video_size
232
+ print(f"🎥 [Sentience] Recording video to: {video_dir}")
233
+ print(
234
+ f" Resolution: {self.record_video_size['width']}x{self.record_video_size['height']}"
235
+ )
236
+
237
+ # Launch persistent context (required for extensions)
238
+ # Note: We pass headless=False to launch_persistent_context because we handle
239
+ # headless mode via the --headless=new arg above. This is a Playwright workaround.
240
+ self.context = self.playwright.chromium.launch_persistent_context(**launch_params)
241
+
242
+ self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
243
+
244
+ # Inject storage state if provided (must be after context creation)
245
+ if self.storage_state:
246
+ self._inject_storage_state(self.storage_state)
247
+
248
+ # Apply stealth if available
249
+ if STEALTH_AVAILABLE:
250
+ stealth_sync(self.page)
251
+
252
+ # Wait a moment for extension to initialize
253
+ time.sleep(0.5)
254
+
255
+ def goto(self, url: str) -> None:
256
+ """Navigate to a URL and ensure extension is ready"""
257
+ if not self.page:
258
+ raise RuntimeError("Browser not started. Call start() first.")
259
+
260
+ self.page.goto(url, wait_until="domcontentloaded")
261
+
262
+ # Wait for extension to be ready (injected into page)
263
+ if not self._wait_for_extension():
264
+ # Gather diagnostic info before failing
265
+ try:
266
+ diag = self.page.evaluate(
267
+ """() => ({
268
+ sentience_defined: typeof window.sentience !== 'undefined',
269
+ registry_defined: typeof window.sentience_registry !== 'undefined',
270
+ snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
271
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
272
+ url: window.location.href
273
+ })"""
274
+ )
275
+ except Exception as e:
276
+ diag = f"Failed to get diagnostics: {str(e)}"
277
+
278
+ raise RuntimeError(
279
+ "Extension failed to load after navigation. Make sure:\n"
280
+ "1. Extension is built (cd sentience-chrome && ./build.sh)\n"
281
+ "2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
282
+ "3. Check browser console for errors (run with headless=False to see console)\n"
283
+ f"4. Extension path: {self._extension_path}\n"
284
+ f"5. Diagnostic info: {diag}"
285
+ )
286
+
287
+ def _inject_storage_state(
288
+ self, storage_state: str | Path | StorageState | dict
289
+ ) -> None: # noqa: C901
290
+ """
291
+ Inject storage state (cookies + localStorage) into browser context.
292
+
293
+ Args:
294
+ storage_state: Path to JSON file, StorageState object, or dict containing storage state
295
+ """
296
+ import json
297
+
298
+ # Load storage state
299
+ if isinstance(storage_state, (str, Path)):
300
+ # Load from file
301
+ with open(storage_state, encoding="utf-8") as f:
302
+ state_dict = json.load(f)
303
+ state = StorageState.from_dict(state_dict)
304
+ elif isinstance(storage_state, StorageState):
305
+ # Already a StorageState object
306
+ state = storage_state
307
+ elif isinstance(storage_state, dict):
308
+ # Dictionary format
309
+ state = StorageState.from_dict(storage_state)
310
+ else:
311
+ raise ValueError(
312
+ f"Invalid storage_state type: {type(storage_state)}. "
313
+ "Expected str, Path, StorageState, or dict."
314
+ )
315
+
316
+ # Inject cookies (works globally)
317
+ if state.cookies:
318
+ # Convert to Playwright cookie format
319
+ playwright_cookies = []
320
+ for cookie in state.cookies:
321
+ cookie_dict = cookie.model_dump()
322
+ # Playwright expects lowercase keys for some fields
323
+ playwright_cookie = {
324
+ "name": cookie_dict["name"],
325
+ "value": cookie_dict["value"],
326
+ "domain": cookie_dict["domain"],
327
+ "path": cookie_dict["path"],
328
+ }
329
+ if cookie_dict.get("expires"):
330
+ playwright_cookie["expires"] = cookie_dict["expires"]
331
+ if cookie_dict.get("httpOnly"):
332
+ playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
333
+ if cookie_dict.get("secure"):
334
+ playwright_cookie["secure"] = cookie_dict["secure"]
335
+ if cookie_dict.get("sameSite"):
336
+ playwright_cookie["sameSite"] = cookie_dict["sameSite"]
337
+ playwright_cookies.append(playwright_cookie)
338
+
339
+ self.context.add_cookies(playwright_cookies)
340
+ print(f"✅ [Sentience] Injected {len(state.cookies)} cookie(s)")
341
+
342
+ # Inject LocalStorage (requires navigation to each domain)
343
+ if state.origins:
344
+ for origin_data in state.origins:
345
+ origin = origin_data.origin
346
+ if not origin:
347
+ continue
348
+
349
+ # Navigate to origin to set localStorage
350
+ try:
351
+ self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
352
+
353
+ # Inject localStorage
354
+ if origin_data.localStorage:
355
+ # Convert to dict format for JavaScript
356
+ localStorage_dict = {
357
+ item.name: item.value for item in origin_data.localStorage
358
+ }
359
+ self.page.evaluate(
360
+ """(localStorage_data) => {
361
+ for (const [key, value] of Object.entries(localStorage_data)) {
362
+ localStorage.setItem(key, value);
363
+ }
364
+ }""",
365
+ localStorage_dict,
366
+ )
367
+ print(
368
+ f"✅ [Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
369
+ )
370
+ except Exception as e:
371
+ print(f"⚠️ [Sentience] Failed to inject localStorage for {origin}: {e}")
372
+
373
+ def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
374
+ """Poll for window.sentience to be available"""
375
+ start_time = time.time()
376
+ last_error = None
377
+
378
+ while time.time() - start_time < timeout_sec:
379
+ try:
380
+ # Check if API exists and WASM is ready (optional check for _wasmModule)
381
+ result = self.page.evaluate(
382
+ """() => {
383
+ if (typeof window.sentience === 'undefined') {
384
+ return { ready: false, reason: 'window.sentience undefined' };
385
+ }
386
+ // Check if WASM loaded (if exposed) or if basic API works
387
+ // Note: injected_api.js defines window.sentience immediately,
388
+ // but _wasmModule might take a few ms to load.
389
+ if (window.sentience._wasmModule === null) {
390
+ // It's defined but WASM isn't linked yet
391
+ return { ready: false, reason: 'WASM module not fully loaded' };
392
+ }
393
+ // If _wasmModule is not exposed, that's okay - it might be internal
394
+ // Just verify the API structure is correct
395
+ return { ready: true };
396
+ }
397
+ """
398
+ )
399
+
400
+ if isinstance(result, dict):
401
+ if result.get("ready"):
402
+ return True
403
+ last_error = result.get("reason", "Unknown error")
404
+ except Exception as e:
405
+ # Continue waiting on errors
406
+ last_error = f"Evaluation error: {str(e)}"
407
+
408
+ time.sleep(0.3)
409
+
410
+ # Log the last error for debugging
411
+ if last_error:
412
+ import warnings
413
+
414
+ warnings.warn(f"Extension wait timeout. Last status: {last_error}")
415
+
416
+ return False
417
+
418
+ def close(self, output_path: str | Path | None = None) -> str | None:
419
+ """
420
+ Close browser and cleanup
421
+
422
+ Args:
423
+ output_path: Optional path to rename the video file to.
424
+ If provided, the recorded video will be moved to this location.
425
+ Useful for giving videos meaningful names instead of random hashes.
426
+
427
+ Returns:
428
+ Path to video file if recording was enabled, None otherwise
429
+ Note: Video files are saved automatically by Playwright when context closes.
430
+ If multiple pages exist, returns the path to the first page's video.
431
+ """
432
+ temp_video_path = None
433
+
434
+ # Get video path before closing (if recording was enabled)
435
+ # Note: Playwright saves videos when pages/context close, but we can get the
436
+ # expected path before closing. The actual file will be available after close.
437
+ if self.record_video_dir:
438
+ try:
439
+ # Try to get video path from the first page
440
+ if self.page and self.page.video:
441
+ temp_video_path = self.page.video.path()
442
+ # If that fails, check all pages in the context
443
+ elif self.context:
444
+ for page in self.context.pages:
445
+ if page.video:
446
+ temp_video_path = page.video.path()
447
+ break
448
+ except Exception:
449
+ # Video path might not be available until after close
450
+ # In that case, we'll return None and user can check the directory
451
+ pass
452
+
453
+ # Close context (this triggers video file finalization)
454
+ if self.context:
455
+ self.context.close()
456
+
457
+ # Close playwright
458
+ if self.playwright:
459
+ self.playwright.stop()
460
+
461
+ # Clean up extension directory
462
+ if self._extension_path and os.path.exists(self._extension_path):
463
+ shutil.rmtree(self._extension_path)
464
+
465
+ # Rename/move video if output_path is specified
466
+ final_path = temp_video_path
467
+ if temp_video_path and output_path and os.path.exists(temp_video_path):
468
+ try:
469
+ output_path = str(output_path)
470
+ # Ensure parent directory exists
471
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
472
+ shutil.move(temp_video_path, output_path)
473
+ final_path = output_path
474
+ except Exception as e:
475
+ import warnings
476
+
477
+ warnings.warn(f"Failed to rename video file: {e}")
478
+ # Return original path if rename fails
479
+ final_path = temp_video_path
480
+
481
+ return final_path
482
+
483
+ @classmethod
484
+ def from_existing(
485
+ cls,
486
+ context: BrowserContext,
487
+ api_key: str | None = None,
488
+ api_url: str | None = None,
489
+ ) -> "SentienceBrowser":
490
+ """
491
+ Create SentienceBrowser from an existing Playwright BrowserContext.
492
+
493
+ This allows you to use Sentience SDK with a browser context you've already created,
494
+ giving you more control over browser initialization.
495
+
496
+ Args:
497
+ context: Existing Playwright BrowserContext
498
+ api_key: Optional API key for server-side processing
499
+ api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
500
+
501
+ Returns:
502
+ SentienceBrowser instance configured to use the existing context
503
+
504
+ Example:
505
+ from playwright.sync_api import sync_playwright
506
+ from sentience import SentienceBrowser, snapshot
507
+
508
+ with sync_playwright() as p:
509
+ context = p.chromium.launch_persistent_context(...)
510
+ browser = SentienceBrowser.from_existing(context)
511
+ browser.page.goto("https://example.com")
512
+ snap = snapshot(browser)
513
+ """
514
+ instance = cls(api_key=api_key, api_url=api_url)
515
+ instance.context = context
516
+ instance.page = context.pages[0] if context.pages else context.new_page()
517
+
518
+ # Apply stealth if available
519
+ if STEALTH_AVAILABLE:
520
+ stealth_sync(instance.page)
521
+
522
+ # Wait for extension to be ready (if extension is loaded)
523
+ time.sleep(0.5)
524
+
525
+ return instance
526
+
527
+ @classmethod
528
+ def from_page(
529
+ cls,
530
+ page: Page,
531
+ api_key: str | None = None,
532
+ api_url: str | None = None,
533
+ ) -> "SentienceBrowser":
534
+ """
535
+ Create SentienceBrowser from an existing Playwright Page.
536
+
537
+ This allows you to use Sentience SDK with a page you've already created,
538
+ giving you more control over browser initialization.
539
+
540
+ Args:
541
+ page: Existing Playwright Page
542
+ api_key: Optional API key for server-side processing
543
+ api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
544
+
545
+ Returns:
546
+ SentienceBrowser instance configured to use the existing page
547
+
548
+ Example:
549
+ from playwright.sync_api import sync_playwright
550
+ from sentience import SentienceBrowser, snapshot
551
+
552
+ with sync_playwright() as p:
553
+ browser_instance = p.chromium.launch()
554
+ context = browser_instance.new_context()
555
+ page = context.new_page()
556
+ page.goto("https://example.com")
557
+
558
+ browser = SentienceBrowser.from_page(page)
559
+ snap = snapshot(browser)
560
+ """
561
+ instance = cls(api_key=api_key, api_url=api_url)
562
+ instance.page = page
563
+ instance.context = page.context
564
+
565
+ # Apply stealth if available
566
+ if STEALTH_AVAILABLE:
567
+ stealth_sync(instance.page)
568
+
569
+ # Wait for extension to be ready (if extension is loaded)
570
+ time.sleep(0.5)
571
+
572
+ return instance
573
+
574
+ def __enter__(self):
575
+ """Context manager entry"""
576
+ self.start()
577
+ return self
578
+
579
+ def __exit__(self, exc_type, exc_val, exc_tb):
580
+ """Context manager exit"""
581
+ self.close()
582
+
583
+
584
+ class AsyncSentienceBrowser:
585
+ """Async version of SentienceBrowser for use in asyncio contexts."""
586
+
587
+ def __init__(
588
+ self,
589
+ api_key: str | None = None,
590
+ api_url: str | None = None,
591
+ headless: bool | None = None,
592
+ proxy: str | None = None,
593
+ user_data_dir: str | Path | None = None,
594
+ storage_state: str | Path | StorageState | dict | None = None,
595
+ record_video_dir: str | Path | None = None,
596
+ record_video_size: dict[str, int] | None = None,
597
+ viewport: Viewport | dict[str, int] | None = None,
598
+ ):
599
+ """
600
+ Initialize Async Sentience browser
601
+
602
+ Args:
603
+ api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
604
+ If None, uses free tier (local extension only)
605
+ api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
606
+ headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
607
+ proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
608
+ user_data_dir: Optional path to user data directory for persistent sessions
609
+ storage_state: Optional storage state to inject (cookies + localStorage)
610
+ record_video_dir: Optional directory path to save video recordings
611
+ record_video_size: Optional video resolution as dict with 'width' and 'height' keys
612
+ viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
613
+ Examples: Viewport(width=1280, height=800) (default)
614
+ Viewport(width=1920, height=1080) (Full HD)
615
+ {"width": 1280, "height": 800} (dict also supported)
616
+ If None, defaults to Viewport(width=1280, height=800).
617
+ """
618
+ self.api_key = api_key
619
+ # Only set api_url if api_key is provided, otherwise None (free tier)
620
+ if self.api_key and not api_url:
621
+ self.api_url = "https://api.sentienceapi.com"
622
+ else:
623
+ self.api_url = api_url
624
+
625
+ # Determine headless mode
626
+ if headless is None:
627
+ # Default to False for local dev, True for CI
628
+ self.headless = os.environ.get("CI", "").lower() == "true"
629
+ else:
630
+ self.headless = headless
631
+
632
+ # Support proxy from argument or environment variable
633
+ self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
634
+
635
+ # Auth injection support
636
+ self.user_data_dir = user_data_dir
637
+ self.storage_state = storage_state
638
+
639
+ # Video recording support
640
+ self.record_video_dir = record_video_dir
641
+ self.record_video_size = record_video_size or {"width": 1280, "height": 800}
642
+
643
+ # Viewport configuration - convert dict to Viewport if needed
644
+ if viewport is None:
645
+ self.viewport = Viewport(width=1280, height=800)
646
+ elif isinstance(viewport, dict):
647
+ self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
648
+ else:
649
+ self.viewport = viewport
650
+
651
+ self.playwright: AsyncPlaywright | None = None
652
+ self.context: AsyncBrowserContext | None = None
653
+ self.page: AsyncPage | None = None
654
+ self._extension_path: str | None = None
655
+
656
+ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
657
+ """
658
+ Parse proxy connection string into ProxyConfig.
659
+
660
+ Args:
661
+ proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
662
+
663
+ Returns:
664
+ ProxyConfig object or None if invalid
665
+ """
666
+ if not proxy_string:
667
+ return None
668
+
669
+ try:
670
+ parsed = urlparse(proxy_string)
671
+
672
+ # Validate scheme
673
+ if parsed.scheme not in ("http", "https", "socks5"):
674
+ print(f"⚠️ [Sentience] Unsupported proxy scheme: {parsed.scheme}")
675
+ print(" Supported: http, https, socks5")
676
+ return None
677
+
678
+ # Validate host and port
679
+ if not parsed.hostname or not parsed.port:
680
+ print("⚠️ [Sentience] Proxy URL must include hostname and port")
681
+ print(" Expected format: http://username:password@host:port")
682
+ return None
683
+
684
+ # Build server URL
685
+ server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
686
+
687
+ # Create ProxyConfig with optional credentials
688
+ return ProxyConfig(
689
+ server=server,
690
+ username=parsed.username if parsed.username else None,
691
+ password=parsed.password if parsed.password else None,
692
+ )
693
+
694
+ except Exception as e:
695
+ print(f"⚠️ [Sentience] Invalid proxy configuration: {e}")
696
+ print(" Expected format: http://username:password@host:port")
697
+ return None
698
+
699
+ async def start(self) -> None:
700
+ """Launch browser with extension loaded (async)"""
701
+ # Get extension source path using shared utility
702
+ extension_source = find_extension_path()
703
+
704
+ # Create temporary extension bundle
705
+ self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
706
+ shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
707
+
708
+ self.playwright = await async_playwright().start()
709
+
710
+ # Build launch arguments
711
+ args = [
712
+ f"--disable-extensions-except={self._extension_path}",
713
+ f"--load-extension={self._extension_path}",
714
+ "--disable-blink-features=AutomationControlled",
715
+ "--no-sandbox",
716
+ "--disable-infobars",
717
+ "--disable-features=WebRtcHideLocalIpsWithMdns",
718
+ "--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
719
+ ]
720
+
721
+ if self.headless:
722
+ args.append("--headless=new")
723
+
724
+ # Parse proxy configuration if provided
725
+ proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
726
+
727
+ # Handle User Data Directory
728
+ if self.user_data_dir:
729
+ user_data_dir = str(self.user_data_dir)
730
+ Path(user_data_dir).mkdir(parents=True, exist_ok=True)
731
+ else:
732
+ user_data_dir = ""
733
+
734
+ # Build launch_persistent_context parameters
735
+ launch_params = {
736
+ "user_data_dir": user_data_dir,
737
+ "headless": False,
738
+ "args": args,
739
+ "viewport": {"width": self.viewport.width, "height": self.viewport.height},
740
+ "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
741
+ }
742
+
743
+ # Add proxy if configured
744
+ if proxy_config:
745
+ launch_params["proxy"] = proxy_config.to_playwright_dict()
746
+ launch_params["ignore_https_errors"] = True
747
+ print(f"🌐 [Sentience] Using proxy: {proxy_config.server}")
748
+
749
+ # Add video recording if configured
750
+ if self.record_video_dir:
751
+ video_dir = Path(self.record_video_dir)
752
+ video_dir.mkdir(parents=True, exist_ok=True)
753
+ launch_params["record_video_dir"] = str(video_dir)
754
+ launch_params["record_video_size"] = self.record_video_size
755
+ print(f"🎥 [Sentience] Recording video to: {video_dir}")
756
+ print(
757
+ f" Resolution: {self.record_video_size['width']}x{self.record_video_size['height']}"
758
+ )
759
+
760
+ # Launch persistent context
761
+ self.context = await self.playwright.chromium.launch_persistent_context(**launch_params)
762
+
763
+ self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()
764
+
765
+ # Inject storage state if provided
766
+ if self.storage_state:
767
+ await self._inject_storage_state(self.storage_state)
768
+
769
+ # Apply stealth if available
770
+ if STEALTH_AVAILABLE:
771
+ await stealth_async(self.page)
772
+
773
+ # Wait a moment for extension to initialize
774
+ await asyncio.sleep(0.5)
775
+
776
+ async def goto(self, url: str) -> None:
777
+ """Navigate to a URL and ensure extension is ready (async)"""
778
+ if not self.page:
779
+ raise RuntimeError("Browser not started. Call await start() first.")
780
+
781
+ await self.page.goto(url, wait_until="domcontentloaded")
782
+
783
+ # Wait for extension to be ready
784
+ if not await self._wait_for_extension():
785
+ try:
786
+ diag = await self.page.evaluate(
787
+ """() => ({
788
+ sentience_defined: typeof window.sentience !== 'undefined',
789
+ registry_defined: typeof window.sentience_registry !== 'undefined',
790
+ snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
791
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
792
+ url: window.location.href
793
+ })"""
794
+ )
795
+ except Exception as e:
796
+ diag = f"Failed to get diagnostics: {str(e)}"
797
+
798
+ raise RuntimeError(
799
+ "Extension failed to load after navigation. Make sure:\n"
800
+ "1. Extension is built (cd sentience-chrome && ./build.sh)\n"
801
+ "2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
802
+ "3. Check browser console for errors (run with headless=False to see console)\n"
803
+ f"4. Extension path: {self._extension_path}\n"
804
+ f"5. Diagnostic info: {diag}"
805
+ )
806
+
807
+ async def _inject_storage_state(self, storage_state: str | Path | StorageState | dict) -> None:
808
+ """Inject storage state (cookies + localStorage) into browser context (async)"""
809
+ import json
810
+
811
+ # Load storage state
812
+ if isinstance(storage_state, (str, Path)):
813
+ with open(storage_state, encoding="utf-8") as f:
814
+ state_dict = json.load(f)
815
+ state = StorageState.from_dict(state_dict)
816
+ elif isinstance(storage_state, StorageState):
817
+ state = storage_state
818
+ elif isinstance(storage_state, dict):
819
+ state = StorageState.from_dict(storage_state)
820
+ else:
821
+ raise ValueError(
822
+ f"Invalid storage_state type: {type(storage_state)}. "
823
+ "Expected str, Path, StorageState, or dict."
824
+ )
825
+
826
+ # Inject cookies
827
+ if state.cookies:
828
+ playwright_cookies = []
829
+ for cookie in state.cookies:
830
+ cookie_dict = cookie.model_dump()
831
+ playwright_cookie = {
832
+ "name": cookie_dict["name"],
833
+ "value": cookie_dict["value"],
834
+ "domain": cookie_dict["domain"],
835
+ "path": cookie_dict["path"],
836
+ }
837
+ if cookie_dict.get("expires"):
838
+ playwright_cookie["expires"] = cookie_dict["expires"]
839
+ if cookie_dict.get("httpOnly"):
840
+ playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
841
+ if cookie_dict.get("secure"):
842
+ playwright_cookie["secure"] = cookie_dict["secure"]
843
+ if cookie_dict.get("sameSite"):
844
+ playwright_cookie["sameSite"] = cookie_dict["sameSite"]
845
+ playwright_cookies.append(playwright_cookie)
846
+
847
+ await self.context.add_cookies(playwright_cookies)
848
+ print(f"✅ [Sentience] Injected {len(state.cookies)} cookie(s)")
849
+
850
+ # Inject LocalStorage
851
+ if state.origins:
852
+ for origin_data in state.origins:
853
+ origin = origin_data.origin
854
+ if not origin:
855
+ continue
856
+
857
+ try:
858
+ await self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
859
+
860
+ if origin_data.localStorage:
861
+ localStorage_dict = {
862
+ item.name: item.value for item in origin_data.localStorage
863
+ }
864
+ await self.page.evaluate(
865
+ """(localStorage_data) => {
866
+ for (const [key, value] of Object.entries(localStorage_data)) {
867
+ localStorage.setItem(key, value);
868
+ }
869
+ }""",
870
+ localStorage_dict,
871
+ )
872
+ print(
873
+ f"✅ [Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
874
+ )
875
+ except Exception as e:
876
+ print(f"⚠️ [Sentience] Failed to inject localStorage for {origin}: {e}")
877
+
878
+ async def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
879
+ """Poll for window.sentience to be available (async)"""
880
+ start_time = time.time()
881
+ last_error = None
882
+
883
+ while time.time() - start_time < timeout_sec:
884
+ try:
885
+ result = await self.page.evaluate(
886
+ """() => {
887
+ if (typeof window.sentience === 'undefined') {
888
+ return { ready: false, reason: 'window.sentience undefined' };
889
+ }
890
+ if (window.sentience._wasmModule === null) {
891
+ return { ready: false, reason: 'WASM module not fully loaded' };
892
+ }
893
+ return { ready: true };
894
+ }
895
+ """
896
+ )
897
+
898
+ if isinstance(result, dict):
899
+ if result.get("ready"):
900
+ return True
901
+ last_error = result.get("reason", "Unknown error")
902
+ except Exception as e:
903
+ last_error = f"Evaluation error: {str(e)}"
904
+
905
+ await asyncio.sleep(0.3)
906
+
907
+ if last_error:
908
+ import warnings
909
+
910
+ warnings.warn(f"Extension wait timeout. Last status: {last_error}")
911
+
912
+ return False
913
+
914
+ async def close(self, output_path: str | Path | None = None) -> str | None:
915
+ """
916
+ Close browser and cleanup (async)
917
+
918
+ Args:
919
+ output_path: Optional path to rename the video file to
920
+
921
+ Returns:
922
+ Path to video file if recording was enabled, None otherwise
923
+ """
924
+ temp_video_path = None
925
+
926
+ if self.record_video_dir:
927
+ try:
928
+ if self.page and self.page.video:
929
+ temp_video_path = await self.page.video.path()
930
+ elif self.context:
931
+ for page in self.context.pages:
932
+ if page.video:
933
+ temp_video_path = await page.video.path()
934
+ break
935
+ except Exception:
936
+ pass
937
+
938
+ if self.context:
939
+ await self.context.close()
940
+ self.context = None
941
+
942
+ if self.playwright:
943
+ await self.playwright.stop()
944
+ self.playwright = None
945
+
946
+ if self._extension_path and os.path.exists(self._extension_path):
947
+ shutil.rmtree(self._extension_path)
948
+
949
+ # Clear page reference after closing context
950
+ self.page = None
951
+
952
+ final_path = temp_video_path
953
+ if temp_video_path and output_path and os.path.exists(temp_video_path):
954
+ try:
955
+ output_path = str(output_path)
956
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
957
+ shutil.move(temp_video_path, output_path)
958
+ final_path = output_path
959
+ except Exception as e:
960
+ import warnings
961
+
962
+ warnings.warn(f"Failed to rename video file: {e}")
963
+ final_path = temp_video_path
964
+
965
+ return final_path
966
+
967
+ async def __aenter__(self):
968
+ """Async context manager entry"""
969
+ await self.start()
970
+ return self
971
+
972
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
973
+ """Async context manager exit"""
974
+ await self.close()
975
+
976
+ @classmethod
977
+ async def from_existing(
978
+ cls,
979
+ context: AsyncBrowserContext,
980
+ api_key: str | None = None,
981
+ api_url: str | None = None,
982
+ ) -> "AsyncSentienceBrowser":
983
+ """
984
+ Create AsyncSentienceBrowser from an existing Playwright BrowserContext.
985
+
986
+ Args:
987
+ context: Existing Playwright BrowserContext
988
+ api_key: Optional API key for server-side processing
989
+ api_url: Optional API URL
990
+
991
+ Returns:
992
+ AsyncSentienceBrowser instance configured to use the existing context
993
+ """
994
+ instance = cls(api_key=api_key, api_url=api_url)
995
+ instance.context = context
996
+ pages = context.pages
997
+ instance.page = pages[0] if pages else await context.new_page()
998
+
999
+ # Apply stealth if available
1000
+ if STEALTH_AVAILABLE:
1001
+ await stealth_async(instance.page)
1002
+
1003
+ # Wait for extension to be ready
1004
+ await asyncio.sleep(0.5)
1005
+
1006
+ return instance
1007
+
1008
+ @classmethod
1009
+ async def from_page(
1010
+ cls,
1011
+ page: AsyncPage,
1012
+ api_key: str | None = None,
1013
+ api_url: str | None = None,
1014
+ ) -> "AsyncSentienceBrowser":
1015
+ """
1016
+ Create AsyncSentienceBrowser from an existing Playwright Page.
1017
+
1018
+ Args:
1019
+ page: Existing Playwright Page
1020
+ api_key: Optional API key for server-side processing
1021
+ api_url: Optional API URL
1022
+
1023
+ Returns:
1024
+ AsyncSentienceBrowser instance configured to use the existing page
1025
+ """
1026
+ instance = cls(api_key=api_key, api_url=api_url)
1027
+ instance.page = page
1028
+ instance.context = page.context
1029
+
1030
+ # Apply stealth if available
1031
+ if STEALTH_AVAILABLE:
1032
+ await stealth_async(instance.page)
1033
+
1034
+ # Wait for extension to be ready
1035
+ await asyncio.sleep(0.5)
1036
+
1037
+ return instance