sentienceapi 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (90) hide show
  1. sentience/__init__.py +120 -6
  2. sentience/_extension_loader.py +156 -1
  3. sentience/action_executor.py +217 -0
  4. sentience/actions.py +758 -30
  5. sentience/agent.py +806 -293
  6. sentience/agent_config.py +3 -0
  7. sentience/agent_runtime.py +840 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +89 -1141
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +372 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +483 -0
  21. sentience/base_agent.py +95 -0
  22. sentience/browser.py +678 -39
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cloud_tracing.py +507 -42
  26. sentience/constants.py +6 -0
  27. sentience/conversational_agent.py +77 -43
  28. sentience/cursor_policy.py +142 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +98 -2
  31. sentience/extension/background.js +56 -185
  32. sentience/extension/content.js +150 -287
  33. sentience/extension/injected_api.js +1088 -1368
  34. sentience/extension/manifest.json +1 -1
  35. sentience/extension/pkg/sentience_core.d.ts +22 -22
  36. sentience/extension/pkg/sentience_core.js +275 -433
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/release.json +47 -47
  39. sentience/failure_artifacts.py +241 -0
  40. sentience/formatting.py +9 -53
  41. sentience/inspector.py +183 -1
  42. sentience/integrations/__init__.py +6 -0
  43. sentience/integrations/langchain/__init__.py +12 -0
  44. sentience/integrations/langchain/context.py +18 -0
  45. sentience/integrations/langchain/core.py +326 -0
  46. sentience/integrations/langchain/tools.py +180 -0
  47. sentience/integrations/models.py +46 -0
  48. sentience/integrations/pydanticai/__init__.py +15 -0
  49. sentience/integrations/pydanticai/deps.py +20 -0
  50. sentience/integrations/pydanticai/toolset.py +468 -0
  51. sentience/llm_interaction_handler.py +191 -0
  52. sentience/llm_provider.py +765 -66
  53. sentience/llm_provider_utils.py +120 -0
  54. sentience/llm_response_builder.py +153 -0
  55. sentience/models.py +595 -3
  56. sentience/ordinal.py +280 -0
  57. sentience/overlay.py +109 -2
  58. sentience/protocols.py +228 -0
  59. sentience/query.py +67 -5
  60. sentience/read.py +95 -3
  61. sentience/recorder.py +223 -3
  62. sentience/schemas/trace_v1.json +128 -9
  63. sentience/screenshot.py +48 -2
  64. sentience/sentience_methods.py +86 -0
  65. sentience/snapshot.py +599 -55
  66. sentience/snapshot_diff.py +126 -0
  67. sentience/text_search.py +120 -5
  68. sentience/trace_event_builder.py +148 -0
  69. sentience/trace_file_manager.py +197 -0
  70. sentience/trace_indexing/index_schema.py +95 -7
  71. sentience/trace_indexing/indexer.py +105 -48
  72. sentience/tracer_factory.py +120 -9
  73. sentience/tracing.py +172 -8
  74. sentience/utils/__init__.py +40 -0
  75. sentience/utils/browser.py +46 -0
  76. sentience/{utils.py → utils/element.py} +3 -42
  77. sentience/utils/formatting.py +59 -0
  78. sentience/verification.py +618 -0
  79. sentience/visual_agent.py +2058 -0
  80. sentience/wait.py +68 -2
  81. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/METADATA +199 -40
  82. sentienceapi-0.98.0.dist-info/RECORD +92 -0
  83. sentience/extension/test-content.js +0 -4
  84. sentienceapi-0.90.16.dist-info/RECORD +0 -50
  85. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/WHEEL +0 -0
  86. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/entry_points.txt +0 -0
  87. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE +0 -0
  88. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-APACHE +0 -0
  89. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-MIT +0 -0
  90. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/top_level.txt +0 -0
sentience/snapshot.py CHANGED
@@ -2,6 +2,7 @@
2
2
  Snapshot functionality - calls window.sentience.snapshot() or server-side API
3
3
  """
4
4
 
5
+ import asyncio
5
6
  import json
6
7
  import os
7
8
  import time
@@ -9,13 +10,216 @@ from typing import Any, Optional
9
10
 
10
11
  import requests
11
12
 
12
- from .browser import SentienceBrowser
13
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
14
+ from .browser_evaluator import BrowserEvaluator
15
+ from .constants import SENTIENCE_API_URL
13
16
  from .models import Snapshot, SnapshotOptions
17
+ from .sentience_methods import SentienceMethod
14
18
 
15
19
  # Maximum payload size for API requests (10MB server limit)
16
20
  MAX_PAYLOAD_BYTES = 10 * 1024 * 1024
17
21
 
18
22
 
23
+ def _is_execution_context_destroyed_error(e: Exception) -> bool:
24
+ """
25
+ Playwright can throw while a navigation is in-flight, invalidating the JS execution context.
26
+
27
+ Common symptoms:
28
+ - "Execution context was destroyed, most likely because of a navigation"
29
+ - "Cannot find context with specified id"
30
+ """
31
+ msg = str(e).lower()
32
+ return (
33
+ "execution context was destroyed" in msg
34
+ or "most likely because of a navigation" in msg
35
+ or "cannot find context with specified id" in msg
36
+ )
37
+
38
+
39
+ async def _page_evaluate_with_nav_retry(
40
+ page: Any,
41
+ expression: str,
42
+ arg: Any = None,
43
+ *,
44
+ retries: int = 2,
45
+ settle_timeout_ms: int = 10000,
46
+ ) -> Any:
47
+ """
48
+ Evaluate JS with a small retry loop if the page is mid-navigation.
49
+
50
+ This prevents flaky crashes when callers snapshot right after triggering a navigation
51
+ (e.g., pressing Enter on Google).
52
+ """
53
+ last_err: Exception | None = None
54
+ for attempt in range(retries + 1):
55
+ try:
56
+ if arg is None:
57
+ return await page.evaluate(expression)
58
+ return await page.evaluate(expression, arg)
59
+ except Exception as e:
60
+ last_err = e
61
+ if not _is_execution_context_destroyed_error(e) or attempt >= retries:
62
+ raise
63
+ try:
64
+ await page.wait_for_load_state("domcontentloaded", timeout=settle_timeout_ms)
65
+ except Exception:
66
+ pass
67
+ await asyncio.sleep(0.25)
68
+ raise last_err if last_err else RuntimeError("Page.evaluate failed")
69
+
70
+
71
+ async def _wait_for_function_with_nav_retry(
72
+ page: Any,
73
+ expression: str,
74
+ *,
75
+ timeout_ms: int,
76
+ retries: int = 2,
77
+ ) -> None:
78
+ last_err: Exception | None = None
79
+ for attempt in range(retries + 1):
80
+ try:
81
+ await page.wait_for_function(expression, timeout=timeout_ms)
82
+ return
83
+ except Exception as e:
84
+ last_err = e
85
+ if not _is_execution_context_destroyed_error(e) or attempt >= retries:
86
+ raise
87
+ try:
88
+ await page.wait_for_load_state("domcontentloaded", timeout=timeout_ms)
89
+ except Exception:
90
+ pass
91
+ await asyncio.sleep(0.25)
92
+ raise last_err if last_err else RuntimeError("wait_for_function failed")
93
+
94
+
95
+ def _build_snapshot_payload(
96
+ raw_result: dict[str, Any],
97
+ options: SnapshotOptions,
98
+ ) -> dict[str, Any]:
99
+ """
100
+ Build payload dict for gateway snapshot API.
101
+
102
+ Shared helper used by both sync and async snapshot implementations.
103
+ """
104
+ diagnostics = raw_result.get("diagnostics") or {}
105
+ client_metrics = None
106
+ try:
107
+ client_metrics = diagnostics.get("metrics")
108
+ except Exception:
109
+ client_metrics = None
110
+
111
+ return {
112
+ "raw_elements": raw_result.get("raw_elements", []),
113
+ "url": raw_result.get("url", ""),
114
+ "viewport": raw_result.get("viewport"),
115
+ "goal": options.goal,
116
+ "options": {
117
+ "limit": options.limit,
118
+ "filter": options.filter.model_dump() if options.filter else None,
119
+ },
120
+ "client_metrics": client_metrics,
121
+ }
122
+
123
+
124
+ def _validate_payload_size(payload_json: str) -> None:
125
+ """
126
+ Validate payload size before sending to gateway.
127
+
128
+ Raises ValueError if payload exceeds server limit.
129
+ """
130
+ payload_size = len(payload_json.encode("utf-8"))
131
+ if payload_size > MAX_PAYLOAD_BYTES:
132
+ raise ValueError(
133
+ f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
134
+ f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
135
+ f"Try reducing the number of elements on the page or filtering elements."
136
+ )
137
+
138
+
139
+ def _post_snapshot_to_gateway_sync(
140
+ payload: dict[str, Any],
141
+ api_key: str,
142
+ api_url: str = SENTIENCE_API_URL,
143
+ ) -> dict[str, Any]:
144
+ """
145
+ Post snapshot payload to gateway (synchronous).
146
+
147
+ Used by sync snapshot() function.
148
+ """
149
+ payload_json = json.dumps(payload)
150
+ _validate_payload_size(payload_json)
151
+
152
+ headers = {
153
+ "Authorization": f"Bearer {api_key}",
154
+ "Content-Type": "application/json",
155
+ }
156
+
157
+ response = requests.post(
158
+ f"{api_url}/v1/snapshot",
159
+ data=payload_json,
160
+ headers=headers,
161
+ timeout=30,
162
+ )
163
+ response.raise_for_status()
164
+ return response.json()
165
+
166
+
167
+ async def _post_snapshot_to_gateway_async(
168
+ payload: dict[str, Any],
169
+ api_key: str,
170
+ api_url: str = SENTIENCE_API_URL,
171
+ ) -> dict[str, Any]:
172
+ """
173
+ Post snapshot payload to gateway (asynchronous).
174
+
175
+ Used by async backend snapshot() function.
176
+ """
177
+ # Lazy import httpx - only needed for async API calls
178
+ import httpx
179
+
180
+ payload_json = json.dumps(payload)
181
+ _validate_payload_size(payload_json)
182
+
183
+ headers = {
184
+ "Authorization": f"Bearer {api_key}",
185
+ "Content-Type": "application/json",
186
+ }
187
+
188
+ async with httpx.AsyncClient(timeout=30.0) as client:
189
+ response = await client.post(
190
+ f"{api_url}/v1/snapshot",
191
+ content=payload_json,
192
+ headers=headers,
193
+ )
194
+ response.raise_for_status()
195
+ return response.json()
196
+
197
+
198
+ def _merge_api_result_with_local(
199
+ api_result: dict[str, Any],
200
+ raw_result: dict[str, Any],
201
+ ) -> dict[str, Any]:
202
+ """
203
+ Merge API result with local data (screenshot, etc.).
204
+
205
+ Shared helper used by both sync and async snapshot implementations.
206
+ """
207
+ return {
208
+ "status": api_result.get("status", "success"),
209
+ "timestamp": api_result.get("timestamp"),
210
+ "url": api_result.get("url", raw_result.get("url", "")),
211
+ "viewport": api_result.get("viewport", raw_result.get("viewport")),
212
+ "elements": api_result.get("elements", []),
213
+ "screenshot": raw_result.get("screenshot"), # Keep local screenshot
214
+ "screenshot_format": raw_result.get("screenshot_format"),
215
+ "error": api_result.get("error"),
216
+ # Phase 2: Runtime stability/debug info
217
+ "diagnostics": api_result.get("diagnostics", raw_result.get("diagnostics")),
218
+ # Phase 2: Ordinal support - dominant group key from Gateway
219
+ "dominant_group_key": api_result.get("dominant_group_key"),
220
+ }
221
+
222
+
19
223
  def _save_trace_to_file(raw_elements: list[dict[str, Any]], trace_path: str | None = None) -> None:
20
224
  """
21
225
  Save raw_elements to a JSON file for benchmarking/training
@@ -69,14 +273,18 @@ def snapshot(
69
273
  if options is None:
70
274
  options = SnapshotOptions()
71
275
 
276
+ # Resolve API key: options.sentience_api_key takes precedence, then browser.api_key
277
+ # This allows browser-use users to pass api_key via options without SentienceBrowser
278
+ effective_api_key = options.sentience_api_key or browser.api_key
279
+
72
280
  # Determine if we should use server-side API
73
281
  should_use_api = (
74
- options.use_api if options.use_api is not None else (browser.api_key is not None)
282
+ options.use_api if options.use_api is not None else (effective_api_key is not None)
75
283
  )
76
284
 
77
- if should_use_api and browser.api_key:
285
+ if should_use_api and effective_api_key:
78
286
  # Use server-side API (Pro/Enterprise tier)
79
- return _snapshot_via_api(browser, options)
287
+ return _snapshot_via_api(browser, options, effective_api_key)
80
288
  else:
81
289
  # Use local extension (Free tier)
82
290
  return _snapshot_via_extension(browser, options)
@@ -93,20 +301,248 @@ def _snapshot_via_extension(
93
301
  # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
94
302
  # The new architecture loads injected_api.js asynchronously, so window.sentience
95
303
  # may not be immediately available after page load
304
+ BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
305
+
306
+ # Build options dict for extension API (exclude save_trace/trace_path)
307
+ ext_options: dict[str, Any] = {}
308
+ if options.screenshot is not False:
309
+ # Serialize ScreenshotConfig to dict if it's a Pydantic model
310
+ if hasattr(options.screenshot, "model_dump"):
311
+ ext_options["screenshot"] = options.screenshot.model_dump()
312
+ else:
313
+ ext_options["screenshot"] = options.screenshot
314
+ if options.limit != 50:
315
+ ext_options["limit"] = options.limit
316
+ if options.filter is not None:
317
+ ext_options["filter"] = (
318
+ options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
319
+ )
320
+
321
+ # Call extension API
322
+ result = browser.page.evaluate(
323
+ """
324
+ (options) => {
325
+ return window.sentience.snapshot(options);
326
+ }
327
+ """,
328
+ ext_options,
329
+ )
330
+
331
+ # Save trace if requested
332
+ if options.save_trace:
333
+ _save_trace_to_file(result.get("raw_elements", []), options.trace_path)
334
+
335
+ # Validate and parse with Pydantic
336
+ snapshot_obj = Snapshot(**result)
337
+
338
+ # Show visual overlay if requested
339
+ if options.show_overlay:
340
+ # Prefer processed semantic elements for overlay (have bbox/importance/visual_cues).
341
+ # raw_elements may not match the overlay renderer's expected shape.
342
+ elements_for_overlay = result.get("elements") or result.get("raw_elements") or []
343
+ if elements_for_overlay:
344
+ browser.page.evaluate(
345
+ """
346
+ (elements) => {
347
+ if (window.sentience && window.sentience.showOverlay) {
348
+ window.sentience.showOverlay(elements, null);
349
+ }
350
+ }
351
+ """,
352
+ elements_for_overlay,
353
+ )
354
+
355
+ # Show grid overlay if requested
356
+ if options.show_grid:
357
+ # Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
358
+ grids = snapshot_obj.get_grid_bounds(grid_id=None)
359
+ if grids:
360
+ # Convert GridInfo to dict for JavaScript
361
+ grid_dicts = [grid.model_dump() for grid in grids]
362
+ # Pass grid_id as targetGridId to highlight it in red
363
+ target_grid_id = options.grid_id if options.grid_id is not None else None
364
+ browser.page.evaluate(
365
+ """
366
+ (grids, targetGridId) => {
367
+ if (window.sentience && window.sentience.showGrid) {
368
+ window.sentience.showGrid(grids, targetGridId);
369
+ } else {
370
+ console.warn('[SDK] showGrid not available in extension');
371
+ }
372
+ }
373
+ """,
374
+ grid_dicts,
375
+ target_grid_id,
376
+ )
377
+
378
+ return snapshot_obj
379
+
380
+
381
+ def _snapshot_via_api(
382
+ browser: SentienceBrowser,
383
+ options: SnapshotOptions,
384
+ api_key: str,
385
+ ) -> Snapshot:
386
+ """Take snapshot using server-side API (Pro/Enterprise tier)"""
387
+ if not browser.page:
388
+ raise RuntimeError("Browser not started. Call browser.start() first.")
389
+
390
+ # Use browser.api_url if set, otherwise default
391
+ api_url = browser.api_url or SENTIENCE_API_URL
392
+
393
+ # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
394
+ # Even for API mode, we need the extension to collect raw data locally
395
+ BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
396
+
397
+ # Step 1: Get raw data from local extension (always happens locally)
398
+ raw_options: dict[str, Any] = {}
399
+ if options.screenshot is not False:
400
+ raw_options["screenshot"] = options.screenshot
401
+ # Important: also pass limit/filter to extension to keep raw_elements payload bounded.
402
+ # Without this, large pages (e.g. Amazon) can exceed gateway request size limits (HTTP 413).
403
+ if options.limit != 50:
404
+ raw_options["limit"] = options.limit
405
+ if options.filter is not None:
406
+ raw_options["filter"] = (
407
+ options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
408
+ )
409
+
410
+ raw_result = BrowserEvaluator.invoke(browser.page, SentienceMethod.SNAPSHOT, **raw_options)
411
+
412
+ # Save trace if requested (save raw data before API processing)
413
+ if options.save_trace:
414
+ _save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)
415
+
416
+ # Step 2: Send to server for smart ranking/filtering
417
+ # Use raw_elements (raw data) instead of elements (processed data)
418
+ # Server validates API key and applies proprietary ranking logic
419
+ payload = _build_snapshot_payload(raw_result, options)
420
+
96
421
  try:
97
- browser.page.wait_for_function(
422
+ api_result = _post_snapshot_to_gateway_sync(payload, api_key, api_url)
423
+
424
+ # Merge API result with local data (screenshot, etc.)
425
+ snapshot_data = _merge_api_result_with_local(api_result, raw_result)
426
+
427
+ # Create snapshot object
428
+ snapshot_obj = Snapshot(**snapshot_data)
429
+
430
+ # Show visual overlay if requested (use API-ranked elements)
431
+ if options.show_overlay:
432
+ elements = api_result.get("elements", [])
433
+ if elements:
434
+ browser.page.evaluate(
435
+ """
436
+ (elements) => {
437
+ if (window.sentience && window.sentience.showOverlay) {
438
+ window.sentience.showOverlay(elements, null);
439
+ }
440
+ }
441
+ """,
442
+ elements,
443
+ )
444
+
445
+ # Show grid overlay if requested
446
+ if options.show_grid:
447
+ # Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
448
+ grids = snapshot_obj.get_grid_bounds(grid_id=None)
449
+ if grids:
450
+ grid_dicts = [grid.model_dump() for grid in grids]
451
+ # Pass grid_id as targetGridId to highlight it in red
452
+ target_grid_id = options.grid_id if options.grid_id is not None else None
453
+ browser.page.evaluate(
454
+ """
455
+ (grids, targetGridId) => {
456
+ if (window.sentience && window.sentience.showGrid) {
457
+ window.sentience.showGrid(grids, targetGridId);
458
+ } else {
459
+ console.warn('[SDK] showGrid not available in extension');
460
+ }
461
+ }
462
+ """,
463
+ grid_dicts,
464
+ target_grid_id,
465
+ )
466
+
467
+ return snapshot_obj
468
+ except requests.exceptions.RequestException as e:
469
+ raise RuntimeError(f"API request failed: {e}") from e
470
+
471
+
472
+ # ========== Async Snapshot Functions ==========
473
+
474
+
475
+ async def snapshot_async(
476
+ browser: AsyncSentienceBrowser,
477
+ options: SnapshotOptions | None = None,
478
+ ) -> Snapshot:
479
+ """
480
+ Take a snapshot of the current page (async)
481
+
482
+ Args:
483
+ browser: AsyncSentienceBrowser instance
484
+ options: Snapshot options (screenshot, limit, filter, etc.)
485
+ If None, uses default options.
486
+
487
+ Returns:
488
+ Snapshot object
489
+
490
+ Example:
491
+ # Basic snapshot with defaults
492
+ snap = await snapshot_async(browser)
493
+
494
+ # With options
495
+ snap = await snapshot_async(browser, SnapshotOptions(
496
+ screenshot=True,
497
+ limit=100,
498
+ show_overlay=True
499
+ ))
500
+ """
501
+ # Use default options if none provided
502
+ if options is None:
503
+ options = SnapshotOptions()
504
+
505
+ # Resolve API key: options.sentience_api_key takes precedence, then browser.api_key
506
+ # This allows browser-use users to pass api_key via options without SentienceBrowser
507
+ effective_api_key = options.sentience_api_key or browser.api_key
508
+
509
+ # Determine if we should use server-side API
510
+ should_use_api = (
511
+ options.use_api if options.use_api is not None else (effective_api_key is not None)
512
+ )
513
+
514
+ if should_use_api and effective_api_key:
515
+ # Use server-side API (Pro/Enterprise tier)
516
+ return await _snapshot_via_api_async(browser, options, effective_api_key)
517
+ else:
518
+ # Use local extension (Free tier)
519
+ return await _snapshot_via_extension_async(browser, options)
520
+
521
+
522
+ async def _snapshot_via_extension_async(
523
+ browser: AsyncSentienceBrowser,
524
+ options: SnapshotOptions,
525
+ ) -> Snapshot:
526
+ """Take snapshot using local extension (Free tier) - async"""
527
+ if not browser.page:
528
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
529
+
530
+ # Wait for extension injection to complete
531
+ try:
532
+ await _wait_for_function_with_nav_retry(
533
+ browser.page,
98
534
  "typeof window.sentience !== 'undefined'",
99
- timeout=5000, # 5 second timeout
535
+ timeout_ms=5000,
100
536
  )
101
537
  except Exception as e:
102
- # Gather diagnostics if wait fails
103
538
  try:
104
- diag = browser.page.evaluate(
539
+ diag = await _page_evaluate_with_nav_retry(
540
+ browser.page,
105
541
  """() => ({
106
542
  sentience_defined: typeof window.sentience !== 'undefined',
107
543
  extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
108
544
  url: window.location.href
109
- })"""
545
+ })""",
110
546
  )
111
547
  except Exception:
112
548
  diag = {"error": "Could not gather diagnostics"}
@@ -116,10 +552,14 @@ def _snapshot_via_extension(
116
552
  f"Is the extension loaded? Diagnostics: {diag}"
117
553
  ) from e
118
554
 
119
- # Build options dict for extension API (exclude save_trace/trace_path)
555
+ # Build options dict for extension API
120
556
  ext_options: dict[str, Any] = {}
121
557
  if options.screenshot is not False:
122
- ext_options["screenshot"] = options.screenshot
558
+ # Serialize ScreenshotConfig to dict if it's a Pydantic model
559
+ if hasattr(options.screenshot, "model_dump"):
560
+ ext_options["screenshot"] = options.screenshot.model_dump()
561
+ else:
562
+ ext_options["screenshot"] = options.screenshot
123
563
  if options.limit != 50:
124
564
  ext_options["limit"] = options.limit
125
565
  if options.filter is not None:
@@ -128,7 +568,8 @@ def _snapshot_via_extension(
128
568
  )
129
569
 
130
570
  # Call extension API
131
- result = browser.page.evaluate(
571
+ result = await _page_evaluate_with_nav_retry(
572
+ browser.page,
132
573
  """
133
574
  (options) => {
134
575
  return window.sentience.snapshot(options);
@@ -136,16 +577,33 @@ def _snapshot_via_extension(
136
577
  """,
137
578
  ext_options,
138
579
  )
580
+ if result.get("error"):
581
+ print(f" Snapshot error: {result.get('error')}")
139
582
 
140
583
  # Save trace if requested
141
584
  if options.save_trace:
142
585
  _save_trace_to_file(result.get("raw_elements", []), options.trace_path)
143
586
 
587
+ # Extract screenshot_format from data URL if not provided by extension
588
+ if result.get("screenshot") and not result.get("screenshot_format"):
589
+ screenshot_data_url = result.get("screenshot", "")
590
+ if screenshot_data_url.startswith("data:image/"):
591
+ # Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
592
+ format_match = screenshot_data_url.split(";")[0].split("/")[-1]
593
+ if format_match in ["jpeg", "jpg", "png"]:
594
+ result["screenshot_format"] = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
595
+
596
+ # Validate and parse with Pydantic
597
+ snapshot_obj = Snapshot(**result)
598
+
144
599
  # Show visual overlay if requested
145
600
  if options.show_overlay:
146
- raw_elements = result.get("raw_elements", [])
147
- if raw_elements:
148
- browser.page.evaluate(
601
+ # Prefer processed semantic elements for overlay (have bbox/importance/visual_cues).
602
+ # raw_elements may not match the overlay renderer's expected shape.
603
+ elements_for_overlay = result.get("elements") or result.get("raw_elements") or []
604
+ if elements_for_overlay:
605
+ await _page_evaluate_with_nav_retry(
606
+ browser.page,
149
607
  """
150
608
  (elements) => {
151
609
  if (window.sentience && window.sentience.showOverlay) {
@@ -153,43 +611,80 @@ def _snapshot_via_extension(
153
611
  }
154
612
  }
155
613
  """,
156
- raw_elements,
614
+ elements_for_overlay,
615
+ )
616
+
617
+ # Show grid overlay if requested
618
+ if options.show_grid:
619
+ # Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
620
+ grids = snapshot_obj.get_grid_bounds(grid_id=None)
621
+ if grids:
622
+ grid_dicts = [grid.model_dump() for grid in grids]
623
+ # Pass grid_id as targetGridId to highlight it in red
624
+ target_grid_id = options.grid_id if options.grid_id is not None else None
625
+ await _page_evaluate_with_nav_retry(
626
+ browser.page,
627
+ """
628
+ (args) => {
629
+ const [grids, targetGridId] = args;
630
+ if (window.sentience && window.sentience.showGrid) {
631
+ window.sentience.showGrid(grids, targetGridId);
632
+ } else {
633
+ console.warn('[SDK] showGrid not available in extension');
634
+ }
635
+ }
636
+ """,
637
+ [grid_dicts, target_grid_id],
157
638
  )
158
639
 
159
- # Validate and parse with Pydantic
160
- snapshot_obj = Snapshot(**result)
161
640
  return snapshot_obj
162
641
 
163
642
 
164
- def _snapshot_via_api(
165
- browser: SentienceBrowser,
643
+ async def _snapshot_via_api_async(
644
+ browser: AsyncSentienceBrowser,
166
645
  options: SnapshotOptions,
646
+ api_key: str,
167
647
  ) -> Snapshot:
168
- """Take snapshot using server-side API (Pro/Enterprise tier)"""
648
+ """Take snapshot using server-side API (Pro/Enterprise tier) - async"""
169
649
  if not browser.page:
170
- raise RuntimeError("Browser not started. Call browser.start() first.")
650
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
171
651
 
172
- if not browser.api_key:
173
- raise ValueError("API key required for server-side processing")
652
+ # Use browser.api_url if set, otherwise default
653
+ api_url = browser.api_url or SENTIENCE_API_URL
174
654
 
175
- if not browser.api_url:
176
- raise ValueError("API URL required for server-side processing")
177
-
178
- # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
179
- # Even for API mode, we need the extension to collect raw data locally
655
+ # Wait for extension injection
180
656
  try:
181
- browser.page.wait_for_function("typeof window.sentience !== 'undefined'", timeout=5000)
657
+ await _wait_for_function_with_nav_retry(
658
+ browser.page,
659
+ "typeof window.sentience !== 'undefined'",
660
+ timeout_ms=5000,
661
+ )
182
662
  except Exception as e:
183
663
  raise RuntimeError(
184
664
  "Sentience extension failed to inject. Cannot collect raw data for API processing."
185
665
  ) from e
186
666
 
187
- # Step 1: Get raw data from local extension (always happens locally)
667
+ # Step 1: Get raw data from local extension (including screenshot)
188
668
  raw_options: dict[str, Any] = {}
669
+ screenshot_requested = False
189
670
  if options.screenshot is not False:
190
- raw_options["screenshot"] = options.screenshot
671
+ screenshot_requested = True
672
+ # Serialize ScreenshotConfig to dict if it's a Pydantic model
673
+ if hasattr(options.screenshot, "model_dump"):
674
+ raw_options["screenshot"] = options.screenshot.model_dump()
675
+ else:
676
+ raw_options["screenshot"] = options.screenshot
677
+ # Important: also pass limit/filter to extension to keep raw_elements payload bounded.
678
+ # Without this, large pages (e.g. Amazon) can exceed gateway request size limits (HTTP 413).
679
+ if options.limit != 50:
680
+ raw_options["limit"] = options.limit
681
+ if options.filter is not None:
682
+ raw_options["filter"] = (
683
+ options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
684
+ )
191
685
 
192
- raw_result = browser.page.evaluate(
686
+ raw_result = await _page_evaluate_with_nav_retry(
687
+ browser.page,
193
688
  """
194
689
  (options) => {
195
690
  return window.sentience.snapshot(options);
@@ -198,25 +693,33 @@ def _snapshot_via_api(
198
693
  raw_options,
199
694
  )
200
695
 
201
- # Save trace if requested (save raw data before API processing)
696
+ # Extract screenshot from raw result (extension captures it, but API doesn't return it)
697
+ screenshot_data_url = raw_result.get("screenshot")
698
+ screenshot_format = None
699
+ if screenshot_data_url:
700
+ # Extract format from data URL
701
+ if screenshot_data_url.startswith("data:image/"):
702
+ format_match = screenshot_data_url.split(";")[0].split("/")[-1]
703
+ if format_match in ["jpeg", "jpg", "png"]:
704
+ screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
705
+
706
+ # Save trace if requested
202
707
  if options.save_trace:
203
708
  _save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)
204
709
 
205
710
  # Step 2: Send to server for smart ranking/filtering
206
- # Use raw_elements (raw data) instead of elements (processed data)
207
- # Server validates API key and applies proprietary ranking logic
208
711
  payload = {
209
- "raw_elements": raw_result.get("raw_elements", []), # Raw data needed for server processing
712
+ "raw_elements": raw_result.get("raw_elements", []),
210
713
  "url": raw_result.get("url", ""),
211
714
  "viewport": raw_result.get("viewport"),
212
- "goal": options.goal, # Optional goal/task description
715
+ "goal": options.goal,
213
716
  "options": {
214
717
  "limit": options.limit,
215
718
  "filter": options.filter.model_dump() if options.filter else None,
216
719
  },
217
720
  }
218
721
 
219
- # Check payload size before sending (server has 10MB limit)
722
+ # Check payload size
220
723
  payload_json = json.dumps(payload)
221
724
  payload_size = len(payload_json.encode("utf-8"))
222
725
  if payload_size > MAX_PAYLOAD_BYTES:
@@ -227,38 +730,51 @@ def _snapshot_via_api(
227
730
  )
228
731
 
229
732
  headers = {
230
- "Authorization": f"Bearer {browser.api_key}",
733
+ "Authorization": f"Bearer {api_key}",
231
734
  "Content-Type": "application/json",
232
735
  }
233
736
 
234
737
  try:
235
- response = requests.post(
236
- f"{browser.api_url}/v1/snapshot",
237
- data=payload_json, # Reuse already-serialized JSON
238
- headers=headers,
239
- timeout=30,
240
- )
241
- response.raise_for_status()
738
+ # Lazy import httpx - only needed for async API calls
739
+ import httpx
740
+
741
+ async with httpx.AsyncClient(timeout=30.0) as client:
742
+ response = await client.post(
743
+ f"{api_url}/v1/snapshot",
744
+ content=payload_json,
745
+ headers=headers,
746
+ )
747
+ response.raise_for_status()
748
+ api_result = response.json()
242
749
 
243
- api_result = response.json()
750
+ # Extract screenshot format from data URL if not provided
751
+ if screenshot_data_url and not screenshot_format:
752
+ if screenshot_data_url.startswith("data:image/"):
753
+ format_match = screenshot_data_url.split(";")[0].split("/")[-1]
754
+ if format_match in ["jpeg", "jpg", "png"]:
755
+ screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
244
756
 
245
- # Merge API result with local data (screenshot, etc.)
757
+ # Merge API result with local data
246
758
  snapshot_data = {
247
759
  "status": api_result.get("status", "success"),
248
760
  "timestamp": api_result.get("timestamp"),
249
761
  "url": api_result.get("url", raw_result.get("url", "")),
250
762
  "viewport": api_result.get("viewport", raw_result.get("viewport")),
251
763
  "elements": api_result.get("elements", []),
252
- "screenshot": raw_result.get("screenshot"), # Keep local screenshot
253
- "screenshot_format": raw_result.get("screenshot_format"),
764
+ "screenshot": screenshot_data_url, # Use the extracted screenshot
765
+ "screenshot_format": screenshot_format, # Use the extracted format
254
766
  "error": api_result.get("error"),
255
767
  }
256
768
 
257
- # Show visual overlay if requested (use API-ranked elements)
769
+ # Create snapshot object
770
+ snapshot_obj = Snapshot(**snapshot_data)
771
+
772
+ # Show visual overlay if requested
258
773
  if options.show_overlay:
259
774
  elements = api_result.get("elements", [])
260
775
  if elements:
261
- browser.page.evaluate(
776
+ await _page_evaluate_with_nav_retry(
777
+ browser.page,
262
778
  """
263
779
  (elements) => {
264
780
  if (window.sentience && window.sentience.showOverlay) {
@@ -269,6 +785,34 @@ def _snapshot_via_api(
269
785
  elements,
270
786
  )
271
787
 
272
- return Snapshot(**snapshot_data)
273
- except requests.exceptions.RequestException as e:
788
+ # Show grid overlay if requested
789
+ if options.show_grid:
790
+ # Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
791
+ grids = snapshot_obj.get_grid_bounds(grid_id=None)
792
+ if grids:
793
+ grid_dicts = [grid.model_dump() for grid in grids]
794
+ # Pass grid_id as targetGridId to highlight it in red
795
+ target_grid_id = options.grid_id if options.grid_id is not None else None
796
+ await _page_evaluate_with_nav_retry(
797
+ browser.page,
798
+ """
799
+ (args) => {
800
+ const [grids, targetGridId] = args;
801
+ if (window.sentience && window.sentience.showGrid) {
802
+ window.sentience.showGrid(grids, targetGridId);
803
+ } else {
804
+ console.warn('[SDK] showGrid not available in extension');
805
+ }
806
+ }
807
+ """,
808
+ [grid_dicts, target_grid_id],
809
+ )
810
+
811
+ return snapshot_obj
812
+ except ImportError:
813
+ # Fallback to requests if httpx not available (shouldn't happen in async context)
814
+ raise RuntimeError(
815
+ "httpx is required for async API calls. Install it with: pip install httpx"
816
+ )
817
+ except Exception as e:
274
818
  raise RuntimeError(f"API request failed: {e}")