sentienceapi 0.90.16__py3-none-any.whl → 0.92.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (61) hide show
  1. sentience/__init__.py +14 -5
  2. sentience/action_executor.py +215 -0
  3. sentience/actions.py +408 -25
  4. sentience/agent.py +802 -293
  5. sentience/agent_config.py +3 -0
  6. sentience/async_api.py +83 -1142
  7. sentience/base_agent.py +95 -0
  8. sentience/browser.py +484 -1
  9. sentience/browser_evaluator.py +299 -0
  10. sentience/cloud_tracing.py +457 -33
  11. sentience/conversational_agent.py +77 -43
  12. sentience/element_filter.py +136 -0
  13. sentience/expect.py +98 -2
  14. sentience/extension/background.js +56 -185
  15. sentience/extension/content.js +117 -289
  16. sentience/extension/injected_api.js +799 -1374
  17. sentience/extension/manifest.json +1 -1
  18. sentience/extension/pkg/sentience_core.js +190 -396
  19. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  20. sentience/extension/release.json +47 -47
  21. sentience/formatting.py +9 -53
  22. sentience/inspector.py +183 -1
  23. sentience/llm_interaction_handler.py +191 -0
  24. sentience/llm_provider.py +74 -52
  25. sentience/llm_provider_utils.py +120 -0
  26. sentience/llm_response_builder.py +153 -0
  27. sentience/models.py +60 -1
  28. sentience/overlay.py +109 -2
  29. sentience/protocols.py +228 -0
  30. sentience/query.py +1 -1
  31. sentience/read.py +95 -3
  32. sentience/recorder.py +223 -3
  33. sentience/schemas/trace_v1.json +102 -9
  34. sentience/screenshot.py +48 -2
  35. sentience/sentience_methods.py +86 -0
  36. sentience/snapshot.py +291 -38
  37. sentience/snapshot_diff.py +141 -0
  38. sentience/text_search.py +119 -5
  39. sentience/trace_event_builder.py +129 -0
  40. sentience/trace_file_manager.py +197 -0
  41. sentience/trace_indexing/index_schema.py +95 -7
  42. sentience/trace_indexing/indexer.py +117 -14
  43. sentience/tracer_factory.py +119 -6
  44. sentience/tracing.py +172 -8
  45. sentience/utils/__init__.py +40 -0
  46. sentience/utils/browser.py +46 -0
  47. sentience/utils/element.py +257 -0
  48. sentience/utils/formatting.py +59 -0
  49. sentience/utils.py +1 -1
  50. sentience/visual_agent.py +2056 -0
  51. sentience/wait.py +68 -2
  52. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +2 -1
  53. sentienceapi-0.92.2.dist-info/RECORD +65 -0
  54. sentience/extension/test-content.js +0 -4
  55. sentienceapi-0.90.16.dist-info/RECORD +0 -50
  56. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
  57. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
  58. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE +0 -0
  59. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-APACHE +0 -0
  60. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-MIT +0 -0
  61. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0
sentience/snapshot.py CHANGED
@@ -2,6 +2,7 @@
2
2
  Snapshot functionality - calls window.sentience.snapshot() or server-side API
3
3
  """
4
4
 
5
+ import asyncio
5
6
  import json
6
7
  import os
7
8
  import time
@@ -9,8 +10,10 @@ from typing import Any, Optional
9
10
 
10
11
  import requests
11
12
 
12
- from .browser import SentienceBrowser
13
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
14
+ from .browser_evaluator import BrowserEvaluator
13
15
  from .models import Snapshot, SnapshotOptions
16
+ from .sentience_methods import SentienceMethod
14
17
 
15
18
  # Maximum payload size for API requests (10MB server limit)
16
19
  MAX_PAYLOAD_BYTES = 10 * 1024 * 1024
@@ -93,33 +96,16 @@ def _snapshot_via_extension(
93
96
  # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
94
97
  # The new architecture loads injected_api.js asynchronously, so window.sentience
95
98
  # may not be immediately available after page load
96
- try:
97
- browser.page.wait_for_function(
98
- "typeof window.sentience !== 'undefined'",
99
- timeout=5000, # 5 second timeout
100
- )
101
- except Exception as e:
102
- # Gather diagnostics if wait fails
103
- try:
104
- diag = browser.page.evaluate(
105
- """() => ({
106
- sentience_defined: typeof window.sentience !== 'undefined',
107
- extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
108
- url: window.location.href
109
- })"""
110
- )
111
- except Exception:
112
- diag = {"error": "Could not gather diagnostics"}
113
-
114
- raise RuntimeError(
115
- f"Sentience extension failed to inject window.sentience API. "
116
- f"Is the extension loaded? Diagnostics: {diag}"
117
- ) from e
99
+ BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
118
100
 
119
101
  # Build options dict for extension API (exclude save_trace/trace_path)
120
102
  ext_options: dict[str, Any] = {}
121
103
  if options.screenshot is not False:
122
- ext_options["screenshot"] = options.screenshot
104
+ # Serialize ScreenshotConfig to dict if it's a Pydantic model
105
+ if hasattr(options.screenshot, "model_dump"):
106
+ ext_options["screenshot"] = options.screenshot.model_dump()
107
+ else:
108
+ ext_options["screenshot"] = options.screenshot
123
109
  if options.limit != 50:
124
110
  ext_options["limit"] = options.limit
125
111
  if options.filter is not None:
@@ -177,26 +163,14 @@ def _snapshot_via_api(
177
163
 
178
164
  # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
179
165
  # Even for API mode, we need the extension to collect raw data locally
180
- try:
181
- browser.page.wait_for_function("typeof window.sentience !== 'undefined'", timeout=5000)
182
- except Exception as e:
183
- raise RuntimeError(
184
- "Sentience extension failed to inject. Cannot collect raw data for API processing."
185
- ) from e
166
+ BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
186
167
 
187
168
  # Step 1: Get raw data from local extension (always happens locally)
188
169
  raw_options: dict[str, Any] = {}
189
170
  if options.screenshot is not False:
190
171
  raw_options["screenshot"] = options.screenshot
191
172
 
192
- raw_result = browser.page.evaluate(
193
- """
194
- (options) => {
195
- return window.sentience.snapshot(options);
196
- }
197
- """,
198
- raw_options,
199
- )
173
+ raw_result = BrowserEvaluator.invoke(browser.page, SentienceMethod.SNAPSHOT, **raw_options)
200
174
 
201
175
  # Save trace if requested (save raw data before API processing)
202
176
  if options.save_trace:
@@ -272,3 +246,282 @@ def _snapshot_via_api(
272
246
  return Snapshot(**snapshot_data)
273
247
  except requests.exceptions.RequestException as e:
274
248
  raise RuntimeError(f"API request failed: {e}")
249
+
250
+
251
+ # ========== Async Snapshot Functions ==========
252
+
253
+
254
+ async def snapshot_async(
255
+ browser: AsyncSentienceBrowser,
256
+ options: SnapshotOptions | None = None,
257
+ ) -> Snapshot:
258
+ """
259
+ Take a snapshot of the current page (async)
260
+
261
+ Args:
262
+ browser: AsyncSentienceBrowser instance
263
+ options: Snapshot options (screenshot, limit, filter, etc.)
264
+ If None, uses default options.
265
+
266
+ Returns:
267
+ Snapshot object
268
+
269
+ Example:
270
+ # Basic snapshot with defaults
271
+ snap = await snapshot_async(browser)
272
+
273
+ # With options
274
+ snap = await snapshot_async(browser, SnapshotOptions(
275
+ screenshot=True,
276
+ limit=100,
277
+ show_overlay=True
278
+ ))
279
+ """
280
+ # Use default options if none provided
281
+ if options is None:
282
+ options = SnapshotOptions()
283
+
284
+ # Determine if we should use server-side API
285
+ should_use_api = (
286
+ options.use_api if options.use_api is not None else (browser.api_key is not None)
287
+ )
288
+
289
+ if should_use_api and browser.api_key:
290
+ # Use server-side API (Pro/Enterprise tier)
291
+ return await _snapshot_via_api_async(browser, options)
292
+ else:
293
+ # Use local extension (Free tier)
294
+ return await _snapshot_via_extension_async(browser, options)
295
+
296
+
297
+ async def _snapshot_via_extension_async(
298
+ browser: AsyncSentienceBrowser,
299
+ options: SnapshotOptions,
300
+ ) -> Snapshot:
301
+ """Take snapshot using local extension (Free tier) - async"""
302
+ if not browser.page:
303
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
304
+
305
+ # Wait for extension injection to complete
306
+ try:
307
+ await browser.page.wait_for_function(
308
+ "typeof window.sentience !== 'undefined'",
309
+ timeout=5000,
310
+ )
311
+ except Exception as e:
312
+ try:
313
+ diag = await browser.page.evaluate(
314
+ """() => ({
315
+ sentience_defined: typeof window.sentience !== 'undefined',
316
+ extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
317
+ url: window.location.href
318
+ })"""
319
+ )
320
+ except Exception:
321
+ diag = {"error": "Could not gather diagnostics"}
322
+
323
+ raise RuntimeError(
324
+ f"Sentience extension failed to inject window.sentience API. "
325
+ f"Is the extension loaded? Diagnostics: {diag}"
326
+ ) from e
327
+
328
+ # Build options dict for extension API
329
+ ext_options: dict[str, Any] = {}
330
+ if options.screenshot is not False:
331
+ # Serialize ScreenshotConfig to dict if it's a Pydantic model
332
+ if hasattr(options.screenshot, "model_dump"):
333
+ ext_options["screenshot"] = options.screenshot.model_dump()
334
+ else:
335
+ ext_options["screenshot"] = options.screenshot
336
+ if options.limit != 50:
337
+ ext_options["limit"] = options.limit
338
+ if options.filter is not None:
339
+ ext_options["filter"] = (
340
+ options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
341
+ )
342
+
343
+ # Call extension API
344
+ result = await browser.page.evaluate(
345
+ """
346
+ (options) => {
347
+ return window.sentience.snapshot(options);
348
+ }
349
+ """,
350
+ ext_options,
351
+ )
352
+ if result.get("error"):
353
+ print(f" Snapshot error: {result.get('error')}")
354
+
355
+ # Save trace if requested
356
+ if options.save_trace:
357
+ _save_trace_to_file(result.get("raw_elements", []), options.trace_path)
358
+
359
+ # Show visual overlay if requested
360
+ if options.show_overlay:
361
+ raw_elements = result.get("raw_elements", [])
362
+ if raw_elements:
363
+ await browser.page.evaluate(
364
+ """
365
+ (elements) => {
366
+ if (window.sentience && window.sentience.showOverlay) {
367
+ window.sentience.showOverlay(elements, null);
368
+ }
369
+ }
370
+ """,
371
+ raw_elements,
372
+ )
373
+
374
+ # Extract screenshot_format from data URL if not provided by extension
375
+ if result.get("screenshot") and not result.get("screenshot_format"):
376
+ screenshot_data_url = result.get("screenshot", "")
377
+ if screenshot_data_url.startswith("data:image/"):
378
+ # Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
379
+ format_match = screenshot_data_url.split(";")[0].split("/")[-1]
380
+ if format_match in ["jpeg", "jpg", "png"]:
381
+ result["screenshot_format"] = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
382
+
383
+ # Validate and parse with Pydantic
384
+ snapshot_obj = Snapshot(**result)
385
+ return snapshot_obj
386
+
387
+
388
+ async def _snapshot_via_api_async(
389
+ browser: AsyncSentienceBrowser,
390
+ options: SnapshotOptions,
391
+ ) -> Snapshot:
392
+ """Take snapshot using server-side API (Pro/Enterprise tier) - async"""
393
+ if not browser.page:
394
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
395
+
396
+ if not browser.api_key:
397
+ raise ValueError("API key required for server-side processing")
398
+
399
+ if not browser.api_url:
400
+ raise ValueError("API URL required for server-side processing")
401
+
402
+ # Wait for extension injection
403
+ try:
404
+ await browser.page.wait_for_function(
405
+ "typeof window.sentience !== 'undefined'", timeout=5000
406
+ )
407
+ except Exception as e:
408
+ raise RuntimeError(
409
+ "Sentience extension failed to inject. Cannot collect raw data for API processing."
410
+ ) from e
411
+
412
+ # Step 1: Get raw data from local extension (including screenshot)
413
+ raw_options: dict[str, Any] = {}
414
+ screenshot_requested = False
415
+ if options.screenshot is not False:
416
+ screenshot_requested = True
417
+ # Serialize ScreenshotConfig to dict if it's a Pydantic model
418
+ if hasattr(options.screenshot, "model_dump"):
419
+ raw_options["screenshot"] = options.screenshot.model_dump()
420
+ else:
421
+ raw_options["screenshot"] = options.screenshot
422
+
423
+ raw_result = await browser.page.evaluate(
424
+ """
425
+ (options) => {
426
+ return window.sentience.snapshot(options);
427
+ }
428
+ """,
429
+ raw_options,
430
+ )
431
+
432
+ # Extract screenshot from raw result (extension captures it, but API doesn't return it)
433
+ screenshot_data_url = raw_result.get("screenshot")
434
+ screenshot_format = None
435
+ if screenshot_data_url:
436
+ # Extract format from data URL
437
+ if screenshot_data_url.startswith("data:image/"):
438
+ format_match = screenshot_data_url.split(";")[0].split("/")[-1]
439
+ if format_match in ["jpeg", "jpg", "png"]:
440
+ screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
441
+
442
+ # Save trace if requested
443
+ if options.save_trace:
444
+ _save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)
445
+
446
+ # Step 2: Send to server for smart ranking/filtering
447
+ payload = {
448
+ "raw_elements": raw_result.get("raw_elements", []),
449
+ "url": raw_result.get("url", ""),
450
+ "viewport": raw_result.get("viewport"),
451
+ "goal": options.goal,
452
+ "options": {
453
+ "limit": options.limit,
454
+ "filter": options.filter.model_dump() if options.filter else None,
455
+ },
456
+ }
457
+
458
+ # Check payload size
459
+ payload_json = json.dumps(payload)
460
+ payload_size = len(payload_json.encode("utf-8"))
461
+ if payload_size > MAX_PAYLOAD_BYTES:
462
+ raise ValueError(
463
+ f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
464
+ f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
465
+ f"Try reducing the number of elements on the page or filtering elements."
466
+ )
467
+
468
+ headers = {
469
+ "Authorization": f"Bearer {browser.api_key}",
470
+ "Content-Type": "application/json",
471
+ }
472
+
473
+ try:
474
+ # Lazy import httpx - only needed for async API calls
475
+ import httpx
476
+
477
+ async with httpx.AsyncClient(timeout=30.0) as client:
478
+ response = await client.post(
479
+ f"{browser.api_url}/v1/snapshot",
480
+ content=payload_json,
481
+ headers=headers,
482
+ )
483
+ response.raise_for_status()
484
+ api_result = response.json()
485
+
486
+ # Extract screenshot format from data URL if not provided
487
+ if screenshot_data_url and not screenshot_format:
488
+ if screenshot_data_url.startswith("data:image/"):
489
+ format_match = screenshot_data_url.split(";")[0].split("/")[-1]
490
+ if format_match in ["jpeg", "jpg", "png"]:
491
+ screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
492
+
493
+ # Merge API result with local data
494
+ snapshot_data = {
495
+ "status": api_result.get("status", "success"),
496
+ "timestamp": api_result.get("timestamp"),
497
+ "url": api_result.get("url", raw_result.get("url", "")),
498
+ "viewport": api_result.get("viewport", raw_result.get("viewport")),
499
+ "elements": api_result.get("elements", []),
500
+ "screenshot": screenshot_data_url, # Use the extracted screenshot
501
+ "screenshot_format": screenshot_format, # Use the extracted format
502
+ "error": api_result.get("error"),
503
+ }
504
+
505
+ # Show visual overlay if requested
506
+ if options.show_overlay:
507
+ elements = api_result.get("elements", [])
508
+ if elements:
509
+ await browser.page.evaluate(
510
+ """
511
+ (elements) => {
512
+ if (window.sentience && window.sentience.showOverlay) {
513
+ window.sentience.showOverlay(elements, null);
514
+ }
515
+ }
516
+ """,
517
+ elements,
518
+ )
519
+
520
+ return Snapshot(**snapshot_data)
521
+ except ImportError:
522
+ # Fallback to requests if httpx not available (shouldn't happen in async context)
523
+ raise RuntimeError(
524
+ "httpx is required for async API calls. Install it with: pip install httpx"
525
+ )
526
+ except Exception as e:
527
+ raise RuntimeError(f"API request failed: {e}")
@@ -0,0 +1,141 @@
1
+ """
2
+ Snapshot comparison utilities for diff_status detection.
3
+
4
+ Implements change detection logic for the Diff Overlay feature.
5
+ """
6
+
7
+ from typing import Literal
8
+
9
+ from .models import Element, Snapshot
10
+
11
+
12
+ class SnapshotDiff:
13
+ """
14
+ Utility for comparing snapshots and computing diff_status for elements.
15
+
16
+ Implements the logic described in DIFF_STATUS_GAP_ANALYSIS.md:
17
+ - ADDED: Element exists in current but not in previous
18
+ - REMOVED: Element existed in previous but not in current
19
+ - MODIFIED: Element exists in both but has changed
20
+ - MOVED: Element exists in both but position changed
21
+ """
22
+
23
+ @staticmethod
24
+ def _has_bbox_changed(el1: Element, el2: Element, threshold: float = 5.0) -> bool:
25
+ """
26
+ Check if element's bounding box has changed significantly.
27
+
28
+ Args:
29
+ el1: First element
30
+ el2: Second element
31
+ threshold: Position change threshold in pixels (default: 5.0)
32
+
33
+ Returns:
34
+ True if position or size changed beyond threshold
35
+ """
36
+ return (
37
+ abs(el1.bbox.x - el2.bbox.x) > threshold
38
+ or abs(el1.bbox.y - el2.bbox.y) > threshold
39
+ or abs(el1.bbox.width - el2.bbox.width) > threshold
40
+ or abs(el1.bbox.height - el2.bbox.height) > threshold
41
+ )
42
+
43
+ @staticmethod
44
+ def _has_content_changed(el1: Element, el2: Element) -> bool:
45
+ """
46
+ Check if element's content has changed.
47
+
48
+ Args:
49
+ el1: First element
50
+ el2: Second element
51
+
52
+ Returns:
53
+ True if text, role, or visual properties changed
54
+ """
55
+ # Compare text content
56
+ if el1.text != el2.text:
57
+ return True
58
+
59
+ # Compare role
60
+ if el1.role != el2.role:
61
+ return True
62
+
63
+ # Compare visual cues
64
+ if el1.visual_cues.is_primary != el2.visual_cues.is_primary:
65
+ return True
66
+ if el1.visual_cues.is_clickable != el2.visual_cues.is_clickable:
67
+ return True
68
+
69
+ return False
70
+
71
+ @staticmethod
72
+ def compute_diff_status(
73
+ current: Snapshot,
74
+ previous: Snapshot | None,
75
+ ) -> list[Element]:
76
+ """
77
+ Compare current snapshot with previous and set diff_status on elements.
78
+
79
+ Args:
80
+ current: Current snapshot
81
+ previous: Previous snapshot (None if this is the first snapshot)
82
+
83
+ Returns:
84
+ List of elements with diff_status set (includes REMOVED elements from previous)
85
+ """
86
+ # If no previous snapshot, all current elements are ADDED
87
+ if previous is None:
88
+ result = []
89
+ for el in current.elements:
90
+ # Create a copy with diff_status set
91
+ el_dict = el.model_dump()
92
+ el_dict["diff_status"] = "ADDED"
93
+ result.append(Element(**el_dict))
94
+ return result
95
+
96
+ # Build lookup maps by element ID
97
+ current_by_id = {el.id: el for el in current.elements}
98
+ previous_by_id = {el.id: el for el in previous.elements}
99
+
100
+ current_ids = set(current_by_id.keys())
101
+ previous_ids = set(previous_by_id.keys())
102
+
103
+ result: list[Element] = []
104
+
105
+ # Process current elements
106
+ for el in current.elements:
107
+ el_dict = el.model_dump()
108
+
109
+ if el.id not in previous_ids:
110
+ # Element is new - mark as ADDED
111
+ el_dict["diff_status"] = "ADDED"
112
+ else:
113
+ # Element existed before - check for changes
114
+ prev_el = previous_by_id[el.id]
115
+
116
+ bbox_changed = SnapshotDiff._has_bbox_changed(el, prev_el)
117
+ content_changed = SnapshotDiff._has_content_changed(el, prev_el)
118
+
119
+ if bbox_changed and content_changed:
120
+ # Both position and content changed - mark as MODIFIED
121
+ el_dict["diff_status"] = "MODIFIED"
122
+ elif bbox_changed:
123
+ # Only position changed - mark as MOVED
124
+ el_dict["diff_status"] = "MOVED"
125
+ elif content_changed:
126
+ # Only content changed - mark as MODIFIED
127
+ el_dict["diff_status"] = "MODIFIED"
128
+ else:
129
+ # No change - don't set diff_status (frontend expects undefined)
130
+ el_dict["diff_status"] = None
131
+
132
+ result.append(Element(**el_dict))
133
+
134
+ # Process removed elements (existed in previous but not in current)
135
+ for prev_id in previous_ids - current_ids:
136
+ prev_el = previous_by_id[prev_id]
137
+ el_dict = prev_el.model_dump()
138
+ el_dict["diff_status"] = "REMOVED"
139
+ result.append(Element(**el_dict))
140
+
141
+ return result
sentience/text_search.py CHANGED
@@ -2,7 +2,8 @@
2
2
  Text search utilities - find text and get pixel coordinates
3
3
  """
4
4
 
5
- from .browser import SentienceBrowser
5
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
6
+ from .browser_evaluator import BrowserEvaluator
6
7
  from .models import TextRectSearchResult
7
8
 
8
9
 
@@ -88,18 +89,131 @@ def find_text_rect(
88
89
  # Limit max_results to prevent performance issues
89
90
  max_results = min(max_results, 100)
90
91
 
92
+ # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
93
+ # The new architecture loads injected_api.js asynchronously, so window.sentience
94
+ # may not be immediately available after page load
95
+ BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
96
+
97
+ # Verify findTextRect method exists (for older extension versions that don't have it)
98
+ if not BrowserEvaluator.verify_method_exists(browser.page, SentienceMethod.FIND_TEXT_RECT):
99
+ raise RuntimeError(
100
+ "window.sentience.findTextRect is not available. "
101
+ "Please update the Sentience extension to the latest version."
102
+ )
103
+
104
+ # Call the extension's findTextRect method
105
+ result_dict = browser.page.evaluate(
106
+ """
107
+ (options) => {
108
+ return window.sentience.findTextRect(options);
109
+ }
110
+ """,
111
+ {
112
+ "text": text,
113
+ "caseSensitive": case_sensitive,
114
+ "wholeWord": whole_word,
115
+ "maxResults": max_results,
116
+ },
117
+ )
118
+
119
+ # Parse and validate with Pydantic
120
+ return TextRectSearchResult(**result_dict)
121
+
122
+
123
+ async def find_text_rect_async(
124
+ browser: AsyncSentienceBrowser,
125
+ text: str,
126
+ case_sensitive: bool = False,
127
+ whole_word: bool = False,
128
+ max_results: int = 10,
129
+ ) -> TextRectSearchResult:
130
+ """
131
+ Find all occurrences of text on the page and get their exact pixel coordinates (async).
132
+
133
+ This function searches for text in all visible text nodes on the page and returns
134
+ the bounding rectangles for each match. Useful for:
135
+ - Finding specific UI elements by their text content
136
+ - Locating buttons, links, or labels without element IDs
137
+ - Getting exact coordinates for click automation
138
+ - Highlighting search results visually
139
+
140
+ Args:
141
+ browser: AsyncSentienceBrowser instance
142
+ text: Text to search for (required)
143
+ case_sensitive: If True, search is case-sensitive (default: False)
144
+ whole_word: If True, only match whole words surrounded by whitespace (default: False)
145
+ max_results: Maximum number of matches to return (default: 10, max: 100)
146
+
147
+ Returns:
148
+ TextRectSearchResult with:
149
+ - status: "success" or "error"
150
+ - query: The search text
151
+ - case_sensitive: Whether search was case-sensitive
152
+ - whole_word: Whether whole-word matching was used
153
+ - matches: Number of matches found
154
+ - results: List of TextMatch objects, each containing:
155
+ - text: The matched text
156
+ - rect: Absolute rectangle (with scroll offset)
157
+ - viewport_rect: Viewport-relative rectangle
158
+ - context: Surrounding text (before/after)
159
+ - in_viewport: Whether visible in current viewport
160
+ - viewport: Current viewport dimensions and scroll position
161
+ - error: Error message if status is "error"
162
+
163
+ Examples:
164
+ # Find "Sign In" button
165
+ result = await find_text_rect_async(browser, "Sign In")
166
+ if result.status == "success" and result.results:
167
+ first_match = result.results[0]
168
+ print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
169
+ print(f"Size: {first_match.rect.width}x{first_match.rect.height}")
170
+ print(f"In viewport: {first_match.in_viewport}")
171
+
172
+ # Case-sensitive search
173
+ result = await find_text_rect_async(browser, "LOGIN", case_sensitive=True)
174
+
175
+ # Whole word only
176
+ result = await find_text_rect_async(browser, "log", whole_word=True) # Won't match "login"
177
+
178
+ # Find all matches and click the first visible one
179
+ result = await find_text_rect_async(browser, "Buy Now", max_results=5)
180
+ if result.status == "success" and result.results:
181
+ for match in result.results:
182
+ if match.in_viewport:
183
+ # Use click_rect_async from actions module
184
+ from sentience.actions import click_rect_async
185
+ click_result = await click_rect_async(browser, {
186
+ "x": match.rect.x,
187
+ "y": match.rect.y,
188
+ "w": match.rect.width,
189
+ "h": match.rect.height
190
+ })
191
+ break
192
+ """
193
+ if not browser.page:
194
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
195
+
196
+ if not text or not text.strip():
197
+ return TextRectSearchResult(
198
+ status="error",
199
+ error="Text parameter is required and cannot be empty",
200
+ )
201
+
202
+ # Limit max_results to prevent performance issues
203
+ max_results = min(max_results, 100)
204
+
91
205
  # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
92
206
  # The new architecture loads injected_api.js asynchronously, so window.sentience
93
207
  # may not be immediately available after page load
94
208
  try:
95
- browser.page.wait_for_function(
209
+ await browser.page.wait_for_function(
96
210
  "typeof window.sentience !== 'undefined'",
97
211
  timeout=5000, # 5 second timeout
98
212
  )
99
213
  except Exception as e:
100
214
  # Gather diagnostics if wait fails
101
215
  try:
102
- diag = browser.page.evaluate(
216
+ diag = await browser.page.evaluate(
103
217
  """() => ({
104
218
  sentience_defined: typeof window.sentience !== 'undefined',
105
219
  extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
@@ -116,7 +230,7 @@ def find_text_rect(
116
230
 
117
231
  # Verify findTextRect method exists (for older extension versions that don't have it)
118
232
  try:
119
- has_find_text_rect = browser.page.evaluate(
233
+ has_find_text_rect = await browser.page.evaluate(
120
234
  "typeof window.sentience.findTextRect !== 'undefined'"
121
235
  )
122
236
  if not has_find_text_rect:
@@ -130,7 +244,7 @@ def find_text_rect(
130
244
  raise RuntimeError(f"Failed to verify findTextRect availability: {e}") from e
131
245
 
132
246
  # Call the extension's findTextRect method
133
- result_dict = browser.page.evaluate(
247
+ result_dict = await browser.page.evaluate(
134
248
  """
135
249
  (options) => {
136
250
  return window.sentience.findTextRect(options);