sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,984 @@
1
+ Metadata-Version: 2.4
2
+ Name: sentienceapi
3
+ Version: 0.95.0
4
+ Summary: Python SDK for Sentience AI Agent Browser Automation
5
+ Author: Sentience Team
6
+ License: MIT OR Apache-2.0
7
+ Project-URL: Homepage, https://github.com/SentienceAPI/sentience-python
8
+ Project-URL: Repository, https://github.com/SentienceAPI/sentience-python
9
+ Project-URL: Issues, https://github.com/SentienceAPI/sentience-python/issues
10
+ Keywords: browser-automation,playwright,ai-agent,web-automation,sentience
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ License-File: LICENSE-APACHE
21
+ License-File: LICENSE-MIT
22
+ Requires-Dist: playwright>=1.40.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Requires-Dist: jsonschema>=4.0.0
25
+ Requires-Dist: requests>=2.31.0
26
+ Requires-Dist: httpx>=0.25.0
27
+ Requires-Dist: playwright-stealth>=1.0.6
28
+ Requires-Dist: markdownify>=0.11.6
29
+ Provides-Extra: browser-use
30
+ Requires-Dist: browser-use>=0.1.40; extra == "browser-use"
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
33
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # Sentience Python SDK
37
+
38
+ **Semantic geometry grounding for deterministic, debuggable AI web agents with time-travel traces.**
39
+
40
+ ## 📦 Installation
41
+
42
+ ```bash
43
+ # Install from PyPI
44
+ pip install sentienceapi
45
+
46
+ # Install Playwright browsers (required)
47
+ playwright install chromium
48
+
49
+ # For LLM Agent features (optional)
50
+ pip install openai # For OpenAI models
51
+ pip install anthropic # For Claude models
52
+ pip install transformers torch # For local LLMs
53
+ ```
54
+
55
+ **For local development:**
56
+ ```bash
57
+ pip install -e .
58
+ ```
59
+
60
+ ## 🚀 Quick Start: Choose Your Abstraction Level
61
+
62
+ Sentience SDK offers **three abstraction levels** - use what fits your needs:
63
+
64
+ <details>
65
+ <summary><b>🎯 Level 3: Natural Language (Easiest)</b> - For non-technical users</summary>
66
+
67
+ ```python
68
+ from sentience import SentienceBrowser, ConversationalAgent
69
+ from sentience.llm_provider import OpenAIProvider
70
+
71
+ browser = SentienceBrowser()
72
+ llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
73
+ agent = ConversationalAgent(browser, llm)
74
+
75
+ with browser:
76
+ response = agent.execute("Search for magic mouse on google.com")
77
+ print(response)
78
+ # → "I searched for 'magic mouse' and found several results.
79
+ # The top result is from amazon.com selling Magic Mouse 2 for $79."
80
+ ```
81
+
82
+ **Best for:** End users, chatbots, no-code platforms
83
+ **Code required:** 3-5 lines
84
+ **Technical knowledge:** None
85
+
86
+ </details>
87
+
88
+ <details>
89
+ <summary><b>⚙️ Level 2: Technical Commands (Recommended)</b> - For AI developers</summary>
90
+
91
+ ```python
92
+ from sentience import SentienceBrowser, SentienceAgent
93
+ from sentience.llm_provider import OpenAIProvider
94
+
95
+ browser = SentienceBrowser()
96
+ llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
97
+ agent = SentienceAgent(browser, llm)
98
+
99
+ with browser:
100
+ browser.page.goto("https://google.com")
101
+ agent.act("Click the search box")
102
+ agent.act("Type 'magic mouse' into the search field")
103
+ agent.act("Press Enter key")
104
+ ```
105
+
106
+ **Best for:** Building AI agents, automation scripts
107
+ **Code required:** 10-15 lines
108
+ **Technical knowledge:** Medium (Python basics)
109
+
110
+ </details>
111
+
112
+ <details>
113
+ <summary><b>🔧 Level 1: Direct SDK (Most Control)</b> - For production automation</summary>
114
+
115
+ ```python
116
+ from sentience import SentienceBrowser, snapshot, find, click
117
+
118
+ with SentienceBrowser(headless=False) as browser:
119
+ browser.page.goto("https://example.com")
120
+
121
+ # Take snapshot - captures all interactive elements
122
+ snap = snapshot(browser)
123
+ print(f"Found {len(snap.elements)} elements")
124
+
125
+ # Find and click a link using semantic selectors
126
+ link = find(snap, "role=link text~'More information'")
127
+ if link:
128
+ result = click(browser, link.id)
129
+ print(f"Click success: {result.success}")
130
+ ```
131
+
132
+ **Best for:** Maximum control, performance-critical apps
133
+ **Code required:** 20-50 lines
134
+ **Technical knowledge:** High (SDK API, selectors)
135
+
136
+ </details>
137
+
138
+ ---
139
+
140
+ ## 🆕 What's New (2026-01-06)
141
+
142
+ ### Human-like Typing
143
+ Add realistic delays between keystrokes to mimic human typing:
144
+ ```python
145
+ from sentience import type_text
146
+
147
+ # Type instantly (default)
148
+ type_text(browser, element_id, "Hello World")
149
+
150
+ # Type with human-like delay (~10ms between keystrokes)
151
+ type_text(browser, element_id, "Hello World", delay_ms=10)
152
+ ```
153
+
154
+ ### Scroll to Element
155
+ Scroll elements into view with smooth animation:
156
+ ```python
157
+ from sentience import snapshot, find, scroll_to
158
+
159
+ snap = snapshot(browser)
160
+ button = find(snap, 'role=button text~"Submit"')
161
+
162
+ # Scroll element into view with smooth animation
163
+ scroll_to(browser, button.id)
164
+
165
+ # Scroll instantly to top of viewport
166
+ scroll_to(browser, button.id, behavior='instant', block='start')
167
+ ```
168
+
169
+ ---
170
+
171
+ <details>
172
+ <summary><h2>💼 Real-World Example: Amazon Shopping Bot</h2></summary>
173
+
174
+ This example demonstrates navigating Amazon, finding products, and adding items to cart:
175
+
176
+ ```python
177
+ from sentience import SentienceBrowser, snapshot, find, click
178
+ import time
179
+
180
+ with SentienceBrowser(headless=False) as browser:
181
+ # Navigate to Amazon Best Sellers
182
+ browser.goto("https://www.amazon.com/gp/bestsellers/", wait_until="domcontentloaded")
183
+ time.sleep(2) # Wait for dynamic content
184
+
185
+ # Take snapshot and find products
186
+ snap = snapshot(browser)
187
+ print(f"Found {len(snap.elements)} elements")
188
+
189
+ # Find first product in viewport using spatial filtering
190
+ products = [
191
+ el for el in snap.elements
192
+ if el.role == "link"
193
+ and el.visual_cues.is_clickable
194
+ and el.in_viewport
195
+ and not el.is_occluded
196
+ and el.bbox.y < 600 # First row
197
+ ]
198
+
199
+ if products:
200
+ # Sort by position (left to right, top to bottom)
201
+ products.sort(key=lambda e: (e.bbox.y, e.bbox.x))
202
+ first_product = products[0]
203
+
204
+ print(f"Clicking: {first_product.text}")
205
+ result = click(browser, first_product.id)
206
+
207
+ # Wait for product page
208
+ browser.page.wait_for_load_state("networkidle")
209
+ time.sleep(2)
210
+
211
+ # Find and click "Add to Cart" button
212
+ product_snap = snapshot(browser)
213
+ add_to_cart = find(product_snap, "role=button text~'add to cart'")
214
+
215
+ if add_to_cart:
216
+ cart_result = click(browser, add_to_cart.id)
217
+ print(f"Added to cart: {cart_result.success}")
218
+ ```
219
+
220
+ **📖 See the complete tutorial:** [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)
221
+
222
+ </details>
223
+
224
+ ---
225
+
226
+ ## 📚 Core Features
227
+
228
+ <details>
229
+ <summary><h3>🌐 Browser Control</h3></summary>
230
+
231
+ - **`SentienceBrowser`** - Playwright browser with Sentience extension pre-loaded
232
+ - **`browser.goto(url)`** - Navigate with automatic extension readiness checks
233
+ - Automatic bot evasion and stealth mode
234
+ - Configurable headless/headed mode
235
+
236
+ </details>
237
+
238
+ <details>
239
+ <summary><h3>📸 Snapshot - Intelligent Page Analysis</h3></summary>
240
+
241
+ **`snapshot(browser, options=SnapshotOptions(screenshot=True, show_overlay=False, limit=None, goal=None))`** - Capture page state with AI-ranked elements
242
+
243
+ Features:
244
+ - Returns semantic elements with roles, text, importance scores, and bounding boxes
245
+ - Optional screenshot capture (PNG/JPEG) - set `screenshot=True`
246
+ - Optional visual overlay to see what elements are detected - set `show_overlay=True`
247
+ - Pydantic models for type safety
248
+ - Optional ML reranking when `goal` is provided
249
+ - **`snapshot.save(filepath)`** - Export to JSON
250
+
251
+ **Example:**
252
+ ```python
253
+ from sentience import snapshot, SnapshotOptions
254
+
255
+ # Basic snapshot with defaults (no screenshot, no overlay)
256
+ snap = snapshot(browser)
257
+
258
+ # With screenshot and overlay
259
+ snap = snapshot(browser, SnapshotOptions(
260
+ screenshot=True,
261
+ show_overlay=True,
262
+ limit=100,
263
+ goal="Click the login button" # Optional: enables ML reranking
264
+ ))
265
+
266
+ # Access structured data
267
+ print(f"URL: {snap.url}")
268
+ print(f"Viewport: {snap.viewport.width}x{snap.viewport.height}")
269
+ print(f"Elements: {len(snap.elements)}")
270
+
271
+ # Iterate over elements
272
+ for element in snap.elements:
273
+ print(f"{element.role}: {element.text} (importance: {element.importance})")
274
+
275
+ # Check ML reranking metadata (when goal is provided)
276
+ if element.rerank_index is not None:
277
+ print(f" ML rank: {element.rerank_index} (confidence: {element.ml_probability:.2%})")
278
+ ```
279
+
280
+ </details>
281
+
282
+ <details>
283
+ <summary><h3>🔍 Query Engine - Semantic Element Selection</h3></summary>
284
+
285
+ - **`query(snapshot, selector)`** - Find all matching elements
286
+ - **`find(snapshot, selector)`** - Find single best match (by importance)
287
+ - Powerful query DSL with multiple operators
288
+
289
+ **Query Examples:**
290
+ ```python
291
+ # Find by role and text
292
+ button = find(snap, "role=button text='Sign in'")
293
+
294
+ # Substring match (case-insensitive)
295
+ link = find(snap, "role=link text~'more info'")
296
+
297
+ # Spatial filtering
298
+ top_left = find(snap, "bbox.x<=100 bbox.y<=200")
299
+
300
+ # Multiple conditions (AND logic)
301
+ primary_btn = find(snap, "role=button clickable=true visible=true importance>800")
302
+
303
+ # Prefix/suffix matching
304
+ starts_with = find(snap, "text^='Add'")
305
+ ends_with = find(snap, "text$='Cart'")
306
+
307
+ # Numeric comparisons
308
+ important = query(snap, "importance>=700")
309
+ first_row = query(snap, "bbox.y<600")
310
+ ```
311
+
312
+ **📖 [Complete Query DSL Guide](docs/QUERY_DSL.md)** - All operators, fields, and advanced patterns
313
+
314
+ </details>
315
+
316
+ <details>
317
+ <summary><h3>👆 Actions - Interact with Elements</h3></summary>
318
+
319
+ - **`click(browser, element_id)`** - Click element by ID
320
+ - **`click_rect(browser, rect)`** - Click at center of rectangle (coordinate-based)
321
+ - **`type_text(browser, element_id, text)`** - Type into input fields
322
+ - **`press(browser, key)`** - Press keyboard keys (Enter, Escape, Tab, etc.)
323
+
324
+ All actions return `ActionResult` with success status, timing, and outcome:
325
+
326
+ ```python
327
+ result = click(browser, element.id)
328
+
329
+ print(f"Success: {result.success}")
330
+ print(f"Outcome: {result.outcome}") # "navigated", "dom_updated", "error"
331
+ print(f"Duration: {result.duration_ms}ms")
332
+ print(f"URL changed: {result.url_changed}")
333
+ ```
334
+
335
+ **Coordinate-based clicking:**
336
+ ```python
337
+ from sentience import click_rect
338
+
339
+ # Click at center of rectangle (x, y, width, height)
340
+ click_rect(browser, {"x": 100, "y": 200, "w": 50, "h": 30})
341
+
342
+ # With visual highlight (default: red border for 2 seconds)
343
+ click_rect(browser, {"x": 100, "y": 200, "w": 50, "h": 30}, highlight=True, highlight_duration=2.0)
344
+
345
+ # Using element's bounding box
346
+ snap = snapshot(browser)
347
+ element = find(snap, "role=button")
348
+ if element:
349
+ click_rect(browser, {
350
+ "x": element.bbox.x,
351
+ "y": element.bbox.y,
352
+ "w": element.bbox.width,
353
+ "h": element.bbox.height
354
+ })
355
+ ```
356
+
357
+ </details>
358
+
359
+ <details>
360
+ <summary><h3>⏱️ Wait & Assertions</h3></summary>
361
+
362
+ - **`wait_for(browser, selector, timeout=5.0, interval=None, use_api=None)`** - Wait for element to appear
363
+ - **`expect(browser, selector)`** - Assertion helper with fluent API
364
+
365
+ **Examples:**
366
+ ```python
367
+ # Wait for element (auto-detects optimal interval based on API usage)
368
+ result = wait_for(browser, "role=button text='Submit'", timeout=10.0)
369
+ if result.found:
370
+ print(f"Found after {result.duration_ms}ms")
371
+
372
+ # Use local extension with fast polling (0.25s interval)
373
+ result = wait_for(browser, "role=button", timeout=5.0, use_api=False)
374
+
375
+ # Use remote API with network-friendly polling (1.5s interval)
376
+ result = wait_for(browser, "role=button", timeout=5.0, use_api=True)
377
+
378
+ # Custom interval override
379
+ result = wait_for(browser, "role=button", timeout=5.0, interval=0.5, use_api=False)
380
+
381
+ # Semantic wait conditions
382
+ wait_for(browser, "clickable=true", timeout=5.0) # Wait for clickable element
383
+ wait_for(browser, "importance>100", timeout=5.0) # Wait for important element
384
+ wait_for(browser, "role=link visible=true", timeout=5.0) # Wait for visible link
385
+
386
+ # Assertions
387
+ expect(browser, "role=button text='Submit'").to_exist(timeout=5.0)
388
+ expect(browser, "role=heading").to_be_visible()
389
+ expect(browser, "role=button").to_have_text("Submit")
390
+ expect(browser, "role=link").to_have_count(10)
391
+ ```
392
+
393
+ </details>
394
+
395
+ <details>
396
+ <summary><h3>🎨 Visual Overlay - Debug Element Detection</h3></summary>
397
+
398
+ - **`show_overlay(browser, elements, target_element_id=None)`** - Display visual overlay highlighting elements
399
+ - **`clear_overlay(browser)`** - Clear overlay manually
400
+
401
+ Show color-coded borders around detected elements to debug, validate, and understand what Sentience sees:
402
+
403
+ ```python
404
+ from sentience import show_overlay, clear_overlay
405
+
406
+ # Take snapshot once
407
+ snap = snapshot(browser)
408
+
409
+ # Show overlay anytime without re-snapshotting
410
+ show_overlay(browser, snap) # Auto-clears after 5 seconds
411
+
412
+ # Highlight specific target element in red
413
+ button = find(snap, "role=button text~'Submit'")
414
+ show_overlay(browser, snap, target_element_id=button.id)
415
+
416
+ # Clear manually before 5 seconds
417
+ import time
418
+ time.sleep(2)
419
+ clear_overlay(browser)
420
+ ```
421
+
422
+ **Color Coding:**
423
+ - 🔴 Red: Target element
424
+ - 🔵 Blue: Primary elements (`is_primary=true`)
425
+ - 🟢 Green: Regular interactive elements
426
+
427
+ **Visual Indicators:**
428
+ - Border thickness/opacity scales with importance
429
+ - Semi-transparent fill
430
+ - Importance badges
431
+ - Star icons for primary elements
432
+ - Auto-clear after 5 seconds
433
+
434
+ </details>
435
+
436
+ <details>
437
+ <summary><h3>📄 Content Reading</h3></summary>
438
+
439
+ **`read(browser, format="text|markdown|raw")`** - Extract page content
440
+ - `format="text"` - Plain text extraction
441
+ - `format="markdown"` - High-quality markdown conversion (uses markdownify)
442
+ - `format="raw"` - Cleaned HTML (default)
443
+
444
+ **Example:**
445
+ ```python
446
+ from sentience import read
447
+
448
+ # Get markdown content
449
+ result = read(browser, format="markdown")
450
+ print(result["content"]) # Markdown text
451
+
452
+ # Get plain text
453
+ result = read(browser, format="text")
454
+ print(result["content"]) # Plain text
455
+ ```
456
+
457
+ </details>
458
+
459
+ <details>
460
+ <summary><h3>📷 Screenshots</h3></summary>
461
+
462
+ **`screenshot(browser, format="png|jpeg", quality=80)`** - Standalone screenshot capture
463
+ - Returns base64-encoded data URL
464
+ - PNG or JPEG format
465
+ - Quality control for JPEG (1-100)
466
+
467
+ **Example:**
468
+ ```python
469
+ from sentience import screenshot
470
+ import base64
471
+
472
+ # Capture PNG screenshot
473
+ data_url = screenshot(browser, format="png")
474
+
475
+ # Save to file
476
+ image_data = base64.b64decode(data_url.split(",")[1])
477
+ with open("screenshot.png", "wb") as f:
478
+ f.write(image_data)
479
+
480
+ # JPEG with quality control (smaller file size)
481
+ data_url = screenshot(browser, format="jpeg", quality=85)
482
+ ```
483
+
484
+ </details>
485
+
486
+ <details>
487
+ <summary><h3>🔎 Text Search - Find Elements by Visible Text</h3></summary>
488
+
489
+ **`find_text_rect(browser, text, case_sensitive=False, whole_word=False, max_results=10)`** - Find text on page and get exact pixel coordinates
490
+
491
+ Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.
492
+
493
+ **Example:**
494
+ ```python
495
+ from sentience import SentienceBrowser, find_text_rect, click_rect
496
+
497
+ with SentienceBrowser() as browser:
498
+ browser.page.goto("https://example.com")
499
+
500
+ # Find "Sign In" button
501
+ result = find_text_rect(browser, "Sign In")
502
+ if result.status == "success" and result.results:
503
+ first_match = result.results[0]
504
+ print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
505
+ print(f"In viewport: {first_match.in_viewport}")
506
+
507
+ # Click on the found text
508
+ if first_match.in_viewport:
509
+ click_rect(browser, {
510
+ "x": first_match.rect.x,
511
+ "y": first_match.rect.y,
512
+ "w": first_match.rect.width,
513
+ "h": first_match.rect.height
514
+ })
515
+ ```
516
+
517
+ **Advanced Options:**
518
+ ```python
519
+ # Case-sensitive search
520
+ result = find_text_rect(browser, "LOGIN", case_sensitive=True)
521
+
522
+ # Whole word only (won't match "login" as part of "loginButton")
523
+ result = find_text_rect(browser, "log", whole_word=True)
524
+
525
+ # Find multiple matches
526
+ result = find_text_rect(browser, "Buy", max_results=10)
527
+ for match in result.results:
528
+ if match.in_viewport:
529
+ print(f"Found '{match.text}' at ({match.rect.x}, {match.rect.y})")
530
+ print(f"Context: ...{match.context.before}[{match.text}]{match.context.after}...")
531
+ ```
532
+
533
+ **Returns:** `TextRectSearchResult` with:
534
+ - **`status`**: "success" or "error"
535
+ - **`results`**: List of `TextMatch` objects with:
536
+ - `text` - The matched text
537
+ - `rect` - Absolute coordinates (with scroll offset)
538
+ - `viewport_rect` - Viewport-relative coordinates
539
+ - `context` - Surrounding text (before/after)
540
+ - `in_viewport` - Whether visible in current viewport
541
+
542
+ **Use Cases:**
543
+ - Find buttons/links by visible text without CSS selectors
544
+ - Get exact pixel coordinates for click automation
545
+ - Verify text visibility and position on page
546
+ - Search dynamic content that changes frequently
547
+
548
+ **Note:** Does not consume API credits (runs locally in browser)
549
+
550
+ **See example:** `examples/find_text_demo.py`
551
+
552
+ </details>
553
+
554
+ ---
555
+
556
+ ## 🔄 Async API
557
+
558
+ For asyncio contexts (FastAPI, async frameworks):
559
+
560
+ ```python
561
+ from sentience.async_api import AsyncSentienceBrowser, snapshot_async, click_async, find
562
+
563
+ async def main():
564
+ async with AsyncSentienceBrowser() as browser:
565
+ await browser.goto("https://example.com")
566
+ snap = await snapshot_async(browser)
567
+ button = find(snap, "role=button")
568
+ if button:
569
+ await click_async(browser, button.id)
570
+
571
+ asyncio.run(main())
572
+ ```
573
+
574
+ **See example:** `examples/async_api_demo.py`
575
+
576
+ ---
577
+
578
+ ## 📋 Reference
579
+
580
+ <details>
581
+ <summary><h3>Element Properties</h3></summary>
582
+
583
+ Elements returned by `snapshot()` have the following properties:
584
+
585
+ ```python
586
+ element.id # Unique identifier for interactions
587
+ element.role # ARIA role (button, link, textbox, heading, etc.)
588
+ element.text # Visible text content
589
+ element.importance # AI importance score (0-1000)
590
+ element.bbox # Bounding box (x, y, width, height)
591
+ element.visual_cues # Visual analysis (is_primary, is_clickable, background_color)
592
+ element.in_viewport # Is element visible in current viewport?
593
+ element.is_occluded # Is element covered by other elements?
594
+ element.z_index # CSS stacking order
595
+ ```
596
+
597
+ </details>
598
+
599
+ <details>
600
+ <summary><h3>Query DSL Reference</h3></summary>
601
+
602
+ ### Basic Operators
603
+
604
+ | Operator | Description | Example |
605
+ |----------|-------------|---------|
606
+ | `=` | Exact match | `role=button` |
607
+ | `!=` | Exclusion | `role!=link` |
608
+ | `~` | Substring (case-insensitive) | `text~'sign in'` |
609
+ | `^=` | Prefix match | `text^='Add'` |
610
+ | `$=` | Suffix match | `text$='Cart'` |
611
+ | `>`, `>=` | Greater than | `importance>500` |
612
+ | `<`, `<=` | Less than | `bbox.y<600` |
613
+
614
+ ### Supported Fields
615
+
616
+ - **Role**: `role=button|link|textbox|heading|...`
617
+ - **Text**: `text`, `text~`, `text^=`, `text$=`
618
+ - **Visibility**: `clickable=true|false`, `visible=true|false`
619
+ - **Importance**: `importance`, `importance>=N`, `importance<N`
620
+ - **Position**: `bbox.x`, `bbox.y`, `bbox.width`, `bbox.height`
621
+ - **Layering**: `z_index`
622
+
623
+ </details>
624
+
625
+ ---
626
+
627
+ ## ⚙️ Configuration
628
+
629
+ <details>
630
+ <summary><h3>Viewport Size</h3></summary>
631
+
632
+ Default viewport is **1280x800** pixels. You can customize it using Playwright's API:
633
+
634
+ ```python
635
+ with SentienceBrowser(headless=False) as browser:
636
+ # Set custom viewport before navigating
637
+ browser.page.set_viewport_size({"width": 1920, "height": 1080})
638
+
639
+ browser.goto("https://example.com")
640
+ ```
641
+
642
+ </details>
643
+
644
+ <details>
645
+ <summary><h3>Headless Mode</h3></summary>
646
+
647
+ ```python
648
+ # Headed mode (default in dev, shows browser window)
649
+ browser = SentienceBrowser(headless=False)
650
+
651
+ # Headless mode (default in CI environments)
652
+ browser = SentienceBrowser(headless=True)
653
+
654
+ # Auto-detect based on environment
655
+ browser = SentienceBrowser() # headless=True if CI=true, else False
656
+ ```
657
+
658
+ </details>
659
+
660
+ <details>
661
+ <summary><h3>🌍 Residential Proxy Support</h3></summary>
662
+
663
+ Use residential proxies to route traffic and protect your IP address. Supports HTTP, HTTPS, and SOCKS5 with automatic SSL certificate handling:
664
+
665
+ ```python
666
+ # Method 1: Direct configuration
667
+ browser = SentienceBrowser(proxy="http://user:pass@proxy.example.com:8080")
668
+
669
+ # Method 2: Environment variable
670
+ # export SENTIENCE_PROXY="http://user:pass@proxy.example.com:8080"
671
+ browser = SentienceBrowser()
672
+
673
+ # Works with agents
674
+ llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
675
+ agent = SentienceAgent(browser, llm)
676
+
677
+ with browser:
678
+ browser.page.goto("https://example.com")
679
+ agent.act("Search for products")
680
+ # All traffic routed through proxy with WebRTC leak protection
681
+ ```
682
+
683
+ **Features:**
684
+ - HTTP, HTTPS, SOCKS5 proxy support
685
+ - Username/password authentication
686
+ - Automatic self-signed SSL certificate handling
687
+ - WebRTC IP leak protection (automatic)
688
+
689
+ See `examples/residential_proxy_agent.py` for complete examples.
690
+
691
+ </details>
692
+
693
+ <details>
694
+ <summary><h3>🔐 Authentication Session Injection</h3></summary>
695
+
696
+ Inject pre-recorded authentication sessions (cookies + localStorage) to start your agent already logged in, bypassing login screens, 2FA, and CAPTCHAs. This saves tokens and reduces costs by eliminating login steps.
697
+
698
+ ```python
699
+ # Workflow 1: Inject pre-recorded session from file
700
+ from sentience import SentienceBrowser, save_storage_state
701
+
702
+ # Save session after manual login
703
+ browser = SentienceBrowser()
704
+ browser.start()
705
+ browser.goto("https://example.com")
706
+ # ... log in manually ...
707
+ save_storage_state(browser.context, "auth.json")
708
+
709
+ # Use saved session in future runs
710
+ browser = SentienceBrowser(storage_state="auth.json")
711
+ browser.start()
712
+ # Agent starts already logged in!
713
+
714
+ # Workflow 2: Persistent sessions (cookies persist across runs)
715
+ browser = SentienceBrowser(user_data_dir="./chrome_profile")
716
+ browser.start()
717
+ # First run: Log in
718
+ # Second run: Already logged in (cookies persist automatically)
719
+ ```
720
+
721
+ **Benefits:**
722
+ - Bypass login screens and CAPTCHAs with valid sessions
723
+ - Save 5-10 agent steps and hundreds of tokens per run
724
+ - Maintain stateful sessions for accessing authenticated pages
725
+ - Act as authenticated users (e.g., "Go to my Orders page")
726
+
727
+ See `examples/auth_injection_agent.py` for complete examples.
728
+
729
+ </details>
730
+
731
+ ---
732
+
733
+ ## 💡 Best Practices
734
+
735
+ <details>
736
+ <summary>Click to expand best practices</summary>
737
+
738
+ ### 1. Wait for Dynamic Content
739
+ ```python
740
+ browser.goto("https://example.com", wait_until="domcontentloaded")
741
+ time.sleep(1) # Extra buffer for AJAX/animations
742
+ ```
743
+
744
+ ### 2. Use Multiple Strategies for Finding Elements
745
+ ```python
746
+ # Try exact match first
747
+ btn = find(snap, "role=button text='Add to Cart'")
748
+
749
+ # Fallback to fuzzy match
750
+ if not btn:
751
+ btn = find(snap, "role=button text~='cart'")
752
+ ```
753
+
754
+ ### 3. Check Element Visibility Before Clicking
755
+ ```python
756
+ if element.in_viewport and not element.is_occluded:
757
+ click(browser, element.id)
758
+ ```
759
+
760
+ ### 4. Handle Navigation
761
+ ```python
762
+ result = click(browser, link_id)
763
+ if result.url_changed:
764
+ browser.page.wait_for_load_state("networkidle")
765
+ ```
766
+
767
+ ### 5. Use Screenshots Sparingly
768
+ ```python
769
+ # Fast - no screenshot (only element data)
770
+ snap = snapshot(browser)
771
+
772
+ # Slower - with screenshot (for debugging/verification)
773
+ snap = snapshot(browser, SnapshotOptions(screenshot=True))
774
+ ```
775
+
776
+ </details>
777
+
778
+ ---
779
+
780
+ ## 🛠️ Troubleshooting
781
+
782
+ <details>
783
+ <summary>Click to expand common issues and solutions</summary>
784
+
785
+ ### "Extension failed to load"
786
+ **Solution:** Build the extension first:
787
+ ```bash
788
+ cd sentience-chrome
789
+ ./build.sh
790
+ ```
791
+
792
+ ### "Element not found"
793
+ **Solutions:**
794
+ - Ensure page is loaded: `browser.page.wait_for_load_state("networkidle")`
795
+ - Use `wait_for()`: `wait_for(browser, "role=button", timeout=10)`
796
+ - Debug elements: `print([el.text for el in snap.elements])`
797
+
798
+ ### Button not clickable
799
+ **Solutions:**
800
+ - Check visibility: `element.in_viewport and not element.is_occluded`
801
+ - Scroll to element: `browser.page.evaluate(f"window.sentience_registry[{element.id}].scrollIntoView()")`
802
+
803
+ </details>
804
+
805
+ ---
806
+
807
+ ## 🔬 Advanced Features (v0.12.0+)
808
+
809
+ <details>
810
+ <summary><h3>📊 Agent Tracing & Debugging</h3></summary>
811
+
812
+ The SDK now includes built-in tracing infrastructure for debugging and analyzing agent behavior:
813
+
814
+ ```python
815
+ from sentience import SentienceBrowser, SentienceAgent
816
+ from sentience.llm_provider import OpenAIProvider
817
+ from sentience.tracing import Tracer, JsonlTraceSink
818
+ from sentience.agent_config import AgentConfig
819
+
820
+ # Create tracer to record agent execution
821
+ tracer = Tracer(
822
+ run_id="my-agent-run-123",
823
+ sink=JsonlTraceSink("trace.jsonl")
824
+ )
825
+
826
+ # Configure agent behavior
827
+ config = AgentConfig(
828
+ snapshot_limit=50,
829
+ temperature=0.0,
830
+ max_retries=1,
831
+ capture_screenshots=True
832
+ )
833
+
834
+ browser = SentienceBrowser()
835
+ llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
836
+
837
+ # Pass tracer and config to agent
838
+ agent = SentienceAgent(browser, llm, tracer=tracer, config=config)
839
+
840
+ with browser:
841
+ browser.page.goto("https://example.com")
842
+
843
+ # All actions are automatically traced
844
+ agent.act("Click the sign in button")
845
+ agent.act("Type 'user@example.com' into email field")
846
+
847
+ # Trace events saved to trace.jsonl
848
+ # Events: step_start, snapshot, llm_query, action, step_end, error
849
+ ```
850
+
851
+ **Trace Events Captured:**
852
+ - `step_start` - Agent begins executing a goal
853
+ - `snapshot` - Page state captured
854
+ - `llm_query` - LLM decision made (includes tokens, model, response)
855
+ - `action` - Action executed (click, type, press)
856
+ - `step_end` - Step completed successfully
857
+ - `error` - Error occurred during execution
858
+
859
+ **Use Cases:**
860
+ - Debug why agent failed or got stuck
861
+ - Analyze token usage and costs
862
+ - Replay agent sessions
863
+ - Train custom models from successful runs
864
+ - Monitor production agents
865
+
866
+ </details>
867
+
868
+ <details>
869
+ <summary><h3>🔍 Agent Runtime Verification</h3></summary>
870
+
871
+ `AgentRuntime` provides assertion predicates for runtime verification in agent loops, enabling programmatic verification of browser state during execution.
872
+
873
+ ```python
874
+ from sentience import (
875
+ AgentRuntime, SentienceBrowser,
876
+ url_contains, exists, all_of
877
+ )
878
+ from sentience.tracer_factory import create_tracer
879
+
880
+ browser = SentienceBrowser()
881
+ browser.start()
882
+ tracer = create_tracer(run_id="my-run", upload_trace=False)
883
+ runtime = AgentRuntime(browser, browser.page, tracer)
884
+
885
+ # Navigate and take snapshot
886
+ browser.page.goto("https://example.com")
887
+ runtime.begin_step("Verify page")
888
+ runtime.snapshot()
889
+
890
+ # Run assertions
891
+ runtime.assert_(url_contains("example.com"), "on_correct_domain")
892
+ runtime.assert_(exists("role=heading"), "has_heading")
893
+ runtime.assert_done(exists("text~'Example'"), "task_complete")
894
+
895
+ print(f"Task done: {runtime.is_task_done}")
896
+ ```
897
+
898
+ **See example:** [`examples/agent_runtime_verification.py`](examples/agent_runtime_verification.py)
899
+
900
+ </details>
901
+
902
+ <details>
903
+ <summary><h3>🧰 Snapshot Utilities</h3></summary>
904
+
905
+ New utility functions for working with snapshots:
906
+
907
+ ```python
908
+ from sentience import snapshot
909
+ from sentience.utils import compute_snapshot_digests, canonical_snapshot_strict
910
+ from sentience.formatting import format_snapshot_for_llm
911
+
912
+ snap = snapshot(browser)
913
+
914
+ # Compute snapshot fingerprints (detect page changes)
915
+ digests = compute_snapshot_digests(snap.elements)
916
+ print(f"Strict digest: {digests['strict']}") # Changes when text changes
917
+ print(f"Loose digest: {digests['loose']}") # Only changes when layout changes
918
+
919
+ # Format snapshot for LLM prompts
920
+ llm_context = format_snapshot_for_llm(snap, limit=50)
921
+ print(llm_context)
922
+ # Output: [1] <button> "Sign In" {PRIMARY,CLICKABLE} @ (100,50) (Imp:10)
923
+ ```
924
+
925
+ </details>
926
+
927
+ ---
928
+
929
+ ## 📖 Documentation
930
+
931
+ - **📖 [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)** - Complete tutorial with real-world example
932
+ - **📖 [Query DSL Guide](docs/QUERY_DSL.md)** - Advanced query patterns and operators
933
+ - **📄 [API Contract](../spec/SNAPSHOT_V1.md)** - Snapshot API specification
934
+ - **📄 [Type Definitions](../spec/sdk-types.md)** - TypeScript/Python type definitions
935
+
936
+ ---
937
+
938
+ ## 💻 Examples & Testing
939
+
940
+ <details>
941
+ <summary><h3>Examples</h3></summary>
942
+
943
+ See the `examples/` directory for complete working examples:
944
+
945
+ - **`hello.py`** - Extension bridge verification
946
+ - **`basic_agent.py`** - Basic snapshot and element inspection
947
+ - **`query_demo.py`** - Query engine demonstrations
948
+ - **`wait_and_click.py`** - Waiting for elements and performing actions
949
+ - **`read_markdown.py`** - Content extraction and markdown conversion
950
+
951
+ </details>
952
+
953
+ <details>
954
+ <summary><h3>Testing</h3></summary>
955
+
956
+ ```bash
957
+ # Run all tests
958
+ pytest tests/
959
+
960
+ # Run specific test file
961
+ pytest tests/test_snapshot.py
962
+
963
+ # Run with verbose output
964
+ pytest -v tests/
965
+ ```
966
+
967
+ </details>
968
+
969
+ ---
970
+
971
+ ## License & Commercial Use
972
+
973
+ ### Open Source SDK
974
+ The Sentience SDK is dual-licensed under [MIT License](./LICENSE-MIT) and [Apache 2.0](./LICENSE-APACHE). You are free to use, modify, and distribute this SDK in your own projects (including commercial ones) without restriction.
975
+
976
+ ### Commercial Platform
977
+ While the SDK is open source, the **Sentience Cloud Platform** (API, Hosting, Sentience Studio) is a commercial service.
978
+
979
+ **We offer Commercial Licenses for:**
980
+ * **High-Volume Production:** Usage beyond the free tier limits.
981
+ * **SLA & Support:** Guaranteed uptime and dedicated engineering support.
982
+ * **On-Premise / Self-Hosted Gateway:** If you need to run the Sentience Gateway (Rust+ONNX) in your own VPC for compliance (e.g., banking/healthcare), you need an Enterprise License.
983
+
984
+ [Contact Us](mailto:support@sentienceapi.com) for Enterprise inquiries.