sentienceapi 0.90.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sentience/__init__.py +153 -0
- sentience/actions.py +439 -0
- sentience/agent.py +687 -0
- sentience/agent_config.py +43 -0
- sentience/base_agent.py +101 -0
- sentience/browser.py +409 -0
- sentience/cli.py +130 -0
- sentience/cloud_tracing.py +292 -0
- sentience/conversational_agent.py +509 -0
- sentience/expect.py +92 -0
- sentience/extension/background.js +233 -0
- sentience/extension/content.js +298 -0
- sentience/extension/injected_api.js +1473 -0
- sentience/extension/manifest.json +36 -0
- sentience/extension/pkg/sentience_core.d.ts +51 -0
- sentience/extension/pkg/sentience_core.js +529 -0
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
- sentience/extension/release.json +115 -0
- sentience/extension/test-content.js +4 -0
- sentience/formatting.py +59 -0
- sentience/generator.py +202 -0
- sentience/inspector.py +185 -0
- sentience/llm_provider.py +431 -0
- sentience/models.py +406 -0
- sentience/overlay.py +115 -0
- sentience/query.py +303 -0
- sentience/read.py +96 -0
- sentience/recorder.py +369 -0
- sentience/schemas/trace_v1.json +216 -0
- sentience/screenshot.py +54 -0
- sentience/snapshot.py +282 -0
- sentience/text_search.py +150 -0
- sentience/trace_indexing/__init__.py +27 -0
- sentience/trace_indexing/index_schema.py +111 -0
- sentience/trace_indexing/indexer.py +363 -0
- sentience/tracer_factory.py +211 -0
- sentience/tracing.py +285 -0
- sentience/utils.py +296 -0
- sentience/wait.py +73 -0
- sentienceapi-0.90.11.dist-info/METADATA +878 -0
- sentienceapi-0.90.11.dist-info/RECORD +46 -0
- sentienceapi-0.90.11.dist-info/WHEEL +5 -0
- sentienceapi-0.90.11.dist-info/entry_points.txt +2 -0
- sentienceapi-0.90.11.dist-info/licenses/LICENSE.md +43 -0
- sentienceapi-0.90.11.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,878 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sentienceapi
|
|
3
|
+
Version: 0.90.11
|
|
4
|
+
Summary: Python SDK for Sentience AI Agent Browser Automation
|
|
5
|
+
Author: Sentience Team
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/SentienceAPI/sentience-python
|
|
8
|
+
Project-URL: Repository, https://github.com/SentienceAPI/sentience-python
|
|
9
|
+
Project-URL: Issues, https://github.com/SentienceAPI/sentience-python/issues
|
|
10
|
+
Keywords: browser-automation,playwright,ai-agent,web-automation,sentience
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE.md
|
|
19
|
+
Requires-Dist: playwright>=1.40.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0.0
|
|
21
|
+
Requires-Dist: jsonschema>=4.0.0
|
|
22
|
+
Requires-Dist: requests>=2.31.0
|
|
23
|
+
Requires-Dist: playwright-stealth>=1.0.6
|
|
24
|
+
Requires-Dist: markdownify>=0.11.6
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# Sentience Python SDK
|
|
31
|
+
|
|
32
|
+
The SDK is open under ELv2; the core semantic geometry and reliability logic runs in Sentience-hosted services.
|
|
33
|
+
|
|
34
|
+
## 📦 Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Install from PyPI
|
|
38
|
+
pip install sentienceapi
|
|
39
|
+
|
|
40
|
+
# Install Playwright browsers (required)
|
|
41
|
+
playwright install chromium
|
|
42
|
+
|
|
43
|
+
# For LLM Agent features (optional)
|
|
44
|
+
pip install openai # For OpenAI models
|
|
45
|
+
pip install anthropic # For Claude models
|
|
46
|
+
pip install transformers torch # For local LLMs
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**For local development:**
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e .
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## 🚀 Quick Start: Choose Your Abstraction Level
|
|
55
|
+
|
|
56
|
+
Sentience SDK offers **three abstraction levels** - use what fits your needs:
|
|
57
|
+
|
|
58
|
+
<details>
|
|
59
|
+
<summary><b>🎯 Level 3: Natural Language (Easiest)</b> - For non-technical users</summary>
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from sentience import SentienceBrowser, ConversationalAgent
|
|
63
|
+
from sentience.llm_provider import OpenAIProvider
|
|
64
|
+
|
|
65
|
+
browser = SentienceBrowser()
|
|
66
|
+
llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
|
|
67
|
+
agent = ConversationalAgent(browser, llm)
|
|
68
|
+
|
|
69
|
+
with browser:
|
|
70
|
+
response = agent.execute("Search for magic mouse on google.com")
|
|
71
|
+
print(response)
|
|
72
|
+
# → "I searched for 'magic mouse' and found several results.
|
|
73
|
+
# The top result is from amazon.com selling Magic Mouse 2 for $79."
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Best for:** End users, chatbots, no-code platforms
|
|
77
|
+
**Code required:** 3-5 lines
|
|
78
|
+
**Technical knowledge:** None
|
|
79
|
+
|
|
80
|
+
</details>
|
|
81
|
+
|
|
82
|
+
<details>
|
|
83
|
+
<summary><b>⚙️ Level 2: Technical Commands (Recommended)</b> - For AI developers</summary>
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from sentience import SentienceBrowser, SentienceAgent
|
|
87
|
+
from sentience.llm_provider import OpenAIProvider
|
|
88
|
+
|
|
89
|
+
browser = SentienceBrowser()
|
|
90
|
+
llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
|
|
91
|
+
agent = SentienceAgent(browser, llm)
|
|
92
|
+
|
|
93
|
+
with browser:
|
|
94
|
+
browser.page.goto("https://google.com")
|
|
95
|
+
agent.act("Click the search box")
|
|
96
|
+
agent.act("Type 'magic mouse' into the search field")
|
|
97
|
+
agent.act("Press Enter key")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Best for:** Building AI agents, automation scripts
|
|
101
|
+
**Code required:** 10-15 lines
|
|
102
|
+
**Technical knowledge:** Medium (Python basics)
|
|
103
|
+
|
|
104
|
+
</details>
|
|
105
|
+
|
|
106
|
+
<details>
|
|
107
|
+
<summary><b>🔧 Level 1: Direct SDK (Most Control)</b> - For production automation</summary>
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from sentience import SentienceBrowser, snapshot, find, click
|
|
111
|
+
|
|
112
|
+
with SentienceBrowser(headless=False) as browser:
|
|
113
|
+
browser.page.goto("https://example.com")
|
|
114
|
+
|
|
115
|
+
# Take snapshot - captures all interactive elements
|
|
116
|
+
snap = snapshot(browser)
|
|
117
|
+
print(f"Found {len(snap.elements)} elements")
|
|
118
|
+
|
|
119
|
+
# Find and click a link using semantic selectors
|
|
120
|
+
link = find(snap, "role=link text~'More information'")
|
|
121
|
+
if link:
|
|
122
|
+
result = click(browser, link.id)
|
|
123
|
+
print(f"Click success: {result.success}")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Best for:** Maximum control, performance-critical apps
|
|
127
|
+
**Code required:** 20-50 lines
|
|
128
|
+
**Technical knowledge:** High (SDK API, selectors)
|
|
129
|
+
|
|
130
|
+
</details>
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
<details>
|
|
135
|
+
<summary><h2>💼 Real-World Example: Amazon Shopping Bot</h2></summary>
|
|
136
|
+
|
|
137
|
+
This example demonstrates navigating Amazon, finding products, and adding items to cart:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from sentience import SentienceBrowser, snapshot, find, click
|
|
141
|
+
import time
|
|
142
|
+
|
|
143
|
+
with SentienceBrowser(headless=False) as browser:
|
|
144
|
+
# Navigate to Amazon Best Sellers
|
|
145
|
+
browser.goto("https://www.amazon.com/gp/bestsellers/", wait_until="domcontentloaded")
|
|
146
|
+
time.sleep(2) # Wait for dynamic content
|
|
147
|
+
|
|
148
|
+
# Take snapshot and find products
|
|
149
|
+
snap = snapshot(browser)
|
|
150
|
+
print(f"Found {len(snap.elements)} elements")
|
|
151
|
+
|
|
152
|
+
# Find first product in viewport using spatial filtering
|
|
153
|
+
products = [
|
|
154
|
+
el for el in snap.elements
|
|
155
|
+
if el.role == "link"
|
|
156
|
+
and el.visual_cues.is_clickable
|
|
157
|
+
and el.in_viewport
|
|
158
|
+
and not el.is_occluded
|
|
159
|
+
and el.bbox.y < 600 # First row
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
if products:
|
|
163
|
+
# Sort by position (left to right, top to bottom)
|
|
164
|
+
products.sort(key=lambda e: (e.bbox.y, e.bbox.x))
|
|
165
|
+
first_product = products[0]
|
|
166
|
+
|
|
167
|
+
print(f"Clicking: {first_product.text}")
|
|
168
|
+
result = click(browser, first_product.id)
|
|
169
|
+
|
|
170
|
+
# Wait for product page
|
|
171
|
+
browser.page.wait_for_load_state("networkidle")
|
|
172
|
+
time.sleep(2)
|
|
173
|
+
|
|
174
|
+
# Find and click "Add to Cart" button
|
|
175
|
+
product_snap = snapshot(browser)
|
|
176
|
+
add_to_cart = find(product_snap, "role=button text~'add to cart'")
|
|
177
|
+
|
|
178
|
+
if add_to_cart:
|
|
179
|
+
cart_result = click(browser, add_to_cart.id)
|
|
180
|
+
print(f"Added to cart: {cart_result.success}")
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**📖 See the complete tutorial:** [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)
|
|
184
|
+
|
|
185
|
+
</details>
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## 📚 Core Features
|
|
190
|
+
|
|
191
|
+
<details>
|
|
192
|
+
<summary><h3>🌐 Browser Control</h3></summary>
|
|
193
|
+
|
|
194
|
+
- **`SentienceBrowser`** - Playwright browser with Sentience extension pre-loaded
|
|
195
|
+
- **`browser.goto(url)`** - Navigate with automatic extension readiness checks
|
|
196
|
+
- Automatic bot evasion and stealth mode
|
|
197
|
+
- Configurable headless/headed mode
|
|
198
|
+
|
|
199
|
+
</details>
|
|
200
|
+
|
|
201
|
+
<details>
|
|
202
|
+
<summary><h3>📸 Snapshot - Intelligent Page Analysis</h3></summary>
|
|
203
|
+
|
|
204
|
+
**`snapshot(browser, screenshot=True, show_overlay=False)`** - Capture page state with AI-ranked elements
|
|
205
|
+
|
|
206
|
+
Features:
|
|
207
|
+
- Returns semantic elements with roles, text, importance scores, and bounding boxes
|
|
208
|
+
- Optional screenshot capture (PNG/JPEG)
|
|
209
|
+
- Optional visual overlay to see what elements are detected
|
|
210
|
+
- Pydantic models for type safety
|
|
211
|
+
- **`snapshot.save(filepath)`** - Export to JSON
|
|
212
|
+
|
|
213
|
+
**Example:**
|
|
214
|
+
```python
|
|
215
|
+
snap = snapshot(browser, screenshot=True, show_overlay=True)
|
|
216
|
+
|
|
217
|
+
# Access structured data
|
|
218
|
+
print(f"URL: {snap.url}")
|
|
219
|
+
print(f"Viewport: {snap.viewport.width}x{snap.viewport.height}")
|
|
220
|
+
print(f"Elements: {len(snap.elements)}")
|
|
221
|
+
|
|
222
|
+
# Iterate over elements
|
|
223
|
+
for element in snap.elements:
|
|
224
|
+
print(f"{element.role}: {element.text} (importance: {element.importance})")
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
</details>
|
|
228
|
+
|
|
229
|
+
<details>
|
|
230
|
+
<summary><h3>🔍 Query Engine - Semantic Element Selection</h3></summary>
|
|
231
|
+
|
|
232
|
+
- **`query(snapshot, selector)`** - Find all matching elements
|
|
233
|
+
- **`find(snapshot, selector)`** - Find single best match (by importance)
|
|
234
|
+
- Powerful query DSL with multiple operators
|
|
235
|
+
|
|
236
|
+
**Query Examples:**
|
|
237
|
+
```python
|
|
238
|
+
# Find by role and text
|
|
239
|
+
button = find(snap, "role=button text='Sign in'")
|
|
240
|
+
|
|
241
|
+
# Substring match (case-insensitive)
|
|
242
|
+
link = find(snap, "role=link text~'more info'")
|
|
243
|
+
|
|
244
|
+
# Spatial filtering
|
|
245
|
+
top_left = find(snap, "bbox.x<=100 bbox.y<=200")
|
|
246
|
+
|
|
247
|
+
# Multiple conditions (AND logic)
|
|
248
|
+
primary_btn = find(snap, "role=button clickable=true visible=true importance>800")
|
|
249
|
+
|
|
250
|
+
# Prefix/suffix matching
|
|
251
|
+
starts_with = find(snap, "text^='Add'")
|
|
252
|
+
ends_with = find(snap, "text$='Cart'")
|
|
253
|
+
|
|
254
|
+
# Numeric comparisons
|
|
255
|
+
important = query(snap, "importance>=700")
|
|
256
|
+
first_row = query(snap, "bbox.y<600")
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
**📖 [Complete Query DSL Guide](docs/QUERY_DSL.md)** - All operators, fields, and advanced patterns
|
|
260
|
+
|
|
261
|
+
</details>
|
|
262
|
+
|
|
263
|
+
<details>
|
|
264
|
+
<summary><h3>👆 Actions - Interact with Elements</h3></summary>
|
|
265
|
+
|
|
266
|
+
- **`click(browser, element_id)`** - Click element by ID
|
|
267
|
+
- **`click_rect(browser, rect)`** - Click at center of rectangle (coordinate-based)
|
|
268
|
+
- **`type_text(browser, element_id, text)`** - Type into input fields
|
|
269
|
+
- **`press(browser, key)`** - Press keyboard keys (Enter, Escape, Tab, etc.)
|
|
270
|
+
|
|
271
|
+
All actions return `ActionResult` with success status, timing, and outcome:
|
|
272
|
+
|
|
273
|
+
```python
|
|
274
|
+
result = click(browser, element.id)
|
|
275
|
+
|
|
276
|
+
print(f"Success: {result.success}")
|
|
277
|
+
print(f"Outcome: {result.outcome}") # "navigated", "dom_updated", "error"
|
|
278
|
+
print(f"Duration: {result.duration_ms}ms")
|
|
279
|
+
print(f"URL changed: {result.url_changed}")
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
**Coordinate-based clicking:**
|
|
283
|
+
```python
|
|
284
|
+
from sentience import click_rect
|
|
285
|
+
|
|
286
|
+
# Click at center of rectangle (x, y, width, height)
|
|
287
|
+
click_rect(browser, {"x": 100, "y": 200, "w": 50, "h": 30})
|
|
288
|
+
|
|
289
|
+
# With visual highlight (default: red border for 2 seconds)
|
|
290
|
+
click_rect(browser, {"x": 100, "y": 200, "w": 50, "h": 30}, highlight=True, highlight_duration=2.0)
|
|
291
|
+
|
|
292
|
+
# Using element's bounding box
|
|
293
|
+
snap = snapshot(browser)
|
|
294
|
+
element = find(snap, "role=button")
|
|
295
|
+
if element:
|
|
296
|
+
click_rect(browser, {
|
|
297
|
+
"x": element.bbox.x,
|
|
298
|
+
"y": element.bbox.y,
|
|
299
|
+
"w": element.bbox.width,
|
|
300
|
+
"h": element.bbox.height
|
|
301
|
+
})
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
</details>
|
|
305
|
+
|
|
306
|
+
<details>
|
|
307
|
+
<summary><h3>⏱️ Wait & Assertions</h3></summary>
|
|
308
|
+
|
|
309
|
+
- **`wait_for(browser, selector, timeout=5.0, interval=None, use_api=None)`** - Wait for element to appear
|
|
310
|
+
- **`expect(browser, selector)`** - Assertion helper with fluent API
|
|
311
|
+
|
|
312
|
+
**Examples:**
|
|
313
|
+
```python
|
|
314
|
+
# Wait for element (auto-detects optimal interval based on API usage)
|
|
315
|
+
result = wait_for(browser, "role=button text='Submit'", timeout=10.0)
|
|
316
|
+
if result.found:
|
|
317
|
+
print(f"Found after {result.duration_ms}ms")
|
|
318
|
+
|
|
319
|
+
# Use local extension with fast polling (0.25s interval)
|
|
320
|
+
result = wait_for(browser, "role=button", timeout=5.0, use_api=False)
|
|
321
|
+
|
|
322
|
+
# Use remote API with network-friendly polling (1.5s interval)
|
|
323
|
+
result = wait_for(browser, "role=button", timeout=5.0, use_api=True)
|
|
324
|
+
|
|
325
|
+
# Custom interval override
|
|
326
|
+
result = wait_for(browser, "role=button", timeout=5.0, interval=0.5, use_api=False)
|
|
327
|
+
|
|
328
|
+
# Semantic wait conditions
|
|
329
|
+
wait_for(browser, "clickable=true", timeout=5.0) # Wait for clickable element
|
|
330
|
+
wait_for(browser, "importance>100", timeout=5.0) # Wait for important element
|
|
331
|
+
wait_for(browser, "role=link visible=true", timeout=5.0) # Wait for visible link
|
|
332
|
+
|
|
333
|
+
# Assertions
|
|
334
|
+
expect(browser, "role=button text='Submit'").to_exist(timeout=5.0)
|
|
335
|
+
expect(browser, "role=heading").to_be_visible()
|
|
336
|
+
expect(browser, "role=button").to_have_text("Submit")
|
|
337
|
+
expect(browser, "role=link").to_have_count(10)
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
</details>
|
|
341
|
+
|
|
342
|
+
<details>
|
|
343
|
+
<summary><h3>🎨 Visual Overlay - Debug Element Detection</h3></summary>
|
|
344
|
+
|
|
345
|
+
- **`show_overlay(browser, elements, target_element_id=None)`** - Display visual overlay highlighting elements
|
|
346
|
+
- **`clear_overlay(browser)`** - Clear overlay manually
|
|
347
|
+
|
|
348
|
+
Show color-coded borders around detected elements to debug, validate, and understand what Sentience sees:
|
|
349
|
+
|
|
350
|
+
```python
|
|
351
|
+
from sentience import show_overlay, clear_overlay
|
|
352
|
+
|
|
353
|
+
# Take snapshot once
|
|
354
|
+
snap = snapshot(browser)
|
|
355
|
+
|
|
356
|
+
# Show overlay anytime without re-snapshotting
|
|
357
|
+
show_overlay(browser, snap) # Auto-clears after 5 seconds
|
|
358
|
+
|
|
359
|
+
# Highlight specific target element in red
|
|
360
|
+
button = find(snap, "role=button text~'Submit'")
|
|
361
|
+
show_overlay(browser, snap, target_element_id=button.id)
|
|
362
|
+
|
|
363
|
+
# Clear manually before 5 seconds
|
|
364
|
+
import time
|
|
365
|
+
time.sleep(2)
|
|
366
|
+
clear_overlay(browser)
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
**Color Coding:**
|
|
370
|
+
- 🔴 Red: Target element
|
|
371
|
+
- 🔵 Blue: Primary elements (`is_primary=true`)
|
|
372
|
+
- 🟢 Green: Regular interactive elements
|
|
373
|
+
|
|
374
|
+
**Visual Indicators:**
|
|
375
|
+
- Border thickness/opacity scales with importance
|
|
376
|
+
- Semi-transparent fill
|
|
377
|
+
- Importance badges
|
|
378
|
+
- Star icons for primary elements
|
|
379
|
+
- Auto-clear after 5 seconds
|
|
380
|
+
|
|
381
|
+
</details>
|
|
382
|
+
|
|
383
|
+
<details>
|
|
384
|
+
<summary><h3>📄 Content Reading</h3></summary>
|
|
385
|
+
|
|
386
|
+
**`read(browser, format="text|markdown|raw")`** - Extract page content
|
|
387
|
+
- `format="text"` - Plain text extraction
|
|
388
|
+
- `format="markdown"` - High-quality markdown conversion (uses markdownify)
|
|
389
|
+
- `format="raw"` - Cleaned HTML (default)
|
|
390
|
+
|
|
391
|
+
**Example:**
|
|
392
|
+
```python
|
|
393
|
+
from sentience import read
|
|
394
|
+
|
|
395
|
+
# Get markdown content
|
|
396
|
+
result = read(browser, format="markdown")
|
|
397
|
+
print(result["content"]) # Markdown text
|
|
398
|
+
|
|
399
|
+
# Get plain text
|
|
400
|
+
result = read(browser, format="text")
|
|
401
|
+
print(result["content"]) # Plain text
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
</details>
|
|
405
|
+
|
|
406
|
+
<details>
|
|
407
|
+
<summary><h3>📷 Screenshots</h3></summary>
|
|
408
|
+
|
|
409
|
+
**`screenshot(browser, format="png|jpeg", quality=80)`** - Standalone screenshot capture
|
|
410
|
+
- Returns base64-encoded data URL
|
|
411
|
+
- PNG or JPEG format
|
|
412
|
+
- Quality control for JPEG (1-100)
|
|
413
|
+
|
|
414
|
+
**Example:**
|
|
415
|
+
```python
|
|
416
|
+
from sentience import screenshot
|
|
417
|
+
import base64
|
|
418
|
+
|
|
419
|
+
# Capture PNG screenshot
|
|
420
|
+
data_url = screenshot(browser, format="png")
|
|
421
|
+
|
|
422
|
+
# Save to file
|
|
423
|
+
image_data = base64.b64decode(data_url.split(",")[1])
|
|
424
|
+
with open("screenshot.png", "wb") as f:
|
|
425
|
+
f.write(image_data)
|
|
426
|
+
|
|
427
|
+
# JPEG with quality control (smaller file size)
|
|
428
|
+
data_url = screenshot(browser, format="jpeg", quality=85)
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
</details>
|
|
432
|
+
|
|
433
|
+
<details>
|
|
434
|
+
<summary><h3>🔎 Text Search - Find Elements by Visible Text</h3></summary>
|
|
435
|
+
|
|
436
|
+
**`find_text_rect(browser, text, case_sensitive=False, whole_word=False, max_results=10)`** - Find text on page and get exact pixel coordinates
|
|
437
|
+
|
|
438
|
+
Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.
|
|
439
|
+
|
|
440
|
+
**Example:**
|
|
441
|
+
```python
|
|
442
|
+
from sentience import SentienceBrowser, find_text_rect, click_rect
|
|
443
|
+
|
|
444
|
+
with SentienceBrowser() as browser:
|
|
445
|
+
browser.page.goto("https://example.com")
|
|
446
|
+
|
|
447
|
+
# Find "Sign In" button
|
|
448
|
+
result = find_text_rect(browser, "Sign In")
|
|
449
|
+
if result.status == "success" and result.results:
|
|
450
|
+
first_match = result.results[0]
|
|
451
|
+
print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
|
|
452
|
+
print(f"In viewport: {first_match.in_viewport}")
|
|
453
|
+
|
|
454
|
+
# Click on the found text
|
|
455
|
+
if first_match.in_viewport:
|
|
456
|
+
click_rect(browser, {
|
|
457
|
+
"x": first_match.rect.x,
|
|
458
|
+
"y": first_match.rect.y,
|
|
459
|
+
"w": first_match.rect.width,
|
|
460
|
+
"h": first_match.rect.height
|
|
461
|
+
})
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
**Advanced Options:**
|
|
465
|
+
```python
|
|
466
|
+
# Case-sensitive search
|
|
467
|
+
result = find_text_rect(browser, "LOGIN", case_sensitive=True)
|
|
468
|
+
|
|
469
|
+
# Whole word only (won't match "login" as part of "loginButton")
|
|
470
|
+
result = find_text_rect(browser, "log", whole_word=True)
|
|
471
|
+
|
|
472
|
+
# Find multiple matches
|
|
473
|
+
result = find_text_rect(browser, "Buy", max_results=10)
|
|
474
|
+
for match in result.results:
|
|
475
|
+
if match.in_viewport:
|
|
476
|
+
print(f"Found '{match.text}' at ({match.rect.x}, {match.rect.y})")
|
|
477
|
+
print(f"Context: ...{match.context.before}[{match.text}]{match.context.after}...")
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
**Returns:** `TextRectSearchResult` with:
|
|
481
|
+
- **`status`**: "success" or "error"
|
|
482
|
+
- **`results`**: List of `TextMatch` objects with:
|
|
483
|
+
- `text` - The matched text
|
|
484
|
+
- `rect` - Absolute coordinates (with scroll offset)
|
|
485
|
+
- `viewport_rect` - Viewport-relative coordinates
|
|
486
|
+
- `context` - Surrounding text (before/after)
|
|
487
|
+
- `in_viewport` - Whether visible in current viewport
|
|
488
|
+
|
|
489
|
+
**Use Cases:**
|
|
490
|
+
- Find buttons/links by visible text without CSS selectors
|
|
491
|
+
- Get exact pixel coordinates for click automation
|
|
492
|
+
- Verify text visibility and position on page
|
|
493
|
+
- Search dynamic content that changes frequently
|
|
494
|
+
|
|
495
|
+
**Note:** Does not consume API credits (runs locally in browser)
|
|
496
|
+
|
|
497
|
+
**See example:** `examples/find_text_demo.py`
|
|
498
|
+
|
|
499
|
+
</details>
|
|
500
|
+
|
|
501
|
+
---
|
|
502
|
+
|
|
503
|
+
## 📋 Reference
|
|
504
|
+
|
|
505
|
+
<details>
|
|
506
|
+
<summary><h3>Element Properties</h3></summary>
|
|
507
|
+
|
|
508
|
+
Elements returned by `snapshot()` have the following properties:
|
|
509
|
+
|
|
510
|
+
```python
|
|
511
|
+
element.id # Unique identifier for interactions
|
|
512
|
+
element.role # ARIA role (button, link, textbox, heading, etc.)
|
|
513
|
+
element.text # Visible text content
|
|
514
|
+
element.importance # AI importance score (0-1000)
|
|
515
|
+
element.bbox # Bounding box (x, y, width, height)
|
|
516
|
+
element.visual_cues # Visual analysis (is_primary, is_clickable, background_color)
|
|
517
|
+
element.in_viewport # Is element visible in current viewport?
|
|
518
|
+
element.is_occluded # Is element covered by other elements?
|
|
519
|
+
element.z_index # CSS stacking order
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
</details>
|
|
523
|
+
|
|
524
|
+
<details>
|
|
525
|
+
<summary><h3>Query DSL Reference</h3></summary>
|
|
526
|
+
|
|
527
|
+
### Basic Operators
|
|
528
|
+
|
|
529
|
+
| Operator | Description | Example |
|
|
530
|
+
|----------|-------------|---------|
|
|
531
|
+
| `=` | Exact match | `role=button` |
|
|
532
|
+
| `!=` | Exclusion | `role!=link` |
|
|
533
|
+
| `~` | Substring (case-insensitive) | `text~'sign in'` |
|
|
534
|
+
| `^=` | Prefix match | `text^='Add'` |
|
|
535
|
+
| `$=` | Suffix match | `text$='Cart'` |
|
|
536
|
+
| `>`, `>=` | Greater than | `importance>500` |
|
|
537
|
+
| `<`, `<=` | Less than | `bbox.y<600` |
|
|
538
|
+
|
|
539
|
+
### Supported Fields
|
|
540
|
+
|
|
541
|
+
- **Role**: `role=button|link|textbox|heading|...`
|
|
542
|
+
- **Text**: `text`, `text~`, `text^=`, `text$=`
|
|
543
|
+
- **Visibility**: `clickable=true|false`, `visible=true|false`
|
|
544
|
+
- **Importance**: `importance`, `importance>=N`, `importance<N`
|
|
545
|
+
- **Position**: `bbox.x`, `bbox.y`, `bbox.width`, `bbox.height`
|
|
546
|
+
- **Layering**: `z_index`
|
|
547
|
+
|
|
548
|
+
</details>
|
|
549
|
+
|
|
550
|
+
---
|
|
551
|
+
|
|
552
|
+
## ⚙️ Configuration
|
|
553
|
+
|
|
554
|
+
<details>
|
|
555
|
+
<summary><h3>Viewport Size</h3></summary>
|
|
556
|
+
|
|
557
|
+
Default viewport is **1280x800** pixels. You can customize it using Playwright's API:
|
|
558
|
+
|
|
559
|
+
```python
|
|
560
|
+
with SentienceBrowser(headless=False) as browser:
|
|
561
|
+
# Set custom viewport before navigating
|
|
562
|
+
browser.page.set_viewport_size({"width": 1920, "height": 1080})
|
|
563
|
+
|
|
564
|
+
browser.goto("https://example.com")
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
</details>
|
|
568
|
+
|
|
569
|
+
<details>
|
|
570
|
+
<summary><h3>Headless Mode</h3></summary>
|
|
571
|
+
|
|
572
|
+
```python
|
|
573
|
+
# Headed mode (default in dev, shows browser window)
|
|
574
|
+
browser = SentienceBrowser(headless=False)
|
|
575
|
+
|
|
576
|
+
# Headless mode (default in CI environments)
|
|
577
|
+
browser = SentienceBrowser(headless=True)
|
|
578
|
+
|
|
579
|
+
# Auto-detect based on environment
|
|
580
|
+
browser = SentienceBrowser() # headless=True if CI=true, else False
|
|
581
|
+
```
|
|
582
|
+
|
|
583
|
+
</details>
|
|
584
|
+
|
|
585
|
+
<details>
|
|
586
|
+
<summary><h3>🌍 Residential Proxy Support</h3></summary>
|
|
587
|
+
|
|
588
|
+
Use residential proxies to route traffic and protect your IP address. Supports HTTP, HTTPS, and SOCKS5 with automatic SSL certificate handling:
|
|
589
|
+
|
|
590
|
+
```python
|
|
591
|
+
# Method 1: Direct configuration
|
|
592
|
+
browser = SentienceBrowser(proxy="http://user:pass@proxy.example.com:8080")
|
|
593
|
+
|
|
594
|
+
# Method 2: Environment variable
|
|
595
|
+
# export SENTIENCE_PROXY="http://user:pass@proxy.example.com:8080"
|
|
596
|
+
browser = SentienceBrowser()
|
|
597
|
+
|
|
598
|
+
# Works with agents
|
|
599
|
+
llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
|
|
600
|
+
agent = SentienceAgent(browser, llm)
|
|
601
|
+
|
|
602
|
+
with browser:
|
|
603
|
+
browser.page.goto("https://example.com")
|
|
604
|
+
agent.act("Search for products")
|
|
605
|
+
# All traffic routed through proxy with WebRTC leak protection
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
**Features:**
|
|
609
|
+
- HTTP, HTTPS, SOCKS5 proxy support
|
|
610
|
+
- Username/password authentication
|
|
611
|
+
- Automatic self-signed SSL certificate handling
|
|
612
|
+
- WebRTC IP leak protection (automatic)
|
|
613
|
+
|
|
614
|
+
See `examples/residential_proxy_agent.py` for complete examples.
|
|
615
|
+
|
|
616
|
+
</details>
|
|
617
|
+
|
|
618
|
+
<details>
|
|
619
|
+
<summary><h3>🔐 Authentication Session Injection</h3></summary>
|
|
620
|
+
|
|
621
|
+
Inject pre-recorded authentication sessions (cookies + localStorage) to start your agent already logged in, bypassing login screens, 2FA, and CAPTCHAs. This saves tokens and reduces costs by eliminating login steps.
|
|
622
|
+
|
|
623
|
+
```python
|
|
624
|
+
# Workflow 1: Inject pre-recorded session from file
|
|
625
|
+
from sentience import SentienceBrowser, save_storage_state
|
|
626
|
+
|
|
627
|
+
# Save session after manual login
|
|
628
|
+
browser = SentienceBrowser()
|
|
629
|
+
browser.start()
|
|
630
|
+
browser.goto("https://example.com")
|
|
631
|
+
# ... log in manually ...
|
|
632
|
+
save_storage_state(browser.context, "auth.json")
|
|
633
|
+
|
|
634
|
+
# Use saved session in future runs
|
|
635
|
+
browser = SentienceBrowser(storage_state="auth.json")
|
|
636
|
+
browser.start()
|
|
637
|
+
# Agent starts already logged in!
|
|
638
|
+
|
|
639
|
+
# Workflow 2: Persistent sessions (cookies persist across runs)
|
|
640
|
+
browser = SentienceBrowser(user_data_dir="./chrome_profile")
|
|
641
|
+
browser.start()
|
|
642
|
+
# First run: Log in
|
|
643
|
+
# Second run: Already logged in (cookies persist automatically)
|
|
644
|
+
```
|
|
645
|
+
|
|
646
|
+
**Benefits:**
|
|
647
|
+
- Bypass login screens and CAPTCHAs with valid sessions
|
|
648
|
+
- Save 5-10 agent steps and hundreds of tokens per run
|
|
649
|
+
- Maintain stateful sessions for accessing authenticated pages
|
|
650
|
+
- Act as authenticated users (e.g., "Go to my Orders page")
|
|
651
|
+
|
|
652
|
+
See `examples/auth_injection_agent.py` for complete examples.
|
|
653
|
+
|
|
654
|
+
</details>
|
|
655
|
+
|
|
656
|
+
---
|
|
657
|
+
|
|
658
|
+
## 💡 Best Practices
|
|
659
|
+
|
|
660
|
+
<details>
|
|
661
|
+
<summary>Click to expand best practices</summary>
|
|
662
|
+
|
|
663
|
+
### 1. Wait for Dynamic Content
|
|
664
|
+
```python
|
|
665
|
+
browser.goto("https://example.com", wait_until="domcontentloaded")
|
|
666
|
+
time.sleep(1) # Extra buffer for AJAX/animations
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
### 2. Use Multiple Strategies for Finding Elements
|
|
670
|
+
```python
|
|
671
|
+
# Try exact match first
|
|
672
|
+
btn = find(snap, "role=button text='Add to Cart'")
|
|
673
|
+
|
|
674
|
+
# Fallback to fuzzy match
|
|
675
|
+
if not btn:
|
|
676
|
+
btn = find(snap, "role=button text~='cart'")
|
|
677
|
+
```
|
|
678
|
+
|
|
679
|
+
### 3. Check Element Visibility Before Clicking
|
|
680
|
+
```python
|
|
681
|
+
if element.in_viewport and not element.is_occluded:
|
|
682
|
+
click(browser, element.id)
|
|
683
|
+
```
|
|
684
|
+
|
|
685
|
+
### 4. Handle Navigation
|
|
686
|
+
```python
|
|
687
|
+
result = click(browser, link_id)
|
|
688
|
+
if result.url_changed:
|
|
689
|
+
browser.page.wait_for_load_state("networkidle")
|
|
690
|
+
```
|
|
691
|
+
|
|
692
|
+
### 5. Use Screenshots Sparingly
|
|
693
|
+
```python
|
|
694
|
+
# Fast - no screenshot (only element data)
|
|
695
|
+
snap = snapshot(browser)
|
|
696
|
+
|
|
697
|
+
# Slower - with screenshot (for debugging/verification)
|
|
698
|
+
snap = snapshot(browser, screenshot=True)
|
|
699
|
+
```
|
|
700
|
+
|
|
701
|
+
</details>
|
|
702
|
+
|
|
703
|
+
---
|
|
704
|
+
|
|
705
|
+
## 🛠️ Troubleshooting
|
|
706
|
+
|
|
707
|
+
<details>
|
|
708
|
+
<summary>Click to expand common issues and solutions</summary>
|
|
709
|
+
|
|
710
|
+
### "Extension failed to load"
|
|
711
|
+
**Solution:** Build the extension first:
|
|
712
|
+
```bash
|
|
713
|
+
cd sentience-chrome
|
|
714
|
+
./build.sh
|
|
715
|
+
```
|
|
716
|
+
|
|
717
|
+
### "Element not found"
|
|
718
|
+
**Solutions:**
|
|
719
|
+
- Ensure page is loaded: `browser.page.wait_for_load_state("networkidle")`
|
|
720
|
+
- Use `wait_for()`: `wait_for(browser, "role=button", timeout=10)`
|
|
721
|
+
- Debug elements: `print([el.text for el in snap.elements])`
|
|
722
|
+
|
|
723
|
+
### Button not clickable
|
|
724
|
+
**Solutions:**
|
|
725
|
+
- Check visibility: `element.in_viewport and not element.is_occluded`
|
|
726
|
+
- Scroll to element: `browser.page.evaluate(f"window.sentience_registry[{element.id}].scrollIntoView()")`
|
|
727
|
+
|
|
728
|
+
</details>
|
|
729
|
+
|
|
730
|
+
---
|
|
731
|
+
|
|
732
|
+
## 🔬 Advanced Features (v0.12.0+)
|
|
733
|
+
|
|
734
|
+
<details>
|
|
735
|
+
<summary><h3>📊 Agent Tracing & Debugging</h3></summary>
|
|
736
|
+
|
|
737
|
+
The SDK now includes built-in tracing infrastructure for debugging and analyzing agent behavior:
|
|
738
|
+
|
|
739
|
+
```python
|
|
740
|
+
from sentience import SentienceBrowser, SentienceAgent
|
|
741
|
+
from sentience.llm_provider import OpenAIProvider
|
|
742
|
+
from sentience.tracing import Tracer, JsonlTraceSink
|
|
743
|
+
from sentience.agent_config import AgentConfig
|
|
744
|
+
|
|
745
|
+
# Create tracer to record agent execution
|
|
746
|
+
tracer = Tracer(
|
|
747
|
+
run_id="my-agent-run-123",
|
|
748
|
+
sink=JsonlTraceSink("trace.jsonl")
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
# Configure agent behavior
|
|
752
|
+
config = AgentConfig(
|
|
753
|
+
snapshot_limit=50,
|
|
754
|
+
temperature=0.0,
|
|
755
|
+
max_retries=1,
|
|
756
|
+
capture_screenshots=True
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
browser = SentienceBrowser()
|
|
760
|
+
llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
|
|
761
|
+
|
|
762
|
+
# Pass tracer and config to agent
|
|
763
|
+
agent = SentienceAgent(browser, llm, tracer=tracer, config=config)
|
|
764
|
+
|
|
765
|
+
with browser:
|
|
766
|
+
browser.page.goto("https://example.com")
|
|
767
|
+
|
|
768
|
+
# All actions are automatically traced
|
|
769
|
+
agent.act("Click the sign in button")
|
|
770
|
+
agent.act("Type 'user@example.com' into email field")
|
|
771
|
+
|
|
772
|
+
# Trace events saved to trace.jsonl
|
|
773
|
+
# Events: step_start, snapshot, llm_query, action, step_end, error
|
|
774
|
+
```
|
|
775
|
+
|
|
776
|
+
**Trace Events Captured:**
|
|
777
|
+
- `step_start` - Agent begins executing a goal
|
|
778
|
+
- `snapshot` - Page state captured
|
|
779
|
+
- `llm_query` - LLM decision made (includes tokens, model, response)
|
|
780
|
+
- `action` - Action executed (click, type, press)
|
|
781
|
+
- `step_end` - Step completed successfully
|
|
782
|
+
- `error` - Error occurred during execution
|
|
783
|
+
|
|
784
|
+
**Use Cases:**
|
|
785
|
+
- Debug why agent failed or got stuck
|
|
786
|
+
- Analyze token usage and costs
|
|
787
|
+
- Replay agent sessions
|
|
788
|
+
- Train custom models from successful runs
|
|
789
|
+
- Monitor production agents
|
|
790
|
+
|
|
791
|
+
</details>
|
|
792
|
+
|
|
793
|
+
<details>
|
|
794
|
+
<summary><h3>🧰 Snapshot Utilities</h3></summary>
|
|
795
|
+
|
|
796
|
+
New utility functions for working with snapshots:
|
|
797
|
+
|
|
798
|
+
```python
|
|
799
|
+
from sentience import snapshot
|
|
800
|
+
from sentience.utils import compute_snapshot_digests, canonical_snapshot_strict
|
|
801
|
+
from sentience.formatting import format_snapshot_for_llm
|
|
802
|
+
|
|
803
|
+
snap = snapshot(browser)
|
|
804
|
+
|
|
805
|
+
# Compute snapshot fingerprints (detect page changes)
|
|
806
|
+
digests = compute_snapshot_digests(snap.elements)
|
|
807
|
+
print(f"Strict digest: {digests['strict']}") # Changes when text changes
|
|
808
|
+
print(f"Loose digest: {digests['loose']}") # Only changes when layout changes
|
|
809
|
+
|
|
810
|
+
# Format snapshot for LLM prompts
|
|
811
|
+
llm_context = format_snapshot_for_llm(snap, limit=50)
|
|
812
|
+
print(llm_context)
|
|
813
|
+
# Output: [1] <button> "Sign In" {PRIMARY,CLICKABLE} @ (100,50) (Imp:10)
|
|
814
|
+
```
|
|
815
|
+
|
|
816
|
+
</details>
|
|
817
|
+
|
|
818
|
+
---
|
|
819
|
+
|
|
820
|
+
## 📖 Documentation
|
|
821
|
+
|
|
822
|
+
- **📖 [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)** - Complete tutorial with real-world example
|
|
823
|
+
- **📖 [Query DSL Guide](docs/QUERY_DSL.md)** - Advanced query patterns and operators
|
|
824
|
+
- **📄 [API Contract](../spec/SNAPSHOT_V1.md)** - Snapshot API specification
|
|
825
|
+
- **📄 [Type Definitions](../spec/sdk-types.md)** - TypeScript/Python type definitions
|
|
826
|
+
|
|
827
|
+
---
|
|
828
|
+
|
|
829
|
+
## 💻 Examples & Testing
|
|
830
|
+
|
|
831
|
+
<details>
|
|
832
|
+
<summary><h3>Examples</h3></summary>
|
|
833
|
+
|
|
834
|
+
See the `examples/` directory for complete working examples:
|
|
835
|
+
|
|
836
|
+
- **`hello.py`** - Extension bridge verification
|
|
837
|
+
- **`basic_agent.py`** - Basic snapshot and element inspection
|
|
838
|
+
- **`query_demo.py`** - Query engine demonstrations
|
|
839
|
+
- **`wait_and_click.py`** - Waiting for elements and performing actions
|
|
840
|
+
- **`read_markdown.py`** - Content extraction and markdown conversion
|
|
841
|
+
|
|
842
|
+
</details>
|
|
843
|
+
|
|
844
|
+
<details>
|
|
845
|
+
<summary><h3>Testing</h3></summary>
|
|
846
|
+
|
|
847
|
+
```bash
|
|
848
|
+
# Run all tests
|
|
849
|
+
pytest tests/
|
|
850
|
+
|
|
851
|
+
# Run specific test file
|
|
852
|
+
pytest tests/test_snapshot.py
|
|
853
|
+
|
|
854
|
+
# Run with verbose output
|
|
855
|
+
pytest -v tests/
|
|
856
|
+
```
|
|
857
|
+
|
|
858
|
+
</details>
|
|
859
|
+
|
|
860
|
+
---
|
|
861
|
+
|
|
862
|
+
## 📜 License
|
|
863
|
+
|
|
864
|
+
This SDK is licensed under the **Elastic License 2.0 (ELv2)**.
|
|
865
|
+
|
|
866
|
+
The Elastic License 2.0 allows you to use, modify, and distribute this SDK for internal, research, and non-competitive purposes. It **does not permit offering this SDK or a derivative as a hosted or managed service**, nor using it to build a competing product or service.
|
|
867
|
+
|
|
868
|
+
### Important Notes
|
|
869
|
+
|
|
870
|
+
- This SDK is a **client-side library** that communicates with proprietary Sentience services and browser components.
|
|
871
|
+
|
|
872
|
+
- The Sentience backend services (including semantic geometry grounding, ranking, visual cues, and trace processing) are **not open source** and are governed by Sentience's Terms of Service.
|
|
873
|
+
|
|
874
|
+
- Use of this SDK does **not** grant rights to operate, replicate, or reimplement Sentience's hosted services.
|
|
875
|
+
|
|
876
|
+
For commercial usage, hosted offerings, or enterprise deployments, please contact Sentience to obtain a commercial license.
|
|
877
|
+
|
|
878
|
+
See the full license text in [`LICENSE`](./LICENSE.md).
|