sentienceapi 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +120 -6
- sentience/_extension_loader.py +156 -1
- sentience/action_executor.py +217 -0
- sentience/actions.py +758 -30
- sentience/agent.py +806 -293
- sentience/agent_config.py +3 -0
- sentience/agent_runtime.py +840 -0
- sentience/asserts/__init__.py +70 -0
- sentience/asserts/expect.py +621 -0
- sentience/asserts/query.py +383 -0
- sentience/async_api.py +89 -1141
- sentience/backends/__init__.py +137 -0
- sentience/backends/actions.py +372 -0
- sentience/backends/browser_use_adapter.py +241 -0
- sentience/backends/cdp_backend.py +393 -0
- sentience/backends/exceptions.py +211 -0
- sentience/backends/playwright_backend.py +194 -0
- sentience/backends/protocol.py +216 -0
- sentience/backends/sentience_context.py +469 -0
- sentience/backends/snapshot.py +483 -0
- sentience/base_agent.py +95 -0
- sentience/browser.py +678 -39
- sentience/browser_evaluator.py +299 -0
- sentience/canonicalization.py +207 -0
- sentience/cloud_tracing.py +507 -42
- sentience/constants.py +6 -0
- sentience/conversational_agent.py +77 -43
- sentience/cursor_policy.py +142 -0
- sentience/element_filter.py +136 -0
- sentience/expect.py +98 -2
- sentience/extension/background.js +56 -185
- sentience/extension/content.js +150 -287
- sentience/extension/injected_api.js +1088 -1368
- sentience/extension/manifest.json +1 -1
- sentience/extension/pkg/sentience_core.d.ts +22 -22
- sentience/extension/pkg/sentience_core.js +275 -433
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/release.json +47 -47
- sentience/failure_artifacts.py +241 -0
- sentience/formatting.py +9 -53
- sentience/inspector.py +183 -1
- sentience/integrations/__init__.py +6 -0
- sentience/integrations/langchain/__init__.py +12 -0
- sentience/integrations/langchain/context.py +18 -0
- sentience/integrations/langchain/core.py +326 -0
- sentience/integrations/langchain/tools.py +180 -0
- sentience/integrations/models.py +46 -0
- sentience/integrations/pydanticai/__init__.py +15 -0
- sentience/integrations/pydanticai/deps.py +20 -0
- sentience/integrations/pydanticai/toolset.py +468 -0
- sentience/llm_interaction_handler.py +191 -0
- sentience/llm_provider.py +765 -66
- sentience/llm_provider_utils.py +120 -0
- sentience/llm_response_builder.py +153 -0
- sentience/models.py +595 -3
- sentience/ordinal.py +280 -0
- sentience/overlay.py +109 -2
- sentience/protocols.py +228 -0
- sentience/query.py +67 -5
- sentience/read.py +95 -3
- sentience/recorder.py +223 -3
- sentience/schemas/trace_v1.json +128 -9
- sentience/screenshot.py +48 -2
- sentience/sentience_methods.py +86 -0
- sentience/snapshot.py +599 -55
- sentience/snapshot_diff.py +126 -0
- sentience/text_search.py +120 -5
- sentience/trace_event_builder.py +148 -0
- sentience/trace_file_manager.py +197 -0
- sentience/trace_indexing/index_schema.py +95 -7
- sentience/trace_indexing/indexer.py +105 -48
- sentience/tracer_factory.py +120 -9
- sentience/tracing.py +172 -8
- sentience/utils/__init__.py +40 -0
- sentience/utils/browser.py +46 -0
- sentience/{utils.py → utils/element.py} +3 -42
- sentience/utils/formatting.py +59 -0
- sentience/verification.py +618 -0
- sentience/visual_agent.py +2058 -0
- sentience/wait.py +68 -2
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/METADATA +199 -40
- sentienceapi-0.98.0.dist-info/RECORD +92 -0
- sentience/extension/test-content.js +0 -4
- sentienceapi-0.90.16.dist-info/RECORD +0 -50
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/WHEEL +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/entry_points.txt +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-APACHE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-MIT +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/top_level.txt +0 -0
sentience/models.py
CHANGED
|
@@ -2,7 +2,10 @@
|
|
|
2
2
|
Pydantic models for Sentience SDK - matches spec/snapshot.schema.json
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any, Literal
|
|
6
9
|
|
|
7
10
|
from pydantic import BaseModel, Field
|
|
8
11
|
|
|
@@ -28,6 +31,7 @@ class VisualCues(BaseModel):
|
|
|
28
31
|
|
|
29
32
|
is_primary: bool
|
|
30
33
|
background_color_name: str | None = None
|
|
34
|
+
fallback_background_color_name: str | None = None
|
|
31
35
|
is_clickable: bool
|
|
32
36
|
|
|
33
37
|
|
|
@@ -50,6 +54,96 @@ class Element(BaseModel):
|
|
|
50
54
|
ml_probability: float | None = None # Confidence score from ONNX model (0.0 - 1.0)
|
|
51
55
|
ml_score: float | None = None # Raw logit score (optional, for debugging)
|
|
52
56
|
|
|
57
|
+
# Diff status for frontend Diff Overlay feature
|
|
58
|
+
diff_status: Literal["ADDED", "REMOVED", "MODIFIED", "MOVED"] | None = None
|
|
59
|
+
|
|
60
|
+
# Phase 1: Ordinal support fields for position-based selection
|
|
61
|
+
center_x: float | None = None # X coordinate of element center (viewport coords)
|
|
62
|
+
center_y: float | None = None # Y coordinate of element center (viewport coords)
|
|
63
|
+
doc_y: float | None = None # Y coordinate in document (center_y + scroll_y)
|
|
64
|
+
group_key: str | None = None # Geometric bucket key for ordinal grouping
|
|
65
|
+
group_index: int | None = None # Position within group (0-indexed, sorted by doc_y)
|
|
66
|
+
|
|
67
|
+
# Hyperlink URL (for link elements)
|
|
68
|
+
href: str | None = None
|
|
69
|
+
|
|
70
|
+
# Nearby static text (best-effort, usually only for top-ranked elements)
|
|
71
|
+
nearby_text: str | None = None
|
|
72
|
+
|
|
73
|
+
# ===== v1 state-aware assertion fields (optional) =====
|
|
74
|
+
# Best-effort accessible name/label for controls (distinct from visible text)
|
|
75
|
+
name: str | None = None
|
|
76
|
+
# Current value for inputs/textarea/select (PII-aware: may be omitted/redacted)
|
|
77
|
+
value: str | None = None
|
|
78
|
+
# Input type (e.g., "text", "email", "password")
|
|
79
|
+
input_type: str | None = None
|
|
80
|
+
# Whether value was redacted for privacy
|
|
81
|
+
value_redacted: bool | None = None
|
|
82
|
+
# Normalized boolean states (best-effort)
|
|
83
|
+
checked: bool | None = None
|
|
84
|
+
disabled: bool | None = None
|
|
85
|
+
expanded: bool | None = None
|
|
86
|
+
# Raw ARIA state strings (tri-state / debugging)
|
|
87
|
+
aria_checked: str | None = None
|
|
88
|
+
aria_disabled: str | None = None
|
|
89
|
+
aria_expanded: str | None = None
|
|
90
|
+
|
|
91
|
+
# Phase 3.2: Pre-computed dominant group membership (uses fuzzy matching)
|
|
92
|
+
# This field is computed by the gateway so downstream consumers don't need to
|
|
93
|
+
# implement fuzzy matching logic themselves.
|
|
94
|
+
in_dominant_group: bool | None = None
|
|
95
|
+
|
|
96
|
+
# Layout-derived metadata (internal-only in v0, not exposed in API responses)
|
|
97
|
+
# Per ChatGPT feedback: explicitly optional to prevent users assuming layout is always present
|
|
98
|
+
# Note: This field is marked with skip_serializing_if in Rust, so it won't appear in API responses
|
|
99
|
+
layout: LayoutHints | None = None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class GridPosition(BaseModel):
|
|
103
|
+
"""Grid position within a detected grid/list"""
|
|
104
|
+
|
|
105
|
+
row_index: int # 0-based row index
|
|
106
|
+
col_index: int # 0-based column index
|
|
107
|
+
cluster_id: int # ID of the row cluster (for distinguishing separate grids)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class LayoutHints(BaseModel):
|
|
111
|
+
"""Layout-derived metadata for an element (internal-only in v0)"""
|
|
112
|
+
|
|
113
|
+
# Grid ID (maps to GridInfo.grid_id) - distinguishes multiple grids on same page
|
|
114
|
+
# Per feedback: Add grid_id to distinguish main feed + sidebar lists + nav links
|
|
115
|
+
grid_id: int | None = None
|
|
116
|
+
# Grid position within the grid (row_index, col_index)
|
|
117
|
+
grid_pos: GridPosition | None = None
|
|
118
|
+
# Inferred parent index in elements array
|
|
119
|
+
parent_index: int | None = None
|
|
120
|
+
# Indices of child elements (optional to avoid payload bloat - container elements can have hundreds)
|
|
121
|
+
# Per feedback: Make optional/capped to prevent serializing large arrays
|
|
122
|
+
children_indices: list[int] | None = None
|
|
123
|
+
# Confidence score for grid position assignment (0.0-1.0)
|
|
124
|
+
grid_confidence: float = 0.0
|
|
125
|
+
# Confidence score for parent-child containment (0.0-1.0)
|
|
126
|
+
parent_confidence: float = 0.0
|
|
127
|
+
# Optional: Page region (header/nav/main/aside/footer) - killer signal for ordinality + dominant group
|
|
128
|
+
# Per feedback: Optional but very useful for region detection
|
|
129
|
+
region: Literal["header", "nav", "main", "aside", "footer"] | None = None
|
|
130
|
+
region_confidence: float = 0.0 # Confidence score for region assignment (0.0-1.0)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class GridInfo(BaseModel):
|
|
134
|
+
"""Grid bounding box and metadata for a detected grid"""
|
|
135
|
+
|
|
136
|
+
grid_id: int # The grid ID (matches grid_id in LayoutHints)
|
|
137
|
+
bbox: BBox # Bounding box: x, y, width, height (document coordinates)
|
|
138
|
+
row_count: int # Number of rows in the grid
|
|
139
|
+
col_count: int # Number of columns in the grid
|
|
140
|
+
item_count: int # Total number of items in the grid
|
|
141
|
+
confidence: float = 1.0 # Confidence score (currently 1.0)
|
|
142
|
+
label: str | None = (
|
|
143
|
+
None # Optional inferred label (e.g., "product_grid", "search_results", "navigation")
|
|
144
|
+
)
|
|
145
|
+
is_dominant: bool = False # Whether this grid is the dominant group (main content area)
|
|
146
|
+
|
|
53
147
|
|
|
54
148
|
class Snapshot(BaseModel):
|
|
55
149
|
"""Snapshot response from extension"""
|
|
@@ -63,14 +157,436 @@ class Snapshot(BaseModel):
|
|
|
63
157
|
screenshot_format: Literal["png", "jpeg"] | None = None
|
|
64
158
|
error: str | None = None
|
|
65
159
|
requires_license: bool | None = None
|
|
160
|
+
# Phase 2: Dominant group key for ordinal selection
|
|
161
|
+
dominant_group_key: str | None = None # The most common group_key (main content group)
|
|
162
|
+
# Phase 2: Runtime stability/debug info (confidence/reasons/metrics)
|
|
163
|
+
diagnostics: SnapshotDiagnostics | None = None
|
|
66
164
|
|
|
67
165
|
def save(self, filepath: str) -> None:
|
|
68
166
|
"""Save snapshot as JSON file"""
|
|
69
167
|
import json
|
|
70
168
|
|
|
71
|
-
with open(filepath, "w") as f:
|
|
169
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
72
170
|
json.dump(self.model_dump(), f, indent=2)
|
|
73
171
|
|
|
172
|
+
def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
|
|
173
|
+
"""
|
|
174
|
+
Get grid coordinates (bounding boxes) for detected grids.
|
|
175
|
+
|
|
176
|
+
Groups elements by grid_id and computes the overall bounding box,
|
|
177
|
+
row/column counts, and item count for each grid.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
grid_id: Optional grid ID to filter by. If None, returns all grids.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
List of GridInfo objects, one per detected grid, sorted by grid_id.
|
|
184
|
+
"""
|
|
185
|
+
from collections import defaultdict
|
|
186
|
+
|
|
187
|
+
# Group elements by grid_id
|
|
188
|
+
grid_elements: dict[int, list[Element]] = defaultdict(list)
|
|
189
|
+
|
|
190
|
+
for elem in self.elements:
|
|
191
|
+
if elem.layout and elem.layout.grid_id is not None:
|
|
192
|
+
grid_elements[elem.layout.grid_id].append(elem)
|
|
193
|
+
|
|
194
|
+
# Filter by grid_id if specified
|
|
195
|
+
if grid_id is not None:
|
|
196
|
+
if grid_id not in grid_elements:
|
|
197
|
+
return []
|
|
198
|
+
grid_elements = {grid_id: grid_elements[grid_id]}
|
|
199
|
+
|
|
200
|
+
grid_infos: list[GridInfo] = []
|
|
201
|
+
|
|
202
|
+
# First pass: compute all grid infos and count dominant group elements
|
|
203
|
+
grid_dominant_counts: dict[int, tuple[int, int]] = {}
|
|
204
|
+
for gid, elements_in_grid in sorted(grid_elements.items()):
|
|
205
|
+
if not elements_in_grid:
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
# Count dominant group elements in this grid
|
|
209
|
+
dominant_count = sum(1 for elem in elements_in_grid if elem.in_dominant_group is True)
|
|
210
|
+
grid_dominant_counts[gid] = (dominant_count, len(elements_in_grid))
|
|
211
|
+
|
|
212
|
+
# Compute bounding box
|
|
213
|
+
min_x = min(elem.bbox.x for elem in elements_in_grid)
|
|
214
|
+
min_y = min(elem.bbox.y for elem in elements_in_grid)
|
|
215
|
+
max_x = max(elem.bbox.x + elem.bbox.width for elem in elements_in_grid)
|
|
216
|
+
max_y = max(elem.bbox.y + elem.bbox.height for elem in elements_in_grid)
|
|
217
|
+
|
|
218
|
+
# Count rows and columns
|
|
219
|
+
row_indices = set()
|
|
220
|
+
col_indices = set()
|
|
221
|
+
|
|
222
|
+
for elem in elements_in_grid:
|
|
223
|
+
if elem.layout and elem.layout.grid_pos:
|
|
224
|
+
row_indices.add(elem.layout.grid_pos.row_index)
|
|
225
|
+
col_indices.add(elem.layout.grid_pos.col_index)
|
|
226
|
+
|
|
227
|
+
# Infer grid label from element patterns (best-effort heuristic)
|
|
228
|
+
# Keep the heuristic implementation in one place.
|
|
229
|
+
label = SnapshotDiagnostics._infer_grid_label(elements_in_grid)
|
|
230
|
+
|
|
231
|
+
grid_infos.append(
|
|
232
|
+
GridInfo(
|
|
233
|
+
grid_id=gid,
|
|
234
|
+
bbox=BBox(
|
|
235
|
+
x=min_x,
|
|
236
|
+
y=min_y,
|
|
237
|
+
width=max_x - min_x,
|
|
238
|
+
height=max_y - min_y,
|
|
239
|
+
),
|
|
240
|
+
row_count=len(row_indices) if row_indices else 0,
|
|
241
|
+
col_count=len(col_indices) if col_indices else 0,
|
|
242
|
+
item_count=len(elements_in_grid),
|
|
243
|
+
confidence=1.0,
|
|
244
|
+
label=label,
|
|
245
|
+
is_dominant=False, # Will be set below
|
|
246
|
+
)
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Second pass: identify dominant grid
|
|
250
|
+
# The grid with the highest count (or highest percentage >= 50%) of dominant group elements
|
|
251
|
+
if grid_dominant_counts:
|
|
252
|
+
# Find grid with highest absolute count
|
|
253
|
+
max_dominant_count = max(count for count, _ in grid_dominant_counts.values())
|
|
254
|
+
if max_dominant_count > 0:
|
|
255
|
+
# Find grid(s) with highest count
|
|
256
|
+
dominant_grids = [
|
|
257
|
+
gid
|
|
258
|
+
for gid, (count, _total) in grid_dominant_counts.items()
|
|
259
|
+
if count == max_dominant_count
|
|
260
|
+
]
|
|
261
|
+
# If multiple grids tie, prefer the one with highest percentage
|
|
262
|
+
if len(dominant_grids) > 1:
|
|
263
|
+
dominant_grids.sort(
|
|
264
|
+
key=lambda gid: (
|
|
265
|
+
grid_dominant_counts[gid][0] / grid_dominant_counts[gid][1]
|
|
266
|
+
if grid_dominant_counts[gid][1] > 0
|
|
267
|
+
else 0
|
|
268
|
+
),
|
|
269
|
+
reverse=True,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Mark the dominant grid
|
|
273
|
+
dominant_gid = dominant_grids[0]
|
|
274
|
+
# Only mark as dominant if it has >= 50% dominant group elements or >= 3 elements
|
|
275
|
+
dominant_count, total_count = grid_dominant_counts[dominant_gid]
|
|
276
|
+
if dominant_count >= 3 or (total_count > 0 and dominant_count / total_count >= 0.5):
|
|
277
|
+
for grid_info in grid_infos:
|
|
278
|
+
if grid_info.grid_id == dominant_gid:
|
|
279
|
+
grid_info.is_dominant = True
|
|
280
|
+
break
|
|
281
|
+
|
|
282
|
+
return grid_infos
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class SnapshotDiagnosticsMetrics(BaseModel):
|
|
286
|
+
ready_state: str | None = None
|
|
287
|
+
quiet_ms: float | None = None
|
|
288
|
+
node_count: int | None = None
|
|
289
|
+
interactive_count: int | None = None
|
|
290
|
+
raw_elements_count: int | None = None
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class SnapshotDiagnostics(BaseModel):
|
|
294
|
+
"""Runtime stability/debug information (reserved for diagnostics, not ML metadata)."""
|
|
295
|
+
|
|
296
|
+
confidence: float | None = None
|
|
297
|
+
reasons: list[str] = []
|
|
298
|
+
metrics: SnapshotDiagnosticsMetrics | None = None
|
|
299
|
+
|
|
300
|
+
def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
|
|
301
|
+
"""
|
|
302
|
+
Get grid coordinates (bounding boxes) for detected grids.
|
|
303
|
+
|
|
304
|
+
Groups elements by grid_id and computes the overall bounding box,
|
|
305
|
+
row/column counts, and item count for each grid.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
grid_id: Optional grid ID to filter by. If None, returns all grids.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
List of GridInfo objects, one per detected grid, sorted by grid_id.
|
|
312
|
+
Each GridInfo contains:
|
|
313
|
+
- grid_id: The grid identifier
|
|
314
|
+
- bbox: Bounding box (x, y, width, height) in document coordinates
|
|
315
|
+
- row_count: Number of rows in the grid
|
|
316
|
+
- col_count: Number of columns in the grid
|
|
317
|
+
- item_count: Total number of items in the grid
|
|
318
|
+
- confidence: Confidence score (currently 1.0)
|
|
319
|
+
- label: Optional inferred label (e.g., "product_grid", "search_results", "navigation")
|
|
320
|
+
Note: Label inference is best-effort and may not always be accurate
|
|
321
|
+
|
|
322
|
+
Example:
|
|
323
|
+
>>> snapshot = browser.snapshot()
|
|
324
|
+
>>> # Get all grids
|
|
325
|
+
>>> all_grids = snapshot.get_grid_bounds()
|
|
326
|
+
>>> # Get specific grid
|
|
327
|
+
>>> main_grid = snapshot.get_grid_bounds(grid_id=0)
|
|
328
|
+
>>> if main_grid:
|
|
329
|
+
... print(f"Grid 0: {main_grid[0].item_count} items at ({main_grid[0].bbox.x}, {main_grid[0].bbox.y})")
|
|
330
|
+
"""
|
|
331
|
+
from collections import defaultdict
|
|
332
|
+
|
|
333
|
+
# Group elements by grid_id
|
|
334
|
+
grid_elements: dict[int, list[Element]] = defaultdict(list)
|
|
335
|
+
|
|
336
|
+
for elem in self.elements:
|
|
337
|
+
if elem.layout and elem.layout.grid_id is not None:
|
|
338
|
+
grid_elements[elem.layout.grid_id].append(elem)
|
|
339
|
+
|
|
340
|
+
# Filter by grid_id if specified
|
|
341
|
+
if grid_id is not None:
|
|
342
|
+
if grid_id not in grid_elements:
|
|
343
|
+
return []
|
|
344
|
+
grid_elements = {grid_id: grid_elements[grid_id]}
|
|
345
|
+
|
|
346
|
+
grid_infos = []
|
|
347
|
+
|
|
348
|
+
# First pass: compute all grid infos and count dominant group elements
|
|
349
|
+
grid_dominant_counts = {}
|
|
350
|
+
for gid, elements_in_grid in sorted(grid_elements.items()):
|
|
351
|
+
if not elements_in_grid:
|
|
352
|
+
continue
|
|
353
|
+
|
|
354
|
+
# Count dominant group elements in this grid
|
|
355
|
+
dominant_count = sum(1 for elem in elements_in_grid if elem.in_dominant_group is True)
|
|
356
|
+
grid_dominant_counts[gid] = (dominant_count, len(elements_in_grid))
|
|
357
|
+
|
|
358
|
+
# Compute bounding box
|
|
359
|
+
min_x = min(elem.bbox.x for elem in elements_in_grid)
|
|
360
|
+
min_y = min(elem.bbox.y for elem in elements_in_grid)
|
|
361
|
+
max_x = max(elem.bbox.x + elem.bbox.width for elem in elements_in_grid)
|
|
362
|
+
max_y = max(elem.bbox.y + elem.bbox.height for elem in elements_in_grid)
|
|
363
|
+
|
|
364
|
+
# Count rows and columns
|
|
365
|
+
row_indices = set()
|
|
366
|
+
col_indices = set()
|
|
367
|
+
|
|
368
|
+
for elem in elements_in_grid:
|
|
369
|
+
if elem.layout and elem.layout.grid_pos:
|
|
370
|
+
row_indices.add(elem.layout.grid_pos.row_index)
|
|
371
|
+
col_indices.add(elem.layout.grid_pos.col_index)
|
|
372
|
+
|
|
373
|
+
# Infer grid label from element patterns (best-effort heuristic)
|
|
374
|
+
label = Snapshot._infer_grid_label(elements_in_grid)
|
|
375
|
+
|
|
376
|
+
grid_infos.append(
|
|
377
|
+
GridInfo(
|
|
378
|
+
grid_id=gid,
|
|
379
|
+
bbox=BBox(
|
|
380
|
+
x=min_x,
|
|
381
|
+
y=min_y,
|
|
382
|
+
width=max_x - min_x,
|
|
383
|
+
height=max_y - min_y,
|
|
384
|
+
),
|
|
385
|
+
row_count=len(row_indices) if row_indices else 0,
|
|
386
|
+
col_count=len(col_indices) if col_indices else 0,
|
|
387
|
+
item_count=len(elements_in_grid),
|
|
388
|
+
confidence=1.0,
|
|
389
|
+
label=label,
|
|
390
|
+
is_dominant=False, # Will be set below
|
|
391
|
+
)
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Second pass: identify dominant grid
|
|
395
|
+
# The grid with the highest count (or highest percentage >= 50%) of dominant group elements
|
|
396
|
+
if grid_dominant_counts:
|
|
397
|
+
# Find grid with highest absolute count
|
|
398
|
+
max_dominant_count = max(count for count, _ in grid_dominant_counts.values())
|
|
399
|
+
if max_dominant_count > 0:
|
|
400
|
+
# Find grid(s) with highest count
|
|
401
|
+
dominant_grids = [
|
|
402
|
+
gid
|
|
403
|
+
for gid, (count, total) in grid_dominant_counts.items()
|
|
404
|
+
if count == max_dominant_count
|
|
405
|
+
]
|
|
406
|
+
# If multiple grids tie, prefer the one with highest percentage
|
|
407
|
+
if len(dominant_grids) > 1:
|
|
408
|
+
dominant_grids.sort(
|
|
409
|
+
key=lambda gid: (
|
|
410
|
+
grid_dominant_counts[gid][0] / grid_dominant_counts[gid][1]
|
|
411
|
+
if grid_dominant_counts[gid][1] > 0
|
|
412
|
+
else 0
|
|
413
|
+
),
|
|
414
|
+
reverse=True,
|
|
415
|
+
)
|
|
416
|
+
# Mark the dominant grid
|
|
417
|
+
dominant_gid = dominant_grids[0]
|
|
418
|
+
# Only mark as dominant if it has >= 50% dominant group elements or >= 3 elements
|
|
419
|
+
dominant_count, total_count = grid_dominant_counts[dominant_gid]
|
|
420
|
+
if dominant_count >= 3 or (total_count > 0 and dominant_count / total_count >= 0.5):
|
|
421
|
+
for grid_info in grid_infos:
|
|
422
|
+
if grid_info.grid_id == dominant_gid:
|
|
423
|
+
grid_info.is_dominant = True
|
|
424
|
+
break
|
|
425
|
+
|
|
426
|
+
return grid_infos
|
|
427
|
+
|
|
428
|
+
@staticmethod
|
|
429
|
+
def _infer_grid_label(elements: list[Element]) -> str | None:
|
|
430
|
+
"""
|
|
431
|
+
Infer grid label from element patterns using text fingerprinting (best-effort heuristic).
|
|
432
|
+
|
|
433
|
+
Uses patterns similar to dominant_group.rs content filtering logic, inverted to detect
|
|
434
|
+
semantic grid types. Analyzes first 5 items as a "bag of features".
|
|
435
|
+
|
|
436
|
+
Returns None if label cannot be reliably determined.
|
|
437
|
+
This is a simple heuristic and may not always be accurate.
|
|
438
|
+
"""
|
|
439
|
+
import re
|
|
440
|
+
|
|
441
|
+
if not elements:
|
|
442
|
+
return None
|
|
443
|
+
|
|
444
|
+
# Sample first 5 items for fingerprinting (as suggested in feedback)
|
|
445
|
+
sample_elements = elements[:5]
|
|
446
|
+
element_texts = [(elem.text or "").strip() for elem in sample_elements if elem.text]
|
|
447
|
+
|
|
448
|
+
if not element_texts:
|
|
449
|
+
return None
|
|
450
|
+
|
|
451
|
+
# Collect text patterns
|
|
452
|
+
all_text = " ".join(text.lower() for text in element_texts)
|
|
453
|
+
hrefs = [elem.href or "" for elem in sample_elements if elem.href]
|
|
454
|
+
|
|
455
|
+
# =========================================================================
|
|
456
|
+
# 1. PRODUCT GRID: Currency symbols, action verbs, ratings
|
|
457
|
+
# =========================================================================
|
|
458
|
+
# Currency patterns: $, €, £, or price patterns like "19.99", "$50", "€30"
|
|
459
|
+
currency_pattern = re.search(r"[\$€£¥]\s*\d+|\d+\.\d{2}", all_text)
|
|
460
|
+
product_action_verbs = [
|
|
461
|
+
"add to cart",
|
|
462
|
+
"buy now",
|
|
463
|
+
"shop now",
|
|
464
|
+
"purchase",
|
|
465
|
+
"out of stock",
|
|
466
|
+
"in stock",
|
|
467
|
+
]
|
|
468
|
+
has_product_actions = any(verb in all_text for verb in product_action_verbs)
|
|
469
|
+
|
|
470
|
+
# Ratings pattern: "4.5 stars", "(120 reviews)", "4.5/5"
|
|
471
|
+
rating_pattern = re.search(r"\d+\.?\d*\s*(stars?|reviews?|/5|/10)", all_text, re.IGNORECASE)
|
|
472
|
+
|
|
473
|
+
# Product URL patterns
|
|
474
|
+
product_url_patterns = ["/product/", "/item/", "/dp/", "/p/", "/products/"]
|
|
475
|
+
has_product_urls = any(
|
|
476
|
+
pattern in href.lower() for href in hrefs for pattern in product_url_patterns
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
if (currency_pattern or has_product_actions or rating_pattern) and (
|
|
480
|
+
has_product_urls
|
|
481
|
+
or len(
|
|
482
|
+
[
|
|
483
|
+
t
|
|
484
|
+
for t in element_texts
|
|
485
|
+
if currency_pattern and currency_pattern.group() in t.lower()
|
|
486
|
+
]
|
|
487
|
+
)
|
|
488
|
+
>= 2
|
|
489
|
+
):
|
|
490
|
+
return "product_grid"
|
|
491
|
+
|
|
492
|
+
# =========================================================================
|
|
493
|
+
# 2. ARTICLE/NEWS FEED: Timestamps, bylines, reading time
|
|
494
|
+
# =========================================================================
|
|
495
|
+
# Timestamp patterns (reusing logic from dominant_group.rs)
|
|
496
|
+
# "2 hours ago", "3 days ago", "5 minutes ago", "1 second ago", "2 ago"
|
|
497
|
+
timestamp_patterns = [
|
|
498
|
+
r"\d+\s+(hour|day|minute|second)s?\s+ago",
|
|
499
|
+
r"\d+\s+ago", # Short form: "2 ago"
|
|
500
|
+
r"\d{1,2}\s+(hour|day|minute|second)\s+ago", # Singular
|
|
501
|
+
]
|
|
502
|
+
has_timestamps = any(
|
|
503
|
+
re.search(pattern, all_text, re.IGNORECASE) for pattern in timestamp_patterns
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
# Date patterns: "Aug 21, 2024", "2024-01-13", "Jan 15"
|
|
507
|
+
date_patterns = [
|
|
508
|
+
r"\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+\d{4}",
|
|
509
|
+
r"\d{4}-\d{2}-\d{2}",
|
|
510
|
+
r"\d{1,2}/\d{1,2}/\d{4}",
|
|
511
|
+
]
|
|
512
|
+
has_dates = any(re.search(pattern, all_text, re.IGNORECASE) for pattern in date_patterns)
|
|
513
|
+
|
|
514
|
+
# Bylines: "By [Name]", "Author:", "Written by"
|
|
515
|
+
byline_patterns = ["by ", "author:", "written by", "posted by"]
|
|
516
|
+
has_bylines = any(pattern in all_text for pattern in byline_patterns)
|
|
517
|
+
|
|
518
|
+
# Reading time: "5 min read", "10 min", "read more"
|
|
519
|
+
reading_time_pattern = re.search(r"\d+\s*(min|minute)s?\s*(read)?", all_text, re.IGNORECASE)
|
|
520
|
+
|
|
521
|
+
if has_timestamps or (has_dates and has_bylines) or reading_time_pattern:
|
|
522
|
+
return "article_feed"
|
|
523
|
+
|
|
524
|
+
# =========================================================================
|
|
525
|
+
# 3. SEARCH RESULTS: Snippets, metadata, ellipses
|
|
526
|
+
# =========================================================================
|
|
527
|
+
search_keywords = ["result", "search", "found", "showing", "results 1-", "sponsored"]
|
|
528
|
+
has_search_metadata = any(keyword in all_text for keyword in search_keywords)
|
|
529
|
+
|
|
530
|
+
# Snippet indicators: ellipses, "match found", truncated text
|
|
531
|
+
has_ellipses = "..." in all_text or any(
|
|
532
|
+
len(text) > 100 and "..." in text for text in element_texts
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
# Check if many elements are links (typical for search results)
|
|
536
|
+
link_count = sum(1 for elem in sample_elements if elem.role == "link" or elem.href)
|
|
537
|
+
is_mostly_links = link_count >= len(sample_elements) * 0.7 # 70%+ are links
|
|
538
|
+
|
|
539
|
+
if (has_search_metadata or has_ellipses) and is_mostly_links:
|
|
540
|
+
return "search_results"
|
|
541
|
+
|
|
542
|
+
# =========================================================================
|
|
543
|
+
# 4. NAVIGATION: Short length, homogeneity, common nav terms
|
|
544
|
+
# =========================================================================
|
|
545
|
+
# Calculate average text length and variance
|
|
546
|
+
text_lengths = [len(text) for text in element_texts]
|
|
547
|
+
if text_lengths:
|
|
548
|
+
avg_length = sum(text_lengths) / len(text_lengths)
|
|
549
|
+
# Low variance = homogeneous (typical of navigation)
|
|
550
|
+
variance = (
|
|
551
|
+
sum((l - avg_length) ** 2 for l in text_lengths) / len(text_lengths)
|
|
552
|
+
if len(text_lengths) > 1
|
|
553
|
+
else 0
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
nav_keywords = [
|
|
557
|
+
"home",
|
|
558
|
+
"about",
|
|
559
|
+
"contact",
|
|
560
|
+
"menu",
|
|
561
|
+
"login",
|
|
562
|
+
"sign in",
|
|
563
|
+
"profile",
|
|
564
|
+
"settings",
|
|
565
|
+
]
|
|
566
|
+
has_nav_keywords = any(keyword in all_text for keyword in nav_keywords)
|
|
567
|
+
|
|
568
|
+
# Navigation: short average length (< 15 chars) AND low variance OR nav keywords
|
|
569
|
+
if avg_length < 15 and (variance < 20 or has_nav_keywords):
|
|
570
|
+
# Also check if all are links
|
|
571
|
+
if all(elem.role == "link" or elem.href for elem in sample_elements):
|
|
572
|
+
return "navigation"
|
|
573
|
+
|
|
574
|
+
# =========================================================================
|
|
575
|
+
# 5. BUTTON GRID: All buttons
|
|
576
|
+
# =========================================================================
|
|
577
|
+
if all(elem.role == "button" for elem in sample_elements):
|
|
578
|
+
return "button_grid"
|
|
579
|
+
|
|
580
|
+
# =========================================================================
|
|
581
|
+
# 6. LINK LIST: Mostly links but not navigation
|
|
582
|
+
# =========================================================================
|
|
583
|
+
link_count = sum(1 for elem in sample_elements if elem.role == "link" or elem.href)
|
|
584
|
+
if link_count >= len(sample_elements) * 0.8: # 80%+ are links
|
|
585
|
+
return "link_list"
|
|
586
|
+
|
|
587
|
+
# Unknown/unclear
|
|
588
|
+
return None
|
|
589
|
+
|
|
74
590
|
|
|
75
591
|
class ActionResult(BaseModel):
|
|
76
592
|
"""Result of an action (click, type, press)"""
|
|
@@ -81,6 +597,8 @@ class ActionResult(BaseModel):
|
|
|
81
597
|
url_changed: bool | None = None
|
|
82
598
|
snapshot_after: Snapshot | None = None
|
|
83
599
|
error: dict | None = None
|
|
600
|
+
# Optional action metadata (e.g., human-like cursor movement path)
|
|
601
|
+
cursor: dict[str, Any] | None = None
|
|
84
602
|
|
|
85
603
|
|
|
86
604
|
class WaitResult(BaseModel):
|
|
@@ -114,6 +632,16 @@ class SnapshotOptions(BaseModel):
|
|
|
114
632
|
"""
|
|
115
633
|
Configuration for snapshot calls.
|
|
116
634
|
Matches TypeScript SnapshotOptions interface from sdk-ts/src/snapshot.ts
|
|
635
|
+
|
|
636
|
+
For browser-use integration (where you don't have a SentienceBrowser),
|
|
637
|
+
you can pass sentience_api_key directly in options:
|
|
638
|
+
|
|
639
|
+
from sentience.models import SnapshotOptions
|
|
640
|
+
options = SnapshotOptions(
|
|
641
|
+
sentience_api_key="sk_pro_xxxxx",
|
|
642
|
+
use_api=True,
|
|
643
|
+
goal="Find the login button"
|
|
644
|
+
)
|
|
117
645
|
"""
|
|
118
646
|
|
|
119
647
|
screenshot: bool | ScreenshotConfig = False # Union type: boolean or config
|
|
@@ -124,6 +652,13 @@ class SnapshotOptions(BaseModel):
|
|
|
124
652
|
trace_path: str | None = None # Path to save trace (default: "trace_{timestamp}.json")
|
|
125
653
|
goal: str | None = None # Optional goal/task description for the snapshot
|
|
126
654
|
show_overlay: bool = False # Show visual overlay highlighting elements in browser
|
|
655
|
+
show_grid: bool = False # Show visual overlay highlighting detected grids
|
|
656
|
+
grid_id: int | None = (
|
|
657
|
+
None # Optional grid ID to show specific grid (only used if show_grid=True)
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
# API credentials (for browser-use integration without SentienceBrowser)
|
|
661
|
+
sentience_api_key: str | None = None # Sentience API key for Pro/Enterprise features
|
|
127
662
|
|
|
128
663
|
class Config:
|
|
129
664
|
arbitrary_types_allowed = True
|
|
@@ -146,6 +681,8 @@ class AgentActionResult(BaseModel):
|
|
|
146
681
|
url_changed: bool | None = None
|
|
147
682
|
error: str | None = None
|
|
148
683
|
message: str | None = None # For FINISH action
|
|
684
|
+
# Optional: action metadata (e.g., human-like cursor movement path)
|
|
685
|
+
cursor: dict[str, Any] | None = None
|
|
149
686
|
|
|
150
687
|
def __getitem__(self, key):
|
|
151
688
|
"""
|
|
@@ -288,7 +825,7 @@ class StorageState(BaseModel):
|
|
|
288
825
|
)
|
|
289
826
|
|
|
290
827
|
@classmethod
|
|
291
|
-
def from_dict(cls, data: dict) ->
|
|
828
|
+
def from_dict(cls, data: dict) -> StorageState:
|
|
292
829
|
"""
|
|
293
830
|
Create StorageState from dictionary (e.g., loaded from JSON).
|
|
294
831
|
|
|
@@ -410,3 +947,58 @@ class TextRectSearchResult(BaseModel):
|
|
|
410
947
|
)
|
|
411
948
|
viewport: Viewport | None = Field(None, description="Current viewport dimensions")
|
|
412
949
|
error: str | None = Field(None, description="Error message if status is 'error'")
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
class ReadResult(BaseModel):
|
|
953
|
+
"""Result of read() or read_async() operation"""
|
|
954
|
+
|
|
955
|
+
status: Literal["success", "error"]
|
|
956
|
+
url: str
|
|
957
|
+
format: Literal["raw", "text", "markdown"]
|
|
958
|
+
content: str
|
|
959
|
+
length: int
|
|
960
|
+
error: str | None = None
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
class TraceStats(BaseModel):
|
|
964
|
+
"""Execution statistics for trace completion"""
|
|
965
|
+
|
|
966
|
+
total_steps: int
|
|
967
|
+
total_events: int
|
|
968
|
+
duration_ms: int | None = None
|
|
969
|
+
final_status: Literal["success", "failure", "partial", "unknown"]
|
|
970
|
+
started_at: str | None = None
|
|
971
|
+
ended_at: str | None = None
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
class StepExecutionResult(BaseModel):
|
|
975
|
+
"""Result of executing a single step in ConversationalAgent"""
|
|
976
|
+
|
|
977
|
+
success: bool
|
|
978
|
+
action: str
|
|
979
|
+
data: dict[str, Any] # Flexible data field for step-specific results
|
|
980
|
+
error: str | None = None
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
class ExtractionResult(BaseModel):
|
|
984
|
+
"""Result of extracting information from a page"""
|
|
985
|
+
|
|
986
|
+
found: bool
|
|
987
|
+
data: dict[str, Any] # Extracted data fields
|
|
988
|
+
summary: str # Brief description of what was found
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
@dataclass
|
|
992
|
+
class ScreenshotMetadata:
|
|
993
|
+
"""
|
|
994
|
+
Metadata for a stored screenshot.
|
|
995
|
+
|
|
996
|
+
Used by CloudTraceSink to track screenshots before upload.
|
|
997
|
+
All fields are required for type safety.
|
|
998
|
+
"""
|
|
999
|
+
|
|
1000
|
+
sequence: int
|
|
1001
|
+
format: Literal["png", "jpeg"]
|
|
1002
|
+
size_bytes: int
|
|
1003
|
+
step_id: str | None
|
|
1004
|
+
filepath: str
|