sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
sentience/models.py ADDED
@@ -0,0 +1,846 @@
1
+ """
2
+ Pydantic models for Sentience SDK - matches spec/snapshot.schema.json
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Literal
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+
11
+ class BBox(BaseModel):
12
+ """Bounding box coordinates"""
13
+
14
+ x: float
15
+ y: float
16
+ width: float
17
+ height: float
18
+
19
+
20
+ class Viewport(BaseModel):
21
+ """Viewport dimensions"""
22
+
23
+ width: float
24
+ height: float
25
+
26
+
27
+ class VisualCues(BaseModel):
28
+ """Visual analysis cues"""
29
+
30
+ is_primary: bool
31
+ background_color_name: str | None = None
32
+ is_clickable: bool
33
+
34
+
35
+ class Element(BaseModel):
36
+ """Element from snapshot"""
37
+
38
+ id: int
39
+ role: str
40
+ text: str | None = None
41
+ importance: int
42
+ bbox: BBox
43
+ visual_cues: VisualCues
44
+ in_viewport: bool = True
45
+ is_occluded: bool = False
46
+ z_index: int = 0
47
+
48
+ # ML reranking metadata (optional - can be absent or null)
49
+ rerank_index: int | None = None # 0-based, The rank after ML reranking
50
+ heuristic_index: int | None = None # 0-based, Where it would have been without ML
51
+ ml_probability: float | None = None # Confidence score from ONNX model (0.0 - 1.0)
52
+ ml_score: float | None = None # Raw logit score (optional, for debugging)
53
+
54
+ # Diff status for frontend Diff Overlay feature
55
+ diff_status: Literal["ADDED", "REMOVED", "MODIFIED", "MOVED"] | None = None
56
+
57
+ # Phase 1: Ordinal support fields for position-based selection
58
+ center_x: float | None = None # X coordinate of element center (viewport coords)
59
+ center_y: float | None = None # Y coordinate of element center (viewport coords)
60
+ doc_y: float | None = None # Y coordinate in document (center_y + scroll_y)
61
+ group_key: str | None = None # Geometric bucket key for ordinal grouping
62
+ group_index: int | None = None # Position within group (0-indexed, sorted by doc_y)
63
+
64
+ # Hyperlink URL (for link elements)
65
+ href: str | None = None
66
+
67
+ # Phase 3.2: Pre-computed dominant group membership (uses fuzzy matching)
68
+ # This field is computed by the gateway so downstream consumers don't need to
69
+ # implement fuzzy matching logic themselves.
70
+ in_dominant_group: bool | None = None
71
+
72
+ # Layout-derived metadata (internal-only in v0, not exposed in API responses)
73
+ # Per ChatGPT feedback: explicitly optional to prevent users assuming layout is always present
74
+ # Note: This field is marked with skip_serializing_if in Rust, so it won't appear in API responses
75
+ layout: "LayoutHints | None" = None
76
+
77
+
78
+ class GridPosition(BaseModel):
79
+ """Grid position within a detected grid/list"""
80
+
81
+ row_index: int # 0-based row index
82
+ col_index: int # 0-based column index
83
+ cluster_id: int # ID of the row cluster (for distinguishing separate grids)
84
+
85
+
86
+ class LayoutHints(BaseModel):
87
+ """Layout-derived metadata for an element (internal-only in v0)"""
88
+
89
+ # Grid ID (maps to GridInfo.grid_id) - distinguishes multiple grids on same page
90
+ # Per feedback: Add grid_id to distinguish main feed + sidebar lists + nav links
91
+ grid_id: int | None = None
92
+ # Grid position within the grid (row_index, col_index)
93
+ grid_pos: GridPosition | None = None
94
+ # Inferred parent index in elements array
95
+ parent_index: int | None = None
96
+ # Indices of child elements (optional to avoid payload bloat - container elements can have hundreds)
97
+ # Per feedback: Make optional/capped to prevent serializing large arrays
98
+ children_indices: list[int] | None = None
99
+ # Confidence score for grid position assignment (0.0-1.0)
100
+ grid_confidence: float = 0.0
101
+ # Confidence score for parent-child containment (0.0-1.0)
102
+ parent_confidence: float = 0.0
103
+ # Optional: Page region (header/nav/main/aside/footer) - killer signal for ordinality + dominant group
104
+ # Per feedback: Optional but very useful for region detection
105
+ region: Literal["header", "nav", "main", "aside", "footer"] | None = None
106
+ region_confidence: float = 0.0 # Confidence score for region assignment (0.0-1.0)
107
+
108
+
109
+ class GridInfo(BaseModel):
110
+ """Grid bounding box and metadata for a detected grid"""
111
+
112
+ grid_id: int # The grid ID (matches grid_id in LayoutHints)
113
+ bbox: BBox # Bounding box: x, y, width, height (document coordinates)
114
+ row_count: int # Number of rows in the grid
115
+ col_count: int # Number of columns in the grid
116
+ item_count: int # Total number of items in the grid
117
+ confidence: float = 1.0 # Confidence score (currently 1.0)
118
+ label: str | None = (
119
+ None # Optional inferred label (e.g., "product_grid", "search_results", "navigation")
120
+ )
121
+ is_dominant: bool = False # Whether this grid is the dominant group (main content area)
122
+
123
+
124
+ class Snapshot(BaseModel):
125
+ """Snapshot response from extension"""
126
+
127
+ status: Literal["success", "error"]
128
+ timestamp: str | None = None
129
+ url: str
130
+ viewport: Viewport | None = None
131
+ elements: list[Element]
132
+ screenshot: str | None = None
133
+ screenshot_format: Literal["png", "jpeg"] | None = None
134
+ error: str | None = None
135
+ requires_license: bool | None = None
136
+ # Phase 2: Dominant group key for ordinal selection
137
+ dominant_group_key: str | None = None # The most common group_key (main content group)
138
+
139
+ def save(self, filepath: str) -> None:
140
+ """Save snapshot as JSON file"""
141
+ import json
142
+
143
+ with open(filepath, "w", encoding="utf-8") as f:
144
+ json.dump(self.model_dump(), f, indent=2)
145
+
146
+ def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
147
+ """
148
+ Get grid coordinates (bounding boxes) for detected grids.
149
+
150
+ Groups elements by grid_id and computes the overall bounding box,
151
+ row/column counts, and item count for each grid.
152
+
153
+ Args:
154
+ grid_id: Optional grid ID to filter by. If None, returns all grids.
155
+
156
+ Returns:
157
+ List of GridInfo objects, one per detected grid, sorted by grid_id.
158
+ Each GridInfo contains:
159
+ - grid_id: The grid identifier
160
+ - bbox: Bounding box (x, y, width, height) in document coordinates
161
+ - row_count: Number of rows in the grid
162
+ - col_count: Number of columns in the grid
163
+ - item_count: Total number of items in the grid
164
+ - confidence: Confidence score (currently 1.0)
165
+ - label: Optional inferred label (e.g., "product_grid", "search_results", "navigation")
166
+ Note: Label inference is best-effort and may not always be accurate
167
+
168
+ Example:
169
+ >>> snapshot = browser.snapshot()
170
+ >>> # Get all grids
171
+ >>> all_grids = snapshot.get_grid_bounds()
172
+ >>> # Get specific grid
173
+ >>> main_grid = snapshot.get_grid_bounds(grid_id=0)
174
+ >>> if main_grid:
175
+ ... print(f"Grid 0: {main_grid[0].item_count} items at ({main_grid[0].bbox.x}, {main_grid[0].bbox.y})")
176
+ """
177
+ from collections import defaultdict
178
+
179
+ # Group elements by grid_id
180
+ grid_elements: dict[int, list[Element]] = defaultdict(list)
181
+
182
+ for elem in self.elements:
183
+ if elem.layout and elem.layout.grid_id is not None:
184
+ grid_elements[elem.layout.grid_id].append(elem)
185
+
186
+ # Filter by grid_id if specified
187
+ if grid_id is not None:
188
+ if grid_id not in grid_elements:
189
+ return []
190
+ grid_elements = {grid_id: grid_elements[grid_id]}
191
+
192
+ grid_infos = []
193
+
194
+ # First pass: compute all grid infos and count dominant group elements
195
+ grid_dominant_counts = {}
196
+ for gid, elements_in_grid in sorted(grid_elements.items()):
197
+ if not elements_in_grid:
198
+ continue
199
+
200
+ # Count dominant group elements in this grid
201
+ dominant_count = sum(1 for elem in elements_in_grid if elem.in_dominant_group is True)
202
+ grid_dominant_counts[gid] = (dominant_count, len(elements_in_grid))
203
+
204
+ # Compute bounding box
205
+ min_x = min(elem.bbox.x for elem in elements_in_grid)
206
+ min_y = min(elem.bbox.y for elem in elements_in_grid)
207
+ max_x = max(elem.bbox.x + elem.bbox.width for elem in elements_in_grid)
208
+ max_y = max(elem.bbox.y + elem.bbox.height for elem in elements_in_grid)
209
+
210
+ # Count rows and columns
211
+ row_indices = set()
212
+ col_indices = set()
213
+
214
+ for elem in elements_in_grid:
215
+ if elem.layout and elem.layout.grid_pos:
216
+ row_indices.add(elem.layout.grid_pos.row_index)
217
+ col_indices.add(elem.layout.grid_pos.col_index)
218
+
219
+ # Infer grid label from element patterns (best-effort heuristic)
220
+ label = Snapshot._infer_grid_label(elements_in_grid)
221
+
222
+ grid_infos.append(
223
+ GridInfo(
224
+ grid_id=gid,
225
+ bbox=BBox(
226
+ x=min_x,
227
+ y=min_y,
228
+ width=max_x - min_x,
229
+ height=max_y - min_y,
230
+ ),
231
+ row_count=len(row_indices) if row_indices else 0,
232
+ col_count=len(col_indices) if col_indices else 0,
233
+ item_count=len(elements_in_grid),
234
+ confidence=1.0,
235
+ label=label,
236
+ is_dominant=False, # Will be set below
237
+ )
238
+ )
239
+
240
+ # Second pass: identify dominant grid
241
+ # The grid with the highest count (or highest percentage >= 50%) of dominant group elements
242
+ if grid_dominant_counts:
243
+ # Find grid with highest absolute count
244
+ max_dominant_count = max(count for count, _ in grid_dominant_counts.values())
245
+ if max_dominant_count > 0:
246
+ # Find grid(s) with highest count
247
+ dominant_grids = [
248
+ gid
249
+ for gid, (count, total) in grid_dominant_counts.items()
250
+ if count == max_dominant_count
251
+ ]
252
+ # If multiple grids tie, prefer the one with highest percentage
253
+ if len(dominant_grids) > 1:
254
+ dominant_grids.sort(
255
+ key=lambda gid: (
256
+ grid_dominant_counts[gid][0] / grid_dominant_counts[gid][1]
257
+ if grid_dominant_counts[gid][1] > 0
258
+ else 0
259
+ ),
260
+ reverse=True,
261
+ )
262
+ # Mark the dominant grid
263
+ dominant_gid = dominant_grids[0]
264
+ # Only mark as dominant if it has >= 50% dominant group elements or >= 3 elements
265
+ dominant_count, total_count = grid_dominant_counts[dominant_gid]
266
+ if dominant_count >= 3 or (total_count > 0 and dominant_count / total_count >= 0.5):
267
+ for grid_info in grid_infos:
268
+ if grid_info.grid_id == dominant_gid:
269
+ grid_info.is_dominant = True
270
+ break
271
+
272
+ return grid_infos
273
+
274
+ @staticmethod
275
+ def _infer_grid_label(elements: list["Element"]) -> str | None:
276
+ """
277
+ Infer grid label from element patterns using text fingerprinting (best-effort heuristic).
278
+
279
+ Uses patterns similar to dominant_group.rs content filtering logic, inverted to detect
280
+ semantic grid types. Analyzes first 5 items as a "bag of features".
281
+
282
+ Returns None if label cannot be reliably determined.
283
+ This is a simple heuristic and may not always be accurate.
284
+ """
285
+ import re
286
+
287
+ if not elements:
288
+ return None
289
+
290
+ # Sample first 5 items for fingerprinting (as suggested in feedback)
291
+ sample_elements = elements[:5]
292
+ element_texts = [(elem.text or "").strip() for elem in sample_elements if elem.text]
293
+
294
+ if not element_texts:
295
+ return None
296
+
297
+ # Collect text patterns
298
+ all_text = " ".join(text.lower() for text in element_texts)
299
+ hrefs = [elem.href or "" for elem in sample_elements if elem.href]
300
+
301
+ # =========================================================================
302
+ # 1. PRODUCT GRID: Currency symbols, action verbs, ratings
303
+ # =========================================================================
304
+ # Currency patterns: $, €, £, or price patterns like "19.99", "$50", "€30"
305
+ currency_pattern = re.search(r"[\$€£¥]\s*\d+|\d+\.\d{2}", all_text)
306
+ product_action_verbs = [
307
+ "add to cart",
308
+ "buy now",
309
+ "shop now",
310
+ "purchase",
311
+ "out of stock",
312
+ "in stock",
313
+ ]
314
+ has_product_actions = any(verb in all_text for verb in product_action_verbs)
315
+
316
+ # Ratings pattern: "4.5 stars", "(120 reviews)", "4.5/5"
317
+ rating_pattern = re.search(r"\d+\.?\d*\s*(stars?|reviews?|/5|/10)", all_text, re.IGNORECASE)
318
+
319
+ # Product URL patterns
320
+ product_url_patterns = ["/product/", "/item/", "/dp/", "/p/", "/products/"]
321
+ has_product_urls = any(
322
+ pattern in href.lower() for href in hrefs for pattern in product_url_patterns
323
+ )
324
+
325
+ if (currency_pattern or has_product_actions or rating_pattern) and (
326
+ has_product_urls
327
+ or len(
328
+ [
329
+ t
330
+ for t in element_texts
331
+ if currency_pattern and currency_pattern.group() in t.lower()
332
+ ]
333
+ )
334
+ >= 2
335
+ ):
336
+ return "product_grid"
337
+
338
+ # =========================================================================
339
+ # 2. ARTICLE/NEWS FEED: Timestamps, bylines, reading time
340
+ # =========================================================================
341
+ # Timestamp patterns (reusing logic from dominant_group.rs)
342
+ # "2 hours ago", "3 days ago", "5 minutes ago", "1 second ago", "2 ago"
343
+ timestamp_patterns = [
344
+ r"\d+\s+(hour|day|minute|second)s?\s+ago",
345
+ r"\d+\s+ago", # Short form: "2 ago"
346
+ r"\d{1,2}\s+(hour|day|minute|second)\s+ago", # Singular
347
+ ]
348
+ has_timestamps = any(
349
+ re.search(pattern, all_text, re.IGNORECASE) for pattern in timestamp_patterns
350
+ )
351
+
352
+ # Date patterns: "Aug 21, 2024", "2024-01-13", "Jan 15"
353
+ date_patterns = [
354
+ r"\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+\d{4}",
355
+ r"\d{4}-\d{2}-\d{2}",
356
+ r"\d{1,2}/\d{1,2}/\d{4}",
357
+ ]
358
+ has_dates = any(re.search(pattern, all_text, re.IGNORECASE) for pattern in date_patterns)
359
+
360
+ # Bylines: "By [Name]", "Author:", "Written by"
361
+ byline_patterns = ["by ", "author:", "written by", "posted by"]
362
+ has_bylines = any(pattern in all_text for pattern in byline_patterns)
363
+
364
+ # Reading time: "5 min read", "10 min", "read more"
365
+ reading_time_pattern = re.search(r"\d+\s*(min|minute)s?\s*(read)?", all_text, re.IGNORECASE)
366
+
367
+ if has_timestamps or (has_dates and has_bylines) or reading_time_pattern:
368
+ return "article_feed"
369
+
370
+ # =========================================================================
371
+ # 3. SEARCH RESULTS: Snippets, metadata, ellipses
372
+ # =========================================================================
373
+ search_keywords = ["result", "search", "found", "showing", "results 1-", "sponsored"]
374
+ has_search_metadata = any(keyword in all_text for keyword in search_keywords)
375
+
376
+ # Snippet indicators: ellipses, "match found", truncated text
377
+ has_ellipses = "..." in all_text or any(
378
+ len(text) > 100 and "..." in text for text in element_texts
379
+ )
380
+
381
+ # Check if many elements are links (typical for search results)
382
+ link_count = sum(1 for elem in sample_elements if elem.role == "link" or elem.href)
383
+ is_mostly_links = link_count >= len(sample_elements) * 0.7 # 70%+ are links
384
+
385
+ if (has_search_metadata or has_ellipses) and is_mostly_links:
386
+ return "search_results"
387
+
388
+ # =========================================================================
389
+ # 4. NAVIGATION: Short length, homogeneity, common nav terms
390
+ # =========================================================================
391
+ # Calculate average text length and variance
392
+ text_lengths = [len(text) for text in element_texts]
393
+ if text_lengths:
394
+ avg_length = sum(text_lengths) / len(text_lengths)
395
+ # Low variance = homogeneous (typical of navigation)
396
+ variance = (
397
+ sum((l - avg_length) ** 2 for l in text_lengths) / len(text_lengths)
398
+ if len(text_lengths) > 1
399
+ else 0
400
+ )
401
+
402
+ nav_keywords = [
403
+ "home",
404
+ "about",
405
+ "contact",
406
+ "menu",
407
+ "login",
408
+ "sign in",
409
+ "profile",
410
+ "settings",
411
+ ]
412
+ has_nav_keywords = any(keyword in all_text for keyword in nav_keywords)
413
+
414
+ # Navigation: short average length (< 15 chars) AND low variance OR nav keywords
415
+ if avg_length < 15 and (variance < 20 or has_nav_keywords):
416
+ # Also check if all are links
417
+ if all(elem.role == "link" or elem.href for elem in sample_elements):
418
+ return "navigation"
419
+
420
+ # =========================================================================
421
+ # 5. BUTTON GRID: All buttons
422
+ # =========================================================================
423
+ if all(elem.role == "button" for elem in sample_elements):
424
+ return "button_grid"
425
+
426
+ # =========================================================================
427
+ # 6. LINK LIST: Mostly links but not navigation
428
+ # =========================================================================
429
+ link_count = sum(1 for elem in sample_elements if elem.role == "link" or elem.href)
430
+ if link_count >= len(sample_elements) * 0.8: # 80%+ are links
431
+ return "link_list"
432
+
433
+ # Unknown/unclear
434
+ return None
435
+
436
+
437
+ class ActionResult(BaseModel):
438
+ """Result of an action (click, type, press)"""
439
+
440
+ success: bool
441
+ duration_ms: int
442
+ outcome: Literal["navigated", "dom_updated", "no_change", "error"] | None = None
443
+ url_changed: bool | None = None
444
+ snapshot_after: Snapshot | None = None
445
+ error: dict | None = None
446
+
447
+
448
+ class WaitResult(BaseModel):
449
+ """Result of wait_for operation"""
450
+
451
+ found: bool
452
+ element: Element | None = None
453
+ duration_ms: int
454
+ timeout: bool
455
+
456
+
457
+ # ========== Agent Layer Models ==========
458
+
459
+
460
+ class ScreenshotConfig(BaseModel):
461
+ """Screenshot format configuration"""
462
+
463
+ format: Literal["png", "jpeg"] = "png"
464
+ quality: int | None = Field(None, ge=1, le=100) # Only for JPEG (1-100)
465
+
466
+
467
+ class SnapshotFilter(BaseModel):
468
+ """Filter options for snapshot elements"""
469
+
470
+ min_area: int | None = Field(None, ge=0)
471
+ allowed_roles: list[str] | None = None
472
+ min_z_index: int | None = None
473
+
474
+
475
+ class SnapshotOptions(BaseModel):
476
+ """
477
+ Configuration for snapshot calls.
478
+ Matches TypeScript SnapshotOptions interface from sdk-ts/src/snapshot.ts
479
+
480
+ For browser-use integration (where you don't have a SentienceBrowser),
481
+ you can pass sentience_api_key directly in options:
482
+
483
+ from sentience.models import SnapshotOptions
484
+ options = SnapshotOptions(
485
+ sentience_api_key="sk_pro_xxxxx",
486
+ use_api=True,
487
+ goal="Find the login button"
488
+ )
489
+ """
490
+
491
+ screenshot: bool | ScreenshotConfig = False # Union type: boolean or config
492
+ limit: int = Field(50, ge=1, le=500)
493
+ filter: SnapshotFilter | None = None
494
+ use_api: bool | None = None # Force API vs extension
495
+ save_trace: bool = False # Save raw_elements to JSON for benchmarking/training
496
+ trace_path: str | None = None # Path to save trace (default: "trace_{timestamp}.json")
497
+ goal: str | None = None # Optional goal/task description for the snapshot
498
+ show_overlay: bool = False # Show visual overlay highlighting elements in browser
499
+ show_grid: bool = False # Show visual overlay highlighting detected grids
500
+ grid_id: int | None = (
501
+ None # Optional grid ID to show specific grid (only used if show_grid=True)
502
+ )
503
+
504
+ # API credentials (for browser-use integration without SentienceBrowser)
505
+ sentience_api_key: str | None = None # Sentience API key for Pro/Enterprise features
506
+
507
+ class Config:
508
+ arbitrary_types_allowed = True
509
+
510
+
511
+ class AgentActionResult(BaseModel):
512
+ """Result of a single agent action (from agent.act())"""
513
+
514
+ success: bool
515
+ action: Literal["click", "type", "press", "finish", "error"]
516
+ goal: str
517
+ duration_ms: int
518
+ attempt: int
519
+
520
+ # Optional fields based on action type
521
+ element_id: int | None = None
522
+ text: str | None = None
523
+ key: str | None = None
524
+ outcome: Literal["navigated", "dom_updated", "no_change", "error"] | None = None
525
+ url_changed: bool | None = None
526
+ error: str | None = None
527
+ message: str | None = None # For FINISH action
528
+
529
+ def __getitem__(self, key):
530
+ """
531
+ Support dict-style access for backward compatibility.
532
+ This allows existing code using result["success"] to continue working.
533
+ """
534
+ import warnings
535
+
536
+ warnings.warn(
537
+ f"Dict-style access result['{key}'] is deprecated. Use result.{key} instead.",
538
+ DeprecationWarning,
539
+ stacklevel=2,
540
+ )
541
+ return getattr(self, key)
542
+
543
+
544
+ class ActionTokenUsage(BaseModel):
545
+ """Token usage for a single action"""
546
+
547
+ goal: str
548
+ prompt_tokens: int
549
+ completion_tokens: int
550
+ total_tokens: int
551
+ model: str
552
+
553
+
554
+ class TokenStats(BaseModel):
555
+ """Token usage statistics for an agent session"""
556
+
557
+ total_prompt_tokens: int
558
+ total_completion_tokens: int
559
+ total_tokens: int
560
+ by_action: list[ActionTokenUsage]
561
+
562
+
563
+ class ActionHistory(BaseModel):
564
+ """Single history entry from agent execution"""
565
+
566
+ goal: str
567
+ action: str # The raw action string from LLM
568
+ result: dict # Will be AgentActionResult but stored as dict for flexibility
569
+ success: bool
570
+ attempt: int
571
+ duration_ms: int
572
+
573
+
574
+ class ProxyConfig(BaseModel):
575
+ """
576
+ Proxy configuration for browser networking.
577
+
578
+ Supports HTTP, HTTPS, and SOCKS5 proxies with optional authentication.
579
+ """
580
+
581
+ server: str = Field(
582
+ ...,
583
+ description="Proxy server URL including scheme and port (e.g., 'http://proxy.example.com:8080')",
584
+ )
585
+ username: str | None = Field(
586
+ None,
587
+ description="Username for proxy authentication (optional)",
588
+ )
589
+ password: str | None = Field(
590
+ None,
591
+ description="Password for proxy authentication (optional)",
592
+ )
593
+
594
+ def to_playwright_dict(self) -> dict:
595
+ """
596
+ Convert to Playwright proxy configuration format.
597
+
598
+ Returns:
599
+ Dict compatible with Playwright's proxy parameter
600
+ """
601
+ config = {"server": self.server}
602
+ if self.username and self.password:
603
+ config["username"] = self.username
604
+ config["password"] = self.password
605
+ return config
606
+
607
+
608
+ # ========== Storage State Models (Auth Injection) ==========
609
+
610
+
611
+ class Cookie(BaseModel):
612
+ """
613
+ Cookie definition for storage state injection.
614
+
615
+ Matches Playwright's cookie format for storage_state.
616
+ """
617
+
618
+ name: str = Field(..., description="Cookie name")
619
+ value: str = Field(..., description="Cookie value")
620
+ domain: str = Field(..., description="Cookie domain (e.g., '.example.com')")
621
+ path: str = Field(default="/", description="Cookie path")
622
+ expires: float | None = Field(None, description="Expiration timestamp (Unix epoch)")
623
+ httpOnly: bool = Field(default=False, description="HTTP-only flag")
624
+ secure: bool = Field(default=False, description="Secure (HTTPS-only) flag")
625
+ sameSite: Literal["Strict", "Lax", "None"] = Field(
626
+ default="Lax", description="SameSite attribute"
627
+ )
628
+
629
+
630
+ class LocalStorageItem(BaseModel):
631
+ """
632
+ LocalStorage item for a specific origin.
633
+
634
+ Playwright stores localStorage as an array of {name, value} objects.
635
+ """
636
+
637
+ name: str = Field(..., description="LocalStorage key")
638
+ value: str = Field(..., description="LocalStorage value")
639
+
640
+
641
+ class OriginStorage(BaseModel):
642
+ """
643
+ Storage state for a specific origin (localStorage).
644
+
645
+ Represents localStorage data for a single domain.
646
+ """
647
+
648
+ origin: str = Field(..., description="Origin URL (e.g., 'https://example.com')")
649
+ localStorage: list[LocalStorageItem] = Field(
650
+ default_factory=list, description="LocalStorage items for this origin"
651
+ )
652
+
653
+
654
+ class StorageState(BaseModel):
655
+ """
656
+ Complete browser storage state (cookies + localStorage).
657
+
658
+ This is the format used by Playwright's storage_state() method.
659
+ Can be saved to/loaded from JSON files for session injection.
660
+ """
661
+
662
+ cookies: list[Cookie] = Field(
663
+ default_factory=list, description="Cookies to inject (global scope)"
664
+ )
665
+ origins: list[OriginStorage] = Field(
666
+ default_factory=list, description="LocalStorage data per origin"
667
+ )
668
+
669
+ @classmethod
670
+ def from_dict(cls, data: dict) -> "StorageState":
671
+ """
672
+ Create StorageState from dictionary (e.g., loaded from JSON).
673
+
674
+ Args:
675
+ data: Dictionary with 'cookies' and/or 'origins' keys
676
+
677
+ Returns:
678
+ StorageState instance
679
+ """
680
+ cookies = [
681
+ Cookie(**cookie) if isinstance(cookie, dict) else cookie
682
+ for cookie in data.get("cookies", [])
683
+ ]
684
+ origins = []
685
+ for origin_data in data.get("origins", []):
686
+ if isinstance(origin_data, dict):
687
+ # Handle localStorage as array of {name, value} or as dict
688
+ localStorage_data = origin_data.get("localStorage", [])
689
+ if isinstance(localStorage_data, dict):
690
+ # Convert dict to list of LocalStorageItem
691
+ localStorage_items = [
692
+ LocalStorageItem(name=k, value=v) for k, v in localStorage_data.items()
693
+ ]
694
+ else:
695
+ # Already a list
696
+ localStorage_items = [
697
+ LocalStorageItem(**item) if isinstance(item, dict) else item
698
+ for item in localStorage_data
699
+ ]
700
+ origins.append(
701
+ OriginStorage(
702
+ origin=origin_data.get("origin", ""),
703
+ localStorage=localStorage_items,
704
+ )
705
+ )
706
+ else:
707
+ origins.append(origin_data)
708
+ return cls(cookies=cookies, origins=origins)
709
+
710
+ def to_playwright_dict(self) -> dict:
711
+ """
712
+ Convert to Playwright-compatible dictionary format.
713
+
714
+ Returns:
715
+ Dictionary compatible with Playwright's storage_state parameter
716
+ """
717
+ return {
718
+ "cookies": [cookie.model_dump() for cookie in self.cookies],
719
+ "origins": [
720
+ {
721
+ "origin": origin.origin,
722
+ "localStorage": [item.model_dump() for item in origin.localStorage],
723
+ }
724
+ for origin in self.origins
725
+ ],
726
+ }
727
+
728
+
729
+ # ========== Text Search Models (findTextRect) ==========
730
+
731
+
732
+ class TextRect(BaseModel):
733
+ """
734
+ Rectangle coordinates for text occurrence.
735
+ Includes both absolute (page) and viewport-relative coordinates.
736
+ """
737
+
738
+ x: float = Field(..., description="Absolute X coordinate (page coordinate with scroll offset)")
739
+ y: float = Field(..., description="Absolute Y coordinate (page coordinate with scroll offset)")
740
+ width: float = Field(..., description="Rectangle width in pixels")
741
+ height: float = Field(..., description="Rectangle height in pixels")
742
+ left: float = Field(..., description="Absolute left position (same as x)")
743
+ top: float = Field(..., description="Absolute top position (same as y)")
744
+ right: float = Field(..., description="Absolute right position (x + width)")
745
+ bottom: float = Field(..., description="Absolute bottom position (y + height)")
746
+
747
+
748
+ class ViewportRect(BaseModel):
749
+ """Viewport-relative rectangle coordinates (without scroll offset)"""
750
+
751
+ x: float = Field(..., description="Viewport-relative X coordinate")
752
+ y: float = Field(..., description="Viewport-relative Y coordinate")
753
+ width: float = Field(..., description="Rectangle width in pixels")
754
+ height: float = Field(..., description="Rectangle height in pixels")
755
+
756
+
757
+ class TextContext(BaseModel):
758
+ """Context text surrounding a match"""
759
+
760
+ before: str = Field(..., description="Text before the match (up to 20 chars)")
761
+ after: str = Field(..., description="Text after the match (up to 20 chars)")
762
+
763
+
764
+ class TextMatch(BaseModel):
765
+ """A single text match with its rectangle and context"""
766
+
767
+ text: str = Field(..., description="The matched text")
768
+ rect: TextRect = Field(..., description="Absolute rectangle coordinates (with scroll offset)")
769
+ viewport_rect: ViewportRect = Field(
770
+ ..., description="Viewport-relative rectangle (without scroll offset)"
771
+ )
772
+ context: TextContext = Field(..., description="Surrounding text context")
773
+ in_viewport: bool = Field(..., description="Whether the match is currently visible in viewport")
774
+
775
+
776
+ class TextRectSearchResult(BaseModel):
777
+ """
778
+ Result of findTextRect operation.
779
+ Returns all occurrences of text on the page with their exact pixel coordinates.
780
+ """
781
+
782
+ status: Literal["success", "error"]
783
+ query: str | None = Field(None, description="The search text that was queried")
784
+ case_sensitive: bool | None = Field(None, description="Whether search was case-sensitive")
785
+ whole_word: bool | None = Field(None, description="Whether whole-word matching was used")
786
+ matches: int | None = Field(None, description="Number of matches found")
787
+ results: list[TextMatch] | None = Field(
788
+ None, description="List of text matches with coordinates"
789
+ )
790
+ viewport: Viewport | None = Field(None, description="Current viewport dimensions")
791
+ error: str | None = Field(None, description="Error message if status is 'error'")
792
+
793
+
794
+ class ReadResult(BaseModel):
795
+ """Result of read() or read_async() operation"""
796
+
797
+ status: Literal["success", "error"]
798
+ url: str
799
+ format: Literal["raw", "text", "markdown"]
800
+ content: str
801
+ length: int
802
+ error: str | None = None
803
+
804
+
805
+ class TraceStats(BaseModel):
806
+ """Execution statistics for trace completion"""
807
+
808
+ total_steps: int
809
+ total_events: int
810
+ duration_ms: int | None = None
811
+ final_status: Literal["success", "failure", "partial", "unknown"]
812
+ started_at: str | None = None
813
+ ended_at: str | None = None
814
+
815
+
816
+ class StepExecutionResult(BaseModel):
817
+ """Result of executing a single step in ConversationalAgent"""
818
+
819
+ success: bool
820
+ action: str
821
+ data: dict[str, Any] # Flexible data field for step-specific results
822
+ error: str | None = None
823
+
824
+
825
+ class ExtractionResult(BaseModel):
826
+ """Result of extracting information from a page"""
827
+
828
+ found: bool
829
+ data: dict[str, Any] # Extracted data fields
830
+ summary: str # Brief description of what was found
831
+
832
+
833
+ @dataclass
834
+ class ScreenshotMetadata:
835
+ """
836
+ Metadata for a stored screenshot.
837
+
838
+ Used by CloudTraceSink to track screenshots before upload.
839
+ All fields are required for type safety.
840
+ """
841
+
842
+ sequence: int
843
+ format: Literal["png", "jpeg"]
844
+ size_bytes: int
845
+ step_id: str | None
846
+ filepath: str