sentienceapi 0.90.16__py3-none-any.whl → 0.92.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (61) hide show
  1. sentience/__init__.py +14 -5
  2. sentience/action_executor.py +215 -0
  3. sentience/actions.py +408 -25
  4. sentience/agent.py +802 -293
  5. sentience/agent_config.py +3 -0
  6. sentience/async_api.py +83 -1142
  7. sentience/base_agent.py +95 -0
  8. sentience/browser.py +484 -1
  9. sentience/browser_evaluator.py +299 -0
  10. sentience/cloud_tracing.py +457 -33
  11. sentience/conversational_agent.py +77 -43
  12. sentience/element_filter.py +136 -0
  13. sentience/expect.py +98 -2
  14. sentience/extension/background.js +56 -185
  15. sentience/extension/content.js +117 -289
  16. sentience/extension/injected_api.js +799 -1374
  17. sentience/extension/manifest.json +1 -1
  18. sentience/extension/pkg/sentience_core.js +190 -396
  19. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  20. sentience/extension/release.json +47 -47
  21. sentience/formatting.py +9 -53
  22. sentience/inspector.py +183 -1
  23. sentience/llm_interaction_handler.py +191 -0
  24. sentience/llm_provider.py +74 -52
  25. sentience/llm_provider_utils.py +120 -0
  26. sentience/llm_response_builder.py +153 -0
  27. sentience/models.py +60 -1
  28. sentience/overlay.py +109 -2
  29. sentience/protocols.py +228 -0
  30. sentience/query.py +1 -1
  31. sentience/read.py +95 -3
  32. sentience/recorder.py +223 -3
  33. sentience/schemas/trace_v1.json +102 -9
  34. sentience/screenshot.py +48 -2
  35. sentience/sentience_methods.py +86 -0
  36. sentience/snapshot.py +291 -38
  37. sentience/snapshot_diff.py +141 -0
  38. sentience/text_search.py +119 -5
  39. sentience/trace_event_builder.py +129 -0
  40. sentience/trace_file_manager.py +197 -0
  41. sentience/trace_indexing/index_schema.py +95 -7
  42. sentience/trace_indexing/indexer.py +117 -14
  43. sentience/tracer_factory.py +119 -6
  44. sentience/tracing.py +172 -8
  45. sentience/utils/__init__.py +40 -0
  46. sentience/utils/browser.py +46 -0
  47. sentience/utils/element.py +257 -0
  48. sentience/utils/formatting.py +59 -0
  49. sentience/utils.py +1 -1
  50. sentience/visual_agent.py +2056 -0
  51. sentience/wait.py +68 -2
  52. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +2 -1
  53. sentienceapi-0.92.2.dist-info/RECORD +65 -0
  54. sentience/extension/test-content.js +0 -4
  55. sentienceapi-0.90.16.dist-info/RECORD +0 -50
  56. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
  57. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
  58. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE +0 -0
  59. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-APACHE +0 -0
  60. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-MIT +0 -0
  61. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0
sentience/recorder.py CHANGED
@@ -4,11 +4,11 @@ Recorder - captures user actions into a trace
4
4
 
5
5
  import json
6
6
  from datetime import datetime
7
- from typing import Any
7
+ from typing import Any, Optional
8
8
 
9
- from .browser import SentienceBrowser
9
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
10
10
  from .models import Element, Snapshot
11
- from .snapshot import snapshot
11
+ from .snapshot import snapshot, snapshot_async
12
12
 
13
13
 
14
14
  class TraceStep:
@@ -367,3 +367,223 @@ def record(browser: SentienceBrowser, capture_snapshots: bool = False) -> Record
367
367
  Recorder instance
368
368
  """
369
369
  return Recorder(browser, capture_snapshots=capture_snapshots)
370
+
371
+
372
+ class RecorderAsync:
373
+ """Recorder for capturing user actions (async)"""
374
+
375
+ def __init__(self, browser: AsyncSentienceBrowser, capture_snapshots: bool = False):
376
+ self.browser = browser
377
+ self.capture_snapshots = capture_snapshots
378
+ self.trace: Trace | None = None
379
+ self._active = False
380
+ self._mask_patterns: list[str] = [] # Patterns to mask (e.g., "password", "email")
381
+
382
+ async def start(self) -> None:
383
+ """Start recording"""
384
+ if not self.browser.page:
385
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
386
+
387
+ self._active = True
388
+ start_url = self.browser.page.url
389
+ self.trace = Trace(start_url)
390
+
391
+ # Set up event listeners in the browser
392
+ self._setup_listeners()
393
+
394
+ def stop(self) -> None:
395
+ """Stop recording"""
396
+ self._active = False
397
+ self._cleanup_listeners()
398
+
399
+ def add_mask_pattern(self, pattern: str) -> None:
400
+ """Add a pattern to mask in recorded text (e.g., "password", "email")"""
401
+ self._mask_patterns.append(pattern.lower())
402
+
403
+ def _should_mask(self, text: str) -> bool:
404
+ """Check if text should be masked"""
405
+ text_lower = text.lower()
406
+ return any(pattern in text_lower for pattern in self._mask_patterns)
407
+
408
+ def _setup_listeners(self) -> None:
409
+ """Set up event listeners to capture actions"""
410
+ # Note: We'll capture actions through the SDK methods rather than DOM events
411
+ # This is cleaner and more reliable
412
+ pass
413
+
414
+ def _cleanup_listeners(self) -> None:
415
+ """Clean up event listeners"""
416
+ pass
417
+
418
+ async def _infer_selector(self, element_id: int) -> str | None: # noqa: C901
419
+ """
420
+ Infer a semantic selector for an element (async)
421
+
422
+ Uses heuristics to build a robust selector:
423
+ - role=... text~"..."
424
+ - If text empty: use name/aria-label/placeholder
425
+ - Include clickable=true when relevant
426
+ - Validate against snapshot (should match 1 element)
427
+ """
428
+ try:
429
+ # Take a snapshot to get element info
430
+ snap = await snapshot_async(self.browser)
431
+
432
+ # Find the element in the snapshot
433
+ element = None
434
+ for el in snap.elements:
435
+ if el.id == element_id:
436
+ element = el
437
+ break
438
+
439
+ if not element:
440
+ return None
441
+
442
+ # Build candidate selector
443
+ parts = []
444
+
445
+ # Add role
446
+ if element.role and element.role != "generic":
447
+ parts.append(f"role={element.role}")
448
+
449
+ # Add text if available
450
+ if element.text:
451
+ # Use contains match for text
452
+ text_part = element.text.replace('"', '\\"')[:50] # Limit length
453
+ parts.append(f'text~"{text_part}"')
454
+ else:
455
+ # Try to get name/aria-label/placeholder from DOM
456
+ try:
457
+ el = await self.browser.page.evaluate(
458
+ f"""
459
+ () => {{
460
+ const el = window.sentience_registry[{element_id}];
461
+ if (!el) return null;
462
+ return {{
463
+ name: el.name || null,
464
+ ariaLabel: el.getAttribute('aria-label') || null,
465
+ placeholder: el.placeholder || null
466
+ }};
467
+ }}
468
+ """
469
+ )
470
+
471
+ if el:
472
+ if el.get("name"):
473
+ parts.append(f'name="{el["name"]}"')
474
+ elif el.get("ariaLabel"):
475
+ parts.append(f'text~"{el["ariaLabel"]}"')
476
+ elif el.get("placeholder"):
477
+ parts.append(f'text~"{el["placeholder"]}"')
478
+ except Exception:
479
+ pass
480
+
481
+ # Add clickable if relevant
482
+ if element.visual_cues.is_clickable:
483
+ parts.append("clickable=true")
484
+
485
+ if not parts:
486
+ return None
487
+
488
+ selector = " ".join(parts)
489
+
490
+ # Validate selector - should match exactly 1 element
491
+ matches = [el for el in snap.elements if self._match_element(el, selector)]
492
+
493
+ if len(matches) == 1:
494
+ return selector
495
+ elif len(matches) > 1:
496
+ # Add more constraints (importance threshold, near-center)
497
+ # For now, just return the selector with a note
498
+ return selector
499
+ else:
500
+ # Selector doesn't match - return None (will use element_id)
501
+ return None
502
+
503
+ except Exception:
504
+ return None
505
+
506
+ def _match_element(self, element: Element, selector: str) -> bool:
507
+ """Simple selector matching (basic implementation)"""
508
+ # This is a simplified version - in production, use the full query engine
509
+ from .query import match_element, parse_selector
510
+
511
+ try:
512
+ query_dict = parse_selector(selector)
513
+ return match_element(element, query_dict)
514
+ except Exception:
515
+ return False
516
+
517
+ def record_navigation(self, url: str) -> None:
518
+ """Record a navigation event"""
519
+ if self._active and self.trace:
520
+ self.trace.add_navigation(url)
521
+
522
+ async def record_click(self, element_id: int, selector: str | None = None) -> None:
523
+ """Record a click event with smart selector inference (async)"""
524
+ if self._active and self.trace:
525
+ # If no selector provided, try to infer one
526
+ if selector is None:
527
+ selector = await self._infer_selector(element_id)
528
+
529
+ # Optionally capture snapshot
530
+ if self.capture_snapshots:
531
+ try:
532
+ snap = await snapshot_async(self.browser)
533
+ step = TraceStep(
534
+ ts=int((datetime.now() - self.trace._start_time).total_seconds() * 1000),
535
+ type="click",
536
+ element_id=element_id,
537
+ selector=selector,
538
+ snapshot=snap,
539
+ )
540
+ self.trace.add_step(step)
541
+ except Exception:
542
+ # If snapshot fails, just record without it
543
+ self.trace.add_click(element_id, selector)
544
+ else:
545
+ self.trace.add_click(element_id, selector)
546
+
547
+ async def record_type(self, element_id: int, text: str, selector: str | None = None) -> None:
548
+ """Record a type event with smart selector inference (async)"""
549
+ if self._active and self.trace:
550
+ # If no selector provided, try to infer one
551
+ if selector is None:
552
+ selector = await self._infer_selector(element_id)
553
+
554
+ mask = self._should_mask(text)
555
+ self.trace.add_type(element_id, text, selector, mask=mask)
556
+
557
+ def record_press(self, key: str) -> None:
558
+ """Record a key press event"""
559
+ if self._active and self.trace:
560
+ self.trace.add_press(key)
561
+
562
+ def save(self, filepath: str) -> None:
563
+ """Save trace to file"""
564
+ if not self.trace:
565
+ raise RuntimeError("No trace to save. Start recording first.")
566
+ self.trace.save(filepath)
567
+
568
+ async def __aenter__(self):
569
+ """Context manager entry"""
570
+ await self.start()
571
+ return self
572
+
573
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
574
+ """Context manager exit"""
575
+ self.stop()
576
+
577
+
578
+ def record_async(browser: AsyncSentienceBrowser, capture_snapshots: bool = False) -> RecorderAsync:
579
+ """
580
+ Create a recorder instance (async)
581
+
582
+ Args:
583
+ browser: AsyncSentienceBrowser instance
584
+ capture_snapshots: Whether to capture snapshots at each step
585
+
586
+ Returns:
587
+ RecorderAsync instance
588
+ """
589
+ return RecorderAsync(browser, capture_snapshots=capture_snapshots)
@@ -13,7 +13,7 @@
13
13
  },
14
14
  "type": {
15
15
  "type": "string",
16
- "enum": ["run_start", "step_start", "snapshot_taken", "llm_called", "action_executed", "verification", "recovery", "step_end", "run_end", "error"],
16
+ "enum": ["run_start", "step_start", "snapshot", "snapshot_taken", "llm_called", "llm_response", "action", "action_executed", "verification", "recovery", "step_end", "run_end", "error"],
17
17
  "description": "Event type"
18
18
  },
19
19
  "ts": {
@@ -64,15 +64,61 @@
64
64
  }
65
65
  },
66
66
  {
67
- "description": "snapshot_taken data",
68
- "required": ["step_id", "snapshot_digest"],
67
+ "description": "snapshot or snapshot_taken data",
69
68
  "properties": {
70
- "step_id": {"type": "string"},
69
+ "step_id": {"type": ["string", "null"]},
71
70
  "snapshot_id": {"type": ["string", "null"]},
72
71
  "snapshot_digest": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
73
72
  "snapshot_digest_loose": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
74
73
  "url": {"type": ["string", "null"]},
75
- "element_count": {"type": "integer"}
74
+ "element_count": {"type": "integer"},
75
+ "timestamp": {"type": ["string", "null"]},
76
+ "elements": {
77
+ "type": "array",
78
+ "items": {
79
+ "type": "object",
80
+ "properties": {
81
+ "id": {"type": "integer"},
82
+ "role": {"type": "string"},
83
+ "text": {"type": ["string", "null"]},
84
+ "importance": {"type": "number"},
85
+ "importance_score": {"type": "number"},
86
+ "bbox": {
87
+ "type": "object",
88
+ "properties": {
89
+ "x": {"type": "number"},
90
+ "y": {"type": "number"},
91
+ "width": {"type": "number"},
92
+ "height": {"type": "number"}
93
+ },
94
+ "required": ["x", "y", "width", "height"]
95
+ },
96
+ "visual_cues": {
97
+ "type": "object",
98
+ "properties": {
99
+ "is_primary": {"type": "boolean"},
100
+ "is_clickable": {"type": "boolean"},
101
+ "background_color_name": {"type": ["string", "null"]}
102
+ }
103
+ },
104
+ "in_viewport": {"type": "boolean"},
105
+ "is_occluded": {"type": "boolean"},
106
+ "z_index": {"type": "integer"},
107
+ "rerank_index": {"type": ["integer", "null"]},
108
+ "heuristic_index": {"type": ["integer", "null"]},
109
+ "ml_probability": {"type": ["number", "null"]},
110
+ "ml_score": {"type": ["number", "null"]},
111
+ "diff_status": {
112
+ "type": ["string", "null"],
113
+ "enum": ["ADDED", "REMOVED", "MODIFIED", "MOVED", null],
114
+ "description": "Diff status for Diff Overlay feature. ADDED: new element, REMOVED: element was removed, MODIFIED: element changed, MOVED: element position changed, null: no change"
115
+ }
116
+ },
117
+ "required": ["id", "role", "importance", "bbox", "visual_cues"]
118
+ }
119
+ },
120
+ "screenshot_base64": {"type": ["string", "null"]},
121
+ "screenshot_format": {"type": ["string", "null"], "enum": ["png", "jpeg", null]}
76
122
  }
77
123
  },
78
124
  {
@@ -119,7 +165,15 @@
119
165
  "required": ["response_text", "response_hash"],
120
166
  "properties": {
121
167
  "response_text": {"type": "string"},
122
- "response_hash": {"type": "string"}
168
+ "response_hash": {"type": "string"},
169
+ "usage": {
170
+ "type": "object",
171
+ "properties": {
172
+ "prompt_tokens": {"type": "integer"},
173
+ "completion_tokens": {"type": "integer"},
174
+ "total_tokens": {"type": "integer"}
175
+ }
176
+ }
123
177
  }
124
178
  },
125
179
  "action": {
@@ -145,7 +199,17 @@
145
199
  "text": {"type": "string"},
146
200
  "key": {"type": "string"},
147
201
  "url_changed": {"type": ["boolean", "null"]},
148
- "duration_ms": {"type": "integer"}
202
+ "duration_ms": {"type": "integer"},
203
+ "error": {"type": ["string", "null"]},
204
+ "bounding_box": {
205
+ "type": "object",
206
+ "properties": {
207
+ "x": {"type": "number"},
208
+ "y": {"type": "number"},
209
+ "width": {"type": "number"},
210
+ "height": {"type": "number"}
211
+ }
212
+ }
149
213
  }
150
214
  },
151
215
  "post": {
@@ -162,7 +226,31 @@
162
226
  "properties": {
163
227
  "policy": {"type": "string"},
164
228
  "passed": {"type": "boolean"},
165
- "signals": {"type": "object"}
229
+ "signals": {
230
+ "type": "object",
231
+ "properties": {
232
+ "url_changed": {"type": "boolean"},
233
+ "error": {"type": ["string", "null"]},
234
+ "elements_found": {
235
+ "type": "array",
236
+ "items": {
237
+ "type": "object",
238
+ "properties": {
239
+ "label": {"type": "string"},
240
+ "bounding_box": {
241
+ "type": "object",
242
+ "properties": {
243
+ "x": {"type": "number"},
244
+ "y": {"type": "number"},
245
+ "width": {"type": "number"},
246
+ "height": {"type": "number"}
247
+ }
248
+ }
249
+ }
250
+ }
251
+ }
252
+ }
253
+ }
166
254
  }
167
255
  },
168
256
  "recovery": {
@@ -198,7 +286,12 @@
198
286
  "description": "run_end data",
199
287
  "required": ["steps"],
200
288
  "properties": {
201
- "steps": {"type": "integer"}
289
+ "steps": {"type": "integer"},
290
+ "status": {
291
+ "type": "string",
292
+ "enum": ["success", "failure", "partial", "unknown"],
293
+ "description": "Final execution status"
294
+ }
202
295
  }
203
296
  },
204
297
  {
sentience/screenshot.py CHANGED
@@ -2,9 +2,10 @@
2
2
  Screenshot functionality - standalone screenshot capture
3
3
  """
4
4
 
5
- from typing import Any, Literal
5
+ import base64
6
+ from typing import Any, Literal, Optional
6
7
 
7
- from .browser import SentienceBrowser
8
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
8
9
 
9
10
 
10
11
  def screenshot(
@@ -52,3 +53,48 @@ def screenshot(
52
53
  # Return as data URL
53
54
  mime_type = "image/png" if format == "png" else "image/jpeg"
54
55
  return f"data:{mime_type};base64,{base64_data}"
56
+
57
+
58
+ async def screenshot_async(
59
+ browser: AsyncSentienceBrowser,
60
+ format: Literal["png", "jpeg"] = "png",
61
+ quality: int | None = None,
62
+ ) -> str:
63
+ """
64
+ Capture screenshot of current page (async)
65
+
66
+ Args:
67
+ browser: AsyncSentienceBrowser instance
68
+ format: Image format - "png" or "jpeg"
69
+ quality: JPEG quality (1-100), only used for JPEG format
70
+
71
+ Returns:
72
+ Base64-encoded screenshot data URL (e.g., "data:image/png;base64,...")
73
+
74
+ Raises:
75
+ RuntimeError: If browser not started
76
+ ValueError: If quality is invalid for JPEG
77
+ """
78
+ if not browser.page:
79
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
80
+
81
+ if format == "jpeg" and quality is not None:
82
+ if not (1 <= quality <= 100):
83
+ raise ValueError("Quality must be between 1 and 100 for JPEG format")
84
+
85
+ # Use Playwright's screenshot with base64 encoding
86
+ screenshot_options: dict[str, Any] = {
87
+ "type": format,
88
+ }
89
+
90
+ if format == "jpeg" and quality is not None:
91
+ screenshot_options["quality"] = quality
92
+
93
+ # Capture screenshot as base64
94
+ # Playwright returns bytes when encoding is not specified, so we encode manually
95
+ image_bytes = await browser.page.screenshot(**screenshot_options)
96
+ base64_data = base64.b64encode(image_bytes).decode("utf-8")
97
+
98
+ # Return as data URL
99
+ mime_type = "image/png" if format == "png" else "image/jpeg"
100
+ return f"data:{mime_type};base64,{base64_data}"
@@ -0,0 +1,86 @@
1
+ """
2
+ Enums for Sentience API methods and agent actions.
3
+
4
+ This module provides type-safe enums for:
5
+ 1. window.sentience API methods (extension-level)
6
+ 2. Agent action types (high-level automation commands)
7
+ """
8
+
9
+ from enum import Enum
10
+
11
+
12
+ class SentienceMethod(str, Enum):
13
+ """
14
+ Enum for window.sentience API methods.
15
+
16
+ These are the actual methods available on the window.sentience object
17
+ injected by the Chrome extension.
18
+ """
19
+
20
+ # Core snapshot and element discovery
21
+ SNAPSHOT = "snapshot"
22
+ """Take a snapshot of the current page with element geometry and metadata."""
23
+
24
+ # Element interaction
25
+ CLICK = "click"
26
+ """Click an element by its ID from the snapshot registry."""
27
+
28
+ # Content extraction
29
+ READ = "read"
30
+ """Read page content as raw HTML, text, or markdown."""
31
+
32
+ FIND_TEXT_RECT = "findTextRect"
33
+ """Find exact pixel coordinates of text occurrences on the page."""
34
+
35
+ # Visual overlay
36
+ SHOW_OVERLAY = "showOverlay"
37
+ """Show visual overlay highlighting elements with importance scores."""
38
+
39
+ CLEAR_OVERLAY = "clearOverlay"
40
+ """Clear the visual overlay."""
41
+
42
+ # Developer tools
43
+ START_RECORDING = "startRecording"
44
+ """Start recording mode for golden set collection (developer tool)."""
45
+
46
+ def __str__(self) -> str:
47
+ """Return the method name as a string."""
48
+ return self.value
49
+
50
+
51
+ class AgentAction(str, Enum):
52
+ """
53
+ Enum for high-level agent action types.
54
+
55
+ These are the action commands that agents can execute. They may use
56
+ one or more window.sentience methods or Playwright APIs directly.
57
+ """
58
+
59
+ # Element interaction
60
+ CLICK = "click"
61
+ """Click an element by ID. Uses window.sentience.click() or Playwright mouse.click()."""
62
+
63
+ TYPE = "type"
64
+ """Type text into an input element. Uses Playwright keyboard.type() directly."""
65
+
66
+ PRESS = "press"
67
+ """Press a keyboard key (Enter, Escape, Tab, etc.). Uses Playwright keyboard.press()."""
68
+
69
+ # Navigation
70
+ NAVIGATE = "navigate"
71
+ """Navigate to a URL. Uses Playwright page.goto() directly."""
72
+
73
+ SCROLL = "scroll"
74
+ """Scroll the page or an element. Uses Playwright page.mouse.wheel() or element.scrollIntoView()."""
75
+
76
+ # Completion
77
+ FINISH = "finish"
78
+ """Signal that the agent task is complete. No browser action, just status update."""
79
+
80
+ # Wait/verification
81
+ WAIT = "wait"
82
+ """Wait for a condition or duration. Uses Playwright wait_for_* methods."""
83
+
84
+ def __str__(self) -> str:
85
+ """Return the action name as a string."""
86
+ return self.value