PyPI - sentienceapi - Versions diffs - 0.90.16__py3-none-any.whl → 0.92.2__py3-none-any.whl - Mend

sentienceapi 0.90.16py3-none-any.whl → 0.92.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sentienceapi might be problematic. Click here for more details.

Files changed (61) hide show

sentience/__init__.py +14 -5
sentience/action_executor.py +215 -0
sentience/actions.py +408 -25
sentience/agent.py +802 -293
sentience/agent_config.py +3 -0
sentience/async_api.py +83 -1142
sentience/base_agent.py +95 -0
sentience/browser.py +484 -1
sentience/browser_evaluator.py +299 -0
sentience/cloud_tracing.py +457 -33
sentience/conversational_agent.py +77 -43
sentience/element_filter.py +136 -0
sentience/expect.py +98 -2
sentience/extension/background.js +56 -185
sentience/extension/content.js +117 -289
sentience/extension/injected_api.js +799 -1374
sentience/extension/manifest.json +1 -1
sentience/extension/pkg/sentience_core.js +190 -396
sentience/extension/pkg/sentience_core_bg.wasm +0 -0
sentience/extension/release.json +47 -47
sentience/formatting.py +9 -53
sentience/inspector.py +183 -1
sentience/llm_interaction_handler.py +191 -0
sentience/llm_provider.py +74 -52
sentience/llm_provider_utils.py +120 -0
sentience/llm_response_builder.py +153 -0
sentience/models.py +60 -1
sentience/overlay.py +109 -2
sentience/protocols.py +228 -0
sentience/query.py +1 -1
sentience/read.py +95 -3
sentience/recorder.py +223 -3
sentience/schemas/trace_v1.json +102 -9
sentience/screenshot.py +48 -2
sentience/sentience_methods.py +86 -0
sentience/snapshot.py +291 -38
sentience/snapshot_diff.py +141 -0
sentience/text_search.py +119 -5
sentience/trace_event_builder.py +129 -0
sentience/trace_file_manager.py +197 -0
sentience/trace_indexing/index_schema.py +95 -7
sentience/trace_indexing/indexer.py +117 -14
sentience/tracer_factory.py +119 -6
sentience/tracing.py +172 -8
sentience/utils/__init__.py +40 -0
sentience/utils/browser.py +46 -0
sentience/utils/element.py +257 -0
sentience/utils/formatting.py +59 -0
sentience/utils.py +1 -1
sentience/visual_agent.py +2056 -0
sentience/wait.py +68 -2
{sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +2 -1
sentienceapi-0.92.2.dist-info/RECORD +65 -0
sentience/extension/test-content.js +0 -4
sentienceapi-0.90.16.dist-info/RECORD +0 -50
{sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-APACHE +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-MIT +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0

sentience/recorder.py CHANGED Viewed

@@ -4,11 +4,11 @@ Recorder - captures user actions into a trace
 import json
 from datetime import datetime
-from typing import Any
+from typing import Any, Optional
-from .browser import SentienceBrowser
+from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .models import Element, Snapshot
-from .snapshot import snapshot
+from .snapshot import snapshot, snapshot_async
 class TraceStep:
@@ -367,3 +367,223 @@ def record(browser: SentienceBrowser, capture_snapshots: bool = False) -> Record
         Recorder instance
     """
     return Recorder(browser, capture_snapshots=capture_snapshots)
+class RecorderAsync:
+    """Recorder for capturing user actions (async)"""
+    def __init__(self, browser: AsyncSentienceBrowser, capture_snapshots: bool = False):
+        self.browser = browser
+        self.capture_snapshots = capture_snapshots
+        self.trace: Trace | None = None
+        self._active = False
+        self._mask_patterns: list[str] = []  # Patterns to mask (e.g., "password", "email")
+    async def start(self) -> None:
+        """Start recording"""
+        if not self.browser.page:
+            raise RuntimeError("Browser not started. Call await browser.start() first.")
+        self._active = True
+        start_url = self.browser.page.url
+        self.trace = Trace(start_url)
+        # Set up event listeners in the browser
+        self._setup_listeners()
+    def stop(self) -> None:
+        """Stop recording"""
+        self._active = False
+        self._cleanup_listeners()
+    def add_mask_pattern(self, pattern: str) -> None:
+        """Add a pattern to mask in recorded text (e.g., "password", "email")"""
+        self._mask_patterns.append(pattern.lower())
+    def _should_mask(self, text: str) -> bool:
+        """Check if text should be masked"""
+        text_lower = text.lower()
+        return any(pattern in text_lower for pattern in self._mask_patterns)
+    def _setup_listeners(self) -> None:
+        """Set up event listeners to capture actions"""
+        # Note: We'll capture actions through the SDK methods rather than DOM events
+        # This is cleaner and more reliable
+        pass
+    def _cleanup_listeners(self) -> None:
+        """Clean up event listeners"""
+        pass
+    async def _infer_selector(self, element_id: int) -> str | None:  # noqa: C901
+        """
+        Infer a semantic selector for an element (async)
+        Uses heuristics to build a robust selector:
+        - role=... text~"..."
+        - If text empty: use name/aria-label/placeholder
+        - Include clickable=true when relevant
+        - Validate against snapshot (should match 1 element)
+        """
+        try:
+            # Take a snapshot to get element info
+            snap = await snapshot_async(self.browser)
+            # Find the element in the snapshot
+            element = None
+            for el in snap.elements:
+                if el.id == element_id:
+                    element = el
+                    break
+            if not element:
+                return None
+            # Build candidate selector
+            parts = []
+            # Add role
+            if element.role and element.role != "generic":
+                parts.append(f"role={element.role}")
+            # Add text if available
+            if element.text:
+                # Use contains match for text
+                text_part = element.text.replace('"', '\\"')[:50]  # Limit length
+                parts.append(f'text~"{text_part}"')
+            else:
+                # Try to get name/aria-label/placeholder from DOM
+                try:
+                    el = await self.browser.page.evaluate(
+                        f"""
+                        () => {{
+                            const el = window.sentience_registry[{element_id}];
+                            if (!el) return null;
+                            return {{
+                                name: el.name || null,
+                                ariaLabel: el.getAttribute('aria-label') || null,
+                                placeholder: el.placeholder || null
+                            }};
+                        }}
+                    """
+                    )
+                    if el:
+                        if el.get("name"):
+                            parts.append(f'name="{el["name"]}"')
+                        elif el.get("ariaLabel"):
+                            parts.append(f'text~"{el["ariaLabel"]}"')
+                        elif el.get("placeholder"):
+                            parts.append(f'text~"{el["placeholder"]}"')
+                except Exception:
+                    pass
+            # Add clickable if relevant
+            if element.visual_cues.is_clickable:
+                parts.append("clickable=true")
+            if not parts:
+                return None
+            selector = " ".join(parts)
+            # Validate selector - should match exactly 1 element
+            matches = [el for el in snap.elements if self._match_element(el, selector)]
+            if len(matches) == 1:
+                return selector
+            elif len(matches) > 1:
+                # Add more constraints (importance threshold, near-center)
+                # For now, just return the selector with a note
+                return selector
+            else:
+                # Selector doesn't match - return None (will use element_id)
+                return None
+        except Exception:
+            return None
+    def _match_element(self, element: Element, selector: str) -> bool:
+        """Simple selector matching (basic implementation)"""
+        # This is a simplified version - in production, use the full query engine
+        from .query import match_element, parse_selector
+        try:
+            query_dict = parse_selector(selector)
+            return match_element(element, query_dict)
+        except Exception:
+            return False
+    def record_navigation(self, url: str) -> None:
+        """Record a navigation event"""
+        if self._active and self.trace:
+            self.trace.add_navigation(url)
+    async def record_click(self, element_id: int, selector: str | None = None) -> None:
+        """Record a click event with smart selector inference (async)"""
+        if self._active and self.trace:
+            # If no selector provided, try to infer one
+            if selector is None:
+                selector = await self._infer_selector(element_id)
+            # Optionally capture snapshot
+            if self.capture_snapshots:
+                try:
+                    snap = await snapshot_async(self.browser)
+                    step = TraceStep(
+                        ts=int((datetime.now() - self.trace._start_time).total_seconds() * 1000),
+                        type="click",
+                        element_id=element_id,
+                        selector=selector,
+                        snapshot=snap,
+                    )
+                    self.trace.add_step(step)
+                except Exception:
+                    # If snapshot fails, just record without it
+                    self.trace.add_click(element_id, selector)
+            else:
+                self.trace.add_click(element_id, selector)
+    async def record_type(self, element_id: int, text: str, selector: str | None = None) -> None:
+        """Record a type event with smart selector inference (async)"""
+        if self._active and self.trace:
+            # If no selector provided, try to infer one
+            if selector is None:
+                selector = await self._infer_selector(element_id)
+            mask = self._should_mask(text)
+            self.trace.add_type(element_id, text, selector, mask=mask)
+    def record_press(self, key: str) -> None:
+        """Record a key press event"""
+        if self._active and self.trace:
+            self.trace.add_press(key)
+    def save(self, filepath: str) -> None:
+        """Save trace to file"""
+        if not self.trace:
+            raise RuntimeError("No trace to save. Start recording first.")
+        self.trace.save(filepath)
+    async def __aenter__(self):
+        """Context manager entry"""
+        await self.start()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit"""
+        self.stop()
+def record_async(browser: AsyncSentienceBrowser, capture_snapshots: bool = False) -> RecorderAsync:
+    """
+    Create a recorder instance (async)
+    Args:
+        browser: AsyncSentienceBrowser instance
+        capture_snapshots: Whether to capture snapshots at each step
+    Returns:
+        RecorderAsync instance
+    """
+    return RecorderAsync(browser, capture_snapshots=capture_snapshots)

sentience/schemas/trace_v1.json CHANGED Viewed

@@ -13,7 +13,7 @@
     },
     "type": {
       "type": "string",
-      "enum": ["run_start", "step_start", "snapshot_taken", "llm_called", "action_executed", "verification", "recovery", "step_end", "run_end", "error"],
+      "enum": ["run_start", "step_start", "snapshot", "snapshot_taken", "llm_called", "llm_response", "action", "action_executed", "verification", "recovery", "step_end", "run_end", "error"],
       "description": "Event type"
     },
     "ts": {
@@ -64,15 +64,61 @@
           }
         },
         {
-          "description": "snapshot_taken data",
-          "required": ["step_id", "snapshot_digest"],
+          "description": "snapshot or snapshot_taken data",
           "properties": {
-            "step_id": {"type": "string"},
+            "step_id": {"type": ["string", "null"]},
             "snapshot_id": {"type": ["string", "null"]},
             "snapshot_digest": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
             "snapshot_digest_loose": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
             "url": {"type": ["string", "null"]},
-            "element_count": {"type": "integer"}
+            "element_count": {"type": "integer"},
+            "timestamp": {"type": ["string", "null"]},
+            "elements": {
+              "type": "array",
+              "items": {
+                "type": "object",
+                "properties": {
+                  "id": {"type": "integer"},
+                  "role": {"type": "string"},
+                  "text": {"type": ["string", "null"]},
+                  "importance": {"type": "number"},
+                  "importance_score": {"type": "number"},
+                  "bbox": {
+                    "type": "object",
+                    "properties": {
+                      "x": {"type": "number"},
+                      "y": {"type": "number"},
+                      "width": {"type": "number"},
+                      "height": {"type": "number"}
+                    },
+                    "required": ["x", "y", "width", "height"]
+                  },
+                  "visual_cues": {
+                    "type": "object",
+                    "properties": {
+                      "is_primary": {"type": "boolean"},
+                      "is_clickable": {"type": "boolean"},
+                      "background_color_name": {"type": ["string", "null"]}
+                    }
+                  },
+                  "in_viewport": {"type": "boolean"},
+                  "is_occluded": {"type": "boolean"},
+                  "z_index": {"type": "integer"},
+                  "rerank_index": {"type": ["integer", "null"]},
+                  "heuristic_index": {"type": ["integer", "null"]},
+                  "ml_probability": {"type": ["number", "null"]},
+                  "ml_score": {"type": ["number", "null"]},
+                  "diff_status": {
+                    "type": ["string", "null"],
+                    "enum": ["ADDED", "REMOVED", "MODIFIED", "MOVED", null],
+                    "description": "Diff status for Diff Overlay feature. ADDED: new element, REMOVED: element was removed, MODIFIED: element changed, MOVED: element position changed, null: no change"
+                  }
+                },
+                "required": ["id", "role", "importance", "bbox", "visual_cues"]
+              }
+            },
+            "screenshot_base64": {"type": ["string", "null"]},
+            "screenshot_format": {"type": ["string", "null"], "enum": ["png", "jpeg", null]}
           }
         },
         {
@@ -119,7 +165,15 @@
               "required": ["response_text", "response_hash"],
               "properties": {
                 "response_text": {"type": "string"},
-                "response_hash": {"type": "string"}
+                "response_hash": {"type": "string"},
+                "usage": {
+                  "type": "object",
+                  "properties": {
+                    "prompt_tokens": {"type": "integer"},
+                    "completion_tokens": {"type": "integer"},
+                    "total_tokens": {"type": "integer"}
+                  }
+                }
               }
             },
             "action": {
@@ -145,7 +199,17 @@
                 "text": {"type": "string"},
                 "key": {"type": "string"},
                 "url_changed": {"type": ["boolean", "null"]},
-                "duration_ms": {"type": "integer"}
+                "duration_ms": {"type": "integer"},
+                "error": {"type": ["string", "null"]},
+                "bounding_box": {
+                  "type": "object",
+                  "properties": {
+                    "x": {"type": "number"},
+                    "y": {"type": "number"},
+                    "width": {"type": "number"},
+                    "height": {"type": "number"}
+                  }
+                }
               }
             },
             "post": {
@@ -162,7 +226,31 @@
               "properties": {
                 "policy": {"type": "string"},
                 "passed": {"type": "boolean"},
-                "signals": {"type": "object"}
+                "signals": {
+                  "type": "object",
+                  "properties": {
+                    "url_changed": {"type": "boolean"},
+                    "error": {"type": ["string", "null"]},
+                    "elements_found": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "label": {"type": "string"},
+                          "bounding_box": {
+                            "type": "object",
+                            "properties": {
+                              "x": {"type": "number"},
+                              "y": {"type": "number"},
+                              "width": {"type": "number"},
+                              "height": {"type": "number"}
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
               }
             },
             "recovery": {
@@ -198,7 +286,12 @@
           "description": "run_end data",
           "required": ["steps"],
           "properties": {
-            "steps": {"type": "integer"}
+            "steps": {"type": "integer"},
+            "status": {
+              "type": "string",
+              "enum": ["success", "failure", "partial", "unknown"],
+              "description": "Final execution status"
+            }
           }
         },
         {

sentience/screenshot.py CHANGED Viewed

@@ -2,9 +2,10 @@
 Screenshot functionality - standalone screenshot capture
 """
-from typing import Any, Literal
+import base64
+from typing import Any, Literal, Optional
-from .browser import SentienceBrowser
+from .browser import AsyncSentienceBrowser, SentienceBrowser
 def screenshot(
@@ -52,3 +53,48 @@ def screenshot(
     # Return as data URL
     mime_type = "image/png" if format == "png" else "image/jpeg"
     return f"data:{mime_type};base64,{base64_data}"
+async def screenshot_async(
+    browser: AsyncSentienceBrowser,
+    format: Literal["png", "jpeg"] = "png",
+    quality: int | None = None,
+) -> str:
+    """
+    Capture screenshot of current page (async)
+    Args:
+        browser: AsyncSentienceBrowser instance
+        format: Image format - "png" or "jpeg"
+        quality: JPEG quality (1-100), only used for JPEG format
+    Returns:
+        Base64-encoded screenshot data URL (e.g., "data:image/png;base64,...")
+    Raises:
+        RuntimeError: If browser not started
+        ValueError: If quality is invalid for JPEG
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    if format == "jpeg" and quality is not None:
+        if not (1 <= quality <= 100):
+            raise ValueError("Quality must be between 1 and 100 for JPEG format")
+    # Use Playwright's screenshot with base64 encoding
+    screenshot_options: dict[str, Any] = {
+        "type": format,
+    }
+    if format == "jpeg" and quality is not None:
+        screenshot_options["quality"] = quality
+    # Capture screenshot as base64
+    # Playwright returns bytes when encoding is not specified, so we encode manually
+    image_bytes = await browser.page.screenshot(**screenshot_options)
+    base64_data = base64.b64encode(image_bytes).decode("utf-8")
+    # Return as data URL
+    mime_type = "image/png" if format == "png" else "image/jpeg"
+    return f"data:{mime_type};base64,{base64_data}"

sentience/sentience_methods.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""
+Enums for Sentience API methods and agent actions.
+This module provides type-safe enums for:
+1. window.sentience API methods (extension-level)
+2. Agent action types (high-level automation commands)
+"""
+from enum import Enum
+class SentienceMethod(str, Enum):
+    """
+    Enum for window.sentience API methods.
+    These are the actual methods available on the window.sentience object
+    injected by the Chrome extension.
+    """
+    # Core snapshot and element discovery
+    SNAPSHOT = "snapshot"
+    """Take a snapshot of the current page with element geometry and metadata."""
+    # Element interaction
+    CLICK = "click"
+    """Click an element by its ID from the snapshot registry."""
+    # Content extraction
+    READ = "read"
+    """Read page content as raw HTML, text, or markdown."""
+    FIND_TEXT_RECT = "findTextRect"
+    """Find exact pixel coordinates of text occurrences on the page."""
+    # Visual overlay
+    SHOW_OVERLAY = "showOverlay"
+    """Show visual overlay highlighting elements with importance scores."""
+    CLEAR_OVERLAY = "clearOverlay"
+    """Clear the visual overlay."""
+    # Developer tools
+    START_RECORDING = "startRecording"
+    """Start recording mode for golden set collection (developer tool)."""
+    def __str__(self) -> str:
+        """Return the method name as a string."""
+        return self.value
+class AgentAction(str, Enum):
+    """
+    Enum for high-level agent action types.
+    These are the action commands that agents can execute. They may use
+    one or more window.sentience methods or Playwright APIs directly.
+    """
+    # Element interaction
+    CLICK = "click"
+    """Click an element by ID. Uses window.sentience.click() or Playwright mouse.click()."""
+    TYPE = "type"
+    """Type text into an input element. Uses Playwright keyboard.type() directly."""
+    PRESS = "press"
+    """Press a keyboard key (Enter, Escape, Tab, etc.). Uses Playwright keyboard.press()."""
+    # Navigation
+    NAVIGATE = "navigate"
+    """Navigate to a URL. Uses Playwright page.goto() directly."""
+    SCROLL = "scroll"
+    """Scroll the page or an element. Uses Playwright page.mouse.wheel() or element.scrollIntoView()."""
+    # Completion
+    FINISH = "finish"
+    """Signal that the agent task is complete. No browser action, just status update."""
+    # Wait/verification
+    WAIT = "wait"
+    """Wait for a condition or duration. Uses Playwright wait_for_* methods."""
+    def __str__(self) -> str:
+        """Return the action name as a string."""
+        return self.value

sentienceapi 0.90.16__py3-none-any.whl → 0.92.2__py3-none-any.whl

Potentially problematic release.

sentienceapi 0.90.16py3-none-any.whl → 0.92.2py3-none-any.whl