PyPI - sentienceapi - Versions diffs - 0.90.17__py3-none-any.whl - Mend

sentienceapi 0.90.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sentienceapi might be problematic. Click here for more details.

Files changed (50) hide show

sentience/__init__.py +153 -0
sentience/_extension_loader.py +40 -0
sentience/actions.py +837 -0
sentience/agent.py +1246 -0
sentience/agent_config.py +43 -0
sentience/async_api.py +101 -0
sentience/base_agent.py +194 -0
sentience/browser.py +1037 -0
sentience/cli.py +130 -0
sentience/cloud_tracing.py +382 -0
sentience/conversational_agent.py +509 -0
sentience/expect.py +188 -0
sentience/extension/background.js +233 -0
sentience/extension/content.js +298 -0
sentience/extension/injected_api.js +1473 -0
sentience/extension/manifest.json +36 -0
sentience/extension/pkg/sentience_core.d.ts +51 -0
sentience/extension/pkg/sentience_core.js +529 -0
sentience/extension/pkg/sentience_core_bg.wasm +0 -0
sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
sentience/extension/release.json +115 -0
sentience/extension/test-content.js +4 -0
sentience/formatting.py +59 -0
sentience/generator.py +202 -0
sentience/inspector.py +365 -0
sentience/llm_provider.py +637 -0
sentience/models.py +412 -0
sentience/overlay.py +222 -0
sentience/query.py +303 -0
sentience/read.py +185 -0
sentience/recorder.py +589 -0
sentience/schemas/trace_v1.json +216 -0
sentience/screenshot.py +100 -0
sentience/snapshot.py +516 -0
sentience/text_search.py +290 -0
sentience/trace_indexing/__init__.py +27 -0
sentience/trace_indexing/index_schema.py +111 -0
sentience/trace_indexing/indexer.py +357 -0
sentience/tracer_factory.py +211 -0
sentience/tracing.py +285 -0
sentience/utils.py +296 -0
sentience/wait.py +137 -0
sentienceapi-0.90.17.dist-info/METADATA +917 -0
sentienceapi-0.90.17.dist-info/RECORD +50 -0
sentienceapi-0.90.17.dist-info/WHEEL +5 -0
sentienceapi-0.90.17.dist-info/entry_points.txt +2 -0
sentienceapi-0.90.17.dist-info/licenses/LICENSE +24 -0
sentienceapi-0.90.17.dist-info/licenses/LICENSE-APACHE +201 -0
sentienceapi-0.90.17.dist-info/licenses/LICENSE-MIT +21 -0
sentienceapi-0.90.17.dist-info/top_level.txt +1 -0

sentience/models.py ADDED Viewed

@@ -0,0 +1,412 @@
+"""
+Pydantic models for Sentience SDK - matches spec/snapshot.schema.json
+"""
+from typing import Literal, Optional
+from pydantic import BaseModel, Field
+class BBox(BaseModel):
+    """Bounding box coordinates"""
+    x: float
+    y: float
+    width: float
+    height: float
+class Viewport(BaseModel):
+    """Viewport dimensions"""
+    width: float
+    height: float
+class VisualCues(BaseModel):
+    """Visual analysis cues"""
+    is_primary: bool
+    background_color_name: str | None = None
+    is_clickable: bool
+class Element(BaseModel):
+    """Element from snapshot"""
+    id: int
+    role: str
+    text: str | None = None
+    importance: int
+    bbox: BBox
+    visual_cues: VisualCues
+    in_viewport: bool = True
+    is_occluded: bool = False
+    z_index: int = 0
+    # ML reranking metadata (optional - can be absent or null)
+    rerank_index: int | None = None  # 0-based, The rank after ML reranking
+    heuristic_index: int | None = None  # 0-based, Where it would have been without ML
+    ml_probability: float | None = None  # Confidence score from ONNX model (0.0 - 1.0)
+    ml_score: float | None = None  # Raw logit score (optional, for debugging)
+class Snapshot(BaseModel):
+    """Snapshot response from extension"""
+    status: Literal["success", "error"]
+    timestamp: str | None = None
+    url: str
+    viewport: Viewport | None = None
+    elements: list[Element]
+    screenshot: str | None = None
+    screenshot_format: Literal["png", "jpeg"] | None = None
+    error: str | None = None
+    requires_license: bool | None = None
+    def save(self, filepath: str) -> None:
+        """Save snapshot as JSON file"""
+        import json
+        with open(filepath, "w") as f:
+            json.dump(self.model_dump(), f, indent=2)
+class ActionResult(BaseModel):
+    """Result of an action (click, type, press)"""
+    success: bool
+    duration_ms: int
+    outcome: Literal["navigated", "dom_updated", "no_change", "error"] | None = None
+    url_changed: bool | None = None
+    snapshot_after: Snapshot | None = None
+    error: dict | None = None
+class WaitResult(BaseModel):
+    """Result of wait_for operation"""
+    found: bool
+    element: Element | None = None
+    duration_ms: int
+    timeout: bool
+# ========== Agent Layer Models ==========
+class ScreenshotConfig(BaseModel):
+    """Screenshot format configuration"""
+    format: Literal["png", "jpeg"] = "png"
+    quality: int | None = Field(None, ge=1, le=100)  # Only for JPEG (1-100)
+class SnapshotFilter(BaseModel):
+    """Filter options for snapshot elements"""
+    min_area: int | None = Field(None, ge=0)
+    allowed_roles: list[str] | None = None
+    min_z_index: int | None = None
+class SnapshotOptions(BaseModel):
+    """
+    Configuration for snapshot calls.
+    Matches TypeScript SnapshotOptions interface from sdk-ts/src/snapshot.ts
+    """
+    screenshot: bool | ScreenshotConfig = False  # Union type: boolean or config
+    limit: int = Field(50, ge=1, le=500)
+    filter: SnapshotFilter | None = None
+    use_api: bool | None = None  # Force API vs extension
+    save_trace: bool = False  # Save raw_elements to JSON for benchmarking/training
+    trace_path: str | None = None  # Path to save trace (default: "trace_{timestamp}.json")
+    goal: str | None = None  # Optional goal/task description for the snapshot
+    show_overlay: bool = False  # Show visual overlay highlighting elements in browser
+    class Config:
+        arbitrary_types_allowed = True
+class AgentActionResult(BaseModel):
+    """Result of a single agent action (from agent.act())"""
+    success: bool
+    action: Literal["click", "type", "press", "finish", "error"]
+    goal: str
+    duration_ms: int
+    attempt: int
+    # Optional fields based on action type
+    element_id: int | None = None
+    text: str | None = None
+    key: str | None = None
+    outcome: Literal["navigated", "dom_updated", "no_change", "error"] | None = None
+    url_changed: bool | None = None
+    error: str | None = None
+    message: str | None = None  # For FINISH action
+    def __getitem__(self, key):
+        """
+        Support dict-style access for backward compatibility.
+        This allows existing code using result["success"] to continue working.
+        """
+        import warnings
+        warnings.warn(
+            f"Dict-style access result['{key}'] is deprecated. Use result.{key} instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return getattr(self, key)
+class ActionTokenUsage(BaseModel):
+    """Token usage for a single action"""
+    goal: str
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+    model: str
+class TokenStats(BaseModel):
+    """Token usage statistics for an agent session"""
+    total_prompt_tokens: int
+    total_completion_tokens: int
+    total_tokens: int
+    by_action: list[ActionTokenUsage]
+class ActionHistory(BaseModel):
+    """Single history entry from agent execution"""
+    goal: str
+    action: str  # The raw action string from LLM
+    result: dict  # Will be AgentActionResult but stored as dict for flexibility
+    success: bool
+    attempt: int
+    duration_ms: int
+class ProxyConfig(BaseModel):
+    """
+    Proxy configuration for browser networking.
+    Supports HTTP, HTTPS, and SOCKS5 proxies with optional authentication.
+    """
+    server: str = Field(
+        ...,
+        description="Proxy server URL including scheme and port (e.g., 'http://proxy.example.com:8080')",
+    )
+    username: str | None = Field(
+        None,
+        description="Username for proxy authentication (optional)",
+    )
+    password: str | None = Field(
+        None,
+        description="Password for proxy authentication (optional)",
+    )
+    def to_playwright_dict(self) -> dict:
+        """
+        Convert to Playwright proxy configuration format.
+        Returns:
+            Dict compatible with Playwright's proxy parameter
+        """
+        config = {"server": self.server}
+        if self.username and self.password:
+            config["username"] = self.username
+            config["password"] = self.password
+        return config
+# ========== Storage State Models (Auth Injection) ==========
+class Cookie(BaseModel):
+    """
+    Cookie definition for storage state injection.
+    Matches Playwright's cookie format for storage_state.
+    """
+    name: str = Field(..., description="Cookie name")
+    value: str = Field(..., description="Cookie value")
+    domain: str = Field(..., description="Cookie domain (e.g., '.example.com')")
+    path: str = Field(default="/", description="Cookie path")
+    expires: float | None = Field(None, description="Expiration timestamp (Unix epoch)")
+    httpOnly: bool = Field(default=False, description="HTTP-only flag")
+    secure: bool = Field(default=False, description="Secure (HTTPS-only) flag")
+    sameSite: Literal["Strict", "Lax", "None"] = Field(
+        default="Lax", description="SameSite attribute"
+    )
+class LocalStorageItem(BaseModel):
+    """
+    LocalStorage item for a specific origin.
+    Playwright stores localStorage as an array of {name, value} objects.
+    """
+    name: str = Field(..., description="LocalStorage key")
+    value: str = Field(..., description="LocalStorage value")
+class OriginStorage(BaseModel):
+    """
+    Storage state for a specific origin (localStorage).
+    Represents localStorage data for a single domain.
+    """
+    origin: str = Field(..., description="Origin URL (e.g., 'https://example.com')")
+    localStorage: list[LocalStorageItem] = Field(
+        default_factory=list, description="LocalStorage items for this origin"
+    )
+class StorageState(BaseModel):
+    """
+    Complete browser storage state (cookies + localStorage).
+    This is the format used by Playwright's storage_state() method.
+    Can be saved to/loaded from JSON files for session injection.
+    """
+    cookies: list[Cookie] = Field(
+        default_factory=list, description="Cookies to inject (global scope)"
+    )
+    origins: list[OriginStorage] = Field(
+        default_factory=list, description="LocalStorage data per origin"
+    )
+    @classmethod
+    def from_dict(cls, data: dict) -> "StorageState":
+        """
+        Create StorageState from dictionary (e.g., loaded from JSON).
+        Args:
+            data: Dictionary with 'cookies' and/or 'origins' keys
+        Returns:
+            StorageState instance
+        """
+        cookies = [
+            Cookie(**cookie) if isinstance(cookie, dict) else cookie
+            for cookie in data.get("cookies", [])
+        ]
+        origins = []
+        for origin_data in data.get("origins", []):
+            if isinstance(origin_data, dict):
+                # Handle localStorage as array of {name, value} or as dict
+                localStorage_data = origin_data.get("localStorage", [])
+                if isinstance(localStorage_data, dict):
+                    # Convert dict to list of LocalStorageItem
+                    localStorage_items = [
+                        LocalStorageItem(name=k, value=v) for k, v in localStorage_data.items()
+                    ]
+                else:
+                    # Already a list
+                    localStorage_items = [
+                        LocalStorageItem(**item) if isinstance(item, dict) else item
+                        for item in localStorage_data
+                    ]
+                origins.append(
+                    OriginStorage(
+                        origin=origin_data.get("origin", ""),
+                        localStorage=localStorage_items,
+                    )
+                )
+            else:
+                origins.append(origin_data)
+        return cls(cookies=cookies, origins=origins)
+    def to_playwright_dict(self) -> dict:
+        """
+        Convert to Playwright-compatible dictionary format.
+        Returns:
+            Dictionary compatible with Playwright's storage_state parameter
+        """
+        return {
+            "cookies": [cookie.model_dump() for cookie in self.cookies],
+            "origins": [
+                {
+                    "origin": origin.origin,
+                    "localStorage": [item.model_dump() for item in origin.localStorage],
+                }
+                for origin in self.origins
+            ],
+        }
+# ========== Text Search Models (findTextRect) ==========
+class TextRect(BaseModel):
+    """
+    Rectangle coordinates for text occurrence.
+    Includes both absolute (page) and viewport-relative coordinates.
+    """
+    x: float = Field(..., description="Absolute X coordinate (page coordinate with scroll offset)")
+    y: float = Field(..., description="Absolute Y coordinate (page coordinate with scroll offset)")
+    width: float = Field(..., description="Rectangle width in pixels")
+    height: float = Field(..., description="Rectangle height in pixels")
+    left: float = Field(..., description="Absolute left position (same as x)")
+    top: float = Field(..., description="Absolute top position (same as y)")
+    right: float = Field(..., description="Absolute right position (x + width)")
+    bottom: float = Field(..., description="Absolute bottom position (y + height)")
+class ViewportRect(BaseModel):
+    """Viewport-relative rectangle coordinates (without scroll offset)"""
+    x: float = Field(..., description="Viewport-relative X coordinate")
+    y: float = Field(..., description="Viewport-relative Y coordinate")
+    width: float = Field(..., description="Rectangle width in pixels")
+    height: float = Field(..., description="Rectangle height in pixels")
+class TextContext(BaseModel):
+    """Context text surrounding a match"""
+    before: str = Field(..., description="Text before the match (up to 20 chars)")
+    after: str = Field(..., description="Text after the match (up to 20 chars)")
+class TextMatch(BaseModel):
+    """A single text match with its rectangle and context"""
+    text: str = Field(..., description="The matched text")
+    rect: TextRect = Field(..., description="Absolute rectangle coordinates (with scroll offset)")
+    viewport_rect: ViewportRect = Field(
+        ..., description="Viewport-relative rectangle (without scroll offset)"
+    )
+    context: TextContext = Field(..., description="Surrounding text context")
+    in_viewport: bool = Field(..., description="Whether the match is currently visible in viewport")
+class TextRectSearchResult(BaseModel):
+    """
+    Result of findTextRect operation.
+    Returns all occurrences of text on the page with their exact pixel coordinates.
+    """
+    status: Literal["success", "error"]
+    query: str | None = Field(None, description="The search text that was queried")
+    case_sensitive: bool | None = Field(None, description="Whether search was case-sensitive")
+    whole_word: bool | None = Field(None, description="Whether whole-word matching was used")
+    matches: int | None = Field(None, description="Number of matches found")
+    results: list[TextMatch] | None = Field(
+        None, description="List of text matches with coordinates"
+    )
+    viewport: Viewport | None = Field(None, description="Current viewport dimensions")
+    error: str | None = Field(None, description="Error message if status is 'error'")

sentience/overlay.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""
+Visual overlay utilities - show/clear element highlights in browser
+"""
+from typing import Any
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .models import Element, Snapshot
+def show_overlay(
+    browser: SentienceBrowser,
+    elements: list[Element] | list[dict[str, Any]] | Snapshot,
+    target_element_id: int | None = None,
+) -> None:
+    """
+    Display visual overlay highlighting elements in the browser
+    This function shows a Shadow DOM overlay with color-coded borders around
+    detected elements. Useful for debugging, learning, and validating element detection.
+    Args:
+        browser: SentienceBrowser instance
+        elements: Can be:
+            - List of Element objects (from snapshot.elements)
+            - List of raw element dicts (from snapshot result or API response)
+            - Snapshot object (will use snapshot.elements)
+        target_element_id: Optional ID of element to highlight in red (default: None)
+    Color Coding:
+        - Red: Target element (when target_element_id is specified)
+        - Blue: Primary elements (is_primary=true)
+        - Green: Regular interactive elements
+    Visual Indicators:
+        - Border thickness and opacity scale with importance score
+        - Semi-transparent fill for better visibility
+        - Importance badges showing scores
+        - Star icon for primary elements
+        - Target emoji for the target element
+    Auto-clear: Overlay automatically disappears after 5 seconds
+    Example:
+        # Show overlay from snapshot
+        snap = snapshot(browser)
+        show_overlay(browser, snap)
+        # Show overlay with custom elements
+        elements = [{"id": 1, "bbox": {"x": 100, "y": 100, "width": 200, "height": 50}, ...}]
+        show_overlay(browser, elements)
+        # Show overlay with target element highlighted in red
+        show_overlay(browser, snap, target_element_id=42)
+        # Clear overlay manually before 5 seconds
+        clear_overlay(browser)
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call browser.start() first.")
+    # Handle different input types
+    if isinstance(elements, Snapshot):
+        # Extract elements from Snapshot object
+        elements_list = [el.model_dump() for el in elements.elements]
+    elif isinstance(elements, list) and len(elements) > 0:
+        # Check if it's a list of Element objects or dicts
+        if hasattr(elements[0], "model_dump"):
+            # List of Element objects
+            elements_list = [el.model_dump() for el in elements]
+        else:
+            # Already a list of dicts
+            elements_list = elements
+    else:
+        raise ValueError("elements must be a Snapshot, list of Element objects, or list of dicts")
+    # Call extension API
+    browser.page.evaluate(
+        """
+        (args) => {
+            if (window.sentience && window.sentience.showOverlay) {
+                window.sentience.showOverlay(args.elements, args.targetId);
+            } else {
+                console.warn('[Sentience SDK] showOverlay not available - is extension loaded?');
+            }
+        }
+        """,
+        {"elements": elements_list, "targetId": target_element_id},
+    )
+def clear_overlay(browser: SentienceBrowser) -> None:
+    """
+    Clear the visual overlay manually (before 5-second auto-clear)
+    Args:
+        browser: SentienceBrowser instance
+    Example:
+        show_overlay(browser, snap)
+        # ... inspect overlay ...
+        clear_overlay(browser)  # Remove immediately
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call browser.start() first.")
+    browser.page.evaluate(
+        """
+        () => {
+            if (window.sentience && window.sentience.clearOverlay) {
+                window.sentience.clearOverlay();
+            }
+        }
+        """
+    )
+async def show_overlay_async(
+    browser: AsyncSentienceBrowser,
+    elements: list[Element] | list[dict[str, Any]] | Snapshot,
+    target_element_id: int | None = None,
+) -> None:
+    """
+    Display visual overlay highlighting elements in the browser (async)
+    This function shows a Shadow DOM overlay with color-coded borders around
+    detected elements. Useful for debugging, learning, and validating element detection.
+    Args:
+        browser: AsyncSentienceBrowser instance
+        elements: Can be:
+            - List of Element objects (from snapshot.elements)
+            - List of raw element dicts (from snapshot result or API response)
+            - Snapshot object (will use snapshot.elements)
+        target_element_id: Optional ID of element to highlight in red (default: None)
+    Color Coding:
+        - Red: Target element (when target_element_id is specified)
+        - Blue: Primary elements (is_primary=true)
+        - Green: Regular interactive elements
+    Visual Indicators:
+        - Border thickness and opacity scale with importance score
+        - Semi-transparent fill for better visibility
+        - Importance badges showing scores
+        - Star icon for primary elements
+        - Target emoji for the target element
+    Auto-clear: Overlay automatically disappears after 5 seconds
+    Example:
+        # Show overlay from snapshot
+        snap = await snapshot_async(browser)
+        await show_overlay_async(browser, snap)
+        # Show overlay with custom elements
+        elements = [{"id": 1, "bbox": {"x": 100, "y": 100, "width": 200, "height": 50}, ...}]
+        await show_overlay_async(browser, elements)
+        # Show overlay with target element highlighted in red
+        await show_overlay_async(browser, snap, target_element_id=42)
+        # Clear overlay manually before 5 seconds
+        await clear_overlay_async(browser)
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    # Handle different input types
+    if isinstance(elements, Snapshot):
+        # Extract elements from Snapshot object
+        elements_list = [el.model_dump() for el in elements.elements]
+    elif isinstance(elements, list) and len(elements) > 0:
+        # Check if it's a list of Element objects or dicts
+        if hasattr(elements[0], "model_dump"):
+            # List of Element objects
+            elements_list = [el.model_dump() for el in elements]
+        else:
+            # Already a list of dicts
+            elements_list = elements
+    else:
+        raise ValueError("elements must be a Snapshot, list of Element objects, or list of dicts")
+    # Call extension API
+    await browser.page.evaluate(
+        """
+        (args) => {
+            if (window.sentience && window.sentience.showOverlay) {
+                window.sentience.showOverlay(args.elements, args.targetId);
+            } else {
+                console.warn('[Sentience SDK] showOverlay not available - is extension loaded?');
+            }
+        }
+        """,
+        {"elements": elements_list, "targetId": target_element_id},
+    )
+async def clear_overlay_async(browser: AsyncSentienceBrowser) -> None:
+    """
+    Clear the visual overlay manually (before 5-second auto-clear) (async)
+    Args:
+        browser: AsyncSentienceBrowser instance
+    Example:
+        await show_overlay_async(browser, snap)
+        # ... inspect overlay ...
+        await clear_overlay_async(browser)  # Remove immediately
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    await browser.page.evaluate(
+        """
+        () => {
+            if (window.sentience && window.sentience.clearOverlay) {
+                window.sentience.clearOverlay();
+            }
+        }
+        """
+    )