PyPI - sentienceapi - Versions diffs - 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl - Mend

sentienceapi 0.90.16py3-none-any.whl → 0.98.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sentienceapi might be problematic. Click here for more details.

Files changed (90) hide show

sentience/__init__.py +120 -6
sentience/_extension_loader.py +156 -1
sentience/action_executor.py +217 -0
sentience/actions.py +758 -30
sentience/agent.py +806 -293
sentience/agent_config.py +3 -0
sentience/agent_runtime.py +840 -0
sentience/asserts/__init__.py +70 -0
sentience/asserts/expect.py +621 -0
sentience/asserts/query.py +383 -0
sentience/async_api.py +89 -1141
sentience/backends/__init__.py +137 -0
sentience/backends/actions.py +372 -0
sentience/backends/browser_use_adapter.py +241 -0
sentience/backends/cdp_backend.py +393 -0
sentience/backends/exceptions.py +211 -0
sentience/backends/playwright_backend.py +194 -0
sentience/backends/protocol.py +216 -0
sentience/backends/sentience_context.py +469 -0
sentience/backends/snapshot.py +483 -0
sentience/base_agent.py +95 -0
sentience/browser.py +678 -39
sentience/browser_evaluator.py +299 -0
sentience/canonicalization.py +207 -0
sentience/cloud_tracing.py +507 -42
sentience/constants.py +6 -0
sentience/conversational_agent.py +77 -43
sentience/cursor_policy.py +142 -0
sentience/element_filter.py +136 -0
sentience/expect.py +98 -2
sentience/extension/background.js +56 -185
sentience/extension/content.js +150 -287
sentience/extension/injected_api.js +1088 -1368
sentience/extension/manifest.json +1 -1
sentience/extension/pkg/sentience_core.d.ts +22 -22
sentience/extension/pkg/sentience_core.js +275 -433
sentience/extension/pkg/sentience_core_bg.wasm +0 -0
sentience/extension/release.json +47 -47
sentience/failure_artifacts.py +241 -0
sentience/formatting.py +9 -53
sentience/inspector.py +183 -1
sentience/integrations/__init__.py +6 -0
sentience/integrations/langchain/__init__.py +12 -0
sentience/integrations/langchain/context.py +18 -0
sentience/integrations/langchain/core.py +326 -0
sentience/integrations/langchain/tools.py +180 -0
sentience/integrations/models.py +46 -0
sentience/integrations/pydanticai/__init__.py +15 -0
sentience/integrations/pydanticai/deps.py +20 -0
sentience/integrations/pydanticai/toolset.py +468 -0
sentience/llm_interaction_handler.py +191 -0
sentience/llm_provider.py +765 -66
sentience/llm_provider_utils.py +120 -0
sentience/llm_response_builder.py +153 -0
sentience/models.py +595 -3
sentience/ordinal.py +280 -0
sentience/overlay.py +109 -2
sentience/protocols.py +228 -0
sentience/query.py +67 -5
sentience/read.py +95 -3
sentience/recorder.py +223 -3
sentience/schemas/trace_v1.json +128 -9
sentience/screenshot.py +48 -2
sentience/sentience_methods.py +86 -0
sentience/snapshot.py +599 -55
sentience/snapshot_diff.py +126 -0
sentience/text_search.py +120 -5
sentience/trace_event_builder.py +148 -0
sentience/trace_file_manager.py +197 -0
sentience/trace_indexing/index_schema.py +95 -7
sentience/trace_indexing/indexer.py +105 -48
sentience/tracer_factory.py +120 -9
sentience/tracing.py +172 -8
sentience/utils/__init__.py +40 -0
sentience/utils/browser.py +46 -0
sentience/{utils.py → utils/element.py} +3 -42
sentience/utils/formatting.py +59 -0
sentience/verification.py +618 -0
sentience/visual_agent.py +2058 -0
sentience/wait.py +68 -2
{sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/METADATA +199 -40
sentienceapi-0.98.0.dist-info/RECORD +92 -0
sentience/extension/test-content.js +0 -4
sentienceapi-0.90.16.dist-info/RECORD +0 -50
{sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/WHEEL +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/entry_points.txt +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-APACHE +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-MIT +0 -0
{sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/top_level.txt +0 -0

sentience/ordinal.py ADDED Viewed

@@ -0,0 +1,280 @@
+"""
+Phase 3: Ordinal Intent Detection for Semantic Search
+This module provides functions to detect ordinal intent in natural language goals
+and select elements based on their position within groups.
+Ordinal operators supported:
+- Position-based: "first", "second", "third", "1st", "2nd", "3rd", etc.
+- Relative: "top", "bottom", "last", "next", "previous"
+- Numeric: "#1", "#2", "number 1", "item 3"
+Example usage:
+    from sentience.ordinal import detect_ordinal_intent, select_by_ordinal
+    intent = detect_ordinal_intent("click the first search result")
+    # OrdinalIntent(kind='nth', n=1, detected=True)
+    element = select_by_ordinal(elements, dominant_group_key, intent)
+"""
+import re
+from dataclasses import dataclass
+from typing import Literal
+from sentience.models import Element
+@dataclass
+class OrdinalIntent:
+    """Detected ordinal intent from a goal string."""
+    detected: bool
+    kind: Literal["first", "last", "nth", "top_k", "next", "previous"] | None = None
+    n: int | None = None  # For "nth" kind: 1-indexed position (1=first, 2=second)
+    k: int | None = None  # For "top_k" kind: number of items
+# Ordinal word to number mapping
+ORDINAL_WORDS = {
+    "first": 1,
+    "second": 2,
+    "third": 3,
+    "fourth": 4,
+    "fifth": 5,
+    "sixth": 6,
+    "seventh": 7,
+    "eighth": 8,
+    "ninth": 9,
+    "tenth": 10,
+    "1st": 1,
+    "2nd": 2,
+    "3rd": 3,
+    "4th": 4,
+    "5th": 5,
+    "6th": 6,
+    "7th": 7,
+    "8th": 8,
+    "9th": 9,
+    "10th": 10,
+}
+# Patterns for detecting ordinal intent
+ORDINAL_PATTERNS = [
+    # "first", "second", etc.
+    (
+        r"\b(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth)\b",
+        "ordinal_word",
+    ),
+    # "1st", "2nd", "3rd", etc.
+    (r"\b(\d+)(st|nd|rd|th)\b", "ordinal_suffix"),
+    # "#1", "#2", etc.
+    (r"#(\d+)\b", "hash_number"),
+    # "number 1", "item 3", "result 5"
+    (r"\b(?:number|item|result|option|choice)\s*(\d+)\b", "labeled_number"),
+    # "top" (implies first/best)
+    (r"\btop\b(?!\s*\d)", "top"),
+    # "top 3", "top 5"
+    (r"\btop\s+(\d+)\b", "top_k"),
+    # "last", "final", "bottom"
+    (r"\b(last|final|bottom)\b", "last"),
+    # "next", "following"
+    (r"\b(next|following)\b", "next"),
+    # "previous", "preceding", "prior"
+    (r"\b(previous|preceding|prior)\b", "previous"),
+]
+def detect_ordinal_intent(goal: str) -> OrdinalIntent:
+    """
+    Detect ordinal intent from a goal string.
+    Args:
+        goal: Natural language goal (e.g., "click the first search result")
+    Returns:
+        OrdinalIntent with detected=True if ordinal intent found, False otherwise.
+    Examples:
+        >>> detect_ordinal_intent("click the first item")
+        OrdinalIntent(detected=True, kind='nth', n=1)
+        >>> detect_ordinal_intent("select the 3rd option")
+        OrdinalIntent(detected=True, kind='nth', n=3)
+        >>> detect_ordinal_intent("show top 5 results")
+        OrdinalIntent(detected=True, kind='top_k', k=5)
+        >>> detect_ordinal_intent("click the last button")
+        OrdinalIntent(detected=True, kind='last')
+        >>> detect_ordinal_intent("find the submit button")
+        OrdinalIntent(detected=False)
+    """
+    goal_lower = goal.lower()
+    for pattern, pattern_type in ORDINAL_PATTERNS:
+        match = re.search(pattern, goal_lower, re.IGNORECASE)
+        if match:
+            if pattern_type == "ordinal_word":
+                word = match.group(1).lower()
+                n = ORDINAL_WORDS.get(word)
+                if n:
+                    return OrdinalIntent(detected=True, kind="nth", n=n)
+            elif pattern_type == "ordinal_suffix":
+                n = int(match.group(1))
+                return OrdinalIntent(detected=True, kind="nth", n=n)
+            elif pattern_type == "hash_number":
+                n = int(match.group(1))
+                return OrdinalIntent(detected=True, kind="nth", n=n)
+            elif pattern_type == "labeled_number":
+                n = int(match.group(1))
+                return OrdinalIntent(detected=True, kind="nth", n=n)
+            elif pattern_type == "top":
+                # "top" without a number means "first/best"
+                return OrdinalIntent(detected=True, kind="first")
+            elif pattern_type == "top_k":
+                k = int(match.group(1))
+                return OrdinalIntent(detected=True, kind="top_k", k=k)
+            elif pattern_type == "last":
+                return OrdinalIntent(detected=True, kind="last")
+            elif pattern_type == "next":
+                return OrdinalIntent(detected=True, kind="next")
+            elif pattern_type == "previous":
+                return OrdinalIntent(detected=True, kind="previous")
+    return OrdinalIntent(detected=False)
+def select_by_ordinal(
+    elements: list[Element],
+    dominant_group_key: str | None,
+    intent: OrdinalIntent,
+    current_element_id: int | None = None,
+) -> Element | list[Element] | None:
+    """
+    Select element(s) from a list based on ordinal intent.
+    Uses the dominant_group_key to filter to the "main content" group,
+    then selects by group_index based on the ordinal intent.
+    Args:
+        elements: List of elements with group_key and group_index populated
+        dominant_group_key: The most common group key (main content group)
+        intent: Detected ordinal intent
+        current_element_id: Current element ID (for next/previous navigation)
+    Returns:
+        Single Element for nth/first/last, list of Elements for top_k,
+        or None if no matching element found.
+    Examples:
+        >>> intent = OrdinalIntent(detected=True, kind='nth', n=1)
+        >>> element = select_by_ordinal(elements, "x5-w2-h1", intent)
+        # Returns element with group_key="x5-w2-h1" and group_index=0
+    """
+    if not intent.detected:
+        return None
+    # Filter to dominant group if available
+    if dominant_group_key:
+        group_elements = [e for e in elements if e.group_key == dominant_group_key]
+    else:
+        # Fallback: use all elements with group_index
+        group_elements = [e for e in elements if e.group_index is not None]
+    if not group_elements:
+        return None
+    # Sort by group_index to ensure correct ordering
+    group_elements.sort(key=lambda e: e.group_index if e.group_index is not None else 0)
+    if intent.kind == "first" or (intent.kind == "nth" and intent.n == 1):
+        # First element (group_index=0)
+        return group_elements[0] if group_elements else None
+    elif intent.kind == "nth" and intent.n is not None:
+        # Nth element (1-indexed, so n=2 means group_index=1)
+        target_index = intent.n - 1
+        if 0 <= target_index < len(group_elements):
+            return group_elements[target_index]
+        return None
+    elif intent.kind == "last":
+        # Last element
+        return group_elements[-1] if group_elements else None
+    elif intent.kind == "top_k" and intent.k is not None:
+        # Top K elements
+        return group_elements[: intent.k]
+    elif intent.kind == "next" and current_element_id is not None:
+        # Next element after current
+        for i, elem in enumerate(group_elements):
+            if elem.id == current_element_id and i + 1 < len(group_elements):
+                return group_elements[i + 1]
+        return None
+    elif intent.kind == "previous" and current_element_id is not None:
+        # Previous element before current
+        for i, elem in enumerate(group_elements):
+            if elem.id == current_element_id and i > 0:
+                return group_elements[i - 1]
+        return None
+    return None
+def boost_ordinal_elements(
+    elements: list[Element],
+    dominant_group_key: str | None,
+    intent: OrdinalIntent,
+    boost_factor: int = 10000,
+) -> list[Element]:
+    """
+    Boost the importance of elements matching ordinal intent.
+    This is useful for integrating ordinal selection with existing
+    importance-based ranking. Elements matching the ordinal intent
+    get a significant importance boost.
+    Args:
+        elements: List of elements (not modified)
+        dominant_group_key: The most common group key
+        intent: Detected ordinal intent
+        boost_factor: Amount to add to importance (default: 10000)
+    Returns:
+        A new list with copies of elements, with boosted importance for matches.
+    """
+    if not intent.detected or not dominant_group_key:
+        return [e.model_copy() for e in elements]
+    target = select_by_ordinal(elements, dominant_group_key, intent)
+    if target is None:
+        return [e.model_copy() for e in elements]
+    # Handle single element or list
+    if isinstance(target, list):
+        target_ids = {e.id for e in target}
+    else:
+        target_ids = {target.id}
+    # Create copies and boost matching elements
+    result = []
+    for elem in elements:
+        copy = elem.model_copy()
+        if copy.id in target_ids:
+            copy.importance = (copy.importance or 0) + boost_factor
+        result.append(copy)
+    return result

sentience/overlay.py CHANGED Viewed

@@ -2,9 +2,9 @@
 Visual overlay utilities - show/clear element highlights in browser
 """
-from typing import Any
+from typing import Any, Optional
-from .browser import SentienceBrowser
+from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .models import Element, Snapshot
@@ -113,3 +113,110 @@ def clear_overlay(browser: SentienceBrowser) -> None:
         }
         """
     )
+async def show_overlay_async(
+    browser: AsyncSentienceBrowser,
+    elements: list[Element] | list[dict[str, Any]] | Snapshot,
+    target_element_id: int | None = None,
+) -> None:
+    """
+    Display visual overlay highlighting elements in the browser (async)
+    This function shows a Shadow DOM overlay with color-coded borders around
+    detected elements. Useful for debugging, learning, and validating element detection.
+    Args:
+        browser: AsyncSentienceBrowser instance
+        elements: Can be:
+            - List of Element objects (from snapshot.elements)
+            - List of raw element dicts (from snapshot result or API response)
+            - Snapshot object (will use snapshot.elements)
+        target_element_id: Optional ID of element to highlight in red (default: None)
+    Color Coding:
+        - Red: Target element (when target_element_id is specified)
+        - Blue: Primary elements (is_primary=true)
+        - Green: Regular interactive elements
+    Visual Indicators:
+        - Border thickness and opacity scale with importance score
+        - Semi-transparent fill for better visibility
+        - Importance badges showing scores
+        - Star icon for primary elements
+        - Target emoji for the target element
+    Auto-clear: Overlay automatically disappears after 5 seconds
+    Example:
+        # Show overlay from snapshot
+        snap = await snapshot_async(browser)
+        await show_overlay_async(browser, snap)
+        # Show overlay with custom elements
+        elements = [{"id": 1, "bbox": {"x": 100, "y": 100, "width": 200, "height": 50}, ...}]
+        await show_overlay_async(browser, elements)
+        # Show overlay with target element highlighted in red
+        await show_overlay_async(browser, snap, target_element_id=42)
+        # Clear overlay manually before 5 seconds
+        await clear_overlay_async(browser)
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    # Handle different input types
+    if isinstance(elements, Snapshot):
+        # Extract elements from Snapshot object
+        elements_list = [el.model_dump() for el in elements.elements]
+    elif isinstance(elements, list) and len(elements) > 0:
+        # Check if it's a list of Element objects or dicts
+        if hasattr(elements[0], "model_dump"):
+            # List of Element objects
+            elements_list = [el.model_dump() for el in elements]
+        else:
+            # Already a list of dicts
+            elements_list = elements
+    else:
+        raise ValueError("elements must be a Snapshot, list of Element objects, or list of dicts")
+    # Call extension API
+    await browser.page.evaluate(
+        """
+        (args) => {
+            if (window.sentience && window.sentience.showOverlay) {
+                window.sentience.showOverlay(args.elements, args.targetId);
+            } else {
+                console.warn('[Sentience SDK] showOverlay not available - is extension loaded?');
+            }
+        }
+        """,
+        {"elements": elements_list, "targetId": target_element_id},
+    )
+async def clear_overlay_async(browser: AsyncSentienceBrowser) -> None:
+    """
+    Clear the visual overlay manually (before 5-second auto-clear) (async)
+    Args:
+        browser: AsyncSentienceBrowser instance
+    Example:
+        await show_overlay_async(browser, snap)
+        # ... inspect overlay ...
+        await clear_overlay_async(browser)  # Remove immediately
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    await browser.page.evaluate(
+        """
+        () => {
+            if (window.sentience && window.sentience.clearOverlay) {
+                window.sentience.clearOverlay();
+            }
+        }
+        """
+    )

sentience/protocols.py ADDED Viewed

@@ -0,0 +1,228 @@
+"""
+Protocol definitions for testability and dependency injection.
+These protocols define the minimal interface required by agent classes,
+enabling better testability through mocking while maintaining type safety.
+"""
+from typing import TYPE_CHECKING, Any, Optional, Protocol, runtime_checkable
+if TYPE_CHECKING:
+    from playwright.async_api import Page as AsyncPage
+    from playwright.sync_api import Page
+    from .models import Snapshot
+@runtime_checkable
+class PageProtocol(Protocol):
+    """
+    Protocol for Playwright Page operations used by agents.
+    This protocol defines the minimal interface required from Playwright's Page object.
+    Agents use this interface to interact with the browser page.
+    """
+    @property
+    def url(self) -> str:
+        """Current page URL."""
+        ...
+    def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any:
+        """
+        Evaluate JavaScript in the page context.
+        Args:
+            script: JavaScript code to evaluate
+            *args: Arguments to pass to the script
+            **kwargs: Keyword arguments to pass to the script
+        Returns:
+            Result of the JavaScript evaluation
+        """
+        ...
+    def goto(self, url: str, **kwargs: Any) -> Any | None:
+        """
+        Navigate to a URL.
+        Args:
+            url: URL to navigate to
+            **kwargs: Additional navigation options
+        Returns:
+            Response object or None
+        """
+        ...
+    def wait_for_timeout(self, timeout: int) -> None:
+        """
+        Wait for a specified timeout.
+        Args:
+            timeout: Timeout in milliseconds
+        """
+        ...
+    def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None:
+        """
+        Wait for page load state.
+        Args:
+            state: Load state to wait for (e.g., "load", "domcontentloaded", "networkidle")
+            timeout: Optional timeout in milliseconds
+        """
+        ...
+@runtime_checkable
+class BrowserProtocol(Protocol):
+    """
+    Protocol for browser operations used by agents.
+    This protocol defines the minimal interface required from SentienceBrowser.
+    Agents use this interface to interact with the browser and take snapshots.
+    Note: SentienceBrowser naturally implements this protocol, so no changes
+    are required to existing code. This protocol enables better testability
+    through mocking.
+    """
+    @property
+    def page(self) -> PageProtocol | None:
+        """
+        Current Playwright Page object.
+        Returns:
+            Page object if browser is started, None otherwise
+        """
+        ...
+    def start(self) -> None:
+        """Start the browser session."""
+        ...
+    def close(self, output_path: str | None = None) -> str | None:
+        """
+        Close the browser session.
+        Args:
+            output_path: Optional path to save browser state/output
+        Returns:
+            Path to saved output or None
+        """
+        ...
+    def goto(self, url: str) -> None:
+        """
+        Navigate to a URL.
+        Args:
+            url: URL to navigate to
+        """
+        ...
+@runtime_checkable
+class AsyncPageProtocol(Protocol):
+    """
+    Protocol for async Playwright Page operations.
+    Similar to PageProtocol but for async operations.
+    """
+    @property
+    def url(self) -> str:
+        """Current page URL."""
+        ...
+    async def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any:
+        """
+        Evaluate JavaScript in the page context (async).
+        Args:
+            script: JavaScript code to evaluate
+            *args: Arguments to pass to the script
+            **kwargs: Keyword arguments to pass to the script
+        Returns:
+            Result of the JavaScript evaluation
+        """
+        ...
+    async def goto(self, url: str, **kwargs: Any) -> Any | None:
+        """
+        Navigate to a URL (async).
+        Args:
+            url: URL to navigate to
+            **kwargs: Additional navigation options
+        Returns:
+            Response object or None
+        """
+        ...
+    async def wait_for_timeout(self, timeout: int) -> None:
+        """
+        Wait for a specified timeout (async).
+        Args:
+            timeout: Timeout in milliseconds
+        """
+        ...
+    async def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None:
+        """
+        Wait for page load state (async).
+        Args:
+            state: Load state to wait for (e.g., "load", "domcontentloaded", "networkidle")
+            timeout: Optional timeout in milliseconds
+        """
+        ...
+@runtime_checkable
+class AsyncBrowserProtocol(Protocol):
+    """
+    Protocol for async browser operations.
+    Similar to BrowserProtocol but for async operations.
+    """
+    @property
+    def page(self) -> AsyncPageProtocol | None:
+        """
+        Current Playwright AsyncPage object.
+        Returns:
+            AsyncPage object if browser is started, None otherwise
+        """
+        ...
+    async def start(self) -> None:
+        """Start the browser session (async)."""
+        ...
+    async def close(self, output_path: str | None = None) -> str | None:
+        """
+        Close the browser session (async).
+        Args:
+            output_path: Optional path to save browser state/output
+        Returns:
+            Path to saved output or None
+        """
+        ...
+    async def goto(self, url: str) -> None:
+        """
+        Navigate to a URL (async).
+        Args:
+            url: URL to navigate to
+        """
+        ...

sentienceapi 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl

Potentially problematic release.

sentienceapi 0.90.16py3-none-any.whl → 0.98.0py3-none-any.whl