PyPI - sentienceapi - Versions diffs - 0.95.0__py3-none-any.whl - Mend

sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show

sentience/__init__.py +253 -0
sentience/_extension_loader.py +195 -0
sentience/action_executor.py +215 -0
sentience/actions.py +1020 -0
sentience/agent.py +1181 -0
sentience/agent_config.py +46 -0
sentience/agent_runtime.py +424 -0
sentience/asserts/__init__.py +70 -0
sentience/asserts/expect.py +621 -0
sentience/asserts/query.py +383 -0
sentience/async_api.py +108 -0
sentience/backends/__init__.py +137 -0
sentience/backends/actions.py +343 -0
sentience/backends/browser_use_adapter.py +241 -0
sentience/backends/cdp_backend.py +393 -0
sentience/backends/exceptions.py +211 -0
sentience/backends/playwright_backend.py +194 -0
sentience/backends/protocol.py +216 -0
sentience/backends/sentience_context.py +469 -0
sentience/backends/snapshot.py +427 -0
sentience/base_agent.py +196 -0
sentience/browser.py +1215 -0
sentience/browser_evaluator.py +299 -0
sentience/canonicalization.py +207 -0
sentience/cli.py +130 -0
sentience/cloud_tracing.py +807 -0
sentience/constants.py +6 -0
sentience/conversational_agent.py +543 -0
sentience/element_filter.py +136 -0
sentience/expect.py +188 -0
sentience/extension/background.js +104 -0
sentience/extension/content.js +161 -0
sentience/extension/injected_api.js +914 -0
sentience/extension/manifest.json +36 -0
sentience/extension/pkg/sentience_core.d.ts +51 -0
sentience/extension/pkg/sentience_core.js +323 -0
sentience/extension/pkg/sentience_core_bg.wasm +0 -0
sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
sentience/extension/release.json +115 -0
sentience/formatting.py +15 -0
sentience/generator.py +202 -0
sentience/inspector.py +367 -0
sentience/llm_interaction_handler.py +191 -0
sentience/llm_provider.py +875 -0
sentience/llm_provider_utils.py +120 -0
sentience/llm_response_builder.py +153 -0
sentience/models.py +846 -0
sentience/ordinal.py +280 -0
sentience/overlay.py +222 -0
sentience/protocols.py +228 -0
sentience/query.py +303 -0
sentience/read.py +188 -0
sentience/recorder.py +589 -0
sentience/schemas/trace_v1.json +335 -0
sentience/screenshot.py +100 -0
sentience/sentience_methods.py +86 -0
sentience/snapshot.py +706 -0
sentience/snapshot_diff.py +126 -0
sentience/text_search.py +262 -0
sentience/trace_event_builder.py +148 -0
sentience/trace_file_manager.py +197 -0
sentience/trace_indexing/__init__.py +27 -0
sentience/trace_indexing/index_schema.py +199 -0
sentience/trace_indexing/indexer.py +414 -0
sentience/tracer_factory.py +322 -0
sentience/tracing.py +449 -0
sentience/utils/__init__.py +40 -0
sentience/utils/browser.py +46 -0
sentience/utils/element.py +257 -0
sentience/utils/formatting.py +59 -0
sentience/utils.py +296 -0
sentience/verification.py +380 -0
sentience/visual_agent.py +2058 -0
sentience/wait.py +139 -0
sentienceapi-0.95.0.dist-info/METADATA +984 -0
sentienceapi-0.95.0.dist-info/RECORD +82 -0
sentienceapi-0.95.0.dist-info/WHEEL +5 -0
sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
sentienceapi-0.95.0.dist-info/top_level.txt +1 -0

sentience/snapshot_diff.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""
+Snapshot comparison utilities for diff_status detection.
+Implements change detection logic for the Diff Overlay feature.
+Uses shared canonicalization helpers from canonicalization.py to ensure
+consistent comparison behavior with trace_indexing/indexer.py.
+"""
+from .canonicalization import bbox_changed, content_changed
+from .models import Element, Snapshot
+class SnapshotDiff:
+    """
+    Utility for comparing snapshots and computing diff_status for elements.
+    Implements the logic described in DIFF_STATUS_GAP_ANALYSIS.md:
+    - ADDED: Element exists in current but not in previous
+    - REMOVED: Element existed in previous but not in current
+    - MODIFIED: Element exists in both but has changed
+    - MOVED: Element exists in both but position changed
+    Uses canonicalized comparisons (normalized text, rounded bbox) to reduce
+    noise from insignificant changes like sub-pixel rendering differences
+    or whitespace variations.
+    """
+    @staticmethod
+    def _element_to_dict(el: Element) -> dict:
+        """Convert Element model to dict for canonicalization helpers."""
+        return {
+            "id": el.id,
+            "role": el.role,
+            "text": el.text,
+            "bbox": {
+                "x": el.bbox.x,
+                "y": el.bbox.y,
+                "width": el.bbox.width,
+                "height": el.bbox.height,
+            },
+            "visual_cues": {
+                "is_primary": el.visual_cues.is_primary,
+                "is_clickable": el.visual_cues.is_clickable,
+            },
+        }
+    @staticmethod
+    def compute_diff_status(
+        current: Snapshot,
+        previous: Snapshot | None,
+    ) -> list[Element]:
+        """
+        Compare current snapshot with previous and set diff_status on elements.
+        Uses canonicalized comparisons:
+        - Text is normalized (trimmed, collapsed whitespace, lowercased)
+        - Bbox is rounded to 2px grid to ignore sub-pixel differences
+        Args:
+            current: Current snapshot
+            previous: Previous snapshot (None if this is the first snapshot)
+        Returns:
+            List of elements with diff_status set (includes REMOVED elements from previous)
+        """
+        # If no previous snapshot, all current elements are ADDED
+        if previous is None:
+            result = []
+            for el in current.elements:
+                # Create a copy with diff_status set
+                el_dict = el.model_dump()
+                el_dict["diff_status"] = "ADDED"
+                result.append(Element(**el_dict))
+            return result
+        # Build lookup maps by element ID
+        current_by_id = {el.id: el for el in current.elements}
+        previous_by_id = {el.id: el for el in previous.elements}
+        current_ids = set(current_by_id.keys())
+        previous_ids = set(previous_by_id.keys())
+        result: list[Element] = []
+        # Process current elements
+        for el in current.elements:
+            el_dict = el.model_dump()
+            if el.id not in previous_ids:
+                # Element is new - mark as ADDED
+                el_dict["diff_status"] = "ADDED"
+            else:
+                # Element existed before - check for changes using canonicalized comparisons
+                prev_el = previous_by_id[el.id]
+                # Convert to dicts for canonicalization helpers
+                el_data = SnapshotDiff._element_to_dict(el)
+                prev_el_data = SnapshotDiff._element_to_dict(prev_el)
+                has_bbox_changed = bbox_changed(el_data["bbox"], prev_el_data["bbox"])
+                has_content_changed = content_changed(el_data, prev_el_data)
+                if has_bbox_changed and has_content_changed:
+                    # Both position and content changed - mark as MODIFIED
+                    el_dict["diff_status"] = "MODIFIED"
+                elif has_bbox_changed:
+                    # Only position changed - mark as MOVED
+                    el_dict["diff_status"] = "MOVED"
+                elif has_content_changed:
+                    # Only content changed - mark as MODIFIED
+                    el_dict["diff_status"] = "MODIFIED"
+                else:
+                    # No change - don't set diff_status (frontend expects undefined)
+                    el_dict["diff_status"] = None
+            result.append(Element(**el_dict))
+        # Process removed elements (existed in previous but not in current)
+        for prev_id in previous_ids - current_ids:
+            prev_el = previous_by_id[prev_id]
+            el_dict = prev_el.model_dump()
+            el_dict["diff_status"] = "REMOVED"
+            result.append(Element(**el_dict))
+        return result

sentience/text_search.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""
+Text search utilities - find text and get pixel coordinates
+"""
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .browser_evaluator import BrowserEvaluator
+from .models import TextRectSearchResult
+def find_text_rect(
+    browser: SentienceBrowser,
+    text: str,
+    case_sensitive: bool = False,
+    whole_word: bool = False,
+    max_results: int = 10,
+) -> TextRectSearchResult:
+    """
+    Find all occurrences of text on the page and get their exact pixel coordinates.
+    This function searches for text in all visible text nodes on the page and returns
+    the bounding rectangles for each match. Useful for:
+    - Finding specific UI elements by their text content
+    - Locating buttons, links, or labels without element IDs
+    - Getting exact coordinates for click automation
+    - Highlighting search results visually
+    Args:
+        browser: SentienceBrowser instance
+        text: Text to search for (required)
+        case_sensitive: If True, search is case-sensitive (default: False)
+        whole_word: If True, only match whole words surrounded by whitespace (default: False)
+        max_results: Maximum number of matches to return (default: 10, max: 100)
+    Returns:
+        TextRectSearchResult with:
+            - status: "success" or "error"
+            - query: The search text
+            - case_sensitive: Whether search was case-sensitive
+            - whole_word: Whether whole-word matching was used
+            - matches: Number of matches found
+            - results: List of TextMatch objects, each containing:
+                - text: The matched text
+                - rect: Absolute rectangle (with scroll offset)
+                - viewport_rect: Viewport-relative rectangle
+                - context: Surrounding text (before/after)
+                - in_viewport: Whether visible in current viewport
+            - viewport: Current viewport dimensions and scroll position
+            - error: Error message if status is "error"
+    Examples:
+        # Find "Sign In" button
+        result = find_text_rect(browser, "Sign In")
+        if result.status == "success" and result.results:
+            first_match = result.results[0]
+            print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
+            print(f"Size: {first_match.rect.width}x{first_match.rect.height}")
+            print(f"In viewport: {first_match.in_viewport}")
+        # Case-sensitive search
+        result = find_text_rect(browser, "LOGIN", case_sensitive=True)
+        # Whole word only
+        result = find_text_rect(browser, "log", whole_word=True)  # Won't match "login"
+        # Find all matches and click the first visible one
+        result = find_text_rect(browser, "Buy Now", max_results=5)
+        if result.status == "success" and result.results:
+            for match in result.results:
+                if match.in_viewport:
+                    # Use click_rect from actions module
+                    from sentience import click_rect
+                    click_result = click_rect(browser, {
+                        "x": match.rect.x,
+                        "y": match.rect.y,
+                        "w": match.rect.width,
+                        "h": match.rect.height
+                    })
+                    break
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call browser.start() first.")
+    if not text or not text.strip():
+        return TextRectSearchResult(
+            status="error",
+            error="Text parameter is required and cannot be empty",
+        )
+    # Limit max_results to prevent performance issues
+    max_results = min(max_results, 100)
+    # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
+    # The new architecture loads injected_api.js asynchronously, so window.sentience
+    # may not be immediately available after page load
+    BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
+    # Verify findTextRect method exists (for older extension versions that don't have it)
+    if not BrowserEvaluator.verify_method_exists(browser.page, SentienceMethod.FIND_TEXT_RECT):
+        raise RuntimeError(
+            "window.sentience.findTextRect is not available. "
+            "Please update the Sentience extension to the latest version."
+        )
+    # Call the extension's findTextRect method
+    result_dict = browser.page.evaluate(
+        """
+        (options) => {
+            return window.sentience.findTextRect(options);
+        }
+        """,
+        {
+            "text": text,
+            "caseSensitive": case_sensitive,
+            "wholeWord": whole_word,
+            "maxResults": max_results,
+        },
+    )
+    # Parse and validate with Pydantic
+    return TextRectSearchResult(**result_dict)
+async def find_text_rect_async(
+    browser: AsyncSentienceBrowser,
+    text: str,
+    case_sensitive: bool = False,
+    whole_word: bool = False,
+    max_results: int = 10,
+) -> TextRectSearchResult:
+    """
+    Find all occurrences of text on the page and get their exact pixel coordinates (async).
+    This function searches for text in all visible text nodes on the page and returns
+    the bounding rectangles for each match. Useful for:
+    - Finding specific UI elements by their text content
+    - Locating buttons, links, or labels without element IDs
+    - Getting exact coordinates for click automation
+    - Highlighting search results visually
+    Args:
+        browser: AsyncSentienceBrowser instance
+        text: Text to search for (required)
+        case_sensitive: If True, search is case-sensitive (default: False)
+        whole_word: If True, only match whole words surrounded by whitespace (default: False)
+        max_results: Maximum number of matches to return (default: 10, max: 100)
+    Returns:
+        TextRectSearchResult with:
+            - status: "success" or "error"
+            - query: The search text
+            - case_sensitive: Whether search was case-sensitive
+            - whole_word: Whether whole-word matching was used
+            - matches: Number of matches found
+            - results: List of TextMatch objects, each containing:
+                - text: The matched text
+                - rect: Absolute rectangle (with scroll offset)
+                - viewport_rect: Viewport-relative rectangle
+                - context: Surrounding text (before/after)
+                - in_viewport: Whether visible in current viewport
+            - viewport: Current viewport dimensions and scroll position
+            - error: Error message if status is "error"
+    Examples:
+        # Find "Sign In" button
+        result = await find_text_rect_async(browser, "Sign In")
+        if result.status == "success" and result.results:
+            first_match = result.results[0]
+            print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
+            print(f"Size: {first_match.rect.width}x{first_match.rect.height}")
+            print(f"In viewport: {first_match.in_viewport}")
+        # Case-sensitive search
+        result = await find_text_rect_async(browser, "LOGIN", case_sensitive=True)
+        # Whole word only
+        result = await find_text_rect_async(browser, "log", whole_word=True)  # Won't match "login"
+        # Find all matches and click the first visible one
+        result = await find_text_rect_async(browser, "Buy Now", max_results=5)
+        if result.status == "success" and result.results:
+            for match in result.results:
+                if match.in_viewport:
+                    # Use click_rect_async from actions module
+                    from sentience.actions import click_rect_async
+                    click_result = await click_rect_async(browser, {
+                        "x": match.rect.x,
+                        "y": match.rect.y,
+                        "w": match.rect.width,
+                        "h": match.rect.height
+                    })
+                    break
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    if not text or not text.strip():
+        return TextRectSearchResult(
+            status="error",
+            error="Text parameter is required and cannot be empty",
+        )
+    # Limit max_results to prevent performance issues
+    max_results = min(max_results, 100)
+    # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
+    # The new architecture loads injected_api.js asynchronously, so window.sentience
+    # may not be immediately available after page load
+    try:
+        await browser.page.wait_for_function(
+            "typeof window.sentience !== 'undefined'",
+            timeout=5000,  # 5 second timeout
+        )
+    except Exception as e:
+        # Gather diagnostics if wait fails
+        try:
+            diag = await browser.page.evaluate(
+                """() => ({
+                    sentience_defined: typeof window.sentience !== 'undefined',
+                    extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
+                    url: window.location.href
+                })"""
+            )
+        except Exception:
+            diag = {"error": "Could not gather diagnostics"}
+        raise RuntimeError(
+            f"Sentience extension failed to inject window.sentience API. "
+            f"Is the extension loaded? Diagnostics: {diag}"
+        ) from e
+    # Verify findTextRect method exists (for older extension versions that don't have it)
+    try:
+        has_find_text_rect = await browser.page.evaluate(
+            "typeof window.sentience.findTextRect !== 'undefined'"
+        )
+        if not has_find_text_rect:
+            raise RuntimeError(
+                "window.sentience.findTextRect is not available. "
+                "Please update the Sentience extension to the latest version."
+            )
+    except RuntimeError:
+        raise
+    except Exception as e:
+        raise RuntimeError(f"Failed to verify findTextRect availability: {e}") from e
+    # Call the extension's findTextRect method
+    result_dict = await browser.page.evaluate(
+        """
+        (options) => {
+            return window.sentience.findTextRect(options);
+        }
+        """,
+        {
+            "text": text,
+            "caseSensitive": case_sensitive,
+            "wholeWord": whole_word,
+            "maxResults": max_results,
+        },
+    )
+    # Parse and validate with Pydantic
+    return TextRectSearchResult(**result_dict)

sentience/trace_event_builder.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""
+Trace event building utilities for agent-based tracing.
+This module provides centralized trace event building logic to reduce duplication
+across agent implementations.
+"""
+from typing import Any, Optional
+from .models import AgentActionResult, Element, Snapshot
+class TraceEventBuilder:
+    """
+    Helper for building trace events with consistent structure.
+    Provides static methods for building common trace event types:
+    - snapshot_taken events
+    - step_end events
+    """
+    @staticmethod
+    def build_snapshot_event(
+        snapshot: Snapshot,
+        include_all_elements: bool = True,
+    ) -> dict[str, Any]:
+        """
+        Build snapshot_taken trace event data.
+        Args:
+            snapshot: Snapshot to build event from
+            include_all_elements: If True, include all elements (for DOM tree display).
+                                 If False, use filtered elements only.
+        Returns:
+            Dictionary with snapshot event data
+        """
+        # Normalize importance values to importance_score (0-1 range) per snapshot
+        # Min-max normalization: (value - min) / (max - min)
+        importance_values = [el.importance for el in snapshot.elements]
+        if importance_values:
+            min_importance = min(importance_values)
+            max_importance = max(importance_values)
+            importance_range = max_importance - min_importance
+        else:
+            min_importance = 0
+            max_importance = 0
+            importance_range = 0
+        # Include ALL elements with full data for DOM tree display
+        # Add importance_score field normalized to [0, 1]
+        elements_data = []
+        for el in snapshot.elements:
+            el_dict = el.model_dump()
+            # Compute normalized importance_score
+            if importance_range > 0:
+                importance_score = (el.importance - min_importance) / importance_range
+            else:
+                # If all elements have same importance, set to 0.5
+                importance_score = 0.5
+            el_dict["importance_score"] = importance_score
+            elements_data.append(el_dict)
+        return {
+            "url": snapshot.url,
+            "element_count": len(snapshot.elements),
+            "timestamp": snapshot.timestamp,
+            "elements": elements_data,  # Full element data for DOM tree
+        }
+    @staticmethod
+    def build_step_end_event(
+        step_id: str,
+        step_index: int,
+        goal: str,
+        attempt: int,
+        pre_url: str,
+        post_url: str,
+        snapshot_digest: str | None,
+        llm_data: dict[str, Any],
+        exec_data: dict[str, Any],
+        verify_data: dict[str, Any],
+        pre_elements: list[dict[str, Any]] | None = None,
+        assertions: list[dict[str, Any]] | None = None,
+    ) -> dict[str, Any]:
+        """
+        Build step_end trace event data.
+        Args:
+            step_id: Unique step identifier
+            step_index: Step index (0-based)
+            goal: User's goal for this step
+            attempt: Attempt number (0-based)
+            pre_url: URL before action execution
+            post_url: URL after action execution
+            snapshot_digest: Digest of snapshot before action
+            llm_data: LLM interaction data
+            exec_data: Action execution data
+            verify_data: Verification data
+            pre_elements: Optional list of elements from pre-snapshot (with diff_status)
+            assertions: Optional list of assertion results from AgentRuntime
+        Returns:
+            Dictionary with step_end event data
+        """
+        pre_data: dict[str, Any] = {
+            "url": pre_url,
+            "snapshot_digest": snapshot_digest,
+        }
+        # Add elements to pre field if provided (for diff overlay support)
+        if pre_elements is not None:
+            pre_data["elements"] = pre_elements
+        # Build verify data with assertions if provided
+        final_verify_data = verify_data.copy()
+        if assertions:
+            # Ensure signals dict exists
+            if "signals" not in final_verify_data:
+                final_verify_data["signals"] = {}
+            # Add assertions to signals
+            final_verify_data["signals"]["assertions"] = assertions
+            # Check for task completion (assertions marked as required that passed)
+            for a in assertions:
+                if a.get("passed") and a.get("required"):
+                    final_verify_data["signals"]["task_done"] = True
+                    final_verify_data["signals"]["task_done_label"] = a.get("label")
+                    break
+        return {
+            "v": 1,
+            "step_id": step_id,
+            "step_index": step_index,
+            "goal": goal,
+            "attempt": attempt,
+            "pre": pre_data,
+            "llm": llm_data,
+            "exec": exec_data,
+            "post": {
+                "url": post_url,
+            },
+            "verify": final_verify_data,
+        }