PyPI - sentienceapi - Versions diffs - 0.90.12__py3-none-any.whl → 0.92.2__py3-none-any.whl - Mend

sentienceapi 0.90.12py3-none-any.whl → 0.92.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sentienceapi might be problematic. Click here for more details.

Files changed (63) hide show

sentience/__init__.py +14 -5
sentience/_extension_loader.py +40 -0
sentience/action_executor.py +215 -0
sentience/actions.py +408 -25
sentience/agent.py +804 -310
sentience/agent_config.py +3 -0
sentience/async_api.py +101 -0
sentience/base_agent.py +95 -0
sentience/browser.py +594 -25
sentience/browser_evaluator.py +299 -0
sentience/cloud_tracing.py +458 -36
sentience/conversational_agent.py +79 -45
sentience/element_filter.py +136 -0
sentience/expect.py +98 -2
sentience/extension/background.js +56 -185
sentience/extension/content.js +117 -289
sentience/extension/injected_api.js +799 -1374
sentience/extension/manifest.json +1 -1
sentience/extension/pkg/sentience_core.js +190 -396
sentience/extension/pkg/sentience_core_bg.wasm +0 -0
sentience/extension/release.json +47 -47
sentience/formatting.py +9 -53
sentience/inspector.py +183 -1
sentience/llm_interaction_handler.py +191 -0
sentience/llm_provider.py +256 -28
sentience/llm_provider_utils.py +120 -0
sentience/llm_response_builder.py +153 -0
sentience/models.py +66 -1
sentience/overlay.py +109 -2
sentience/protocols.py +228 -0
sentience/query.py +1 -1
sentience/read.py +95 -3
sentience/recorder.py +223 -3
sentience/schemas/trace_v1.json +102 -9
sentience/screenshot.py +48 -2
sentience/sentience_methods.py +86 -0
sentience/snapshot.py +309 -64
sentience/snapshot_diff.py +141 -0
sentience/text_search.py +119 -5
sentience/trace_event_builder.py +129 -0
sentience/trace_file_manager.py +197 -0
sentience/trace_indexing/index_schema.py +95 -7
sentience/trace_indexing/indexer.py +117 -14
sentience/tracer_factory.py +119 -6
sentience/tracing.py +172 -8
sentience/utils/__init__.py +40 -0
sentience/utils/browser.py +46 -0
sentience/utils/element.py +257 -0
sentience/utils/formatting.py +59 -0
sentience/utils.py +1 -1
sentience/visual_agent.py +2056 -0
sentience/wait.py +70 -4
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +61 -22
sentienceapi-0.92.2.dist-info/RECORD +65 -0
sentienceapi-0.92.2.dist-info/licenses/LICENSE +24 -0
sentienceapi-0.92.2.dist-info/licenses/LICENSE-APACHE +201 -0
sentienceapi-0.92.2.dist-info/licenses/LICENSE-MIT +21 -0
sentience/extension/test-content.js +0 -4
sentienceapi-0.90.12.dist-info/RECORD +0 -46
sentienceapi-0.90.12.dist-info/licenses/LICENSE.md +0 -43
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0

sentience/snapshot.py CHANGED Viewed

@@ -2,15 +2,18 @@
 Snapshot functionality - calls window.sentience.snapshot() or server-side API
 """
+import asyncio
 import json
 import os
 import time
-from typing import Any
+from typing import Any, Optional
 import requests
-from .browser import SentienceBrowser
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .browser_evaluator import BrowserEvaluator
 from .models import Snapshot, SnapshotOptions
+from .sentience_methods import SentienceMethod
 # Maximum payload size for API requests (10MB server limit)
 MAX_PAYLOAD_BYTES = 10 * 1024 * 1024
@@ -41,41 +44,33 @@ def _save_trace_to_file(raw_elements: list[dict[str, Any]], trace_path: str | No
 def snapshot(
     browser: SentienceBrowser,
-    screenshot: bool | None = None,
-    limit: int | None = None,
-    filter: dict[str, Any] | None = None,
-    use_api: bool | None = None,
-    save_trace: bool = False,
-    trace_path: str | None = None,
-    show_overlay: bool = False,
+    options: SnapshotOptions | None = None,
 ) -> Snapshot:
     """
     Take a snapshot of the current page
     Args:
         browser: SentienceBrowser instance
-        screenshot: Whether to capture screenshot (bool or dict with format/quality)
-        limit: Limit number of elements returned
-        filter: Filter options (min_area, allowed_roles, min_z_index)
-        use_api: Force use of server-side API if True, local extension if False.
-                 If None, uses API if api_key is set, otherwise uses local extension.
-        save_trace: Whether to save raw_elements to JSON for benchmarking/training
-        trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json"
-        show_overlay: Show visual overlay highlighting elements in browser
+        options: Snapshot options (screenshot, limit, filter, etc.)
+                If None, uses default options.
     Returns:
         Snapshot object
+    Example:
+        # Basic snapshot with defaults
+        snap = snapshot(browser)
+        # With options
+        snap = snapshot(browser, SnapshotOptions(
+            screenshot=True,
+            limit=100,
+            show_overlay=True
+        ))
     """
-    # Build SnapshotOptions from individual parameters
-    options = SnapshotOptions(
-        screenshot=screenshot if screenshot is not None else False,
-        limit=limit if limit is not None else 50,
-        filter=filter,
-        use_api=use_api,
-        save_trace=save_trace,
-        trace_path=trace_path,
-        show_overlay=show_overlay,
-    )
+    # Use default options if none provided
+    if options is None:
+        options = SnapshotOptions()
     # Determine if we should use server-side API
     should_use_api = (
@@ -101,33 +96,16 @@ def _snapshot_via_extension(
     # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
     # The new architecture loads injected_api.js asynchronously, so window.sentience
     # may not be immediately available after page load
-    try:
-        browser.page.wait_for_function(
-            "typeof window.sentience !== 'undefined'",
-            timeout=5000,  # 5 second timeout
-        )
-    except Exception as e:
-        # Gather diagnostics if wait fails
-        try:
-            diag = browser.page.evaluate(
-                """() => ({
-                    sentience_defined: typeof window.sentience !== 'undefined',
-                    extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
-                    url: window.location.href
-                })"""
-            )
-        except Exception:
-            diag = {"error": "Could not gather diagnostics"}
-        raise RuntimeError(
-            f"Sentience extension failed to inject window.sentience API. "
-            f"Is the extension loaded? Diagnostics: {diag}"
-        ) from e
+    BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
     # Build options dict for extension API (exclude save_trace/trace_path)
     ext_options: dict[str, Any] = {}
     if options.screenshot is not False:
-        ext_options["screenshot"] = options.screenshot
+        # Serialize ScreenshotConfig to dict if it's a Pydantic model
+        if hasattr(options.screenshot, "model_dump"):
+            ext_options["screenshot"] = options.screenshot.model_dump()
+        else:
+            ext_options["screenshot"] = options.screenshot
     if options.limit != 50:
         ext_options["limit"] = options.limit
     if options.filter is not None:
@@ -185,26 +163,14 @@ def _snapshot_via_api(
     # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
     # Even for API mode, we need the extension to collect raw data locally
-    try:
-        browser.page.wait_for_function("typeof window.sentience !== 'undefined'", timeout=5000)
-    except Exception as e:
-        raise RuntimeError(
-            "Sentience extension failed to inject. Cannot collect raw data for API processing."
-        ) from e
+    BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
     # Step 1: Get raw data from local extension (always happens locally)
     raw_options: dict[str, Any] = {}
     if options.screenshot is not False:
         raw_options["screenshot"] = options.screenshot
-    raw_result = browser.page.evaluate(
-        """
-        (options) => {
-            return window.sentience.snapshot(options);
-        }
-        """,
-        raw_options,
-    )
+    raw_result = BrowserEvaluator.invoke(browser.page, SentienceMethod.SNAPSHOT, **raw_options)
     # Save trace if requested (save raw data before API processing)
     if options.save_trace:
@@ -280,3 +246,282 @@ def _snapshot_via_api(
         return Snapshot(**snapshot_data)
     except requests.exceptions.RequestException as e:
         raise RuntimeError(f"API request failed: {e}")
+# ========== Async Snapshot Functions ==========
+async def snapshot_async(
+    browser: AsyncSentienceBrowser,
+    options: SnapshotOptions | None = None,
+) -> Snapshot:
+    """
+    Take a snapshot of the current page (async)
+    Args:
+        browser: AsyncSentienceBrowser instance
+        options: Snapshot options (screenshot, limit, filter, etc.)
+                If None, uses default options.
+    Returns:
+        Snapshot object
+    Example:
+        # Basic snapshot with defaults
+        snap = await snapshot_async(browser)
+        # With options
+        snap = await snapshot_async(browser, SnapshotOptions(
+            screenshot=True,
+            limit=100,
+            show_overlay=True
+        ))
+    """
+    # Use default options if none provided
+    if options is None:
+        options = SnapshotOptions()
+    # Determine if we should use server-side API
+    should_use_api = (
+        options.use_api if options.use_api is not None else (browser.api_key is not None)
+    )
+    if should_use_api and browser.api_key:
+        # Use server-side API (Pro/Enterprise tier)
+        return await _snapshot_via_api_async(browser, options)
+    else:
+        # Use local extension (Free tier)
+        return await _snapshot_via_extension_async(browser, options)
+async def _snapshot_via_extension_async(
+    browser: AsyncSentienceBrowser,
+    options: SnapshotOptions,
+) -> Snapshot:
+    """Take snapshot using local extension (Free tier) - async"""
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    # Wait for extension injection to complete
+    try:
+        await browser.page.wait_for_function(
+            "typeof window.sentience !== 'undefined'",
+            timeout=5000,
+        )
+    except Exception as e:
+        try:
+            diag = await browser.page.evaluate(
+                """() => ({
+                    sentience_defined: typeof window.sentience !== 'undefined',
+                    extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
+                    url: window.location.href
+                })"""
+            )
+        except Exception:
+            diag = {"error": "Could not gather diagnostics"}
+        raise RuntimeError(
+            f"Sentience extension failed to inject window.sentience API. "
+            f"Is the extension loaded? Diagnostics: {diag}"
+        ) from e
+    # Build options dict for extension API
+    ext_options: dict[str, Any] = {}
+    if options.screenshot is not False:
+        # Serialize ScreenshotConfig to dict if it's a Pydantic model
+        if hasattr(options.screenshot, "model_dump"):
+            ext_options["screenshot"] = options.screenshot.model_dump()
+        else:
+            ext_options["screenshot"] = options.screenshot
+    if options.limit != 50:
+        ext_options["limit"] = options.limit
+    if options.filter is not None:
+        ext_options["filter"] = (
+            options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
+        )
+    # Call extension API
+    result = await browser.page.evaluate(
+        """
+        (options) => {
+            return window.sentience.snapshot(options);
+        }
+        """,
+        ext_options,
+    )
+    if result.get("error"):
+        print(f"      Snapshot error: {result.get('error')}")
+    # Save trace if requested
+    if options.save_trace:
+        _save_trace_to_file(result.get("raw_elements", []), options.trace_path)
+    # Show visual overlay if requested
+    if options.show_overlay:
+        raw_elements = result.get("raw_elements", [])
+        if raw_elements:
+            await browser.page.evaluate(
+                """
+                (elements) => {
+                    if (window.sentience && window.sentience.showOverlay) {
+                        window.sentience.showOverlay(elements, null);
+                    }
+                }
+                """,
+                raw_elements,
+            )
+    # Extract screenshot_format from data URL if not provided by extension
+    if result.get("screenshot") and not result.get("screenshot_format"):
+        screenshot_data_url = result.get("screenshot", "")
+        if screenshot_data_url.startswith("data:image/"):
+            # Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
+            format_match = screenshot_data_url.split(";")[0].split("/")[-1]
+            if format_match in ["jpeg", "jpg", "png"]:
+                result["screenshot_format"] = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
+    # Validate and parse with Pydantic
+    snapshot_obj = Snapshot(**result)
+    return snapshot_obj
+async def _snapshot_via_api_async(
+    browser: AsyncSentienceBrowser,
+    options: SnapshotOptions,
+) -> Snapshot:
+    """Take snapshot using server-side API (Pro/Enterprise tier) - async"""
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    if not browser.api_key:
+        raise ValueError("API key required for server-side processing")
+    if not browser.api_url:
+        raise ValueError("API URL required for server-side processing")
+    # Wait for extension injection
+    try:
+        await browser.page.wait_for_function(
+            "typeof window.sentience !== 'undefined'", timeout=5000
+        )
+    except Exception as e:
+        raise RuntimeError(
+            "Sentience extension failed to inject. Cannot collect raw data for API processing."
+        ) from e
+    # Step 1: Get raw data from local extension (including screenshot)
+    raw_options: dict[str, Any] = {}
+    screenshot_requested = False
+    if options.screenshot is not False:
+        screenshot_requested = True
+        # Serialize ScreenshotConfig to dict if it's a Pydantic model
+        if hasattr(options.screenshot, "model_dump"):
+            raw_options["screenshot"] = options.screenshot.model_dump()
+        else:
+            raw_options["screenshot"] = options.screenshot
+    raw_result = await browser.page.evaluate(
+        """
+        (options) => {
+            return window.sentience.snapshot(options);
+        }
+        """,
+        raw_options,
+    )
+    # Extract screenshot from raw result (extension captures it, but API doesn't return it)
+    screenshot_data_url = raw_result.get("screenshot")
+    screenshot_format = None
+    if screenshot_data_url:
+        # Extract format from data URL
+        if screenshot_data_url.startswith("data:image/"):
+            format_match = screenshot_data_url.split(";")[0].split("/")[-1]
+            if format_match in ["jpeg", "jpg", "png"]:
+                screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
+    # Save trace if requested
+    if options.save_trace:
+        _save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)
+    # Step 2: Send to server for smart ranking/filtering
+    payload = {
+        "raw_elements": raw_result.get("raw_elements", []),
+        "url": raw_result.get("url", ""),
+        "viewport": raw_result.get("viewport"),
+        "goal": options.goal,
+        "options": {
+            "limit": options.limit,
+            "filter": options.filter.model_dump() if options.filter else None,
+        },
+    }
+    # Check payload size
+    payload_json = json.dumps(payload)
+    payload_size = len(payload_json.encode("utf-8"))
+    if payload_size > MAX_PAYLOAD_BYTES:
+        raise ValueError(
+            f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
+            f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
+            f"Try reducing the number of elements on the page or filtering elements."
+        )
+    headers = {
+        "Authorization": f"Bearer {browser.api_key}",
+        "Content-Type": "application/json",
+    }
+    try:
+        # Lazy import httpx - only needed for async API calls
+        import httpx
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{browser.api_url}/v1/snapshot",
+                content=payload_json,
+                headers=headers,
+            )
+            response.raise_for_status()
+            api_result = response.json()
+        # Extract screenshot format from data URL if not provided
+        if screenshot_data_url and not screenshot_format:
+            if screenshot_data_url.startswith("data:image/"):
+                format_match = screenshot_data_url.split(";")[0].split("/")[-1]
+                if format_match in ["jpeg", "jpg", "png"]:
+                    screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
+        # Merge API result with local data
+        snapshot_data = {
+            "status": api_result.get("status", "success"),
+            "timestamp": api_result.get("timestamp"),
+            "url": api_result.get("url", raw_result.get("url", "")),
+            "viewport": api_result.get("viewport", raw_result.get("viewport")),
+            "elements": api_result.get("elements", []),
+            "screenshot": screenshot_data_url,  # Use the extracted screenshot
+            "screenshot_format": screenshot_format,  # Use the extracted format
+            "error": api_result.get("error"),
+        }
+        # Show visual overlay if requested
+        if options.show_overlay:
+            elements = api_result.get("elements", [])
+            if elements:
+                await browser.page.evaluate(
+                    """
+                    (elements) => {
+                        if (window.sentience && window.sentience.showOverlay) {
+                            window.sentience.showOverlay(elements, null);
+                        }
+                    }
+                    """,
+                    elements,
+                )
+        return Snapshot(**snapshot_data)
+    except ImportError:
+        # Fallback to requests if httpx not available (shouldn't happen in async context)
+        raise RuntimeError(
+            "httpx is required for async API calls. Install it with: pip install httpx"
+        )
+    except Exception as e:
+        raise RuntimeError(f"API request failed: {e}")

sentience/snapshot_diff.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""
+Snapshot comparison utilities for diff_status detection.
+Implements change detection logic for the Diff Overlay feature.
+"""
+from typing import Literal
+from .models import Element, Snapshot
+class SnapshotDiff:
+    """
+    Utility for comparing snapshots and computing diff_status for elements.
+    Implements the logic described in DIFF_STATUS_GAP_ANALYSIS.md:
+    - ADDED: Element exists in current but not in previous
+    - REMOVED: Element existed in previous but not in current
+    - MODIFIED: Element exists in both but has changed
+    - MOVED: Element exists in both but position changed
+    """
+    @staticmethod
+    def _has_bbox_changed(el1: Element, el2: Element, threshold: float = 5.0) -> bool:
+        """
+        Check if element's bounding box has changed significantly.
+        Args:
+            el1: First element
+            el2: Second element
+            threshold: Position change threshold in pixels (default: 5.0)
+        Returns:
+            True if position or size changed beyond threshold
+        """
+        return (
+            abs(el1.bbox.x - el2.bbox.x) > threshold
+            or abs(el1.bbox.y - el2.bbox.y) > threshold
+            or abs(el1.bbox.width - el2.bbox.width) > threshold
+            or abs(el1.bbox.height - el2.bbox.height) > threshold
+        )
+    @staticmethod
+    def _has_content_changed(el1: Element, el2: Element) -> bool:
+        """
+        Check if element's content has changed.
+        Args:
+            el1: First element
+            el2: Second element
+        Returns:
+            True if text, role, or visual properties changed
+        """
+        # Compare text content
+        if el1.text != el2.text:
+            return True
+        # Compare role
+        if el1.role != el2.role:
+            return True
+        # Compare visual cues
+        if el1.visual_cues.is_primary != el2.visual_cues.is_primary:
+            return True
+        if el1.visual_cues.is_clickable != el2.visual_cues.is_clickable:
+            return True
+        return False
+    @staticmethod
+    def compute_diff_status(
+        current: Snapshot,
+        previous: Snapshot | None,
+    ) -> list[Element]:
+        """
+        Compare current snapshot with previous and set diff_status on elements.
+        Args:
+            current: Current snapshot
+            previous: Previous snapshot (None if this is the first snapshot)
+        Returns:
+            List of elements with diff_status set (includes REMOVED elements from previous)
+        """
+        # If no previous snapshot, all current elements are ADDED
+        if previous is None:
+            result = []
+            for el in current.elements:
+                # Create a copy with diff_status set
+                el_dict = el.model_dump()
+                el_dict["diff_status"] = "ADDED"
+                result.append(Element(**el_dict))
+            return result
+        # Build lookup maps by element ID
+        current_by_id = {el.id: el for el in current.elements}
+        previous_by_id = {el.id: el for el in previous.elements}
+        current_ids = set(current_by_id.keys())
+        previous_ids = set(previous_by_id.keys())
+        result: list[Element] = []
+        # Process current elements
+        for el in current.elements:
+            el_dict = el.model_dump()
+            if el.id not in previous_ids:
+                # Element is new - mark as ADDED
+                el_dict["diff_status"] = "ADDED"
+            else:
+                # Element existed before - check for changes
+                prev_el = previous_by_id[el.id]
+                bbox_changed = SnapshotDiff._has_bbox_changed(el, prev_el)
+                content_changed = SnapshotDiff._has_content_changed(el, prev_el)
+                if bbox_changed and content_changed:
+                    # Both position and content changed - mark as MODIFIED
+                    el_dict["diff_status"] = "MODIFIED"
+                elif bbox_changed:
+                    # Only position changed - mark as MOVED
+                    el_dict["diff_status"] = "MOVED"
+                elif content_changed:
+                    # Only content changed - mark as MODIFIED
+                    el_dict["diff_status"] = "MODIFIED"
+                else:
+                    # No change - don't set diff_status (frontend expects undefined)
+                    el_dict["diff_status"] = None
+            result.append(Element(**el_dict))
+        # Process removed elements (existed in previous but not in current)
+        for prev_id in previous_ids - current_ids:
+            prev_el = previous_by_id[prev_id]
+            el_dict = prev_el.model_dump()
+            el_dict["diff_status"] = "REMOVED"
+            result.append(Element(**el_dict))
+        return result

sentienceapi 0.90.12__py3-none-any.whl → 0.92.2__py3-none-any.whl

Potentially problematic release.

sentienceapi 0.90.12py3-none-any.whl → 0.92.2py3-none-any.whl