PyPI - sentienceapi - Versions diffs - 0.90.12__py3-none-any.whl → 0.92.2__py3-none-any.whl - Mend

sentienceapi 0.90.12py3-none-any.whl → 0.92.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sentienceapi might be problematic. Click here for more details.

Files changed (63) hide show

sentience/__init__.py +14 -5
sentience/_extension_loader.py +40 -0
sentience/action_executor.py +215 -0
sentience/actions.py +408 -25
sentience/agent.py +804 -310
sentience/agent_config.py +3 -0
sentience/async_api.py +101 -0
sentience/base_agent.py +95 -0
sentience/browser.py +594 -25
sentience/browser_evaluator.py +299 -0
sentience/cloud_tracing.py +458 -36
sentience/conversational_agent.py +79 -45
sentience/element_filter.py +136 -0
sentience/expect.py +98 -2
sentience/extension/background.js +56 -185
sentience/extension/content.js +117 -289
sentience/extension/injected_api.js +799 -1374
sentience/extension/manifest.json +1 -1
sentience/extension/pkg/sentience_core.js +190 -396
sentience/extension/pkg/sentience_core_bg.wasm +0 -0
sentience/extension/release.json +47 -47
sentience/formatting.py +9 -53
sentience/inspector.py +183 -1
sentience/llm_interaction_handler.py +191 -0
sentience/llm_provider.py +256 -28
sentience/llm_provider_utils.py +120 -0
sentience/llm_response_builder.py +153 -0
sentience/models.py +66 -1
sentience/overlay.py +109 -2
sentience/protocols.py +228 -0
sentience/query.py +1 -1
sentience/read.py +95 -3
sentience/recorder.py +223 -3
sentience/schemas/trace_v1.json +102 -9
sentience/screenshot.py +48 -2
sentience/sentience_methods.py +86 -0
sentience/snapshot.py +309 -64
sentience/snapshot_diff.py +141 -0
sentience/text_search.py +119 -5
sentience/trace_event_builder.py +129 -0
sentience/trace_file_manager.py +197 -0
sentience/trace_indexing/index_schema.py +95 -7
sentience/trace_indexing/indexer.py +117 -14
sentience/tracer_factory.py +119 -6
sentience/tracing.py +172 -8
sentience/utils/__init__.py +40 -0
sentience/utils/browser.py +46 -0
sentience/utils/element.py +257 -0
sentience/utils/formatting.py +59 -0
sentience/utils.py +1 -1
sentience/visual_agent.py +2056 -0
sentience/wait.py +70 -4
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +61 -22
sentienceapi-0.92.2.dist-info/RECORD +65 -0
sentienceapi-0.92.2.dist-info/licenses/LICENSE +24 -0
sentienceapi-0.92.2.dist-info/licenses/LICENSE-APACHE +201 -0
sentienceapi-0.92.2.dist-info/licenses/LICENSE-MIT +21 -0
sentience/extension/test-content.js +0 -4
sentienceapi-0.90.12.dist-info/RECORD +0 -46
sentienceapi-0.90.12.dist-info/licenses/LICENSE.md +0 -43
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
{sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0

sentience/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ Sentience Python SDK - AI Agent Browser Automation
 """
 from .actions import click, click_rect, press, type_text
-from .agent import SentienceAgent
+from .agent import SentienceAgent, SentienceAgentAsync
 from .agent_config import AgentConfig
 # Agent Layer (Phase 1 & 2)
@@ -14,9 +14,6 @@ from .browser import SentienceBrowser
 from .cloud_tracing import CloudTraceSink, SentienceLogger
 from .conversational_agent import ConversationalAgent
 from .expect import expect
-# Formatting (v0.12.0+)
-from .formatting import format_snapshot_for_llm
 from .generator import ScriptGenerator, generate
 from .inspector import Inspector, inspect
 from .llm_provider import (
@@ -55,12 +52,14 @@ from .query import find, query
 from .read import read
 from .recorder import Recorder, Trace, TraceStep, record
 from .screenshot import screenshot
+from .sentience_methods import AgentAction, SentienceMethod
 from .snapshot import snapshot
 from .text_search import find_text_rect
 from .tracer_factory import SENTIENCE_API_URL, create_tracer
 from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
 # Utilities (v0.12.0+)
+# Import from utils package (re-exports from submodules for backward compatibility)
 from .utils import (
     canonical_snapshot_loose,
     canonical_snapshot_strict,
@@ -68,9 +67,13 @@ from .utils import (
     save_storage_state,
     sha256_digest,
 )
+# Formatting (v0.12.0+)
+from .utils.formatting import format_snapshot_for_llm
+from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
 from .wait import wait_for
-__version__ = "0.90.12"
+__version__ = "0.92.2"
 __all__ = [
     # Core SDK
@@ -117,6 +120,9 @@ __all__ = [
     "AnthropicProvider",
     "LocalLLMProvider",
     "SentienceAgent",
+    "SentienceAgentAsync",
+    "SentienceVisualAgent",
+    "SentienceVisualAgentAsync",
     "ConversationalAgent",
     # Agent Layer Models
     "AgentActionResult",
@@ -150,4 +156,7 @@ __all__ = [
     "format_snapshot_for_llm",
     # Agent Config (v0.12.0+)
     "AgentConfig",
+    # Enums
+    "SentienceMethod",
+    "AgentAction",
 ]

sentience/_extension_loader.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""
+Shared extension loading logic for sync and async implementations
+"""
+from pathlib import Path
+def find_extension_path() -> Path:
+    """
+    Find Sentience extension directory (shared logic for sync and async).
+    Checks multiple locations:
+    1. sentience/extension/ (installed package)
+    2. ../sentience-chrome (development/monorepo)
+    Returns:
+        Path to extension directory
+    Raises:
+        FileNotFoundError: If extension not found in any location
+    """
+    # 1. Try relative to this file (installed package structure)
+    # sentience/_extension_loader.py -> sentience/extension/
+    package_ext_path = Path(__file__).parent / "extension"
+    # 2. Try development root (if running from source repo)
+    # sentience/_extension_loader.py -> ../sentience-chrome
+    dev_ext_path = Path(__file__).parent.parent.parent / "sentience-chrome"
+    if package_ext_path.exists() and (package_ext_path / "manifest.json").exists():
+        return package_ext_path
+    elif dev_ext_path.exists() and (dev_ext_path / "manifest.json").exists():
+        return dev_ext_path
+    else:
+        raise FileNotFoundError(
+            f"Extension not found. Checked:\n"
+            f"1. {package_ext_path}\n"
+            f"2. {dev_ext_path}\n"
+            "Make sure the extension is built and 'sentience/extension' directory exists."
+        )

sentience/action_executor.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""
+Action Executor for Sentience Agent.
+Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
+This separates action execution concerns from LLM interaction.
+"""
+import re
+from typing import Any, Union
+from .actions import click, click_async, press, press_async, type_text, type_text_async
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .models import Snapshot
+from .protocols import AsyncBrowserProtocol, BrowserProtocol
+class ActionExecutor:
+    """
+    Executes actions and handles parsing of action command strings.
+    This class encapsulates all action execution logic, making it easier to:
+    - Test action execution independently
+    - Add new action types in one place
+    - Handle action parsing errors consistently
+    """
+    def __init__(
+        self,
+        browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
+    ):
+        """
+        Initialize action executor.
+        Args:
+            browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
+                    (for testing, can use mock objects that implement BrowserProtocol)
+        """
+        self.browser = browser
+        # Check if browser is async - support both concrete types and protocols
+        # Check concrete types first (most reliable)
+        if isinstance(browser, AsyncSentienceBrowser):
+            self._is_async = True
+        elif isinstance(browser, SentienceBrowser):
+            self._is_async = False
+        else:
+            # For protocol-based browsers, check if methods are actually async
+            # This is more reliable than isinstance checks which can match both protocols
+            import inspect
+            start_method = getattr(browser, "start", None)
+            if start_method and inspect.iscoroutinefunction(start_method):
+                self._is_async = True
+            elif isinstance(browser, BrowserProtocol):
+                # If it implements BrowserProtocol and start is not async, it's sync
+                self._is_async = False
+            else:
+                # Default to sync for unknown types
+                self._is_async = False
+    def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (synchronous).
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+        Returns:
+            Execution result dictionary with keys:
+            - success: bool
+            - action: str (e.g., "click", "type", "press", "finish")
+            - element_id: Optional[int] (for click/type actions)
+            - text: Optional[str] (for type actions)
+            - key: Optional[str] (for press actions)
+            - outcome: Optional[str] (action outcome)
+            - url_changed: Optional[bool] (for click actions)
+            - error: Optional[str] (if action failed)
+            - message: Optional[str] (for finish action)
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on async browser (use execute_async instead)
+        """
+        if self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute() called on async browser. Use execute_async() instead."
+            )
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = click(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = type_text(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = press(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )
+    async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (asynchronous).
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+        Returns:
+            Execution result dictionary (same format as execute())
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on sync browser (use execute() instead)
+        """
+        if not self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute_async() called on sync browser. Use execute() instead."
+            )
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = await click_async(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = await type_text_async(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = await press_async(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )

sentienceapi 0.90.12__py3-none-any.whl → 0.92.2__py3-none-any.whl

Potentially problematic release.

sentienceapi 0.90.12py3-none-any.whl → 0.92.2py3-none-any.whl