PyPI - sentienceapi - Versions diffs - 0.90.17__py3-none-any.whl - Mend

sentienceapi 0.90.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sentienceapi might be problematic. Click here for more details.

Files changed (50) hide show

sentience/__init__.py +153 -0
sentience/_extension_loader.py +40 -0
sentience/actions.py +837 -0
sentience/agent.py +1246 -0
sentience/agent_config.py +43 -0
sentience/async_api.py +101 -0
sentience/base_agent.py +194 -0
sentience/browser.py +1037 -0
sentience/cli.py +130 -0
sentience/cloud_tracing.py +382 -0
sentience/conversational_agent.py +509 -0
sentience/expect.py +188 -0
sentience/extension/background.js +233 -0
sentience/extension/content.js +298 -0
sentience/extension/injected_api.js +1473 -0
sentience/extension/manifest.json +36 -0
sentience/extension/pkg/sentience_core.d.ts +51 -0
sentience/extension/pkg/sentience_core.js +529 -0
sentience/extension/pkg/sentience_core_bg.wasm +0 -0
sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
sentience/extension/release.json +115 -0
sentience/extension/test-content.js +4 -0
sentience/formatting.py +59 -0
sentience/generator.py +202 -0
sentience/inspector.py +365 -0
sentience/llm_provider.py +637 -0
sentience/models.py +412 -0
sentience/overlay.py +222 -0
sentience/query.py +303 -0
sentience/read.py +185 -0
sentience/recorder.py +589 -0
sentience/schemas/trace_v1.json +216 -0
sentience/screenshot.py +100 -0
sentience/snapshot.py +516 -0
sentience/text_search.py +290 -0
sentience/trace_indexing/__init__.py +27 -0
sentience/trace_indexing/index_schema.py +111 -0
sentience/trace_indexing/indexer.py +357 -0
sentience/tracer_factory.py +211 -0
sentience/tracing.py +285 -0
sentience/utils.py +296 -0
sentience/wait.py +137 -0
sentienceapi-0.90.17.dist-info/METADATA +917 -0
sentienceapi-0.90.17.dist-info/RECORD +50 -0
sentienceapi-0.90.17.dist-info/WHEEL +5 -0
sentienceapi-0.90.17.dist-info/entry_points.txt +2 -0
sentienceapi-0.90.17.dist-info/licenses/LICENSE +24 -0
sentienceapi-0.90.17.dist-info/licenses/LICENSE-APACHE +201 -0
sentienceapi-0.90.17.dist-info/licenses/LICENSE-MIT +21 -0
sentienceapi-0.90.17.dist-info/top_level.txt +1 -0

sentience/trace_indexing/indexer.py ADDED Viewed

@@ -0,0 +1,357 @@
+"""
+Trace indexing for fast timeline rendering and step drill-down.
+"""
+import hashlib
+import json
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List
+from .index_schema import (
+    ActionInfo,
+    SnapshotInfo,
+    StepCounters,
+    StepIndex,
+    TraceFileInfo,
+    TraceIndex,
+    TraceSummary,
+)
+def _normalize_text(text: str | None, max_len: int = 80) -> str:
+    """Normalize text for digest: trim, collapse whitespace, lowercase, cap length."""
+    if not text:
+        return ""
+    # Trim and collapse whitespace
+    normalized = " ".join(text.split())
+    # Lowercase
+    normalized = normalized.lower()
+    # Cap length
+    if len(normalized) > max_len:
+        normalized = normalized[:max_len]
+    return normalized
+def _round_bbox(bbox: dict[str, float], precision: int = 2) -> dict[str, int]:
+    """Round bbox coordinates to reduce noise (default: 2px precision)."""
+    return {
+        "x": round(bbox.get("x", 0) / precision) * precision,
+        "y": round(bbox.get("y", 0) / precision) * precision,
+        "width": round(bbox.get("width", 0) / precision) * precision,
+        "height": round(bbox.get("height", 0) / precision) * precision,
+    }
+def _compute_snapshot_digest(snapshot_data: dict[str, Any]) -> str:
+    """
+    Compute stable digest of snapshot for diffing.
+    Includes: url, viewport, canonicalized elements (id, role, text_norm, bbox_rounded).
+    Excludes: importance, style fields, transient attributes.
+    """
+    url = snapshot_data.get("url", "")
+    viewport = snapshot_data.get("viewport", {})
+    elements = snapshot_data.get("elements", [])
+    # Canonicalize elements
+    canonical_elements = []
+    for elem in elements:
+        canonical_elem = {
+            "id": elem.get("id"),
+            "role": elem.get("role", ""),
+            "text_norm": _normalize_text(elem.get("text")),
+            "bbox": _round_bbox(elem.get("bbox", {"x": 0, "y": 0, "width": 0, "height": 0})),
+            "is_primary": elem.get("is_primary", False),
+            "is_clickable": elem.get("is_clickable", False),
+        }
+        canonical_elements.append(canonical_elem)
+    # Sort by element id for determinism
+    canonical_elements.sort(key=lambda e: e.get("id", 0))
+    # Build canonical object
+    canonical = {
+        "url": url,
+        "viewport": {
+            "width": viewport.get("width", 0),
+            "height": viewport.get("height", 0),
+        },
+        "elements": canonical_elements,
+    }
+    # Hash
+    canonical_json = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
+    digest = hashlib.sha256(canonical_json.encode("utf-8")).hexdigest()
+    return f"sha256:{digest}"
+def _compute_action_digest(action_data: dict[str, Any]) -> str:
+    """
+    Compute digest of action args for privacy + determinism.
+    For TYPE: includes text_len + text_sha256 (not raw text)
+    For CLICK/PRESS: includes only non-sensitive fields
+    """
+    action_type = action_data.get("type", "")
+    target_id = action_data.get("target_element_id")
+    canonical = {
+        "type": action_type,
+        "target_element_id": target_id,
+    }
+    # Type-specific canonicalization
+    if action_type == "TYPE":
+        text = action_data.get("text", "")
+        canonical["text_len"] = len(text)
+        canonical["text_sha256"] = hashlib.sha256(text.encode("utf-8")).hexdigest()
+    elif action_type == "PRESS":
+        canonical["key"] = action_data.get("key", "")
+    # CLICK has no extra args
+    # Hash
+    canonical_json = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
+    digest = hashlib.sha256(canonical_json.encode("utf-8")).hexdigest()
+    return f"sha256:{digest}"
+def _compute_file_sha256(file_path: str) -> str:
+    """Compute SHA256 hash of entire file."""
+    sha256 = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        while chunk := f.read(8192):
+            sha256.update(chunk)
+    return sha256.hexdigest()
+def build_trace_index(trace_path: str) -> TraceIndex:
+    """
+    Build trace index from JSONL file in single streaming pass.
+    Args:
+        trace_path: Path to trace JSONL file
+    Returns:
+        Complete TraceIndex object
+    """
+    trace_path_obj = Path(trace_path)
+    if not trace_path_obj.exists():
+        raise FileNotFoundError(f"Trace file not found: {trace_path}")
+    # Extract run_id from filename
+    run_id = trace_path_obj.stem
+    # Initialize summary
+    first_ts = ""
+    last_ts = ""
+    event_count = 0
+    error_count = 0
+    final_url = None
+    steps_by_id: dict[str, StepIndex] = {}
+    step_order: list[str] = []  # Track order of first appearance
+    # Stream through file, tracking byte offsets
+    with open(trace_path, "rb") as f:
+        byte_offset = 0
+        for line_bytes in f:
+            line_len = len(line_bytes)
+            try:
+                event = json.loads(line_bytes.decode("utf-8"))
+            except json.JSONDecodeError:
+                # Skip malformed lines
+                byte_offset += line_len
+                continue
+            # Extract event metadata
+            event_type = event.get("type", "")
+            ts = event.get("ts") or event.get("timestamp", "")
+            step_id = event.get("step_id", "step-0")  # Default synthetic step
+            data = event.get("data", {})
+            # Update summary
+            event_count += 1
+            if not first_ts:
+                first_ts = ts
+            last_ts = ts
+            if event_type == "error":
+                error_count += 1
+            # Initialize step if first time seeing this step_id
+            if step_id not in steps_by_id:
+                step_order.append(step_id)
+                steps_by_id[step_id] = StepIndex(
+                    step_index=len(step_order),
+                    step_id=step_id,
+                    goal=None,
+                    status="partial",
+                    ts_start=ts,
+                    ts_end=ts,
+                    offset_start=byte_offset,
+                    offset_end=byte_offset + line_len,
+                    url_before=None,
+                    url_after=None,
+                    snapshot_before=SnapshotInfo(),
+                    snapshot_after=SnapshotInfo(),
+                    action=ActionInfo(),
+                    counters=StepCounters(),
+                )
+            step = steps_by_id[step_id]
+            # Update step metadata
+            step.ts_end = ts
+            step.offset_end = byte_offset + line_len
+            step.counters.events += 1
+            # Handle specific event types
+            if event_type == "step_start":
+                step.goal = data.get("goal")
+                step.url_before = data.get("pre_url")
+            elif event_type == "snapshot":
+                snapshot_id = data.get("snapshot_id")
+                url = data.get("url")
+                digest = _compute_snapshot_digest(data)
+                # First snapshot = before, last snapshot = after
+                if step.snapshot_before.snapshot_id is None:
+                    step.snapshot_before = SnapshotInfo(
+                        snapshot_id=snapshot_id, digest=digest, url=url
+                    )
+                    step.url_before = step.url_before or url
+                step.snapshot_after = SnapshotInfo(snapshot_id=snapshot_id, digest=digest, url=url)
+                step.url_after = url
+                step.counters.snapshots += 1
+                final_url = url
+            elif event_type == "action":
+                step.action = ActionInfo(
+                    type=data.get("type"),
+                    target_element_id=data.get("target_element_id"),
+                    args_digest=_compute_action_digest(data),
+                    success=data.get("success", True),
+                )
+                step.counters.actions += 1
+            elif event_type == "llm_response":
+                step.counters.llm_calls += 1
+            elif event_type == "error":
+                step.status = "error"
+            elif event_type == "step_end":
+                if step.status != "error":
+                    step.status = "ok"
+            byte_offset += line_len
+    # Build summary
+    summary = TraceSummary(
+        first_ts=first_ts,
+        last_ts=last_ts,
+        event_count=event_count,
+        step_count=len(steps_by_id),
+        error_count=error_count,
+        final_url=final_url,
+    )
+    # Build steps list in order
+    steps_list = [steps_by_id[sid] for sid in step_order]
+    # Build trace file info
+    trace_file = TraceFileInfo(
+        path=str(trace_path),
+        size_bytes=os.path.getsize(trace_path),
+        sha256=_compute_file_sha256(str(trace_path)),
+    )
+    # Build final index
+    index = TraceIndex(
+        version=1,
+        run_id=run_id,
+        created_at=datetime.now(timezone.utc).isoformat(),
+        trace_file=trace_file,
+        summary=summary,
+        steps=steps_list,
+    )
+    return index
+def write_trace_index(trace_path: str, index_path: str | None = None) -> str:
+    """
+    Build index and write to file.
+    Args:
+        trace_path: Path to trace JSONL file
+        index_path: Optional custom path for index file (default: trace_path with .index.json)
+    Returns:
+        Path to written index file
+    """
+    if index_path is None:
+        index_path = str(Path(trace_path).with_suffix("")) + ".index.json"
+    index = build_trace_index(trace_path)
+    with open(index_path, "w") as f:
+        json.dump(index.to_dict(), f, indent=2)
+    return index_path
+def read_step_events(trace_path: str, offset_start: int, offset_end: int) -> list[dict[str, Any]]:
+    """
+    Read events for a specific step using byte offsets from index.
+    Args:
+        trace_path: Path to trace JSONL file
+        offset_start: Byte offset where step starts
+        offset_end: Byte offset where step ends
+    Returns:
+        List of event dictionaries for the step
+    """
+    events = []
+    with open(trace_path, "rb") as f:
+        f.seek(offset_start)
+        bytes_to_read = offset_end - offset_start
+        chunk = f.read(bytes_to_read)
+    # Parse lines
+    for line_bytes in chunk.split(b"\n"):
+        if not line_bytes:
+            continue
+        try:
+            event = json.loads(line_bytes.decode("utf-8"))
+            events.append(event)
+        except json.JSONDecodeError:
+            continue
+    return events
+# CLI entrypoint
+def main():
+    """CLI tool for building trace index."""
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python -m sentience.tracing.indexer <trace.jsonl>")
+        sys.exit(1)
+    trace_path = sys.argv[1]
+    index_path = write_trace_index(trace_path)
+    print(f"✅ Index written to: {index_path}")
+if __name__ == "__main__":
+    main()

sentience/tracer_factory.py ADDED Viewed

@@ -0,0 +1,211 @@
+"""
+Tracer factory with automatic tier detection.
+Provides convenient factory function for creating tracers with cloud upload support.
+"""
+import gzip
+import os
+import uuid
+from pathlib import Path
+import requests
+from sentience.cloud_tracing import CloudTraceSink, SentienceLogger
+from sentience.tracing import JsonlTraceSink, Tracer
+# Sentience API base URL (constant)
+SENTIENCE_API_URL = "https://api.sentienceapi.com"
+def create_tracer(
+    api_key: str | None = None,
+    run_id: str | None = None,
+    api_url: str | None = None,
+    logger: SentienceLogger | None = None,
+    upload_trace: bool = False,
+) -> Tracer:
+    """
+    Create tracer with automatic tier detection.
+    Tier Detection:
+    - If api_key is provided: Try to initialize CloudTraceSink (Pro/Enterprise)
+    - If cloud init fails or no api_key: Fall back to JsonlTraceSink (Free tier)
+    Args:
+        api_key: Sentience API key (e.g., "sk_pro_xxxxx")
+                 - Free tier: None or empty
+                 - Pro/Enterprise: Valid API key
+        run_id: Unique identifier for this agent run. If not provided, generates UUID.
+        api_url: Sentience API base URL (default: https://api.sentienceapi.com)
+        logger: Optional logger instance for logging file sizes and errors
+        upload_trace: Enable cloud trace upload (default: False). When True and api_key
+                      is provided, traces will be uploaded to cloud. When False, traces
+                      are saved locally only.
+    Returns:
+        Tracer configured with appropriate sink
+    Example:
+        >>> # Pro tier user
+        >>> tracer = create_tracer(api_key="sk_pro_xyz", run_id="demo")
+        >>> # Returns: Tracer with CloudTraceSink
+        >>>
+        >>> # Free tier user
+        >>> tracer = create_tracer(run_id="demo")
+        >>> # Returns: Tracer with JsonlTraceSink (local-only)
+        >>>
+        >>> # Use with agent
+        >>> agent = SentienceAgent(browser, llm, tracer=tracer)
+        >>> agent.act("Click search")
+        >>> tracer.close()  # Uploads to cloud if Pro tier
+    """
+    if run_id is None:
+        run_id = str(uuid.uuid4())
+    if api_url is None:
+        api_url = SENTIENCE_API_URL
+    # 0. Check for orphaned traces from previous crashes (if api_key provided and upload enabled)
+    if api_key and upload_trace:
+        _recover_orphaned_traces(api_key, api_url)
+    # 1. Try to initialize Cloud Sink (Pro/Enterprise tier) if upload enabled
+    if api_key and upload_trace:
+        try:
+            # Request pre-signed upload URL from backend
+            response = requests.post(
+                f"{api_url}/v1/traces/init",
+                headers={"Authorization": f"Bearer {api_key}"},
+                json={"run_id": run_id},
+                timeout=10,
+            )
+            if response.status_code == 200:
+                data = response.json()
+                upload_url = data.get("upload_url")
+                if upload_url:
+                    print("☁️  [Sentience] Cloud tracing enabled (Pro tier)")
+                    return Tracer(
+                        run_id=run_id,
+                        sink=CloudTraceSink(
+                            upload_url=upload_url,
+                            run_id=run_id,
+                            api_key=api_key,
+                            api_url=api_url,
+                            logger=logger,
+                        ),
+                    )
+                else:
+                    print("⚠️  [Sentience] Cloud init response missing upload_url")
+                    print("   Falling back to local-only tracing")
+            elif response.status_code == 403:
+                print("⚠️  [Sentience] Cloud tracing requires Pro tier")
+                print("   Falling back to local-only tracing")
+            else:
+                print(f"⚠️  [Sentience] Cloud init failed: HTTP {response.status_code}")
+                print("   Falling back to local-only tracing")
+        except requests.exceptions.Timeout:
+            print("⚠️  [Sentience] Cloud init timeout")
+            print("   Falling back to local-only tracing")
+        except requests.exceptions.ConnectionError:
+            print("⚠️  [Sentience] Cloud init connection error")
+            print("   Falling back to local-only tracing")
+        except Exception as e:
+            print(f"⚠️  [Sentience] Cloud init error: {e}")
+            print("   Falling back to local-only tracing")
+    # 2. Fallback to Local Sink (Free tier / Offline mode)
+    traces_dir = Path("traces")
+    traces_dir.mkdir(exist_ok=True)
+    local_path = traces_dir / f"{run_id}.jsonl"
+    print(f"💾 [Sentience] Local tracing: {local_path}")
+    return Tracer(run_id=run_id, sink=JsonlTraceSink(str(local_path)))
+def _recover_orphaned_traces(api_key: str, api_url: str = SENTIENCE_API_URL) -> None:
+    """
+    Attempt to upload orphaned traces from previous crashed runs.
+    Scans ~/.sentience/traces/pending/ for un-uploaded trace files and
+    attempts to upload them using the provided API key.
+    Args:
+        api_key: Sentience API key for authentication
+        api_url: Sentience API base URL (defaults to SENTIENCE_API_URL)
+    """
+    pending_dir = Path.home() / ".sentience" / "traces" / "pending"
+    if not pending_dir.exists():
+        return
+    orphaned = list(pending_dir.glob("*.jsonl"))
+    if not orphaned:
+        return
+    print(f"⚠️  [Sentience] Found {len(orphaned)} un-uploaded trace(s) from previous runs")
+    print("   Attempting to upload now...")
+    for trace_file in orphaned:
+        try:
+            # Extract run_id from filename (format: {run_id}.jsonl)
+            run_id = trace_file.stem
+            # Request new upload URL for this run_id
+            response = requests.post(
+                f"{api_url}/v1/traces/init",
+                headers={"Authorization": f"Bearer {api_key}"},
+                json={"run_id": run_id},
+                timeout=10,
+            )
+            if response.status_code != 200:
+                print(f"❌ Failed to get upload URL for {run_id}: HTTP {response.status_code}")
+                continue
+            data = response.json()
+            upload_url = data.get("upload_url")
+            if not upload_url:
+                print(f"❌ Upload URL missing for {run_id}")
+                continue
+            # Read and compress trace file
+            with open(trace_file, "rb") as f:
+                trace_data = f.read()
+            compressed_data = gzip.compress(trace_data)
+            # Upload to cloud
+            upload_response = requests.put(
+                upload_url,
+                data=compressed_data,
+                headers={
+                    "Content-Type": "application/x-gzip",
+                    "Content-Encoding": "gzip",
+                },
+                timeout=60,
+            )
+            if upload_response.status_code == 200:
+                print(f"✅ Uploaded orphaned trace: {run_id}")
+                # Delete file on successful upload
+                try:
+                    os.remove(trace_file)
+                except Exception:
+                    pass  # Ignore cleanup errors
+            else:
+                print(f"❌ Failed to upload {run_id}: HTTP {upload_response.status_code}")
+        except requests.exceptions.Timeout:
+            print(f"❌ Timeout uploading {trace_file.name}")
+        except requests.exceptions.ConnectionError:
+            print(f"❌ Connection error uploading {trace_file.name}")
+        except Exception as e:
+            print(f"❌ Error uploading {trace_file.name}: {e}")