sentienceapi 0.90.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +153 -0
- sentience/actions.py +439 -0
- sentience/agent.py +687 -0
- sentience/agent_config.py +43 -0
- sentience/base_agent.py +101 -0
- sentience/browser.py +409 -0
- sentience/cli.py +130 -0
- sentience/cloud_tracing.py +292 -0
- sentience/conversational_agent.py +509 -0
- sentience/expect.py +92 -0
- sentience/extension/background.js +233 -0
- sentience/extension/content.js +298 -0
- sentience/extension/injected_api.js +1473 -0
- sentience/extension/manifest.json +36 -0
- sentience/extension/pkg/sentience_core.d.ts +51 -0
- sentience/extension/pkg/sentience_core.js +529 -0
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
- sentience/extension/release.json +115 -0
- sentience/extension/test-content.js +4 -0
- sentience/formatting.py +59 -0
- sentience/generator.py +202 -0
- sentience/inspector.py +185 -0
- sentience/llm_provider.py +431 -0
- sentience/models.py +406 -0
- sentience/overlay.py +115 -0
- sentience/query.py +303 -0
- sentience/read.py +96 -0
- sentience/recorder.py +369 -0
- sentience/schemas/trace_v1.json +216 -0
- sentience/screenshot.py +54 -0
- sentience/snapshot.py +282 -0
- sentience/text_search.py +107 -0
- sentience/trace_indexing/__init__.py +27 -0
- sentience/trace_indexing/index_schema.py +111 -0
- sentience/trace_indexing/indexer.py +363 -0
- sentience/tracer_factory.py +211 -0
- sentience/tracing.py +285 -0
- sentience/utils.py +296 -0
- sentience/wait.py +73 -0
- sentienceapi-0.90.9.dist-info/METADATA +878 -0
- sentienceapi-0.90.9.dist-info/RECORD +46 -0
- sentienceapi-0.90.9.dist-info/WHEEL +5 -0
- sentienceapi-0.90.9.dist-info/entry_points.txt +2 -0
- sentienceapi-0.90.9.dist-info/licenses/LICENSE.md +43 -0
- sentienceapi-0.90.9.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration classes for Sentience agents.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class AgentConfig:
|
|
10
|
+
"""
|
|
11
|
+
Configuration for Sentience Agent execution.
|
|
12
|
+
|
|
13
|
+
This dataclass provides centralized configuration for agent behavior,
|
|
14
|
+
including snapshot limits, retry logic, verification, and screenshot capture.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
snapshot_limit: Maximum elements to include in LLM context (default: 50)
|
|
18
|
+
temperature: LLM temperature 0.0-1.0 for response generation (default: 0.0)
|
|
19
|
+
max_retries: Number of retries on action failure (default: 1)
|
|
20
|
+
verify: Whether to run verification step after actions (default: True)
|
|
21
|
+
capture_screenshots: Whether to capture screenshots during execution (default: True)
|
|
22
|
+
screenshot_format: Screenshot format 'png' or 'jpeg' (default: 'jpeg')
|
|
23
|
+
screenshot_quality: JPEG quality 1-100, ignored for PNG (default: 80)
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> from sentience import AgentConfig, SentienceAgent
|
|
27
|
+
>>> config = AgentConfig(
|
|
28
|
+
... snapshot_limit=100,
|
|
29
|
+
... max_retries=2,
|
|
30
|
+
... verify=True
|
|
31
|
+
... )
|
|
32
|
+
>>> agent = SentienceAgent(browser, llm, config=config)
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
snapshot_limit: int = 50
|
|
36
|
+
temperature: float = 0.0
|
|
37
|
+
max_retries: int = 1
|
|
38
|
+
verify: bool = True
|
|
39
|
+
|
|
40
|
+
# Screenshot options
|
|
41
|
+
capture_screenshots: bool = True
|
|
42
|
+
screenshot_format: str = "jpeg" # "png" or "jpeg"
|
|
43
|
+
screenshot_quality: int = 80 # 1-100 (for JPEG only)
|
sentience/base_agent.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BaseAgent: Abstract base class for all Sentience agents
|
|
3
|
+
Defines the interface that all agent implementations must follow
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
|
|
8
|
+
from .models import ActionHistory, AgentActionResult, Element, Snapshot, TokenStats
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseAgent(ABC):
|
|
12
|
+
"""
|
|
13
|
+
Abstract base class for all Sentience agents.
|
|
14
|
+
|
|
15
|
+
Provides a standard interface for:
|
|
16
|
+
- Executing natural language goals (act)
|
|
17
|
+
- Tracking execution history
|
|
18
|
+
- Monitoring token usage
|
|
19
|
+
- Filtering elements based on goals
|
|
20
|
+
|
|
21
|
+
Subclasses must implement:
|
|
22
|
+
- act(): Execute a natural language goal
|
|
23
|
+
- get_history(): Return execution history
|
|
24
|
+
- get_token_stats(): Return token usage statistics
|
|
25
|
+
- clear_history(): Reset history and token counters
|
|
26
|
+
|
|
27
|
+
Subclasses can override:
|
|
28
|
+
- filter_elements(): Customize element filtering logic
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def act(self, goal: str, **kwargs) -> AgentActionResult:
|
|
33
|
+
"""
|
|
34
|
+
Execute a natural language goal using the agent.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
goal: Natural language instruction (e.g., "Click the login button")
|
|
38
|
+
**kwargs: Additional parameters (implementation-specific)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
AgentActionResult with execution details
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
RuntimeError: If execution fails after retries
|
|
45
|
+
"""
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def get_history(self) -> list[ActionHistory]:
|
|
50
|
+
"""
|
|
51
|
+
Get the execution history of all actions taken.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List of ActionHistory entries
|
|
55
|
+
"""
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def get_token_stats(self) -> TokenStats:
|
|
60
|
+
"""
|
|
61
|
+
Get token usage statistics for the agent session.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
TokenStats with cumulative token counts
|
|
65
|
+
"""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def clear_history(self) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Clear execution history and reset token counters.
|
|
72
|
+
|
|
73
|
+
This resets the agent to a clean state.
|
|
74
|
+
"""
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
def filter_elements(self, snapshot: Snapshot, goal: str | None = None) -> list[Element]:
|
|
78
|
+
"""
|
|
79
|
+
Filter elements from a snapshot based on goal context.
|
|
80
|
+
|
|
81
|
+
Default implementation returns all elements unchanged.
|
|
82
|
+
Subclasses can override to implement custom filtering logic
|
|
83
|
+
such as:
|
|
84
|
+
- Removing irrelevant elements based on goal keywords
|
|
85
|
+
- Boosting importance of matching elements
|
|
86
|
+
- Filtering by role, size, or visual properties
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
snapshot: Current page snapshot
|
|
90
|
+
goal: User's goal (can inform filtering strategy)
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Filtered list of elements (default: all elements)
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
>>> agent = SentienceAgent(browser, llm)
|
|
97
|
+
>>> snap = snapshot(browser)
|
|
98
|
+
>>> filtered = agent.filter_elements(snap, goal="Click login")
|
|
99
|
+
>>> # filtered now contains only relevant elements
|
|
100
|
+
"""
|
|
101
|
+
return snapshot.elements
|
sentience/browser.py
ADDED
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Playwright browser harness with extension loading
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import tempfile
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from urllib.parse import urlparse
|
|
11
|
+
|
|
12
|
+
from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
|
|
13
|
+
|
|
14
|
+
from sentience.models import ProxyConfig, StorageState
|
|
15
|
+
|
|
16
|
+
# Import stealth for bot evasion (optional - graceful fallback if not available)
|
|
17
|
+
try:
|
|
18
|
+
from playwright_stealth import stealth_sync
|
|
19
|
+
|
|
20
|
+
STEALTH_AVAILABLE = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
STEALTH_AVAILABLE = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SentienceBrowser:
|
|
26
|
+
"""Main browser session with Sentience extension loaded"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
api_key: str | None = None,
|
|
31
|
+
api_url: str | None = None,
|
|
32
|
+
headless: bool | None = None,
|
|
33
|
+
proxy: str | None = None,
|
|
34
|
+
user_data_dir: str | None = None,
|
|
35
|
+
storage_state: str | Path | StorageState | dict | None = None,
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Initialize Sentience browser
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
|
|
42
|
+
If None, uses free tier (local extension only)
|
|
43
|
+
api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
|
|
44
|
+
If None and api_key is provided, uses default URL
|
|
45
|
+
If None and no api_key, uses free tier (local extension only)
|
|
46
|
+
If 'local' or Docker sidecar URL, uses Enterprise tier
|
|
47
|
+
headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise
|
|
48
|
+
proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
49
|
+
Supports HTTP, HTTPS, and SOCKS5 proxies
|
|
50
|
+
Falls back to SENTIENCE_PROXY environment variable if not provided
|
|
51
|
+
user_data_dir: Optional path to user data directory for persistent sessions.
|
|
52
|
+
If None, uses temporary directory (session not persisted).
|
|
53
|
+
If provided, cookies and localStorage persist across browser restarts.
|
|
54
|
+
storage_state: Optional storage state to inject (cookies + localStorage).
|
|
55
|
+
Can be:
|
|
56
|
+
- Path to JSON file (str or Path)
|
|
57
|
+
- StorageState object
|
|
58
|
+
- Dictionary with 'cookies' and/or 'origins' keys
|
|
59
|
+
If provided, browser starts with pre-injected authentication.
|
|
60
|
+
"""
|
|
61
|
+
self.api_key = api_key
|
|
62
|
+
# Only set api_url if api_key is provided, otherwise None (free tier)
|
|
63
|
+
# Defaults to production API if key is present but url is missing
|
|
64
|
+
if self.api_key and not api_url:
|
|
65
|
+
self.api_url = "https://api.sentienceapi.com"
|
|
66
|
+
else:
|
|
67
|
+
self.api_url = api_url
|
|
68
|
+
|
|
69
|
+
# Determine headless mode
|
|
70
|
+
if headless is None:
|
|
71
|
+
# Default to False for local dev, True for CI
|
|
72
|
+
self.headless = os.environ.get("CI", "").lower() == "true"
|
|
73
|
+
else:
|
|
74
|
+
self.headless = headless
|
|
75
|
+
|
|
76
|
+
# Support proxy from argument or environment variable
|
|
77
|
+
self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
|
|
78
|
+
|
|
79
|
+
# Auth injection support
|
|
80
|
+
self.user_data_dir = user_data_dir
|
|
81
|
+
self.storage_state = storage_state
|
|
82
|
+
|
|
83
|
+
self.playwright: Playwright | None = None
|
|
84
|
+
self.context: BrowserContext | None = None
|
|
85
|
+
self.page: Page | None = None
|
|
86
|
+
self._extension_path: str | None = None
|
|
87
|
+
|
|
88
|
+
def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
|
|
89
|
+
"""
|
|
90
|
+
Parse proxy connection string into ProxyConfig.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
ProxyConfig object or None if invalid
|
|
97
|
+
|
|
98
|
+
Raises:
|
|
99
|
+
ValueError: If proxy format is invalid
|
|
100
|
+
"""
|
|
101
|
+
if not proxy_string:
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
parsed = urlparse(proxy_string)
|
|
106
|
+
|
|
107
|
+
# Validate scheme
|
|
108
|
+
if parsed.scheme not in ("http", "https", "socks5"):
|
|
109
|
+
print(f"ā ļø [Sentience] Unsupported proxy scheme: {parsed.scheme}")
|
|
110
|
+
print(" Supported: http, https, socks5")
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# Validate host and port
|
|
114
|
+
if not parsed.hostname or not parsed.port:
|
|
115
|
+
print("ā ļø [Sentience] Proxy URL must include hostname and port")
|
|
116
|
+
print(" Expected format: http://username:password@host:port")
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
# Build server URL
|
|
120
|
+
server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
|
|
121
|
+
|
|
122
|
+
# Create ProxyConfig with optional credentials
|
|
123
|
+
return ProxyConfig(
|
|
124
|
+
server=server,
|
|
125
|
+
username=parsed.username if parsed.username else None,
|
|
126
|
+
password=parsed.password if parsed.password else None,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
except Exception as e:
|
|
130
|
+
print(f"ā ļø [Sentience] Invalid proxy configuration: {e}")
|
|
131
|
+
print(" Expected format: http://username:password@host:port")
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
def start(self) -> None:
|
|
135
|
+
"""Launch browser with extension loaded"""
|
|
136
|
+
# Get extension source path (relative to project root/package)
|
|
137
|
+
# Handle both development (src/) and installed package cases
|
|
138
|
+
|
|
139
|
+
# 1. Try relative to this file (installed package structure)
|
|
140
|
+
# sentience/browser.py -> sentience/extension/
|
|
141
|
+
package_ext_path = Path(__file__).parent / "extension"
|
|
142
|
+
|
|
143
|
+
# 2. Try development root (if running from source repo)
|
|
144
|
+
# sentience/browser.py -> ../sentience-chrome
|
|
145
|
+
dev_ext_path = Path(__file__).parent.parent.parent / "sentience-chrome"
|
|
146
|
+
|
|
147
|
+
if package_ext_path.exists() and (package_ext_path / "manifest.json").exists():
|
|
148
|
+
extension_source = package_ext_path
|
|
149
|
+
elif dev_ext_path.exists() and (dev_ext_path / "manifest.json").exists():
|
|
150
|
+
extension_source = dev_ext_path
|
|
151
|
+
else:
|
|
152
|
+
raise FileNotFoundError(
|
|
153
|
+
f"Extension not found. Checked:\n"
|
|
154
|
+
f"1. {package_ext_path}\n"
|
|
155
|
+
f"2. {dev_ext_path}\n"
|
|
156
|
+
"Make sure the extension is built and 'sentience/extension' directory exists."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Create temporary extension bundle
|
|
160
|
+
# We copy it to a temp dir to avoid file locking issues and ensure clean state
|
|
161
|
+
self._extension_path = tempfile.mkdtemp(prefix="sentience-ext-")
|
|
162
|
+
shutil.copytree(extension_source, self._extension_path, dirs_exist_ok=True)
|
|
163
|
+
|
|
164
|
+
self.playwright = sync_playwright().start()
|
|
165
|
+
|
|
166
|
+
# Build launch arguments
|
|
167
|
+
args = [
|
|
168
|
+
f"--disable-extensions-except={self._extension_path}",
|
|
169
|
+
f"--load-extension={self._extension_path}",
|
|
170
|
+
"--disable-blink-features=AutomationControlled", # Hides 'navigator.webdriver'
|
|
171
|
+
"--no-sandbox",
|
|
172
|
+
"--disable-infobars",
|
|
173
|
+
# WebRTC leak protection (prevents real IP exposure when using proxies/VPNs)
|
|
174
|
+
"--disable-features=WebRtcHideLocalIpsWithMdns",
|
|
175
|
+
"--force-webrtc-ip-handling-policy=disable_non_proxied_udp",
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
# Handle headless mode correctly for extensions
|
|
179
|
+
# 'headless=True' DOES NOT support extensions in standard Chrome
|
|
180
|
+
# We must use 'headless="new"' (Chrome 112+) or run visible
|
|
181
|
+
# launch_headless_arg = False # Default to visible
|
|
182
|
+
if self.headless:
|
|
183
|
+
args.append("--headless=new") # Use new headless mode via args
|
|
184
|
+
|
|
185
|
+
# Parse proxy configuration if provided
|
|
186
|
+
proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
|
|
187
|
+
|
|
188
|
+
# Handle User Data Directory (Persistence)
|
|
189
|
+
if self.user_data_dir:
|
|
190
|
+
user_data_dir = str(self.user_data_dir)
|
|
191
|
+
Path(user_data_dir).mkdir(parents=True, exist_ok=True)
|
|
192
|
+
else:
|
|
193
|
+
user_data_dir = "" # Ephemeral temp dir (existing behavior)
|
|
194
|
+
|
|
195
|
+
# Build launch_persistent_context parameters
|
|
196
|
+
launch_params = {
|
|
197
|
+
"user_data_dir": user_data_dir,
|
|
198
|
+
"headless": False, # IMPORTANT: See note above
|
|
199
|
+
"args": args,
|
|
200
|
+
"viewport": {"width": 1280, "height": 800},
|
|
201
|
+
# Remove "HeadlessChrome" from User Agent automatically
|
|
202
|
+
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
# Add proxy if configured
|
|
206
|
+
if proxy_config:
|
|
207
|
+
launch_params["proxy"] = proxy_config.to_playwright_dict()
|
|
208
|
+
# Ignore HTTPS errors when using proxy (many residential proxies use self-signed certs)
|
|
209
|
+
launch_params["ignore_https_errors"] = True
|
|
210
|
+
print(f"š [Sentience] Using proxy: {proxy_config.server}")
|
|
211
|
+
|
|
212
|
+
# Launch persistent context (required for extensions)
|
|
213
|
+
# Note: We pass headless=False to launch_persistent_context because we handle
|
|
214
|
+
# headless mode via the --headless=new arg above. This is a Playwright workaround.
|
|
215
|
+
self.context = self.playwright.chromium.launch_persistent_context(**launch_params)
|
|
216
|
+
|
|
217
|
+
self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
|
|
218
|
+
|
|
219
|
+
# Inject storage state if provided (must be after context creation)
|
|
220
|
+
if self.storage_state:
|
|
221
|
+
self._inject_storage_state(self.storage_state)
|
|
222
|
+
|
|
223
|
+
# Apply stealth if available
|
|
224
|
+
if STEALTH_AVAILABLE:
|
|
225
|
+
stealth_sync(self.page)
|
|
226
|
+
|
|
227
|
+
# Wait a moment for extension to initialize
|
|
228
|
+
time.sleep(0.5)
|
|
229
|
+
|
|
230
|
+
def goto(self, url: str) -> None:
|
|
231
|
+
"""Navigate to a URL and ensure extension is ready"""
|
|
232
|
+
if not self.page:
|
|
233
|
+
raise RuntimeError("Browser not started. Call start() first.")
|
|
234
|
+
|
|
235
|
+
self.page.goto(url, wait_until="domcontentloaded")
|
|
236
|
+
|
|
237
|
+
# Wait for extension to be ready (injected into page)
|
|
238
|
+
if not self._wait_for_extension():
|
|
239
|
+
# Gather diagnostic info before failing
|
|
240
|
+
try:
|
|
241
|
+
diag = self.page.evaluate(
|
|
242
|
+
"""() => ({
|
|
243
|
+
sentience_defined: typeof window.sentience !== 'undefined',
|
|
244
|
+
registry_defined: typeof window.sentience_registry !== 'undefined',
|
|
245
|
+
snapshot_defined: window.sentience && typeof window.sentience.snapshot === 'function',
|
|
246
|
+
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
247
|
+
url: window.location.href
|
|
248
|
+
})"""
|
|
249
|
+
)
|
|
250
|
+
except Exception as e:
|
|
251
|
+
diag = f"Failed to get diagnostics: {str(e)}"
|
|
252
|
+
|
|
253
|
+
raise RuntimeError(
|
|
254
|
+
"Extension failed to load after navigation. Make sure:\n"
|
|
255
|
+
"1. Extension is built (cd sentience-chrome && ./build.sh)\n"
|
|
256
|
+
"2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
|
|
257
|
+
"3. Check browser console for errors (run with headless=False to see console)\n"
|
|
258
|
+
f"4. Extension path: {self._extension_path}\n"
|
|
259
|
+
f"5. Diagnostic info: {diag}"
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _inject_storage_state(
|
|
263
|
+
self, storage_state: str | Path | StorageState | dict
|
|
264
|
+
) -> None: # noqa: C901
|
|
265
|
+
"""
|
|
266
|
+
Inject storage state (cookies + localStorage) into browser context.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
storage_state: Path to JSON file, StorageState object, or dict containing storage state
|
|
270
|
+
"""
|
|
271
|
+
import json
|
|
272
|
+
|
|
273
|
+
# Load storage state
|
|
274
|
+
if isinstance(storage_state, (str, Path)):
|
|
275
|
+
# Load from file
|
|
276
|
+
with open(storage_state, encoding="utf-8") as f:
|
|
277
|
+
state_dict = json.load(f)
|
|
278
|
+
state = StorageState.from_dict(state_dict)
|
|
279
|
+
elif isinstance(storage_state, StorageState):
|
|
280
|
+
# Already a StorageState object
|
|
281
|
+
state = storage_state
|
|
282
|
+
elif isinstance(storage_state, dict):
|
|
283
|
+
# Dictionary format
|
|
284
|
+
state = StorageState.from_dict(storage_state)
|
|
285
|
+
else:
|
|
286
|
+
raise ValueError(
|
|
287
|
+
f"Invalid storage_state type: {type(storage_state)}. "
|
|
288
|
+
"Expected str, Path, StorageState, or dict."
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# Inject cookies (works globally)
|
|
292
|
+
if state.cookies:
|
|
293
|
+
# Convert to Playwright cookie format
|
|
294
|
+
playwright_cookies = []
|
|
295
|
+
for cookie in state.cookies:
|
|
296
|
+
cookie_dict = cookie.model_dump()
|
|
297
|
+
# Playwright expects lowercase keys for some fields
|
|
298
|
+
playwright_cookie = {
|
|
299
|
+
"name": cookie_dict["name"],
|
|
300
|
+
"value": cookie_dict["value"],
|
|
301
|
+
"domain": cookie_dict["domain"],
|
|
302
|
+
"path": cookie_dict["path"],
|
|
303
|
+
}
|
|
304
|
+
if cookie_dict.get("expires"):
|
|
305
|
+
playwright_cookie["expires"] = cookie_dict["expires"]
|
|
306
|
+
if cookie_dict.get("httpOnly"):
|
|
307
|
+
playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
|
|
308
|
+
if cookie_dict.get("secure"):
|
|
309
|
+
playwright_cookie["secure"] = cookie_dict["secure"]
|
|
310
|
+
if cookie_dict.get("sameSite"):
|
|
311
|
+
playwright_cookie["sameSite"] = cookie_dict["sameSite"]
|
|
312
|
+
playwright_cookies.append(playwright_cookie)
|
|
313
|
+
|
|
314
|
+
self.context.add_cookies(playwright_cookies)
|
|
315
|
+
print(f"ā
[Sentience] Injected {len(state.cookies)} cookie(s)")
|
|
316
|
+
|
|
317
|
+
# Inject LocalStorage (requires navigation to each domain)
|
|
318
|
+
if state.origins:
|
|
319
|
+
for origin_data in state.origins:
|
|
320
|
+
origin = origin_data.origin
|
|
321
|
+
if not origin:
|
|
322
|
+
continue
|
|
323
|
+
|
|
324
|
+
# Navigate to origin to set localStorage
|
|
325
|
+
try:
|
|
326
|
+
self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
|
|
327
|
+
|
|
328
|
+
# Inject localStorage
|
|
329
|
+
if origin_data.localStorage:
|
|
330
|
+
# Convert to dict format for JavaScript
|
|
331
|
+
localStorage_dict = {
|
|
332
|
+
item.name: item.value for item in origin_data.localStorage
|
|
333
|
+
}
|
|
334
|
+
self.page.evaluate(
|
|
335
|
+
"""(localStorage_data) => {
|
|
336
|
+
for (const [key, value] of Object.entries(localStorage_data)) {
|
|
337
|
+
localStorage.setItem(key, value);
|
|
338
|
+
}
|
|
339
|
+
}""",
|
|
340
|
+
localStorage_dict,
|
|
341
|
+
)
|
|
342
|
+
print(
|
|
343
|
+
f"ā
[Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
|
|
344
|
+
)
|
|
345
|
+
except Exception as e:
|
|
346
|
+
print(f"ā ļø [Sentience] Failed to inject localStorage for {origin}: {e}")
|
|
347
|
+
|
|
348
|
+
def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
|
|
349
|
+
"""Poll for window.sentience to be available"""
|
|
350
|
+
start_time = time.time()
|
|
351
|
+
last_error = None
|
|
352
|
+
|
|
353
|
+
while time.time() - start_time < timeout_sec:
|
|
354
|
+
try:
|
|
355
|
+
# Check if API exists and WASM is ready (optional check for _wasmModule)
|
|
356
|
+
result = self.page.evaluate(
|
|
357
|
+
"""() => {
|
|
358
|
+
if (typeof window.sentience === 'undefined') {
|
|
359
|
+
return { ready: false, reason: 'window.sentience undefined' };
|
|
360
|
+
}
|
|
361
|
+
// Check if WASM loaded (if exposed) or if basic API works
|
|
362
|
+
// Note: injected_api.js defines window.sentience immediately,
|
|
363
|
+
// but _wasmModule might take a few ms to load.
|
|
364
|
+
if (window.sentience._wasmModule === null) {
|
|
365
|
+
// It's defined but WASM isn't linked yet
|
|
366
|
+
return { ready: false, reason: 'WASM module not fully loaded' };
|
|
367
|
+
}
|
|
368
|
+
// If _wasmModule is not exposed, that's okay - it might be internal
|
|
369
|
+
// Just verify the API structure is correct
|
|
370
|
+
return { ready: true };
|
|
371
|
+
}
|
|
372
|
+
"""
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
if isinstance(result, dict):
|
|
376
|
+
if result.get("ready"):
|
|
377
|
+
return True
|
|
378
|
+
last_error = result.get("reason", "Unknown error")
|
|
379
|
+
except Exception as e:
|
|
380
|
+
# Continue waiting on errors
|
|
381
|
+
last_error = f"Evaluation error: {str(e)}"
|
|
382
|
+
|
|
383
|
+
time.sleep(0.3)
|
|
384
|
+
|
|
385
|
+
# Log the last error for debugging
|
|
386
|
+
if last_error:
|
|
387
|
+
import warnings
|
|
388
|
+
|
|
389
|
+
warnings.warn(f"Extension wait timeout. Last status: {last_error}")
|
|
390
|
+
|
|
391
|
+
return False
|
|
392
|
+
|
|
393
|
+
def close(self) -> None:
|
|
394
|
+
"""Close browser and cleanup"""
|
|
395
|
+
if self.context:
|
|
396
|
+
self.context.close()
|
|
397
|
+
if self.playwright:
|
|
398
|
+
self.playwright.stop()
|
|
399
|
+
if self._extension_path and os.path.exists(self._extension_path):
|
|
400
|
+
shutil.rmtree(self._extension_path)
|
|
401
|
+
|
|
402
|
+
def __enter__(self):
|
|
403
|
+
"""Context manager entry"""
|
|
404
|
+
self.start()
|
|
405
|
+
return self
|
|
406
|
+
|
|
407
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
408
|
+
"""Context manager exit"""
|
|
409
|
+
self.close()
|
sentience/cli.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI commands for Sentience SDK
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .browser import SentienceBrowser
|
|
9
|
+
from .generator import ScriptGenerator
|
|
10
|
+
from .inspector import inspect
|
|
11
|
+
from .recorder import Trace, record
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def cmd_inspect(args):
|
|
15
|
+
"""Start inspector mode"""
|
|
16
|
+
browser = SentienceBrowser(headless=False)
|
|
17
|
+
try:
|
|
18
|
+
browser.start()
|
|
19
|
+
print("ā
Inspector started. Hover elements to see info, click to see full details.")
|
|
20
|
+
print("Press Ctrl+C to stop.")
|
|
21
|
+
|
|
22
|
+
with inspect(browser):
|
|
23
|
+
# Keep running until interrupted
|
|
24
|
+
import time
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
while True:
|
|
28
|
+
time.sleep(1)
|
|
29
|
+
except KeyboardInterrupt:
|
|
30
|
+
print("\nš Inspector stopped.")
|
|
31
|
+
finally:
|
|
32
|
+
browser.close()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def cmd_record(args):
|
|
36
|
+
"""Start recording mode"""
|
|
37
|
+
browser = SentienceBrowser(headless=False)
|
|
38
|
+
try:
|
|
39
|
+
browser.start()
|
|
40
|
+
|
|
41
|
+
# Navigate to start URL if provided
|
|
42
|
+
if args.url:
|
|
43
|
+
browser.page.goto(args.url)
|
|
44
|
+
browser.page.wait_for_load_state("networkidle")
|
|
45
|
+
|
|
46
|
+
print("ā
Recording started. Perform actions in the browser.")
|
|
47
|
+
print("Press Ctrl+C to stop and save trace.")
|
|
48
|
+
|
|
49
|
+
with record(browser, capture_snapshots=args.snapshots) as rec:
|
|
50
|
+
# Add mask patterns if provided
|
|
51
|
+
for pattern in args.mask or []:
|
|
52
|
+
rec.add_mask_pattern(pattern)
|
|
53
|
+
|
|
54
|
+
# Keep running until interrupted
|
|
55
|
+
import time
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
while True:
|
|
59
|
+
time.sleep(1)
|
|
60
|
+
except KeyboardInterrupt:
|
|
61
|
+
print("\nš¾ Saving trace...")
|
|
62
|
+
output = args.output or "trace.json"
|
|
63
|
+
rec.save(output)
|
|
64
|
+
print(f"ā
Trace saved to {output}")
|
|
65
|
+
finally:
|
|
66
|
+
browser.close()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def cmd_gen(args):
|
|
70
|
+
"""Generate script from trace"""
|
|
71
|
+
# Load trace
|
|
72
|
+
trace = Trace.load(args.trace)
|
|
73
|
+
|
|
74
|
+
# Generate script
|
|
75
|
+
generator = ScriptGenerator(trace)
|
|
76
|
+
|
|
77
|
+
if args.lang == "py":
|
|
78
|
+
output = args.output or "generated.py"
|
|
79
|
+
generator.save_python(output)
|
|
80
|
+
elif args.lang == "ts":
|
|
81
|
+
output = args.output or "generated.ts"
|
|
82
|
+
generator.save_typescript(output)
|
|
83
|
+
else:
|
|
84
|
+
print(f"ā Unsupported language: {args.lang}")
|
|
85
|
+
sys.exit(1)
|
|
86
|
+
|
|
87
|
+
print(f"ā
Generated {args.lang.upper()} script: {output}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def main():
|
|
91
|
+
"""Main CLI entry point"""
|
|
92
|
+
parser = argparse.ArgumentParser(description="Sentience SDK CLI")
|
|
93
|
+
subparsers = parser.add_subparsers(dest="command", help="Commands")
|
|
94
|
+
|
|
95
|
+
# Inspect command
|
|
96
|
+
inspect_parser = subparsers.add_parser("inspect", help="Start inspector mode")
|
|
97
|
+
inspect_parser.set_defaults(func=cmd_inspect)
|
|
98
|
+
|
|
99
|
+
# Record command
|
|
100
|
+
record_parser = subparsers.add_parser("record", help="Start recording mode")
|
|
101
|
+
record_parser.add_argument("--url", help="Start URL")
|
|
102
|
+
record_parser.add_argument("--output", "-o", help="Output trace file", default="trace.json")
|
|
103
|
+
record_parser.add_argument(
|
|
104
|
+
"--snapshots", action="store_true", help="Capture snapshots at each step"
|
|
105
|
+
)
|
|
106
|
+
record_parser.add_argument(
|
|
107
|
+
"--mask",
|
|
108
|
+
action="append",
|
|
109
|
+
help="Pattern to mask in recorded text (e.g., password)",
|
|
110
|
+
)
|
|
111
|
+
record_parser.set_defaults(func=cmd_record)
|
|
112
|
+
|
|
113
|
+
# Generate command
|
|
114
|
+
gen_parser = subparsers.add_parser("gen", help="Generate script from trace")
|
|
115
|
+
gen_parser.add_argument("trace", help="Trace JSON file")
|
|
116
|
+
gen_parser.add_argument("--lang", choices=["py", "ts"], default="py", help="Output language")
|
|
117
|
+
gen_parser.add_argument("--output", "-o", help="Output script file")
|
|
118
|
+
gen_parser.set_defaults(func=cmd_gen)
|
|
119
|
+
|
|
120
|
+
args = parser.parse_args()
|
|
121
|
+
|
|
122
|
+
if not args.command:
|
|
123
|
+
parser.print_help()
|
|
124
|
+
sys.exit(1)
|
|
125
|
+
|
|
126
|
+
args.func(args)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
if __name__ == "__main__":
|
|
130
|
+
main()
|