sentienceapi 0.90.12__py3-none-any.whl → 0.92.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +14 -5
- sentience/_extension_loader.py +40 -0
- sentience/action_executor.py +215 -0
- sentience/actions.py +408 -25
- sentience/agent.py +804 -310
- sentience/agent_config.py +3 -0
- sentience/async_api.py +101 -0
- sentience/base_agent.py +95 -0
- sentience/browser.py +594 -25
- sentience/browser_evaluator.py +299 -0
- sentience/cloud_tracing.py +458 -36
- sentience/conversational_agent.py +79 -45
- sentience/element_filter.py +136 -0
- sentience/expect.py +98 -2
- sentience/extension/background.js +56 -185
- sentience/extension/content.js +117 -289
- sentience/extension/injected_api.js +799 -1374
- sentience/extension/manifest.json +1 -1
- sentience/extension/pkg/sentience_core.js +190 -396
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/release.json +47 -47
- sentience/formatting.py +9 -53
- sentience/inspector.py +183 -1
- sentience/llm_interaction_handler.py +191 -0
- sentience/llm_provider.py +256 -28
- sentience/llm_provider_utils.py +120 -0
- sentience/llm_response_builder.py +153 -0
- sentience/models.py +66 -1
- sentience/overlay.py +109 -2
- sentience/protocols.py +228 -0
- sentience/query.py +1 -1
- sentience/read.py +95 -3
- sentience/recorder.py +223 -3
- sentience/schemas/trace_v1.json +102 -9
- sentience/screenshot.py +48 -2
- sentience/sentience_methods.py +86 -0
- sentience/snapshot.py +309 -64
- sentience/snapshot_diff.py +141 -0
- sentience/text_search.py +119 -5
- sentience/trace_event_builder.py +129 -0
- sentience/trace_file_manager.py +197 -0
- sentience/trace_indexing/index_schema.py +95 -7
- sentience/trace_indexing/indexer.py +117 -14
- sentience/tracer_factory.py +119 -6
- sentience/tracing.py +172 -8
- sentience/utils/__init__.py +40 -0
- sentience/utils/browser.py +46 -0
- sentience/utils/element.py +257 -0
- sentience/utils/formatting.py +59 -0
- sentience/utils.py +1 -1
- sentience/visual_agent.py +2056 -0
- sentience/wait.py +70 -4
- {sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +61 -22
- sentienceapi-0.92.2.dist-info/RECORD +65 -0
- sentienceapi-0.92.2.dist-info/licenses/LICENSE +24 -0
- sentienceapi-0.92.2.dist-info/licenses/LICENSE-APACHE +201 -0
- sentienceapi-0.92.2.dist-info/licenses/LICENSE-MIT +21 -0
- sentience/extension/test-content.js +0 -4
- sentienceapi-0.90.12.dist-info/RECORD +0 -46
- sentienceapi-0.90.12.dist-info/licenses/LICENSE.md +0 -43
- {sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
- {sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
- {sentienceapi-0.90.12.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0
sentience/__init__.py
CHANGED
|
@@ -3,7 +3,7 @@ Sentience Python SDK - AI Agent Browser Automation
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from .actions import click, click_rect, press, type_text
|
|
6
|
-
from .agent import SentienceAgent
|
|
6
|
+
from .agent import SentienceAgent, SentienceAgentAsync
|
|
7
7
|
from .agent_config import AgentConfig
|
|
8
8
|
|
|
9
9
|
# Agent Layer (Phase 1 & 2)
|
|
@@ -14,9 +14,6 @@ from .browser import SentienceBrowser
|
|
|
14
14
|
from .cloud_tracing import CloudTraceSink, SentienceLogger
|
|
15
15
|
from .conversational_agent import ConversationalAgent
|
|
16
16
|
from .expect import expect
|
|
17
|
-
|
|
18
|
-
# Formatting (v0.12.0+)
|
|
19
|
-
from .formatting import format_snapshot_for_llm
|
|
20
17
|
from .generator import ScriptGenerator, generate
|
|
21
18
|
from .inspector import Inspector, inspect
|
|
22
19
|
from .llm_provider import (
|
|
@@ -55,12 +52,14 @@ from .query import find, query
|
|
|
55
52
|
from .read import read
|
|
56
53
|
from .recorder import Recorder, Trace, TraceStep, record
|
|
57
54
|
from .screenshot import screenshot
|
|
55
|
+
from .sentience_methods import AgentAction, SentienceMethod
|
|
58
56
|
from .snapshot import snapshot
|
|
59
57
|
from .text_search import find_text_rect
|
|
60
58
|
from .tracer_factory import SENTIENCE_API_URL, create_tracer
|
|
61
59
|
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
|
|
62
60
|
|
|
63
61
|
# Utilities (v0.12.0+)
|
|
62
|
+
# Import from utils package (re-exports from submodules for backward compatibility)
|
|
64
63
|
from .utils import (
|
|
65
64
|
canonical_snapshot_loose,
|
|
66
65
|
canonical_snapshot_strict,
|
|
@@ -68,9 +67,13 @@ from .utils import (
|
|
|
68
67
|
save_storage_state,
|
|
69
68
|
sha256_digest,
|
|
70
69
|
)
|
|
70
|
+
|
|
71
|
+
# Formatting (v0.12.0+)
|
|
72
|
+
from .utils.formatting import format_snapshot_for_llm
|
|
73
|
+
from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
|
|
71
74
|
from .wait import wait_for
|
|
72
75
|
|
|
73
|
-
__version__ = "0.
|
|
76
|
+
__version__ = "0.92.2"
|
|
74
77
|
|
|
75
78
|
__all__ = [
|
|
76
79
|
# Core SDK
|
|
@@ -117,6 +120,9 @@ __all__ = [
|
|
|
117
120
|
"AnthropicProvider",
|
|
118
121
|
"LocalLLMProvider",
|
|
119
122
|
"SentienceAgent",
|
|
123
|
+
"SentienceAgentAsync",
|
|
124
|
+
"SentienceVisualAgent",
|
|
125
|
+
"SentienceVisualAgentAsync",
|
|
120
126
|
"ConversationalAgent",
|
|
121
127
|
# Agent Layer Models
|
|
122
128
|
"AgentActionResult",
|
|
@@ -150,4 +156,7 @@ __all__ = [
|
|
|
150
156
|
"format_snapshot_for_llm",
|
|
151
157
|
# Agent Config (v0.12.0+)
|
|
152
158
|
"AgentConfig",
|
|
159
|
+
# Enums
|
|
160
|
+
"SentienceMethod",
|
|
161
|
+
"AgentAction",
|
|
153
162
|
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared extension loading logic for sync and async implementations
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_extension_path() -> Path:
|
|
9
|
+
"""
|
|
10
|
+
Find Sentience extension directory (shared logic for sync and async).
|
|
11
|
+
|
|
12
|
+
Checks multiple locations:
|
|
13
|
+
1. sentience/extension/ (installed package)
|
|
14
|
+
2. ../sentience-chrome (development/monorepo)
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
Path to extension directory
|
|
18
|
+
|
|
19
|
+
Raises:
|
|
20
|
+
FileNotFoundError: If extension not found in any location
|
|
21
|
+
"""
|
|
22
|
+
# 1. Try relative to this file (installed package structure)
|
|
23
|
+
# sentience/_extension_loader.py -> sentience/extension/
|
|
24
|
+
package_ext_path = Path(__file__).parent / "extension"
|
|
25
|
+
|
|
26
|
+
# 2. Try development root (if running from source repo)
|
|
27
|
+
# sentience/_extension_loader.py -> ../sentience-chrome
|
|
28
|
+
dev_ext_path = Path(__file__).parent.parent.parent / "sentience-chrome"
|
|
29
|
+
|
|
30
|
+
if package_ext_path.exists() and (package_ext_path / "manifest.json").exists():
|
|
31
|
+
return package_ext_path
|
|
32
|
+
elif dev_ext_path.exists() and (dev_ext_path / "manifest.json").exists():
|
|
33
|
+
return dev_ext_path
|
|
34
|
+
else:
|
|
35
|
+
raise FileNotFoundError(
|
|
36
|
+
f"Extension not found. Checked:\n"
|
|
37
|
+
f"1. {package_ext_path}\n"
|
|
38
|
+
f"2. {dev_ext_path}\n"
|
|
39
|
+
"Make sure the extension is built and 'sentience/extension' directory exists."
|
|
40
|
+
)
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Action Executor for Sentience Agent.
|
|
3
|
+
|
|
4
|
+
Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
|
|
5
|
+
This separates action execution concerns from LLM interaction.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import Any, Union
|
|
10
|
+
|
|
11
|
+
from .actions import click, click_async, press, press_async, type_text, type_text_async
|
|
12
|
+
from .browser import AsyncSentienceBrowser, SentienceBrowser
|
|
13
|
+
from .models import Snapshot
|
|
14
|
+
from .protocols import AsyncBrowserProtocol, BrowserProtocol
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ActionExecutor:
|
|
18
|
+
"""
|
|
19
|
+
Executes actions and handles parsing of action command strings.
|
|
20
|
+
|
|
21
|
+
This class encapsulates all action execution logic, making it easier to:
|
|
22
|
+
- Test action execution independently
|
|
23
|
+
- Add new action types in one place
|
|
24
|
+
- Handle action parsing errors consistently
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Initialize action executor.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
|
|
36
|
+
(for testing, can use mock objects that implement BrowserProtocol)
|
|
37
|
+
"""
|
|
38
|
+
self.browser = browser
|
|
39
|
+
# Check if browser is async - support both concrete types and protocols
|
|
40
|
+
# Check concrete types first (most reliable)
|
|
41
|
+
if isinstance(browser, AsyncSentienceBrowser):
|
|
42
|
+
self._is_async = True
|
|
43
|
+
elif isinstance(browser, SentienceBrowser):
|
|
44
|
+
self._is_async = False
|
|
45
|
+
else:
|
|
46
|
+
# For protocol-based browsers, check if methods are actually async
|
|
47
|
+
# This is more reliable than isinstance checks which can match both protocols
|
|
48
|
+
import inspect
|
|
49
|
+
|
|
50
|
+
start_method = getattr(browser, "start", None)
|
|
51
|
+
if start_method and inspect.iscoroutinefunction(start_method):
|
|
52
|
+
self._is_async = True
|
|
53
|
+
elif isinstance(browser, BrowserProtocol):
|
|
54
|
+
# If it implements BrowserProtocol and start is not async, it's sync
|
|
55
|
+
self._is_async = False
|
|
56
|
+
else:
|
|
57
|
+
# Default to sync for unknown types
|
|
58
|
+
self._is_async = False
|
|
59
|
+
|
|
60
|
+
def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
|
|
61
|
+
"""
|
|
62
|
+
Parse action string and execute SDK call (synchronous).
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
|
|
66
|
+
snap: Current snapshot (for context, currently unused but kept for API consistency)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Execution result dictionary with keys:
|
|
70
|
+
- success: bool
|
|
71
|
+
- action: str (e.g., "click", "type", "press", "finish")
|
|
72
|
+
- element_id: Optional[int] (for click/type actions)
|
|
73
|
+
- text: Optional[str] (for type actions)
|
|
74
|
+
- key: Optional[str] (for press actions)
|
|
75
|
+
- outcome: Optional[str] (action outcome)
|
|
76
|
+
- url_changed: Optional[bool] (for click actions)
|
|
77
|
+
- error: Optional[str] (if action failed)
|
|
78
|
+
- message: Optional[str] (for finish action)
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ValueError: If action format is unknown
|
|
82
|
+
RuntimeError: If called on async browser (use execute_async instead)
|
|
83
|
+
"""
|
|
84
|
+
if self._is_async:
|
|
85
|
+
raise RuntimeError(
|
|
86
|
+
"ActionExecutor.execute() called on async browser. Use execute_async() instead."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Parse CLICK(42)
|
|
90
|
+
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
|
|
91
|
+
element_id = int(match.group(1))
|
|
92
|
+
result = click(self.browser, element_id) # type: ignore
|
|
93
|
+
return {
|
|
94
|
+
"success": result.success,
|
|
95
|
+
"action": "click",
|
|
96
|
+
"element_id": element_id,
|
|
97
|
+
"outcome": result.outcome,
|
|
98
|
+
"url_changed": result.url_changed,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Parse TYPE(42, "hello world")
|
|
102
|
+
elif match := re.match(
|
|
103
|
+
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
|
|
104
|
+
action_str,
|
|
105
|
+
re.IGNORECASE,
|
|
106
|
+
):
|
|
107
|
+
element_id = int(match.group(1))
|
|
108
|
+
text = match.group(2)
|
|
109
|
+
result = type_text(self.browser, element_id, text) # type: ignore
|
|
110
|
+
return {
|
|
111
|
+
"success": result.success,
|
|
112
|
+
"action": "type",
|
|
113
|
+
"element_id": element_id,
|
|
114
|
+
"text": text,
|
|
115
|
+
"outcome": result.outcome,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Parse PRESS("Enter")
|
|
119
|
+
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
|
|
120
|
+
key = match.group(1)
|
|
121
|
+
result = press(self.browser, key) # type: ignore
|
|
122
|
+
return {
|
|
123
|
+
"success": result.success,
|
|
124
|
+
"action": "press",
|
|
125
|
+
"key": key,
|
|
126
|
+
"outcome": result.outcome,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Parse FINISH()
|
|
130
|
+
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
|
|
131
|
+
return {
|
|
132
|
+
"success": True,
|
|
133
|
+
"action": "finish",
|
|
134
|
+
"message": "Task marked as complete",
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
raise ValueError(
|
|
139
|
+
f"Unknown action format: {action_str}\n"
|
|
140
|
+
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
|
|
144
|
+
"""
|
|
145
|
+
Parse action string and execute SDK call (asynchronous).
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
|
|
149
|
+
snap: Current snapshot (for context, currently unused but kept for API consistency)
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Execution result dictionary (same format as execute())
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
ValueError: If action format is unknown
|
|
156
|
+
RuntimeError: If called on sync browser (use execute() instead)
|
|
157
|
+
"""
|
|
158
|
+
if not self._is_async:
|
|
159
|
+
raise RuntimeError(
|
|
160
|
+
"ActionExecutor.execute_async() called on sync browser. Use execute() instead."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Parse CLICK(42)
|
|
164
|
+
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
|
|
165
|
+
element_id = int(match.group(1))
|
|
166
|
+
result = await click_async(self.browser, element_id) # type: ignore
|
|
167
|
+
return {
|
|
168
|
+
"success": result.success,
|
|
169
|
+
"action": "click",
|
|
170
|
+
"element_id": element_id,
|
|
171
|
+
"outcome": result.outcome,
|
|
172
|
+
"url_changed": result.url_changed,
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
# Parse TYPE(42, "hello world")
|
|
176
|
+
elif match := re.match(
|
|
177
|
+
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
|
|
178
|
+
action_str,
|
|
179
|
+
re.IGNORECASE,
|
|
180
|
+
):
|
|
181
|
+
element_id = int(match.group(1))
|
|
182
|
+
text = match.group(2)
|
|
183
|
+
result = await type_text_async(self.browser, element_id, text) # type: ignore
|
|
184
|
+
return {
|
|
185
|
+
"success": result.success,
|
|
186
|
+
"action": "type",
|
|
187
|
+
"element_id": element_id,
|
|
188
|
+
"text": text,
|
|
189
|
+
"outcome": result.outcome,
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
# Parse PRESS("Enter")
|
|
193
|
+
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
|
|
194
|
+
key = match.group(1)
|
|
195
|
+
result = await press_async(self.browser, key) # type: ignore
|
|
196
|
+
return {
|
|
197
|
+
"success": result.success,
|
|
198
|
+
"action": "press",
|
|
199
|
+
"key": key,
|
|
200
|
+
"outcome": result.outcome,
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
# Parse FINISH()
|
|
204
|
+
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
|
|
205
|
+
return {
|
|
206
|
+
"success": True,
|
|
207
|
+
"action": "finish",
|
|
208
|
+
"message": "Task marked as complete",
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
else:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
f"Unknown action format: {action_str}\n"
|
|
214
|
+
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
|
|
215
|
+
)
|