sentienceapi 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +120 -6
- sentience/_extension_loader.py +156 -1
- sentience/action_executor.py +217 -0
- sentience/actions.py +758 -30
- sentience/agent.py +806 -293
- sentience/agent_config.py +3 -0
- sentience/agent_runtime.py +840 -0
- sentience/asserts/__init__.py +70 -0
- sentience/asserts/expect.py +621 -0
- sentience/asserts/query.py +383 -0
- sentience/async_api.py +89 -1141
- sentience/backends/__init__.py +137 -0
- sentience/backends/actions.py +372 -0
- sentience/backends/browser_use_adapter.py +241 -0
- sentience/backends/cdp_backend.py +393 -0
- sentience/backends/exceptions.py +211 -0
- sentience/backends/playwright_backend.py +194 -0
- sentience/backends/protocol.py +216 -0
- sentience/backends/sentience_context.py +469 -0
- sentience/backends/snapshot.py +483 -0
- sentience/base_agent.py +95 -0
- sentience/browser.py +678 -39
- sentience/browser_evaluator.py +299 -0
- sentience/canonicalization.py +207 -0
- sentience/cloud_tracing.py +507 -42
- sentience/constants.py +6 -0
- sentience/conversational_agent.py +77 -43
- sentience/cursor_policy.py +142 -0
- sentience/element_filter.py +136 -0
- sentience/expect.py +98 -2
- sentience/extension/background.js +56 -185
- sentience/extension/content.js +150 -287
- sentience/extension/injected_api.js +1088 -1368
- sentience/extension/manifest.json +1 -1
- sentience/extension/pkg/sentience_core.d.ts +22 -22
- sentience/extension/pkg/sentience_core.js +275 -433
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/release.json +47 -47
- sentience/failure_artifacts.py +241 -0
- sentience/formatting.py +9 -53
- sentience/inspector.py +183 -1
- sentience/integrations/__init__.py +6 -0
- sentience/integrations/langchain/__init__.py +12 -0
- sentience/integrations/langchain/context.py +18 -0
- sentience/integrations/langchain/core.py +326 -0
- sentience/integrations/langchain/tools.py +180 -0
- sentience/integrations/models.py +46 -0
- sentience/integrations/pydanticai/__init__.py +15 -0
- sentience/integrations/pydanticai/deps.py +20 -0
- sentience/integrations/pydanticai/toolset.py +468 -0
- sentience/llm_interaction_handler.py +191 -0
- sentience/llm_provider.py +765 -66
- sentience/llm_provider_utils.py +120 -0
- sentience/llm_response_builder.py +153 -0
- sentience/models.py +595 -3
- sentience/ordinal.py +280 -0
- sentience/overlay.py +109 -2
- sentience/protocols.py +228 -0
- sentience/query.py +67 -5
- sentience/read.py +95 -3
- sentience/recorder.py +223 -3
- sentience/schemas/trace_v1.json +128 -9
- sentience/screenshot.py +48 -2
- sentience/sentience_methods.py +86 -0
- sentience/snapshot.py +599 -55
- sentience/snapshot_diff.py +126 -0
- sentience/text_search.py +120 -5
- sentience/trace_event_builder.py +148 -0
- sentience/trace_file_manager.py +197 -0
- sentience/trace_indexing/index_schema.py +95 -7
- sentience/trace_indexing/indexer.py +105 -48
- sentience/tracer_factory.py +120 -9
- sentience/tracing.py +172 -8
- sentience/utils/__init__.py +40 -0
- sentience/utils/browser.py +46 -0
- sentience/{utils.py → utils/element.py} +3 -42
- sentience/utils/formatting.py +59 -0
- sentience/verification.py +618 -0
- sentience/visual_agent.py +2058 -0
- sentience/wait.py +68 -2
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/METADATA +199 -40
- sentienceapi-0.98.0.dist-info/RECORD +92 -0
- sentience/extension/test-content.js +0 -4
- sentienceapi-0.90.16.dist-info/RECORD +0 -50
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/WHEEL +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/entry_points.txt +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-APACHE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-MIT +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/top_level.txt +0 -0
sentience/__init__.py
CHANGED
|
@@ -2,9 +2,39 @@
|
|
|
2
2
|
Sentience Python SDK - AI Agent Browser Automation
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
from .
|
|
5
|
+
# Extension helpers (for browser-use integration)
|
|
6
|
+
from ._extension_loader import (
|
|
7
|
+
get_extension_dir,
|
|
8
|
+
get_extension_version,
|
|
9
|
+
verify_extension_injected,
|
|
10
|
+
verify_extension_injected_async,
|
|
11
|
+
verify_extension_version,
|
|
12
|
+
verify_extension_version_async,
|
|
13
|
+
)
|
|
14
|
+
from .actions import click, click_rect, press, scroll_to, type_text
|
|
15
|
+
from .agent import SentienceAgent, SentienceAgentAsync
|
|
7
16
|
from .agent_config import AgentConfig
|
|
17
|
+
from .agent_runtime import AgentRuntime, AssertionHandle
|
|
18
|
+
|
|
19
|
+
# Backend-agnostic actions (aliased to avoid conflict with existing actions)
|
|
20
|
+
# Browser backends (for browser-use integration)
|
|
21
|
+
from .backends import (
|
|
22
|
+
BrowserBackend,
|
|
23
|
+
BrowserUseAdapter,
|
|
24
|
+
BrowserUseCDPTransport,
|
|
25
|
+
CachedSnapshot,
|
|
26
|
+
CDPBackendV0,
|
|
27
|
+
CDPTransport,
|
|
28
|
+
LayoutMetrics,
|
|
29
|
+
PlaywrightBackend,
|
|
30
|
+
ViewportInfo,
|
|
31
|
+
)
|
|
32
|
+
from .backends import click as backend_click
|
|
33
|
+
from .backends import scroll as backend_scroll
|
|
34
|
+
from .backends import scroll_to_element as backend_scroll_to_element
|
|
35
|
+
from .backends import snapshot as backend_snapshot
|
|
36
|
+
from .backends import type_text as backend_type_text
|
|
37
|
+
from .backends import wait_for_stable as backend_wait_for_stable
|
|
8
38
|
|
|
9
39
|
# Agent Layer (Phase 1 & 2)
|
|
10
40
|
from .base_agent import BaseAgent
|
|
@@ -13,10 +43,8 @@ from .browser import SentienceBrowser
|
|
|
13
43
|
# Tracing (v0.12.0+)
|
|
14
44
|
from .cloud_tracing import CloudTraceSink, SentienceLogger
|
|
15
45
|
from .conversational_agent import ConversationalAgent
|
|
46
|
+
from .cursor_policy import CursorPolicy
|
|
16
47
|
from .expect import expect
|
|
17
|
-
|
|
18
|
-
# Formatting (v0.12.0+)
|
|
19
|
-
from .formatting import format_snapshot_for_llm
|
|
20
48
|
from .generator import ScriptGenerator, generate
|
|
21
49
|
from .inspector import Inspector, inspect
|
|
22
50
|
from .llm_provider import (
|
|
@@ -24,6 +52,8 @@ from .llm_provider import (
|
|
|
24
52
|
LLMProvider,
|
|
25
53
|
LLMResponse,
|
|
26
54
|
LocalLLMProvider,
|
|
55
|
+
LocalVisionLLMProvider,
|
|
56
|
+
MLXVLMProvider,
|
|
27
57
|
OpenAIProvider,
|
|
28
58
|
)
|
|
29
59
|
from .models import ( # Agent Layer Models
|
|
@@ -50,17 +80,22 @@ from .models import ( # Agent Layer Models
|
|
|
50
80
|
ViewportRect,
|
|
51
81
|
WaitResult,
|
|
52
82
|
)
|
|
83
|
+
|
|
84
|
+
# Ordinal support (Phase 3)
|
|
85
|
+
from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
|
|
53
86
|
from .overlay import clear_overlay, show_overlay
|
|
54
87
|
from .query import find, query
|
|
55
88
|
from .read import read
|
|
56
89
|
from .recorder import Recorder, Trace, TraceStep, record
|
|
57
90
|
from .screenshot import screenshot
|
|
91
|
+
from .sentience_methods import AgentAction, SentienceMethod
|
|
58
92
|
from .snapshot import snapshot
|
|
59
93
|
from .text_search import find_text_rect
|
|
60
94
|
from .tracer_factory import SENTIENCE_API_URL, create_tracer
|
|
61
95
|
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
|
|
62
96
|
|
|
63
97
|
# Utilities (v0.12.0+)
|
|
98
|
+
# Import from utils package (re-exports from submodules for backward compatibility)
|
|
64
99
|
from .utils import (
|
|
65
100
|
canonical_snapshot_loose,
|
|
66
101
|
canonical_snapshot_strict,
|
|
@@ -68,11 +103,62 @@ from .utils import (
|
|
|
68
103
|
save_storage_state,
|
|
69
104
|
sha256_digest,
|
|
70
105
|
)
|
|
106
|
+
|
|
107
|
+
# Formatting (v0.12.0+)
|
|
108
|
+
from .utils.formatting import format_snapshot_for_llm
|
|
109
|
+
|
|
110
|
+
# Verification (agent assertion loop)
|
|
111
|
+
from .verification import (
|
|
112
|
+
AssertContext,
|
|
113
|
+
AssertOutcome,
|
|
114
|
+
Predicate,
|
|
115
|
+
all_of,
|
|
116
|
+
any_of,
|
|
117
|
+
custom,
|
|
118
|
+
element_count,
|
|
119
|
+
exists,
|
|
120
|
+
is_checked,
|
|
121
|
+
is_collapsed,
|
|
122
|
+
is_disabled,
|
|
123
|
+
is_enabled,
|
|
124
|
+
is_expanded,
|
|
125
|
+
is_unchecked,
|
|
126
|
+
not_exists,
|
|
127
|
+
url_contains,
|
|
128
|
+
url_matches,
|
|
129
|
+
value_contains,
|
|
130
|
+
value_equals,
|
|
131
|
+
)
|
|
132
|
+
from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
|
|
71
133
|
from .wait import wait_for
|
|
72
134
|
|
|
73
|
-
__version__ = "0.
|
|
135
|
+
__version__ = "0.98.0"
|
|
74
136
|
|
|
75
137
|
__all__ = [
|
|
138
|
+
# Extension helpers (for browser-use integration)
|
|
139
|
+
"get_extension_dir",
|
|
140
|
+
"get_extension_version",
|
|
141
|
+
"verify_extension_injected",
|
|
142
|
+
"verify_extension_injected_async",
|
|
143
|
+
"verify_extension_version",
|
|
144
|
+
"verify_extension_version_async",
|
|
145
|
+
# Browser backends (for browser-use integration)
|
|
146
|
+
"BrowserBackend",
|
|
147
|
+
"CDPTransport",
|
|
148
|
+
"CDPBackendV0",
|
|
149
|
+
"PlaywrightBackend",
|
|
150
|
+
"BrowserUseAdapter",
|
|
151
|
+
"BrowserUseCDPTransport",
|
|
152
|
+
"ViewportInfo",
|
|
153
|
+
"LayoutMetrics",
|
|
154
|
+
"backend_snapshot",
|
|
155
|
+
"CachedSnapshot",
|
|
156
|
+
# Backend-agnostic actions (prefixed to avoid conflicts)
|
|
157
|
+
"backend_click",
|
|
158
|
+
"backend_type_text",
|
|
159
|
+
"backend_scroll",
|
|
160
|
+
"backend_scroll_to_element",
|
|
161
|
+
"backend_wait_for_stable",
|
|
76
162
|
# Core SDK
|
|
77
163
|
"SentienceBrowser",
|
|
78
164
|
"Snapshot",
|
|
@@ -87,7 +173,9 @@ __all__ = [
|
|
|
87
173
|
"click",
|
|
88
174
|
"type_text",
|
|
89
175
|
"press",
|
|
176
|
+
"scroll_to",
|
|
90
177
|
"click_rect",
|
|
178
|
+
"CursorPolicy",
|
|
91
179
|
"wait_for",
|
|
92
180
|
"expect",
|
|
93
181
|
"Inspector",
|
|
@@ -116,7 +204,12 @@ __all__ = [
|
|
|
116
204
|
"OpenAIProvider",
|
|
117
205
|
"AnthropicProvider",
|
|
118
206
|
"LocalLLMProvider",
|
|
207
|
+
"LocalVisionLLMProvider",
|
|
208
|
+
"MLXVLMProvider",
|
|
119
209
|
"SentienceAgent",
|
|
210
|
+
"SentienceAgentAsync",
|
|
211
|
+
"SentienceVisualAgent",
|
|
212
|
+
"SentienceVisualAgentAsync",
|
|
120
213
|
"ConversationalAgent",
|
|
121
214
|
# Agent Layer Models
|
|
122
215
|
"AgentActionResult",
|
|
@@ -150,4 +243,25 @@ __all__ = [
|
|
|
150
243
|
"format_snapshot_for_llm",
|
|
151
244
|
# Agent Config (v0.12.0+)
|
|
152
245
|
"AgentConfig",
|
|
246
|
+
# Enums
|
|
247
|
+
"SentienceMethod",
|
|
248
|
+
"AgentAction",
|
|
249
|
+
# Verification (agent assertion loop)
|
|
250
|
+
"AgentRuntime",
|
|
251
|
+
"AssertContext",
|
|
252
|
+
"AssertOutcome",
|
|
253
|
+
"Predicate",
|
|
254
|
+
"url_matches",
|
|
255
|
+
"url_contains",
|
|
256
|
+
"exists",
|
|
257
|
+
"not_exists",
|
|
258
|
+
"element_count",
|
|
259
|
+
"all_of",
|
|
260
|
+
"any_of",
|
|
261
|
+
"custom",
|
|
262
|
+
# Ordinal support (Phase 3)
|
|
263
|
+
"OrdinalIntent",
|
|
264
|
+
"detect_ordinal_intent",
|
|
265
|
+
"select_by_ordinal",
|
|
266
|
+
"boost_ordinal_elements",
|
|
153
267
|
]
|
sentience/_extension_loader.py
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Shared extension loading logic for sync and async implementations
|
|
2
|
+
Shared extension loading logic for sync and async implementations.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- get_extension_dir(): Returns path to bundled extension (for browser-use integration)
|
|
6
|
+
- verify_extension_injected(): Verifies window.sentience API is available
|
|
7
|
+
- get_extension_version(): Gets extension version from manifest
|
|
8
|
+
- verify_extension_version(): Checks SDK-extension version compatibility
|
|
3
9
|
"""
|
|
4
10
|
|
|
11
|
+
import json
|
|
5
12
|
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from .protocols import AsyncPageProtocol, PageProtocol
|
|
6
17
|
|
|
7
18
|
|
|
8
19
|
def find_extension_path() -> Path:
|
|
@@ -38,3 +49,147 @@ def find_extension_path() -> Path:
|
|
|
38
49
|
f"2. {dev_ext_path}\n"
|
|
39
50
|
"Make sure the extension is built and 'sentience/extension' directory exists."
|
|
40
51
|
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_extension_dir() -> str:
|
|
55
|
+
"""
|
|
56
|
+
Get path to the bundled Sentience extension directory.
|
|
57
|
+
|
|
58
|
+
Use this to load the extension into browser-use or other Chromium-based browsers:
|
|
59
|
+
|
|
60
|
+
from sentience import get_extension_dir
|
|
61
|
+
from browser_use import BrowserSession, BrowserProfile
|
|
62
|
+
|
|
63
|
+
profile = BrowserProfile(
|
|
64
|
+
args=[f"--load-extension={get_extension_dir()}"],
|
|
65
|
+
)
|
|
66
|
+
session = BrowserSession(browser_profile=profile)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Absolute path to extension directory as string
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
FileNotFoundError: If extension not found in package
|
|
73
|
+
"""
|
|
74
|
+
return str(find_extension_path())
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_extension_version() -> str:
|
|
78
|
+
"""
|
|
79
|
+
Get the version of the bundled extension from manifest.json.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Version string (e.g., "2.2.0")
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
FileNotFoundError: If extension or manifest not found
|
|
86
|
+
"""
|
|
87
|
+
ext_path = find_extension_path()
|
|
88
|
+
manifest_path = ext_path / "manifest.json"
|
|
89
|
+
with open(manifest_path) as f:
|
|
90
|
+
manifest = json.load(f)
|
|
91
|
+
return manifest.get("version", "unknown")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def verify_extension_injected(page: "PageProtocol") -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Verify the Sentience extension injected window.sentience API (sync).
|
|
97
|
+
|
|
98
|
+
Call this after navigating to a page to confirm the extension is working:
|
|
99
|
+
|
|
100
|
+
browser.goto("https://example.com")
|
|
101
|
+
if not verify_extension_injected(browser.page):
|
|
102
|
+
raise RuntimeError("Extension not injected")
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
page: Playwright Page object (sync)
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
True if window.sentience.snapshot is available, False otherwise
|
|
109
|
+
"""
|
|
110
|
+
try:
|
|
111
|
+
result = page.evaluate(
|
|
112
|
+
"(() => !!(window.sentience && typeof window.sentience.snapshot === 'function'))()"
|
|
113
|
+
)
|
|
114
|
+
return bool(result)
|
|
115
|
+
except Exception:
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
async def verify_extension_injected_async(page: "AsyncPageProtocol") -> bool:
|
|
120
|
+
"""
|
|
121
|
+
Verify the Sentience extension injected window.sentience API (async).
|
|
122
|
+
|
|
123
|
+
Call this after navigating to a page to confirm the extension is working:
|
|
124
|
+
|
|
125
|
+
await browser.goto("https://example.com")
|
|
126
|
+
if not await verify_extension_injected_async(browser.page):
|
|
127
|
+
raise RuntimeError("Extension not injected")
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
page: Playwright Page object (async)
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
True if window.sentience.snapshot is available, False otherwise
|
|
134
|
+
"""
|
|
135
|
+
try:
|
|
136
|
+
result = await page.evaluate(
|
|
137
|
+
"(() => !!(window.sentience && typeof window.sentience.snapshot === 'function'))()"
|
|
138
|
+
)
|
|
139
|
+
return bool(result)
|
|
140
|
+
except Exception:
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def verify_extension_version(page: "PageProtocol", expected: str | None = None) -> str | None:
|
|
145
|
+
"""
|
|
146
|
+
Check extension version exposed in page (sync).
|
|
147
|
+
|
|
148
|
+
The extension sets window.__SENTIENCE_EXTENSION_VERSION__ when injected.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
page: Playwright Page object (sync)
|
|
152
|
+
expected: If provided, raises RuntimeError on mismatch
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Version string if found, None if not set (page may not have injected yet)
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
RuntimeError: If expected version provided and doesn't match
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
got = page.evaluate("window.__SENTIENCE_EXTENSION_VERSION__ || null")
|
|
162
|
+
except Exception:
|
|
163
|
+
got = None
|
|
164
|
+
|
|
165
|
+
if expected and got and got != expected:
|
|
166
|
+
raise RuntimeError(f"Sentience extension version mismatch: expected {expected}, got {got}")
|
|
167
|
+
return got
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def verify_extension_version_async(
|
|
171
|
+
page: "AsyncPageProtocol", expected: str | None = None
|
|
172
|
+
) -> str | None:
|
|
173
|
+
"""
|
|
174
|
+
Check extension version exposed in page (async).
|
|
175
|
+
|
|
176
|
+
The extension sets window.__SENTIENCE_EXTENSION_VERSION__ when injected.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
page: Playwright Page object (async)
|
|
180
|
+
expected: If provided, raises RuntimeError on mismatch
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Version string if found, None if not set (page may not have injected yet)
|
|
184
|
+
|
|
185
|
+
Raises:
|
|
186
|
+
RuntimeError: If expected version provided and doesn't match
|
|
187
|
+
"""
|
|
188
|
+
try:
|
|
189
|
+
got = await page.evaluate("window.__SENTIENCE_EXTENSION_VERSION__ || null")
|
|
190
|
+
except Exception:
|
|
191
|
+
got = None
|
|
192
|
+
|
|
193
|
+
if expected and got and got != expected:
|
|
194
|
+
raise RuntimeError(f"Sentience extension version mismatch: expected {expected}, got {got}")
|
|
195
|
+
return got
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Action Executor for Sentience Agent.
|
|
3
|
+
|
|
4
|
+
Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
|
|
5
|
+
This separates action execution concerns from LLM interaction.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import Any, Union
|
|
10
|
+
|
|
11
|
+
from .actions import click, click_async, press, press_async, type_text, type_text_async
|
|
12
|
+
from .browser import AsyncSentienceBrowser, SentienceBrowser
|
|
13
|
+
from .models import Snapshot
|
|
14
|
+
from .protocols import AsyncBrowserProtocol, BrowserProtocol
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ActionExecutor:
|
|
18
|
+
"""
|
|
19
|
+
Executes actions and handles parsing of action command strings.
|
|
20
|
+
|
|
21
|
+
This class encapsulates all action execution logic, making it easier to:
|
|
22
|
+
- Test action execution independently
|
|
23
|
+
- Add new action types in one place
|
|
24
|
+
- Handle action parsing errors consistently
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Initialize action executor.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
|
|
36
|
+
(for testing, can use mock objects that implement BrowserProtocol)
|
|
37
|
+
"""
|
|
38
|
+
self.browser = browser
|
|
39
|
+
# Check if browser is async - support both concrete types and protocols
|
|
40
|
+
# Check concrete types first (most reliable)
|
|
41
|
+
if isinstance(browser, AsyncSentienceBrowser):
|
|
42
|
+
self._is_async = True
|
|
43
|
+
elif isinstance(browser, SentienceBrowser):
|
|
44
|
+
self._is_async = False
|
|
45
|
+
else:
|
|
46
|
+
# For protocol-based browsers, check if methods are actually async
|
|
47
|
+
# This is more reliable than isinstance checks which can match both protocols
|
|
48
|
+
import inspect
|
|
49
|
+
|
|
50
|
+
start_method = getattr(browser, "start", None)
|
|
51
|
+
if start_method and inspect.iscoroutinefunction(start_method):
|
|
52
|
+
self._is_async = True
|
|
53
|
+
elif isinstance(browser, BrowserProtocol):
|
|
54
|
+
# If it implements BrowserProtocol and start is not async, it's sync
|
|
55
|
+
self._is_async = False
|
|
56
|
+
else:
|
|
57
|
+
# Default to sync for unknown types
|
|
58
|
+
self._is_async = False
|
|
59
|
+
|
|
60
|
+
def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
|
|
61
|
+
"""
|
|
62
|
+
Parse action string and execute SDK call (synchronous).
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
|
|
66
|
+
snap: Current snapshot (for context, currently unused but kept for API consistency)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Execution result dictionary with keys:
|
|
70
|
+
- success: bool
|
|
71
|
+
- action: str (e.g., "click", "type", "press", "finish")
|
|
72
|
+
- element_id: Optional[int] (for click/type actions)
|
|
73
|
+
- text: Optional[str] (for type actions)
|
|
74
|
+
- key: Optional[str] (for press actions)
|
|
75
|
+
- outcome: Optional[str] (action outcome)
|
|
76
|
+
- url_changed: Optional[bool] (for click actions)
|
|
77
|
+
- error: Optional[str] (if action failed)
|
|
78
|
+
- message: Optional[str] (for finish action)
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ValueError: If action format is unknown
|
|
82
|
+
RuntimeError: If called on async browser (use execute_async instead)
|
|
83
|
+
"""
|
|
84
|
+
if self._is_async:
|
|
85
|
+
raise RuntimeError(
|
|
86
|
+
"ActionExecutor.execute() called on async browser. Use execute_async() instead."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Parse CLICK(42)
|
|
90
|
+
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
|
|
91
|
+
element_id = int(match.group(1))
|
|
92
|
+
result = click(self.browser, element_id) # type: ignore
|
|
93
|
+
return {
|
|
94
|
+
"success": result.success,
|
|
95
|
+
"action": "click",
|
|
96
|
+
"element_id": element_id,
|
|
97
|
+
"outcome": result.outcome,
|
|
98
|
+
"url_changed": result.url_changed,
|
|
99
|
+
"cursor": getattr(result, "cursor", None),
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# Parse TYPE(42, "hello world")
|
|
103
|
+
elif match := re.match(
|
|
104
|
+
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
|
|
105
|
+
action_str,
|
|
106
|
+
re.IGNORECASE,
|
|
107
|
+
):
|
|
108
|
+
element_id = int(match.group(1))
|
|
109
|
+
text = match.group(2)
|
|
110
|
+
result = type_text(self.browser, element_id, text) # type: ignore
|
|
111
|
+
return {
|
|
112
|
+
"success": result.success,
|
|
113
|
+
"action": "type",
|
|
114
|
+
"element_id": element_id,
|
|
115
|
+
"text": text,
|
|
116
|
+
"outcome": result.outcome,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Parse PRESS("Enter")
|
|
120
|
+
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
|
|
121
|
+
key = match.group(1)
|
|
122
|
+
result = press(self.browser, key) # type: ignore
|
|
123
|
+
return {
|
|
124
|
+
"success": result.success,
|
|
125
|
+
"action": "press",
|
|
126
|
+
"key": key,
|
|
127
|
+
"outcome": result.outcome,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# Parse FINISH()
|
|
131
|
+
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
|
|
132
|
+
return {
|
|
133
|
+
"success": True,
|
|
134
|
+
"action": "finish",
|
|
135
|
+
"message": "Task marked as complete",
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
else:
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"Unknown action format: {action_str}\n"
|
|
141
|
+
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
|
|
145
|
+
"""
|
|
146
|
+
Parse action string and execute SDK call (asynchronous).
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
|
|
150
|
+
snap: Current snapshot (for context, currently unused but kept for API consistency)
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Execution result dictionary (same format as execute())
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
ValueError: If action format is unknown
|
|
157
|
+
RuntimeError: If called on sync browser (use execute() instead)
|
|
158
|
+
"""
|
|
159
|
+
if not self._is_async:
|
|
160
|
+
raise RuntimeError(
|
|
161
|
+
"ActionExecutor.execute_async() called on sync browser. Use execute() instead."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Parse CLICK(42)
|
|
165
|
+
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
|
|
166
|
+
element_id = int(match.group(1))
|
|
167
|
+
result = await click_async(self.browser, element_id) # type: ignore
|
|
168
|
+
return {
|
|
169
|
+
"success": result.success,
|
|
170
|
+
"action": "click",
|
|
171
|
+
"element_id": element_id,
|
|
172
|
+
"outcome": result.outcome,
|
|
173
|
+
"url_changed": result.url_changed,
|
|
174
|
+
"cursor": getattr(result, "cursor", None),
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# Parse TYPE(42, "hello world")
|
|
178
|
+
elif match := re.match(
|
|
179
|
+
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
|
|
180
|
+
action_str,
|
|
181
|
+
re.IGNORECASE,
|
|
182
|
+
):
|
|
183
|
+
element_id = int(match.group(1))
|
|
184
|
+
text = match.group(2)
|
|
185
|
+
result = await type_text_async(self.browser, element_id, text) # type: ignore
|
|
186
|
+
return {
|
|
187
|
+
"success": result.success,
|
|
188
|
+
"action": "type",
|
|
189
|
+
"element_id": element_id,
|
|
190
|
+
"text": text,
|
|
191
|
+
"outcome": result.outcome,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
# Parse PRESS("Enter")
|
|
195
|
+
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
|
|
196
|
+
key = match.group(1)
|
|
197
|
+
result = await press_async(self.browser, key) # type: ignore
|
|
198
|
+
return {
|
|
199
|
+
"success": result.success,
|
|
200
|
+
"action": "press",
|
|
201
|
+
"key": key,
|
|
202
|
+
"outcome": result.outcome,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
# Parse FINISH()
|
|
206
|
+
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
|
|
207
|
+
return {
|
|
208
|
+
"success": True,
|
|
209
|
+
"action": "finish",
|
|
210
|
+
"message": "Task marked as complete",
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
else:
|
|
214
|
+
raise ValueError(
|
|
215
|
+
f"Unknown action format: {action_str}\n"
|
|
216
|
+
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
|
|
217
|
+
)
|