sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
sentience/__init__.py ADDED
@@ -0,0 +1,253 @@
1
+ """
2
+ Sentience Python SDK - AI Agent Browser Automation
3
+ """
4
+
5
+ # Extension helpers (for browser-use integration)
6
+ from ._extension_loader import (
7
+ get_extension_dir,
8
+ get_extension_version,
9
+ verify_extension_injected,
10
+ verify_extension_injected_async,
11
+ verify_extension_version,
12
+ verify_extension_version_async,
13
+ )
14
+ from .actions import click, click_rect, press, scroll_to, type_text
15
+ from .agent import SentienceAgent, SentienceAgentAsync
16
+ from .agent_config import AgentConfig
17
+ from .agent_runtime import AgentRuntime
18
+
19
+ # Backend-agnostic actions (aliased to avoid conflict with existing actions)
20
+ # Browser backends (for browser-use integration)
21
+ from .backends import (
22
+ BrowserBackend,
23
+ BrowserUseAdapter,
24
+ BrowserUseCDPTransport,
25
+ CachedSnapshot,
26
+ CDPBackendV0,
27
+ CDPTransport,
28
+ LayoutMetrics,
29
+ PlaywrightBackend,
30
+ ViewportInfo,
31
+ )
32
+ from .backends import click as backend_click
33
+ from .backends import scroll as backend_scroll
34
+ from .backends import scroll_to_element as backend_scroll_to_element
35
+ from .backends import snapshot as backend_snapshot
36
+ from .backends import type_text as backend_type_text
37
+ from .backends import wait_for_stable as backend_wait_for_stable
38
+
39
+ # Agent Layer (Phase 1 & 2)
40
+ from .base_agent import BaseAgent
41
+ from .browser import SentienceBrowser
42
+
43
+ # Tracing (v0.12.0+)
44
+ from .cloud_tracing import CloudTraceSink, SentienceLogger
45
+ from .conversational_agent import ConversationalAgent
46
+ from .expect import expect
47
+ from .generator import ScriptGenerator, generate
48
+ from .inspector import Inspector, inspect
49
+ from .llm_provider import (
50
+ AnthropicProvider,
51
+ LLMProvider,
52
+ LLMResponse,
53
+ LocalLLMProvider,
54
+ OpenAIProvider,
55
+ )
56
+ from .models import ( # Agent Layer Models
57
+ ActionHistory,
58
+ ActionResult,
59
+ ActionTokenUsage,
60
+ AgentActionResult,
61
+ BBox,
62
+ Cookie,
63
+ Element,
64
+ LocalStorageItem,
65
+ OriginStorage,
66
+ ScreenshotConfig,
67
+ Snapshot,
68
+ SnapshotFilter,
69
+ SnapshotOptions,
70
+ StorageState,
71
+ TextContext,
72
+ TextMatch,
73
+ TextRect,
74
+ TextRectSearchResult,
75
+ TokenStats,
76
+ Viewport,
77
+ ViewportRect,
78
+ WaitResult,
79
+ )
80
+
81
+ # Ordinal support (Phase 3)
82
+ from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
83
+ from .overlay import clear_overlay, show_overlay
84
+ from .query import find, query
85
+ from .read import read
86
+ from .recorder import Recorder, Trace, TraceStep, record
87
+ from .screenshot import screenshot
88
+ from .sentience_methods import AgentAction, SentienceMethod
89
+ from .snapshot import snapshot
90
+ from .text_search import find_text_rect
91
+ from .tracer_factory import SENTIENCE_API_URL, create_tracer
92
+ from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
93
+
94
+ # Utilities (v0.12.0+)
95
+ # Import from utils package (re-exports from submodules for backward compatibility)
96
+ from .utils import (
97
+ canonical_snapshot_loose,
98
+ canonical_snapshot_strict,
99
+ compute_snapshot_digests,
100
+ save_storage_state,
101
+ sha256_digest,
102
+ )
103
+
104
+ # Formatting (v0.12.0+)
105
+ from .utils.formatting import format_snapshot_for_llm
106
+
107
+ # Verification (agent assertion loop)
108
+ from .verification import (
109
+ AssertContext,
110
+ AssertOutcome,
111
+ Predicate,
112
+ all_of,
113
+ any_of,
114
+ custom,
115
+ element_count,
116
+ exists,
117
+ not_exists,
118
+ url_contains,
119
+ url_matches,
120
+ )
121
+ from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
122
+ from .wait import wait_for
123
+
124
+ __version__ = "0.95.0"
125
+
126
+ __all__ = [
127
+ # Extension helpers (for browser-use integration)
128
+ "get_extension_dir",
129
+ "get_extension_version",
130
+ "verify_extension_injected",
131
+ "verify_extension_injected_async",
132
+ "verify_extension_version",
133
+ "verify_extension_version_async",
134
+ # Browser backends (for browser-use integration)
135
+ "BrowserBackend",
136
+ "CDPTransport",
137
+ "CDPBackendV0",
138
+ "PlaywrightBackend",
139
+ "BrowserUseAdapter",
140
+ "BrowserUseCDPTransport",
141
+ "ViewportInfo",
142
+ "LayoutMetrics",
143
+ "backend_snapshot",
144
+ "CachedSnapshot",
145
+ # Backend-agnostic actions (prefixed to avoid conflicts)
146
+ "backend_click",
147
+ "backend_type_text",
148
+ "backend_scroll",
149
+ "backend_scroll_to_element",
150
+ "backend_wait_for_stable",
151
+ # Core SDK
152
+ "SentienceBrowser",
153
+ "Snapshot",
154
+ "Element",
155
+ "BBox",
156
+ "Viewport",
157
+ "ActionResult",
158
+ "WaitResult",
159
+ "snapshot",
160
+ "query",
161
+ "find",
162
+ "click",
163
+ "type_text",
164
+ "press",
165
+ "scroll_to",
166
+ "click_rect",
167
+ "wait_for",
168
+ "expect",
169
+ "Inspector",
170
+ "inspect",
171
+ "Recorder",
172
+ "Trace",
173
+ "TraceStep",
174
+ "record",
175
+ "ScriptGenerator",
176
+ "generate",
177
+ "read",
178
+ "screenshot",
179
+ "show_overlay",
180
+ "clear_overlay",
181
+ # Text Search
182
+ "find_text_rect",
183
+ "TextRectSearchResult",
184
+ "TextMatch",
185
+ "TextRect",
186
+ "ViewportRect",
187
+ "TextContext",
188
+ # Agent Layer (Phase 1 & 2)
189
+ "BaseAgent",
190
+ "LLMProvider",
191
+ "LLMResponse",
192
+ "OpenAIProvider",
193
+ "AnthropicProvider",
194
+ "LocalLLMProvider",
195
+ "SentienceAgent",
196
+ "SentienceAgentAsync",
197
+ "SentienceVisualAgent",
198
+ "SentienceVisualAgentAsync",
199
+ "ConversationalAgent",
200
+ # Agent Layer Models
201
+ "AgentActionResult",
202
+ "TokenStats",
203
+ "ActionHistory",
204
+ "ActionTokenUsage",
205
+ "SnapshotOptions",
206
+ "SnapshotFilter",
207
+ "ScreenshotConfig",
208
+ # Storage State Models (Auth Injection)
209
+ "StorageState",
210
+ "Cookie",
211
+ "LocalStorageItem",
212
+ "OriginStorage",
213
+ # Tracing (v0.12.0+)
214
+ "Tracer",
215
+ "TraceSink",
216
+ "JsonlTraceSink",
217
+ "CloudTraceSink",
218
+ "SentienceLogger",
219
+ "TraceEvent",
220
+ "create_tracer",
221
+ "SENTIENCE_API_URL",
222
+ # Utilities (v0.12.0+)
223
+ "canonical_snapshot_strict",
224
+ "canonical_snapshot_loose",
225
+ "compute_snapshot_digests",
226
+ "sha256_digest",
227
+ "save_storage_state",
228
+ # Formatting (v0.12.0+)
229
+ "format_snapshot_for_llm",
230
+ # Agent Config (v0.12.0+)
231
+ "AgentConfig",
232
+ # Enums
233
+ "SentienceMethod",
234
+ "AgentAction",
235
+ # Verification (agent assertion loop)
236
+ "AgentRuntime",
237
+ "AssertContext",
238
+ "AssertOutcome",
239
+ "Predicate",
240
+ "url_matches",
241
+ "url_contains",
242
+ "exists",
243
+ "not_exists",
244
+ "element_count",
245
+ "all_of",
246
+ "any_of",
247
+ "custom",
248
+ # Ordinal support (Phase 3)
249
+ "OrdinalIntent",
250
+ "detect_ordinal_intent",
251
+ "select_by_ordinal",
252
+ "boost_ordinal_elements",
253
+ ]
@@ -0,0 +1,195 @@
1
+ """
2
+ Shared extension loading logic for sync and async implementations.
3
+
4
+ Provides:
5
+ - get_extension_dir(): Returns path to bundled extension (for browser-use integration)
6
+ - verify_extension_injected(): Verifies window.sentience API is available
7
+ - get_extension_version(): Gets extension version from manifest
8
+ - verify_extension_version(): Checks SDK-extension version compatibility
9
+ """
10
+
11
+ import json
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ if TYPE_CHECKING:
16
+ from .protocols import AsyncPageProtocol, PageProtocol
17
+
18
+
19
+ def find_extension_path() -> Path:
20
+ """
21
+ Find Sentience extension directory (shared logic for sync and async).
22
+
23
+ Checks multiple locations:
24
+ 1. sentience/extension/ (installed package)
25
+ 2. ../sentience-chrome (development/monorepo)
26
+
27
+ Returns:
28
+ Path to extension directory
29
+
30
+ Raises:
31
+ FileNotFoundError: If extension not found in any location
32
+ """
33
+ # 1. Try relative to this file (installed package structure)
34
+ # sentience/_extension_loader.py -> sentience/extension/
35
+ package_ext_path = Path(__file__).parent / "extension"
36
+
37
+ # 2. Try development root (if running from source repo)
38
+ # sentience/_extension_loader.py -> ../sentience-chrome
39
+ dev_ext_path = Path(__file__).parent.parent.parent / "sentience-chrome"
40
+
41
+ if package_ext_path.exists() and (package_ext_path / "manifest.json").exists():
42
+ return package_ext_path
43
+ elif dev_ext_path.exists() and (dev_ext_path / "manifest.json").exists():
44
+ return dev_ext_path
45
+ else:
46
+ raise FileNotFoundError(
47
+ f"Extension not found. Checked:\n"
48
+ f"1. {package_ext_path}\n"
49
+ f"2. {dev_ext_path}\n"
50
+ "Make sure the extension is built and 'sentience/extension' directory exists."
51
+ )
52
+
53
+
54
+ def get_extension_dir() -> str:
55
+ """
56
+ Get path to the bundled Sentience extension directory.
57
+
58
+ Use this to load the extension into browser-use or other Chromium-based browsers:
59
+
60
+ from sentience import get_extension_dir
61
+ from browser_use import BrowserSession, BrowserProfile
62
+
63
+ profile = BrowserProfile(
64
+ args=[f"--load-extension={get_extension_dir()}"],
65
+ )
66
+ session = BrowserSession(browser_profile=profile)
67
+
68
+ Returns:
69
+ Absolute path to extension directory as string
70
+
71
+ Raises:
72
+ FileNotFoundError: If extension not found in package
73
+ """
74
+ return str(find_extension_path())
75
+
76
+
77
+ def get_extension_version() -> str:
78
+ """
79
+ Get the version of the bundled extension from manifest.json.
80
+
81
+ Returns:
82
+ Version string (e.g., "2.2.0")
83
+
84
+ Raises:
85
+ FileNotFoundError: If extension or manifest not found
86
+ """
87
+ ext_path = find_extension_path()
88
+ manifest_path = ext_path / "manifest.json"
89
+ with open(manifest_path) as f:
90
+ manifest = json.load(f)
91
+ return manifest.get("version", "unknown")
92
+
93
+
94
+ def verify_extension_injected(page: "PageProtocol") -> bool:
95
+ """
96
+ Verify the Sentience extension injected window.sentience API (sync).
97
+
98
+ Call this after navigating to a page to confirm the extension is working:
99
+
100
+ browser.goto("https://example.com")
101
+ if not verify_extension_injected(browser.page):
102
+ raise RuntimeError("Extension not injected")
103
+
104
+ Args:
105
+ page: Playwright Page object (sync)
106
+
107
+ Returns:
108
+ True if window.sentience.snapshot is available, False otherwise
109
+ """
110
+ try:
111
+ result = page.evaluate(
112
+ "(() => !!(window.sentience && typeof window.sentience.snapshot === 'function'))()"
113
+ )
114
+ return bool(result)
115
+ except Exception:
116
+ return False
117
+
118
+
119
+ async def verify_extension_injected_async(page: "AsyncPageProtocol") -> bool:
120
+ """
121
+ Verify the Sentience extension injected window.sentience API (async).
122
+
123
+ Call this after navigating to a page to confirm the extension is working:
124
+
125
+ await browser.goto("https://example.com")
126
+ if not await verify_extension_injected_async(browser.page):
127
+ raise RuntimeError("Extension not injected")
128
+
129
+ Args:
130
+ page: Playwright Page object (async)
131
+
132
+ Returns:
133
+ True if window.sentience.snapshot is available, False otherwise
134
+ """
135
+ try:
136
+ result = await page.evaluate(
137
+ "(() => !!(window.sentience && typeof window.sentience.snapshot === 'function'))()"
138
+ )
139
+ return bool(result)
140
+ except Exception:
141
+ return False
142
+
143
+
144
+ def verify_extension_version(page: "PageProtocol", expected: str | None = None) -> str | None:
145
+ """
146
+ Check extension version exposed in page (sync).
147
+
148
+ The extension sets window.__SENTIENCE_EXTENSION_VERSION__ when injected.
149
+
150
+ Args:
151
+ page: Playwright Page object (sync)
152
+ expected: If provided, raises RuntimeError on mismatch
153
+
154
+ Returns:
155
+ Version string if found, None if not set (page may not have injected yet)
156
+
157
+ Raises:
158
+ RuntimeError: If expected version provided and doesn't match
159
+ """
160
+ try:
161
+ got = page.evaluate("window.__SENTIENCE_EXTENSION_VERSION__ || null")
162
+ except Exception:
163
+ got = None
164
+
165
+ if expected and got and got != expected:
166
+ raise RuntimeError(f"Sentience extension version mismatch: expected {expected}, got {got}")
167
+ return got
168
+
169
+
170
+ async def verify_extension_version_async(
171
+ page: "AsyncPageProtocol", expected: str | None = None
172
+ ) -> str | None:
173
+ """
174
+ Check extension version exposed in page (async).
175
+
176
+ The extension sets window.__SENTIENCE_EXTENSION_VERSION__ when injected.
177
+
178
+ Args:
179
+ page: Playwright Page object (async)
180
+ expected: If provided, raises RuntimeError on mismatch
181
+
182
+ Returns:
183
+ Version string if found, None if not set (page may not have injected yet)
184
+
185
+ Raises:
186
+ RuntimeError: If expected version provided and doesn't match
187
+ """
188
+ try:
189
+ got = await page.evaluate("window.__SENTIENCE_EXTENSION_VERSION__ || null")
190
+ except Exception:
191
+ got = None
192
+
193
+ if expected and got and got != expected:
194
+ raise RuntimeError(f"Sentience extension version mismatch: expected {expected}, got {got}")
195
+ return got
@@ -0,0 +1,215 @@
1
+ """
2
+ Action Executor for Sentience Agent.
3
+
4
+ Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
5
+ This separates action execution concerns from LLM interaction.
6
+ """
7
+
8
+ import re
9
+ from typing import Any, Union
10
+
11
+ from .actions import click, click_async, press, press_async, type_text, type_text_async
12
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
13
+ from .models import Snapshot
14
+ from .protocols import AsyncBrowserProtocol, BrowserProtocol
15
+
16
+
17
+ class ActionExecutor:
18
+ """
19
+ Executes actions and handles parsing of action command strings.
20
+
21
+ This class encapsulates all action execution logic, making it easier to:
22
+ - Test action execution independently
23
+ - Add new action types in one place
24
+ - Handle action parsing errors consistently
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
30
+ ):
31
+ """
32
+ Initialize action executor.
33
+
34
+ Args:
35
+ browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
36
+ (for testing, can use mock objects that implement BrowserProtocol)
37
+ """
38
+ self.browser = browser
39
+ # Check if browser is async - support both concrete types and protocols
40
+ # Check concrete types first (most reliable)
41
+ if isinstance(browser, AsyncSentienceBrowser):
42
+ self._is_async = True
43
+ elif isinstance(browser, SentienceBrowser):
44
+ self._is_async = False
45
+ else:
46
+ # For protocol-based browsers, check if methods are actually async
47
+ # This is more reliable than isinstance checks which can match both protocols
48
+ import inspect
49
+
50
+ start_method = getattr(browser, "start", None)
51
+ if start_method and inspect.iscoroutinefunction(start_method):
52
+ self._is_async = True
53
+ elif isinstance(browser, BrowserProtocol):
54
+ # If it implements BrowserProtocol and start is not async, it's sync
55
+ self._is_async = False
56
+ else:
57
+ # Default to sync for unknown types
58
+ self._is_async = False
59
+
60
+ def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
61
+ """
62
+ Parse action string and execute SDK call (synchronous).
63
+
64
+ Args:
65
+ action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
66
+ snap: Current snapshot (for context, currently unused but kept for API consistency)
67
+
68
+ Returns:
69
+ Execution result dictionary with keys:
70
+ - success: bool
71
+ - action: str (e.g., "click", "type", "press", "finish")
72
+ - element_id: Optional[int] (for click/type actions)
73
+ - text: Optional[str] (for type actions)
74
+ - key: Optional[str] (for press actions)
75
+ - outcome: Optional[str] (action outcome)
76
+ - url_changed: Optional[bool] (for click actions)
77
+ - error: Optional[str] (if action failed)
78
+ - message: Optional[str] (for finish action)
79
+
80
+ Raises:
81
+ ValueError: If action format is unknown
82
+ RuntimeError: If called on async browser (use execute_async instead)
83
+ """
84
+ if self._is_async:
85
+ raise RuntimeError(
86
+ "ActionExecutor.execute() called on async browser. Use execute_async() instead."
87
+ )
88
+
89
+ # Parse CLICK(42)
90
+ if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
91
+ element_id = int(match.group(1))
92
+ result = click(self.browser, element_id) # type: ignore
93
+ return {
94
+ "success": result.success,
95
+ "action": "click",
96
+ "element_id": element_id,
97
+ "outcome": result.outcome,
98
+ "url_changed": result.url_changed,
99
+ }
100
+
101
+ # Parse TYPE(42, "hello world")
102
+ elif match := re.match(
103
+ r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
104
+ action_str,
105
+ re.IGNORECASE,
106
+ ):
107
+ element_id = int(match.group(1))
108
+ text = match.group(2)
109
+ result = type_text(self.browser, element_id, text) # type: ignore
110
+ return {
111
+ "success": result.success,
112
+ "action": "type",
113
+ "element_id": element_id,
114
+ "text": text,
115
+ "outcome": result.outcome,
116
+ }
117
+
118
+ # Parse PRESS("Enter")
119
+ elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
120
+ key = match.group(1)
121
+ result = press(self.browser, key) # type: ignore
122
+ return {
123
+ "success": result.success,
124
+ "action": "press",
125
+ "key": key,
126
+ "outcome": result.outcome,
127
+ }
128
+
129
+ # Parse FINISH()
130
+ elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
131
+ return {
132
+ "success": True,
133
+ "action": "finish",
134
+ "message": "Task marked as complete",
135
+ }
136
+
137
+ else:
138
+ raise ValueError(
139
+ f"Unknown action format: {action_str}\n"
140
+ f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
141
+ )
142
+
143
+ async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
144
+ """
145
+ Parse action string and execute SDK call (asynchronous).
146
+
147
+ Args:
148
+ action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
149
+ snap: Current snapshot (for context, currently unused but kept for API consistency)
150
+
151
+ Returns:
152
+ Execution result dictionary (same format as execute())
153
+
154
+ Raises:
155
+ ValueError: If action format is unknown
156
+ RuntimeError: If called on sync browser (use execute() instead)
157
+ """
158
+ if not self._is_async:
159
+ raise RuntimeError(
160
+ "ActionExecutor.execute_async() called on sync browser. Use execute() instead."
161
+ )
162
+
163
+ # Parse CLICK(42)
164
+ if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
165
+ element_id = int(match.group(1))
166
+ result = await click_async(self.browser, element_id) # type: ignore
167
+ return {
168
+ "success": result.success,
169
+ "action": "click",
170
+ "element_id": element_id,
171
+ "outcome": result.outcome,
172
+ "url_changed": result.url_changed,
173
+ }
174
+
175
+ # Parse TYPE(42, "hello world")
176
+ elif match := re.match(
177
+ r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
178
+ action_str,
179
+ re.IGNORECASE,
180
+ ):
181
+ element_id = int(match.group(1))
182
+ text = match.group(2)
183
+ result = await type_text_async(self.browser, element_id, text) # type: ignore
184
+ return {
185
+ "success": result.success,
186
+ "action": "type",
187
+ "element_id": element_id,
188
+ "text": text,
189
+ "outcome": result.outcome,
190
+ }
191
+
192
+ # Parse PRESS("Enter")
193
+ elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
194
+ key = match.group(1)
195
+ result = await press_async(self.browser, key) # type: ignore
196
+ return {
197
+ "success": result.success,
198
+ "action": "press",
199
+ "key": key,
200
+ "outcome": result.outcome,
201
+ }
202
+
203
+ # Parse FINISH()
204
+ elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
205
+ return {
206
+ "success": True,
207
+ "action": "finish",
208
+ "message": "Task marked as complete",
209
+ }
210
+
211
+ else:
212
+ raise ValueError(
213
+ f"Unknown action format: {action_str}\n"
214
+ f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
215
+ )