sentienceapi 0.90.16__py3-none-any.whl → 0.92.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (61) hide show
  1. sentience/__init__.py +14 -5
  2. sentience/action_executor.py +215 -0
  3. sentience/actions.py +408 -25
  4. sentience/agent.py +802 -293
  5. sentience/agent_config.py +3 -0
  6. sentience/async_api.py +83 -1142
  7. sentience/base_agent.py +95 -0
  8. sentience/browser.py +484 -1
  9. sentience/browser_evaluator.py +299 -0
  10. sentience/cloud_tracing.py +457 -33
  11. sentience/conversational_agent.py +77 -43
  12. sentience/element_filter.py +136 -0
  13. sentience/expect.py +98 -2
  14. sentience/extension/background.js +56 -185
  15. sentience/extension/content.js +117 -289
  16. sentience/extension/injected_api.js +799 -1374
  17. sentience/extension/manifest.json +1 -1
  18. sentience/extension/pkg/sentience_core.js +190 -396
  19. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  20. sentience/extension/release.json +47 -47
  21. sentience/formatting.py +9 -53
  22. sentience/inspector.py +183 -1
  23. sentience/llm_interaction_handler.py +191 -0
  24. sentience/llm_provider.py +74 -52
  25. sentience/llm_provider_utils.py +120 -0
  26. sentience/llm_response_builder.py +153 -0
  27. sentience/models.py +60 -1
  28. sentience/overlay.py +109 -2
  29. sentience/protocols.py +228 -0
  30. sentience/query.py +1 -1
  31. sentience/read.py +95 -3
  32. sentience/recorder.py +223 -3
  33. sentience/schemas/trace_v1.json +102 -9
  34. sentience/screenshot.py +48 -2
  35. sentience/sentience_methods.py +86 -0
  36. sentience/snapshot.py +291 -38
  37. sentience/snapshot_diff.py +141 -0
  38. sentience/text_search.py +119 -5
  39. sentience/trace_event_builder.py +129 -0
  40. sentience/trace_file_manager.py +197 -0
  41. sentience/trace_indexing/index_schema.py +95 -7
  42. sentience/trace_indexing/indexer.py +117 -14
  43. sentience/tracer_factory.py +119 -6
  44. sentience/tracing.py +172 -8
  45. sentience/utils/__init__.py +40 -0
  46. sentience/utils/browser.py +46 -0
  47. sentience/utils/element.py +257 -0
  48. sentience/utils/formatting.py +59 -0
  49. sentience/utils.py +1 -1
  50. sentience/visual_agent.py +2056 -0
  51. sentience/wait.py +68 -2
  52. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +2 -1
  53. sentienceapi-0.92.2.dist-info/RECORD +65 -0
  54. sentience/extension/test-content.js +0 -4
  55. sentienceapi-0.90.16.dist-info/RECORD +0 -50
  56. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
  57. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
  58. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE +0 -0
  59. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-APACHE +0 -0
  60. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-MIT +0 -0
  61. {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0
sentience/__init__.py CHANGED
@@ -3,7 +3,7 @@ Sentience Python SDK - AI Agent Browser Automation
3
3
  """
4
4
 
5
5
  from .actions import click, click_rect, press, type_text
6
- from .agent import SentienceAgent
6
+ from .agent import SentienceAgent, SentienceAgentAsync
7
7
  from .agent_config import AgentConfig
8
8
 
9
9
  # Agent Layer (Phase 1 & 2)
@@ -14,9 +14,6 @@ from .browser import SentienceBrowser
14
14
  from .cloud_tracing import CloudTraceSink, SentienceLogger
15
15
  from .conversational_agent import ConversationalAgent
16
16
  from .expect import expect
17
-
18
- # Formatting (v0.12.0+)
19
- from .formatting import format_snapshot_for_llm
20
17
  from .generator import ScriptGenerator, generate
21
18
  from .inspector import Inspector, inspect
22
19
  from .llm_provider import (
@@ -55,12 +52,14 @@ from .query import find, query
55
52
  from .read import read
56
53
  from .recorder import Recorder, Trace, TraceStep, record
57
54
  from .screenshot import screenshot
55
+ from .sentience_methods import AgentAction, SentienceMethod
58
56
  from .snapshot import snapshot
59
57
  from .text_search import find_text_rect
60
58
  from .tracer_factory import SENTIENCE_API_URL, create_tracer
61
59
  from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
62
60
 
63
61
  # Utilities (v0.12.0+)
62
+ # Import from utils package (re-exports from submodules for backward compatibility)
64
63
  from .utils import (
65
64
  canonical_snapshot_loose,
66
65
  canonical_snapshot_strict,
@@ -68,9 +67,13 @@ from .utils import (
68
67
  save_storage_state,
69
68
  sha256_digest,
70
69
  )
70
+
71
+ # Formatting (v0.12.0+)
72
+ from .utils.formatting import format_snapshot_for_llm
73
+ from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
71
74
  from .wait import wait_for
72
75
 
73
- __version__ = "0.90.16"
76
+ __version__ = "0.92.2"
74
77
 
75
78
  __all__ = [
76
79
  # Core SDK
@@ -117,6 +120,9 @@ __all__ = [
117
120
  "AnthropicProvider",
118
121
  "LocalLLMProvider",
119
122
  "SentienceAgent",
123
+ "SentienceAgentAsync",
124
+ "SentienceVisualAgent",
125
+ "SentienceVisualAgentAsync",
120
126
  "ConversationalAgent",
121
127
  # Agent Layer Models
122
128
  "AgentActionResult",
@@ -150,4 +156,7 @@ __all__ = [
150
156
  "format_snapshot_for_llm",
151
157
  # Agent Config (v0.12.0+)
152
158
  "AgentConfig",
159
+ # Enums
160
+ "SentienceMethod",
161
+ "AgentAction",
153
162
  ]
@@ -0,0 +1,215 @@
1
+ """
2
+ Action Executor for Sentience Agent.
3
+
4
+ Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
5
+ This separates action execution concerns from LLM interaction.
6
+ """
7
+
8
+ import re
9
+ from typing import Any, Union
10
+
11
+ from .actions import click, click_async, press, press_async, type_text, type_text_async
12
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
13
+ from .models import Snapshot
14
+ from .protocols import AsyncBrowserProtocol, BrowserProtocol
15
+
16
+
17
+ class ActionExecutor:
18
+ """
19
+ Executes actions and handles parsing of action command strings.
20
+
21
+ This class encapsulates all action execution logic, making it easier to:
22
+ - Test action execution independently
23
+ - Add new action types in one place
24
+ - Handle action parsing errors consistently
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
30
+ ):
31
+ """
32
+ Initialize action executor.
33
+
34
+ Args:
35
+ browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
36
+ (for testing, can use mock objects that implement BrowserProtocol)
37
+ """
38
+ self.browser = browser
39
+ # Check if browser is async - support both concrete types and protocols
40
+ # Check concrete types first (most reliable)
41
+ if isinstance(browser, AsyncSentienceBrowser):
42
+ self._is_async = True
43
+ elif isinstance(browser, SentienceBrowser):
44
+ self._is_async = False
45
+ else:
46
+ # For protocol-based browsers, check if methods are actually async
47
+ # This is more reliable than isinstance checks which can match both protocols
48
+ import inspect
49
+
50
+ start_method = getattr(browser, "start", None)
51
+ if start_method and inspect.iscoroutinefunction(start_method):
52
+ self._is_async = True
53
+ elif isinstance(browser, BrowserProtocol):
54
+ # If it implements BrowserProtocol and start is not async, it's sync
55
+ self._is_async = False
56
+ else:
57
+ # Default to sync for unknown types
58
+ self._is_async = False
59
+
60
+ def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
61
+ """
62
+ Parse action string and execute SDK call (synchronous).
63
+
64
+ Args:
65
+ action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
66
+ snap: Current snapshot (for context, currently unused but kept for API consistency)
67
+
68
+ Returns:
69
+ Execution result dictionary with keys:
70
+ - success: bool
71
+ - action: str (e.g., "click", "type", "press", "finish")
72
+ - element_id: Optional[int] (for click/type actions)
73
+ - text: Optional[str] (for type actions)
74
+ - key: Optional[str] (for press actions)
75
+ - outcome: Optional[str] (action outcome)
76
+ - url_changed: Optional[bool] (for click actions)
77
+ - error: Optional[str] (if action failed)
78
+ - message: Optional[str] (for finish action)
79
+
80
+ Raises:
81
+ ValueError: If action format is unknown
82
+ RuntimeError: If called on async browser (use execute_async instead)
83
+ """
84
+ if self._is_async:
85
+ raise RuntimeError(
86
+ "ActionExecutor.execute() called on async browser. Use execute_async() instead."
87
+ )
88
+
89
+ # Parse CLICK(42)
90
+ if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
91
+ element_id = int(match.group(1))
92
+ result = click(self.browser, element_id) # type: ignore
93
+ return {
94
+ "success": result.success,
95
+ "action": "click",
96
+ "element_id": element_id,
97
+ "outcome": result.outcome,
98
+ "url_changed": result.url_changed,
99
+ }
100
+
101
+ # Parse TYPE(42, "hello world")
102
+ elif match := re.match(
103
+ r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
104
+ action_str,
105
+ re.IGNORECASE,
106
+ ):
107
+ element_id = int(match.group(1))
108
+ text = match.group(2)
109
+ result = type_text(self.browser, element_id, text) # type: ignore
110
+ return {
111
+ "success": result.success,
112
+ "action": "type",
113
+ "element_id": element_id,
114
+ "text": text,
115
+ "outcome": result.outcome,
116
+ }
117
+
118
+ # Parse PRESS("Enter")
119
+ elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
120
+ key = match.group(1)
121
+ result = press(self.browser, key) # type: ignore
122
+ return {
123
+ "success": result.success,
124
+ "action": "press",
125
+ "key": key,
126
+ "outcome": result.outcome,
127
+ }
128
+
129
+ # Parse FINISH()
130
+ elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
131
+ return {
132
+ "success": True,
133
+ "action": "finish",
134
+ "message": "Task marked as complete",
135
+ }
136
+
137
+ else:
138
+ raise ValueError(
139
+ f"Unknown action format: {action_str}\n"
140
+ f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
141
+ )
142
+
143
+ async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
144
+ """
145
+ Parse action string and execute SDK call (asynchronous).
146
+
147
+ Args:
148
+ action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
149
+ snap: Current snapshot (for context, currently unused but kept for API consistency)
150
+
151
+ Returns:
152
+ Execution result dictionary (same format as execute())
153
+
154
+ Raises:
155
+ ValueError: If action format is unknown
156
+ RuntimeError: If called on sync browser (use execute() instead)
157
+ """
158
+ if not self._is_async:
159
+ raise RuntimeError(
160
+ "ActionExecutor.execute_async() called on sync browser. Use execute() instead."
161
+ )
162
+
163
+ # Parse CLICK(42)
164
+ if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
165
+ element_id = int(match.group(1))
166
+ result = await click_async(self.browser, element_id) # type: ignore
167
+ return {
168
+ "success": result.success,
169
+ "action": "click",
170
+ "element_id": element_id,
171
+ "outcome": result.outcome,
172
+ "url_changed": result.url_changed,
173
+ }
174
+
175
+ # Parse TYPE(42, "hello world")
176
+ elif match := re.match(
177
+ r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
178
+ action_str,
179
+ re.IGNORECASE,
180
+ ):
181
+ element_id = int(match.group(1))
182
+ text = match.group(2)
183
+ result = await type_text_async(self.browser, element_id, text) # type: ignore
184
+ return {
185
+ "success": result.success,
186
+ "action": "type",
187
+ "element_id": element_id,
188
+ "text": text,
189
+ "outcome": result.outcome,
190
+ }
191
+
192
+ # Parse PRESS("Enter")
193
+ elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
194
+ key = match.group(1)
195
+ result = await press_async(self.browser, key) # type: ignore
196
+ return {
197
+ "success": result.success,
198
+ "action": "press",
199
+ "key": key,
200
+ "outcome": result.outcome,
201
+ }
202
+
203
+ # Parse FINISH()
204
+ elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
205
+ return {
206
+ "success": True,
207
+ "action": "finish",
208
+ "message": "Task marked as complete",
209
+ }
210
+
211
+ else:
212
+ raise ValueError(
213
+ f"Unknown action format: {action_str}\n"
214
+ f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
215
+ )