sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,343 @@
1
+ """
2
+ Backend-agnostic actions for browser-use integration.
3
+
4
+ These actions work with any BrowserBackend implementation,
5
+ enabling Sentience grounding with browser-use or other frameworks.
6
+
7
+ Usage with browser-use:
8
+ from sentience.backends import BrowserUseAdapter
9
+ from sentience.backends.actions import click, type_text, scroll
10
+
11
+ adapter = BrowserUseAdapter(session)
12
+ backend = await adapter.create_backend()
13
+
14
+ # Take snapshot and click element
15
+ snap = await snapshot_from_backend(backend)
16
+ element = find(snap, 'role=button[name="Submit"]')
17
+ await click(backend, element.bbox)
18
+ """
19
+
20
+ import asyncio
21
+ import time
22
+ from typing import TYPE_CHECKING, Any, Literal
23
+
24
+ from ..models import ActionResult, BBox, Snapshot
25
+
26
+ if TYPE_CHECKING:
27
+ from .protocol import BrowserBackend
28
+
29
+
30
+ async def click(
31
+ backend: "BrowserBackend",
32
+ target: BBox | dict[str, float] | tuple[float, float],
33
+ button: Literal["left", "right", "middle"] = "left",
34
+ click_count: int = 1,
35
+ move_first: bool = True,
36
+ ) -> ActionResult:
37
+ """
38
+ Click at coordinates using the backend.
39
+
40
+ Args:
41
+ backend: BrowserBackend implementation
42
+ target: Click target - BBox (clicks center), dict with x/y, or (x, y) tuple
43
+ button: Mouse button to click
44
+ click_count: Number of clicks (1=single, 2=double)
45
+ move_first: Whether to move mouse to position before clicking
46
+
47
+ Returns:
48
+ ActionResult with success status
49
+
50
+ Example:
51
+ # Click at coordinates
52
+ await click(backend, (100, 200))
53
+
54
+ # Click element bbox center
55
+ await click(backend, element.bbox)
56
+
57
+ # Double-click
58
+ await click(backend, element.bbox, click_count=2)
59
+ """
60
+ start_time = time.time()
61
+
62
+ # Resolve coordinates
63
+ x, y = _resolve_coordinates(target)
64
+
65
+ try:
66
+ # Optional mouse move for hover effects
67
+ if move_first:
68
+ await backend.mouse_move(x, y)
69
+ await asyncio.sleep(0.02) # Brief pause for hover
70
+
71
+ # Perform click
72
+ await backend.mouse_click(x, y, button=button, click_count=click_count)
73
+
74
+ duration_ms = int((time.time() - start_time) * 1000)
75
+ return ActionResult(
76
+ success=True,
77
+ duration_ms=duration_ms,
78
+ outcome="dom_updated",
79
+ )
80
+ except Exception as e:
81
+ duration_ms = int((time.time() - start_time) * 1000)
82
+ return ActionResult(
83
+ success=False,
84
+ duration_ms=duration_ms,
85
+ outcome="error",
86
+ error={"code": "click_failed", "reason": str(e)},
87
+ )
88
+
89
+
90
+ async def type_text(
91
+ backend: "BrowserBackend",
92
+ text: str,
93
+ target: BBox | dict[str, float] | tuple[float, float] | None = None,
94
+ clear_first: bool = False,
95
+ ) -> ActionResult:
96
+ """
97
+ Type text, optionally clicking a target first.
98
+
99
+ Args:
100
+ backend: BrowserBackend implementation
101
+ text: Text to type
102
+ target: Optional click target before typing (BBox, dict, or tuple)
103
+ clear_first: If True, select all and delete before typing
104
+
105
+ Returns:
106
+ ActionResult with success status
107
+
108
+ Example:
109
+ # Type into focused element
110
+ await type_text(backend, "Hello World")
111
+
112
+ # Click input then type
113
+ await type_text(backend, "search query", target=search_box.bbox)
114
+
115
+ # Clear and type
116
+ await type_text(backend, "new value", target=input.bbox, clear_first=True)
117
+ """
118
+ start_time = time.time()
119
+
120
+ try:
121
+ # Click target if provided
122
+ if target is not None:
123
+ x, y = _resolve_coordinates(target)
124
+ await backend.mouse_click(x, y)
125
+ await asyncio.sleep(0.05) # Wait for focus
126
+
127
+ # Clear existing content if requested
128
+ if clear_first:
129
+ # Select all (Ctrl+A / Cmd+A) and delete
130
+ await backend.eval("document.execCommand('selectAll')")
131
+ await asyncio.sleep(0.02)
132
+
133
+ # Type the text
134
+ await backend.type_text(text)
135
+
136
+ duration_ms = int((time.time() - start_time) * 1000)
137
+ return ActionResult(
138
+ success=True,
139
+ duration_ms=duration_ms,
140
+ outcome="dom_updated",
141
+ )
142
+ except Exception as e:
143
+ duration_ms = int((time.time() - start_time) * 1000)
144
+ return ActionResult(
145
+ success=False,
146
+ duration_ms=duration_ms,
147
+ outcome="error",
148
+ error={"code": "type_failed", "reason": str(e)},
149
+ )
150
+
151
+
152
+ async def scroll(
153
+ backend: "BrowserBackend",
154
+ delta_y: float = 300,
155
+ target: BBox | dict[str, float] | tuple[float, float] | None = None,
156
+ ) -> ActionResult:
157
+ """
158
+ Scroll the page or element.
159
+
160
+ Args:
161
+ backend: BrowserBackend implementation
162
+ delta_y: Scroll amount (positive=down, negative=up)
163
+ target: Optional position for scroll (defaults to viewport center)
164
+
165
+ Returns:
166
+ ActionResult with success status
167
+
168
+ Example:
169
+ # Scroll down 300px
170
+ await scroll(backend, 300)
171
+
172
+ # Scroll up 500px
173
+ await scroll(backend, -500)
174
+
175
+ # Scroll at specific position
176
+ await scroll(backend, 200, target=(500, 300))
177
+ """
178
+ start_time = time.time()
179
+
180
+ try:
181
+ x: float | None = None
182
+ y: float | None = None
183
+
184
+ if target is not None:
185
+ x, y = _resolve_coordinates(target)
186
+
187
+ await backend.wheel(delta_y=delta_y, x=x, y=y)
188
+
189
+ # Wait for scroll to settle
190
+ await asyncio.sleep(0.1)
191
+
192
+ duration_ms = int((time.time() - start_time) * 1000)
193
+ return ActionResult(
194
+ success=True,
195
+ duration_ms=duration_ms,
196
+ outcome="dom_updated",
197
+ )
198
+ except Exception as e:
199
+ duration_ms = int((time.time() - start_time) * 1000)
200
+ return ActionResult(
201
+ success=False,
202
+ duration_ms=duration_ms,
203
+ outcome="error",
204
+ error={"code": "scroll_failed", "reason": str(e)},
205
+ )
206
+
207
+
208
+ async def scroll_to_element(
209
+ backend: "BrowserBackend",
210
+ element_id: int,
211
+ behavior: Literal["smooth", "instant", "auto"] = "instant",
212
+ block: Literal["start", "center", "end", "nearest"] = "center",
213
+ ) -> ActionResult:
214
+ """
215
+ Scroll element into view using JavaScript scrollIntoView.
216
+
217
+ Args:
218
+ backend: BrowserBackend implementation
219
+ element_id: Element ID from snapshot (requires sentience_registry)
220
+ behavior: Scroll behavior
221
+ block: Vertical alignment
222
+
223
+ Returns:
224
+ ActionResult with success status
225
+ """
226
+ start_time = time.time()
227
+
228
+ try:
229
+ scrolled = await backend.eval(
230
+ f"""
231
+ (() => {{
232
+ const el = window.sentience_registry && window.sentience_registry[{element_id}];
233
+ if (el && el.scrollIntoView) {{
234
+ el.scrollIntoView({{
235
+ behavior: '{behavior}',
236
+ block: '{block}',
237
+ inline: 'nearest'
238
+ }});
239
+ return true;
240
+ }}
241
+ return false;
242
+ }})()
243
+ """
244
+ )
245
+
246
+ # Wait for scroll animation
247
+ wait_time = 0.3 if behavior == "smooth" else 0.05
248
+ await asyncio.sleep(wait_time)
249
+
250
+ duration_ms = int((time.time() - start_time) * 1000)
251
+
252
+ if scrolled:
253
+ return ActionResult(
254
+ success=True,
255
+ duration_ms=duration_ms,
256
+ outcome="dom_updated",
257
+ )
258
+ else:
259
+ return ActionResult(
260
+ success=False,
261
+ duration_ms=duration_ms,
262
+ outcome="error",
263
+ error={"code": "scroll_failed", "reason": "Element not found in registry"},
264
+ )
265
+ except Exception as e:
266
+ duration_ms = int((time.time() - start_time) * 1000)
267
+ return ActionResult(
268
+ success=False,
269
+ duration_ms=duration_ms,
270
+ outcome="error",
271
+ error={"code": "scroll_failed", "reason": str(e)},
272
+ )
273
+
274
+
275
+ async def wait_for_stable(
276
+ backend: "BrowserBackend",
277
+ state: Literal["interactive", "complete"] = "complete",
278
+ timeout_ms: int = 10000,
279
+ ) -> ActionResult:
280
+ """
281
+ Wait for page to reach stable state.
282
+
283
+ Args:
284
+ backend: BrowserBackend implementation
285
+ state: Target document.readyState
286
+ timeout_ms: Maximum wait time
287
+
288
+ Returns:
289
+ ActionResult with success status
290
+ """
291
+ start_time = time.time()
292
+
293
+ try:
294
+ await backend.wait_ready_state(state=state, timeout_ms=timeout_ms)
295
+
296
+ duration_ms = int((time.time() - start_time) * 1000)
297
+ return ActionResult(
298
+ success=True,
299
+ duration_ms=duration_ms,
300
+ outcome="dom_updated",
301
+ )
302
+ except TimeoutError as e:
303
+ duration_ms = int((time.time() - start_time) * 1000)
304
+ return ActionResult(
305
+ success=False,
306
+ duration_ms=duration_ms,
307
+ outcome="error",
308
+ error={"code": "timeout", "reason": str(e)},
309
+ )
310
+ except Exception as e:
311
+ duration_ms = int((time.time() - start_time) * 1000)
312
+ return ActionResult(
313
+ success=False,
314
+ duration_ms=duration_ms,
315
+ outcome="error",
316
+ error={"code": "wait_failed", "reason": str(e)},
317
+ )
318
+
319
+
320
+ def _resolve_coordinates(
321
+ target: BBox | dict[str, float] | tuple[float, float],
322
+ ) -> tuple[float, float]:
323
+ """
324
+ Resolve target to (x, y) coordinates.
325
+
326
+ - BBox: Returns center point
327
+ - dict: Returns x, y keys (or center if width/height present)
328
+ - tuple: Returns as-is
329
+ """
330
+ if isinstance(target, BBox):
331
+ return (target.x + target.width / 2, target.y + target.height / 2)
332
+ elif isinstance(target, tuple):
333
+ return target
334
+ elif isinstance(target, dict):
335
+ # If has width/height, compute center
336
+ if "width" in target and "height" in target:
337
+ x = target.get("x", 0) + target["width"] / 2
338
+ y = target.get("y", 0) + target["height"] / 2
339
+ return (x, y)
340
+ # Otherwise use x/y directly
341
+ return (target.get("x", 0), target.get("y", 0))
342
+ else:
343
+ raise ValueError(f"Invalid target type: {type(target)}")
@@ -0,0 +1,241 @@
1
+ """
2
+ Browser-use adapter for Sentience SDK.
3
+
4
+ This module provides BrowserUseAdapter which wraps browser-use's BrowserSession
5
+ and provides a CDPBackendV0 for Sentience operations.
6
+
7
+ Usage:
8
+ from browser_use import BrowserSession, BrowserProfile
9
+ from sentience import get_extension_dir
10
+ from sentience.backends import BrowserUseAdapter
11
+
12
+ # Create browser-use session with Sentience extension
13
+ profile = BrowserProfile(args=[f"--load-extension={get_extension_dir()}"])
14
+ session = BrowserSession(browser_profile=profile)
15
+ await session.start()
16
+
17
+ # Create Sentience adapter
18
+ adapter = BrowserUseAdapter(session)
19
+ backend = await adapter.create_backend()
20
+
21
+ # Use backend for Sentience operations
22
+ viewport = await backend.refresh_page_info()
23
+ await backend.mouse_click(100, 200)
24
+ """
25
+
26
+ from typing import TYPE_CHECKING, Any
27
+
28
+ from .cdp_backend import CDPBackendV0, CDPTransport
29
+
30
+ if TYPE_CHECKING:
31
+ # Import browser-use types only for type checking
32
+ # This avoids requiring browser-use as a hard dependency
33
+ pass
34
+
35
+
36
+ class BrowserUseCDPTransport(CDPTransport):
37
+ """
38
+ CDP transport implementation for browser-use.
39
+
40
+ Wraps browser-use's CDP client to provide the CDPTransport interface.
41
+ Uses cdp-use library pattern: cdp_client.send.Domain.method(params={}, session_id=)
42
+ """
43
+
44
+ def __init__(self, cdp_client: Any, session_id: str) -> None:
45
+ """
46
+ Initialize transport with browser-use CDP client.
47
+
48
+ Args:
49
+ cdp_client: browser-use's CDP client (from cdp_session.cdp_client)
50
+ session_id: CDP session ID (from cdp_session.session_id)
51
+ """
52
+ self._client = cdp_client
53
+ self._session_id = session_id
54
+
55
+ async def send(self, method: str, params: dict | None = None) -> dict:
56
+ """
57
+ Send CDP command using browser-use's cdp-use client.
58
+
59
+ Translates method name like "Runtime.evaluate" to
60
+ cdp_client.send.Runtime.evaluate(params={...}, session_id=...).
61
+
62
+ Args:
63
+ method: CDP method name, e.g., "Runtime.evaluate"
64
+ params: Method parameters
65
+
66
+ Returns:
67
+ CDP response dict
68
+ """
69
+ # Split method into domain and method name
70
+ # e.g., "Runtime.evaluate" -> ("Runtime", "evaluate")
71
+ parts = method.split(".", 1)
72
+ if len(parts) != 2:
73
+ raise ValueError(f"Invalid CDP method format: {method}")
74
+
75
+ domain_name, method_name = parts
76
+
77
+ # Get the domain object from cdp_client.send
78
+ domain = getattr(self._client.send, domain_name, None)
79
+ if domain is None:
80
+ raise ValueError(f"Unknown CDP domain: {domain_name}")
81
+
82
+ # Get the method from the domain
83
+ method_func = getattr(domain, method_name, None)
84
+ if method_func is None:
85
+ raise ValueError(f"Unknown CDP method: {method}")
86
+
87
+ # Call the method with params and session_id
88
+ result = await method_func(
89
+ params=params or {},
90
+ session_id=self._session_id,
91
+ )
92
+
93
+ # cdp-use returns the result directly or None
94
+ return result if result is not None else {}
95
+
96
+
97
+ class BrowserUseAdapter:
98
+ """
99
+ Adapter to use Sentience with browser-use's BrowserSession.
100
+
101
+ This adapter:
102
+ 1. Wraps browser-use's CDP client with BrowserUseCDPTransport
103
+ 2. Creates CDPBackendV0 for Sentience operations
104
+ 3. Provides access to the underlying page for extension calls
105
+
106
+ Example:
107
+ from browser_use import BrowserSession, BrowserProfile
108
+ from sentience import get_extension_dir, snapshot_async, SnapshotOptions
109
+ from sentience.backends import BrowserUseAdapter
110
+
111
+ # Setup browser-use with Sentience extension
112
+ profile = BrowserProfile(args=[f"--load-extension={get_extension_dir()}"])
113
+ session = BrowserSession(browser_profile=profile)
114
+ await session.start()
115
+
116
+ # Create adapter and backend
117
+ adapter = BrowserUseAdapter(session)
118
+ backend = await adapter.create_backend()
119
+
120
+ # Navigate (using browser-use)
121
+ page = await session.get_current_page()
122
+ await page.goto("https://example.com")
123
+
124
+ # Take Sentience snapshot (uses extension)
125
+ snap = await snapshot_async(adapter, SnapshotOptions())
126
+
127
+ # Use backend for precise clicking
128
+ await backend.mouse_click(snap.elements[0].bbox.x, snap.elements[0].bbox.y)
129
+ """
130
+
131
+ def __init__(self, session: Any) -> None:
132
+ """
133
+ Initialize adapter with browser-use BrowserSession.
134
+
135
+ Args:
136
+ session: browser-use BrowserSession instance
137
+ """
138
+ self._session = session
139
+ self._backend: CDPBackendV0 | None = None
140
+ self._transport: BrowserUseCDPTransport | None = None
141
+
142
+ @property
143
+ def page(self) -> Any:
144
+ """
145
+ Get the current Playwright page from browser-use.
146
+
147
+ This is needed for Sentience snapshot() which calls window.sentience.snapshot().
148
+
149
+ Returns:
150
+ Playwright Page object
151
+ """
152
+ # browser-use stores page in session
153
+ # Access pattern may vary by browser-use version
154
+ if hasattr(self._session, "page"):
155
+ return self._session.page
156
+ if hasattr(self._session, "_page"):
157
+ return self._session._page
158
+ if hasattr(self._session, "get_current_page"):
159
+ # This is async, but we need sync access for property
160
+ # Caller should use get_page_async() instead
161
+ raise RuntimeError("Use await adapter.get_page_async() to get the page")
162
+ raise RuntimeError("Could not find page in browser-use session")
163
+
164
+ async def get_page_async(self) -> Any:
165
+ """
166
+ Get the current Playwright page (async).
167
+
168
+ Returns:
169
+ Playwright Page object
170
+ """
171
+ if hasattr(self._session, "get_current_page"):
172
+ return await self._session.get_current_page()
173
+ return self.page
174
+
175
+ @property
176
+ def api_key(self) -> str | None:
177
+ """
178
+ API key for Sentience API (for snapshot compatibility).
179
+
180
+ Returns None since browser-use users pass api_key via SnapshotOptions.
181
+ """
182
+ return None
183
+
184
+ @property
185
+ def api_url(self) -> str | None:
186
+ """
187
+ API URL for Sentience API (for snapshot compatibility).
188
+
189
+ Returns None to use default.
190
+ """
191
+ return None
192
+
193
+ async def create_backend(self) -> CDPBackendV0:
194
+ """
195
+ Create CDP backend for Sentience operations.
196
+
197
+ This method:
198
+ 1. Gets or creates a CDP session from browser-use
199
+ 2. Creates BrowserUseCDPTransport to wrap the CDP client
200
+ 3. Creates CDPBackendV0 with the transport
201
+
202
+ Returns:
203
+ CDPBackendV0 instance ready for use
204
+
205
+ Raises:
206
+ RuntimeError: If CDP session cannot be created
207
+ """
208
+ if self._backend is not None:
209
+ return self._backend
210
+
211
+ # Get CDP session from browser-use
212
+ # browser-use uses: cdp_session = await session.get_or_create_cdp_session()
213
+ if not hasattr(self._session, "get_or_create_cdp_session"):
214
+ raise RuntimeError(
215
+ "browser-use session does not have get_or_create_cdp_session method. "
216
+ "Make sure you're using a compatible version of browser-use."
217
+ )
218
+
219
+ cdp_session = await self._session.get_or_create_cdp_session()
220
+
221
+ # Extract CDP client and session ID
222
+ cdp_client = cdp_session.cdp_client
223
+ session_id = cdp_session.session_id
224
+
225
+ # Create transport and backend
226
+ self._transport = BrowserUseCDPTransport(cdp_client, session_id)
227
+ self._backend = CDPBackendV0(self._transport)
228
+
229
+ return self._backend
230
+
231
+ async def get_transport(self) -> BrowserUseCDPTransport:
232
+ """
233
+ Get the CDP transport (creates backend if needed).
234
+
235
+ Returns:
236
+ BrowserUseCDPTransport instance
237
+ """
238
+ if self._transport is None:
239
+ await self.create_backend()
240
+ assert self._transport is not None
241
+ return self._transport