sentienceapi 0.90.16__py3-none-any.whl → 0.92.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +14 -5
- sentience/action_executor.py +215 -0
- sentience/actions.py +408 -25
- sentience/agent.py +802 -293
- sentience/agent_config.py +3 -0
- sentience/async_api.py +83 -1142
- sentience/base_agent.py +95 -0
- sentience/browser.py +484 -1
- sentience/browser_evaluator.py +299 -0
- sentience/cloud_tracing.py +457 -33
- sentience/conversational_agent.py +77 -43
- sentience/element_filter.py +136 -0
- sentience/expect.py +98 -2
- sentience/extension/background.js +56 -185
- sentience/extension/content.js +117 -289
- sentience/extension/injected_api.js +799 -1374
- sentience/extension/manifest.json +1 -1
- sentience/extension/pkg/sentience_core.js +190 -396
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/release.json +47 -47
- sentience/formatting.py +9 -53
- sentience/inspector.py +183 -1
- sentience/llm_interaction_handler.py +191 -0
- sentience/llm_provider.py +74 -52
- sentience/llm_provider_utils.py +120 -0
- sentience/llm_response_builder.py +153 -0
- sentience/models.py +60 -1
- sentience/overlay.py +109 -2
- sentience/protocols.py +228 -0
- sentience/query.py +1 -1
- sentience/read.py +95 -3
- sentience/recorder.py +223 -3
- sentience/schemas/trace_v1.json +102 -9
- sentience/screenshot.py +48 -2
- sentience/sentience_methods.py +86 -0
- sentience/snapshot.py +291 -38
- sentience/snapshot_diff.py +141 -0
- sentience/text_search.py +119 -5
- sentience/trace_event_builder.py +129 -0
- sentience/trace_file_manager.py +197 -0
- sentience/trace_indexing/index_schema.py +95 -7
- sentience/trace_indexing/indexer.py +117 -14
- sentience/tracer_factory.py +119 -6
- sentience/tracing.py +172 -8
- sentience/utils/__init__.py +40 -0
- sentience/utils/browser.py +46 -0
- sentience/utils/element.py +257 -0
- sentience/utils/formatting.py +59 -0
- sentience/utils.py +1 -1
- sentience/visual_agent.py +2056 -0
- sentience/wait.py +68 -2
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +2 -1
- sentienceapi-0.92.2.dist-info/RECORD +65 -0
- sentience/extension/test-content.js +0 -4
- sentienceapi-0.90.16.dist-info/RECORD +0 -50
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-APACHE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-MIT +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0
|
Binary file
|
sentience/extension/release.json
CHANGED
|
@@ -1,45 +1,45 @@
|
|
|
1
1
|
{
|
|
2
|
-
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/
|
|
3
|
-
"assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/
|
|
4
|
-
"upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/
|
|
5
|
-
"html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v2.0
|
|
6
|
-
"id":
|
|
2
|
+
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/274400382",
|
|
3
|
+
"assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/274400382/assets",
|
|
4
|
+
"upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/274400382/assets{?name,label}",
|
|
5
|
+
"html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v2.2.0",
|
|
6
|
+
"id": 274400382,
|
|
7
7
|
"author": {
|
|
8
|
-
"login": "
|
|
9
|
-
"id":
|
|
10
|
-
"node_id": "
|
|
11
|
-
"avatar_url": "https://avatars.githubusercontent.com/
|
|
8
|
+
"login": "rcholic",
|
|
9
|
+
"id": 135060,
|
|
10
|
+
"node_id": "MDQ6VXNlcjEzNTA2MA==",
|
|
11
|
+
"avatar_url": "https://avatars.githubusercontent.com/u/135060?v=4",
|
|
12
12
|
"gravatar_id": "",
|
|
13
|
-
"url": "https://api.github.com/users/
|
|
14
|
-
"html_url": "https://github.com/
|
|
15
|
-
"followers_url": "https://api.github.com/users/
|
|
16
|
-
"following_url": "https://api.github.com/users/
|
|
17
|
-
"gists_url": "https://api.github.com/users/
|
|
18
|
-
"starred_url": "https://api.github.com/users/
|
|
19
|
-
"subscriptions_url": "https://api.github.com/users/
|
|
20
|
-
"organizations_url": "https://api.github.com/users/
|
|
21
|
-
"repos_url": "https://api.github.com/users/
|
|
22
|
-
"events_url": "https://api.github.com/users/
|
|
23
|
-
"received_events_url": "https://api.github.com/users/
|
|
24
|
-
"type": "
|
|
13
|
+
"url": "https://api.github.com/users/rcholic",
|
|
14
|
+
"html_url": "https://github.com/rcholic",
|
|
15
|
+
"followers_url": "https://api.github.com/users/rcholic/followers",
|
|
16
|
+
"following_url": "https://api.github.com/users/rcholic/following{/other_user}",
|
|
17
|
+
"gists_url": "https://api.github.com/users/rcholic/gists{/gist_id}",
|
|
18
|
+
"starred_url": "https://api.github.com/users/rcholic/starred{/owner}{/repo}",
|
|
19
|
+
"subscriptions_url": "https://api.github.com/users/rcholic/subscriptions",
|
|
20
|
+
"organizations_url": "https://api.github.com/users/rcholic/orgs",
|
|
21
|
+
"repos_url": "https://api.github.com/users/rcholic/repos",
|
|
22
|
+
"events_url": "https://api.github.com/users/rcholic/events{/privacy}",
|
|
23
|
+
"received_events_url": "https://api.github.com/users/rcholic/received_events",
|
|
24
|
+
"type": "User",
|
|
25
25
|
"user_view_type": "public",
|
|
26
26
|
"site_admin": false
|
|
27
27
|
},
|
|
28
|
-
"node_id": "
|
|
29
|
-
"tag_name": "v2.0
|
|
28
|
+
"node_id": "RE_kwDOQshiJ84QWwR-",
|
|
29
|
+
"tag_name": "v2.2.0",
|
|
30
30
|
"target_commitish": "main",
|
|
31
|
-
"name": "Release v2.0
|
|
31
|
+
"name": "Release v2.2.0",
|
|
32
32
|
"draft": false,
|
|
33
33
|
"immutable": false,
|
|
34
34
|
"prerelease": false,
|
|
35
|
-
"created_at": "
|
|
36
|
-
"updated_at": "
|
|
37
|
-
"published_at": "
|
|
35
|
+
"created_at": "2026-01-06T03:10:35Z",
|
|
36
|
+
"updated_at": "2026-01-06T03:16:45Z",
|
|
37
|
+
"published_at": "2026-01-06T03:16:10Z",
|
|
38
38
|
"assets": [
|
|
39
39
|
{
|
|
40
|
-
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/
|
|
41
|
-
"id":
|
|
42
|
-
"node_id": "
|
|
40
|
+
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/336823100",
|
|
41
|
+
"id": 336823100,
|
|
42
|
+
"node_id": "RA_kwDOQshiJ84UE4M8",
|
|
43
43
|
"name": "extension-files.tar.gz",
|
|
44
44
|
"label": "",
|
|
45
45
|
"uploader": {
|
|
@@ -65,17 +65,17 @@
|
|
|
65
65
|
},
|
|
66
66
|
"content_type": "application/gzip",
|
|
67
67
|
"state": "uploaded",
|
|
68
|
-
"size":
|
|
69
|
-
"digest": "sha256:
|
|
70
|
-
"download_count":
|
|
71
|
-
"created_at": "
|
|
72
|
-
"updated_at": "
|
|
73
|
-
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.0
|
|
68
|
+
"size": 72250,
|
|
69
|
+
"digest": "sha256:adb68bd89b417f23f32c029c6cf045cc3677588e6a7760b7c8d0deb7e2601dd1",
|
|
70
|
+
"download_count": 0,
|
|
71
|
+
"created_at": "2026-01-06T03:16:44Z",
|
|
72
|
+
"updated_at": "2026-01-06T03:16:45Z",
|
|
73
|
+
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.2.0/extension-files.tar.gz"
|
|
74
74
|
},
|
|
75
75
|
{
|
|
76
|
-
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/
|
|
77
|
-
"id":
|
|
78
|
-
"node_id": "
|
|
76
|
+
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/336823099",
|
|
77
|
+
"id": 336823099,
|
|
78
|
+
"node_id": "RA_kwDOQshiJ84UE4M7",
|
|
79
79
|
"name": "extension-package.zip",
|
|
80
80
|
"label": "",
|
|
81
81
|
"uploader": {
|
|
@@ -101,15 +101,15 @@
|
|
|
101
101
|
},
|
|
102
102
|
"content_type": "application/zip",
|
|
103
103
|
"state": "uploaded",
|
|
104
|
-
"size":
|
|
105
|
-
"digest": "sha256:
|
|
104
|
+
"size": 73962,
|
|
105
|
+
"digest": "sha256:7483812c016842fb02add2d6c8d887e321cb9eb89030fee016cf4ea9f812f4bf",
|
|
106
106
|
"download_count": 0,
|
|
107
|
-
"created_at": "
|
|
108
|
-
"updated_at": "
|
|
109
|
-
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.0
|
|
107
|
+
"created_at": "2026-01-06T03:16:44Z",
|
|
108
|
+
"updated_at": "2026-01-06T03:16:45Z",
|
|
109
|
+
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.2.0/extension-package.zip"
|
|
110
110
|
}
|
|
111
111
|
],
|
|
112
|
-
"tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v2.0
|
|
113
|
-
"zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v2.0
|
|
114
|
-
"body": "
|
|
112
|
+
"tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v2.2.0",
|
|
113
|
+
"zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v2.2.0",
|
|
114
|
+
"body": ""
|
|
115
115
|
}
|
sentience/formatting.py
CHANGED
|
@@ -1,59 +1,15 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Snapshot formatting utilities for LLM prompts.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from typing import List
|
|
9
|
-
|
|
10
|
-
from .models import Snapshot
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def format_snapshot_for_llm(snap: Snapshot, limit: int = 50) -> str:
|
|
14
|
-
"""
|
|
15
|
-
Convert snapshot elements to text format for LLM consumption.
|
|
16
|
-
|
|
17
|
-
This is the canonical way Sentience formats DOM state for LLMs.
|
|
18
|
-
The format includes element ID, role, text preview, visual cues,
|
|
19
|
-
position, and importance score.
|
|
4
|
+
DEPRECATED: This module is maintained for backward compatibility only.
|
|
5
|
+
New code should import from sentience.utils.formatting or sentience directly:
|
|
20
6
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
Formatted string with one element per line
|
|
27
|
-
|
|
28
|
-
Example:
|
|
29
|
-
>>> snap = snapshot(browser)
|
|
30
|
-
>>> formatted = format_snapshot_for_llm(snap, limit=10)
|
|
31
|
-
>>> print(formatted)
|
|
32
|
-
[1] <button> "Sign In" {PRIMARY,CLICKABLE} @ (100,50) (Imp:10)
|
|
33
|
-
[2] <input> "Email address" @ (100,100) (Imp:8)
|
|
34
|
-
...
|
|
35
|
-
"""
|
|
36
|
-
lines: list[str] = []
|
|
37
|
-
|
|
38
|
-
for el in snap.elements[:limit]:
|
|
39
|
-
# Build visual cues string
|
|
40
|
-
cues = []
|
|
41
|
-
if getattr(el.visual_cues, "is_primary", False):
|
|
42
|
-
cues.append("PRIMARY")
|
|
43
|
-
if getattr(el.visual_cues, "is_clickable", False):
|
|
44
|
-
cues.append("CLICKABLE")
|
|
45
|
-
|
|
46
|
-
cues_str = f" {{{','.join(cues)}}}" if cues else ""
|
|
47
|
-
|
|
48
|
-
# Format text preview (truncate to 50 chars)
|
|
49
|
-
text_preview = el.text or ""
|
|
50
|
-
if len(text_preview) > 50:
|
|
51
|
-
text_preview = text_preview[:50] + "..."
|
|
7
|
+
from sentience.utils.formatting import format_snapshot_for_llm
|
|
8
|
+
# or
|
|
9
|
+
from sentience import format_snapshot_for_llm
|
|
10
|
+
"""
|
|
52
11
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
f'[{el.id}] <{el.role}> "{text_preview}"{cues_str} '
|
|
56
|
-
f"@ ({int(el.bbox.x)},{int(el.bbox.y)}) (Imp:{el.importance})"
|
|
57
|
-
)
|
|
12
|
+
# Re-export from new location for backward compatibility
|
|
13
|
+
from .utils.formatting import format_snapshot_for_llm
|
|
58
14
|
|
|
59
|
-
|
|
15
|
+
__all__ = ["format_snapshot_for_llm"]
|
sentience/inspector.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
"""
|
|
2
4
|
Inspector tool - helps developers see what the agent "sees"
|
|
3
5
|
"""
|
|
4
6
|
|
|
5
|
-
from .browser import SentienceBrowser
|
|
7
|
+
from .browser import AsyncSentienceBrowser, SentienceBrowser
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
class Inspector:
|
|
@@ -183,3 +185,183 @@ def inspect(browser: SentienceBrowser) -> Inspector:
|
|
|
183
185
|
Inspector instance
|
|
184
186
|
"""
|
|
185
187
|
return Inspector(browser)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class InspectorAsync:
|
|
191
|
+
"""Inspector for debugging - shows element info on hover/click (async)"""
|
|
192
|
+
|
|
193
|
+
def __init__(self, browser: AsyncSentienceBrowser):
|
|
194
|
+
self.browser = browser
|
|
195
|
+
self._active = False
|
|
196
|
+
self._last_element_id: int | None = None
|
|
197
|
+
|
|
198
|
+
async def start(self) -> None:
|
|
199
|
+
"""Start inspection mode - prints element info on mouse move/click (async)"""
|
|
200
|
+
if not self.browser.page:
|
|
201
|
+
raise RuntimeError("Browser not started. Call await browser.start() first.")
|
|
202
|
+
|
|
203
|
+
self._active = True
|
|
204
|
+
|
|
205
|
+
# Inject inspector script into page
|
|
206
|
+
await self.browser.page.evaluate(
|
|
207
|
+
"""
|
|
208
|
+
(() => {
|
|
209
|
+
// Remove existing inspector if any
|
|
210
|
+
if (window.__sentience_inspector_active) {
|
|
211
|
+
return;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
window.__sentience_inspector_active = true;
|
|
215
|
+
window.__sentience_last_element_id = null;
|
|
216
|
+
|
|
217
|
+
// Get element at point
|
|
218
|
+
function getElementAtPoint(x, y) {
|
|
219
|
+
const el = document.elementFromPoint(x, y);
|
|
220
|
+
if (!el) return null;
|
|
221
|
+
|
|
222
|
+
// Find element in registry
|
|
223
|
+
if (window.sentience_registry) {
|
|
224
|
+
for (let i = 0; i < window.sentience_registry.length; i++) {
|
|
225
|
+
if (window.sentience_registry[i] === el) {
|
|
226
|
+
return i;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return null;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Mouse move handler
|
|
234
|
+
function handleMouseMove(e) {
|
|
235
|
+
if (!window.__sentience_inspector_active) return;
|
|
236
|
+
|
|
237
|
+
const elementId = getElementAtPoint(e.clientX, e.clientY);
|
|
238
|
+
if (elementId === null || elementId === window.__sentience_last_element_id) {
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
window.__sentience_last_element_id = elementId;
|
|
243
|
+
|
|
244
|
+
// Get element info from snapshot if available
|
|
245
|
+
if (window.sentience && window.sentience_registry) {
|
|
246
|
+
const el = window.sentience_registry[elementId];
|
|
247
|
+
if (el) {
|
|
248
|
+
const rect = el.getBoundingClientRect();
|
|
249
|
+
const text = el.getAttribute('aria-label') ||
|
|
250
|
+
el.value ||
|
|
251
|
+
el.placeholder ||
|
|
252
|
+
el.alt ||
|
|
253
|
+
(el.innerText || '').substring(0, 50);
|
|
254
|
+
|
|
255
|
+
const role = el.getAttribute('role') || el.tagName.toLowerCase();
|
|
256
|
+
|
|
257
|
+
console.log(`[Sentience Inspector] Element #${elementId}: role=${role}, text="${text}", bbox=(${Math.round(rect.x)}, ${Math.round(rect.y)}, ${Math.round(rect.width)}, ${Math.round(rect.height)})`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Click handler
|
|
263
|
+
function handleClick(e) {
|
|
264
|
+
if (!window.__sentience_inspector_active) return;
|
|
265
|
+
|
|
266
|
+
e.preventDefault();
|
|
267
|
+
e.stopPropagation();
|
|
268
|
+
|
|
269
|
+
const elementId = getElementAtPoint(e.clientX, e.clientY);
|
|
270
|
+
if (elementId === null) return;
|
|
271
|
+
|
|
272
|
+
// Get full element info
|
|
273
|
+
if (window.sentience && window.sentience_registry) {
|
|
274
|
+
const el = window.sentience_registry[elementId];
|
|
275
|
+
if (el) {
|
|
276
|
+
const rect = el.getBoundingClientRect();
|
|
277
|
+
const info = {
|
|
278
|
+
id: elementId,
|
|
279
|
+
tag: el.tagName.toLowerCase(),
|
|
280
|
+
role: el.getAttribute('role') || 'generic',
|
|
281
|
+
text: el.getAttribute('aria-label') ||
|
|
282
|
+
el.value ||
|
|
283
|
+
el.placeholder ||
|
|
284
|
+
el.alt ||
|
|
285
|
+
(el.innerText || '').substring(0, 100),
|
|
286
|
+
bbox: {
|
|
287
|
+
x: Math.round(rect.x),
|
|
288
|
+
y: Math.round(rect.y),
|
|
289
|
+
width: Math.round(rect.width),
|
|
290
|
+
height: Math.round(rect.height)
|
|
291
|
+
},
|
|
292
|
+
attributes: {
|
|
293
|
+
id: el.id || null,
|
|
294
|
+
class: el.className || null,
|
|
295
|
+
name: el.name || null,
|
|
296
|
+
type: el.type || null
|
|
297
|
+
}
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
console.log('[Sentience Inspector] Clicked element:', JSON.stringify(info, null, 2));
|
|
301
|
+
|
|
302
|
+
// Also try to get from snapshot if available
|
|
303
|
+
window.sentience.snapshot({ limit: 100 }).then(snap => {
|
|
304
|
+
const element = snap.elements.find(el => el.id === elementId);
|
|
305
|
+
if (element) {
|
|
306
|
+
console.log('[Sentience Inspector] Snapshot element:', JSON.stringify(element, null, 2));
|
|
307
|
+
}
|
|
308
|
+
}).catch(() => {});
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Add event listeners
|
|
314
|
+
document.addEventListener('mousemove', handleMouseMove, true);
|
|
315
|
+
document.addEventListener('click', handleClick, true);
|
|
316
|
+
|
|
317
|
+
// Store cleanup function
|
|
318
|
+
window.__sentience_inspector_cleanup = () => {
|
|
319
|
+
document.removeEventListener('mousemove', handleMouseMove, true);
|
|
320
|
+
document.removeEventListener('click', handleClick, true);
|
|
321
|
+
window.__sentience_inspector_active = false;
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
console.log('[Sentience Inspector] ✅ Inspection mode active. Hover elements to see info, click to see full details.');
|
|
325
|
+
})();
|
|
326
|
+
"""
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
async def stop(self) -> None:
|
|
330
|
+
"""Stop inspection mode (async)"""
|
|
331
|
+
if not self.browser.page:
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
self._active = False
|
|
335
|
+
|
|
336
|
+
# Cleanup inspector
|
|
337
|
+
await self.browser.page.evaluate(
|
|
338
|
+
"""
|
|
339
|
+
() => {
|
|
340
|
+
if (window.__sentience_inspector_cleanup) {
|
|
341
|
+
window.__sentience_inspector_cleanup();
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
"""
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
async def __aenter__(self):
|
|
348
|
+
"""Context manager entry"""
|
|
349
|
+
await self.start()
|
|
350
|
+
return self
|
|
351
|
+
|
|
352
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
353
|
+
"""Context manager exit"""
|
|
354
|
+
await self.stop()
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def inspect_async(browser: AsyncSentienceBrowser) -> InspectorAsync:
|
|
358
|
+
"""
|
|
359
|
+
Create an inspector instance (async)
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
browser: AsyncSentienceBrowser instance
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
InspectorAsync instance
|
|
366
|
+
"""
|
|
367
|
+
return InspectorAsync(browser)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Interaction Handler for Sentience Agent.
|
|
3
|
+
|
|
4
|
+
Handles all LLM-related operations: context building, querying, and response parsing.
|
|
5
|
+
This separates LLM interaction concerns from action execution.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
|
|
10
|
+
from .llm_provider import LLMProvider, LLMResponse
|
|
11
|
+
from .models import Snapshot
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LLMInteractionHandler:
|
|
15
|
+
"""
|
|
16
|
+
Handles LLM queries and response parsing for Sentience Agent.
|
|
17
|
+
|
|
18
|
+
This class encapsulates all LLM interaction logic, making it easier to:
|
|
19
|
+
- Test LLM interactions independently
|
|
20
|
+
- Swap LLM providers without changing agent code
|
|
21
|
+
- Modify prompt templates in one place
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, llm: LLMProvider):
|
|
25
|
+
"""
|
|
26
|
+
Initialize LLM interaction handler.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
llm: LLM provider instance (OpenAIProvider, AnthropicProvider, etc.)
|
|
30
|
+
"""
|
|
31
|
+
self.llm = llm
|
|
32
|
+
|
|
33
|
+
def build_context(self, snap: Snapshot, goal: str | None = None) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Convert snapshot elements to token-efficient prompt string.
|
|
36
|
+
|
|
37
|
+
Format: [ID] <role> "text" {cues} @ position size:WxH importance:score [status]
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
snap: Snapshot object
|
|
41
|
+
goal: Optional user goal (for context, currently unused but kept for API consistency)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Formatted element context string
|
|
45
|
+
"""
|
|
46
|
+
lines = []
|
|
47
|
+
for el in snap.elements:
|
|
48
|
+
# Skip REMOVED elements - they're not actionable and shouldn't be in LLM context
|
|
49
|
+
if el.diff_status == "REMOVED":
|
|
50
|
+
continue
|
|
51
|
+
# Extract visual cues
|
|
52
|
+
cues: list[str] = []
|
|
53
|
+
if el.visual_cues.is_primary:
|
|
54
|
+
cues.append("PRIMARY")
|
|
55
|
+
if el.visual_cues.is_clickable:
|
|
56
|
+
cues.append("CLICKABLE")
|
|
57
|
+
if el.visual_cues.background_color_name:
|
|
58
|
+
cues.append(f"color:{el.visual_cues.background_color_name}")
|
|
59
|
+
|
|
60
|
+
# Format element line with improved readability
|
|
61
|
+
# Ensure cues is defined before using it in f-string
|
|
62
|
+
cues_str = f" {{{','.join(cues)}}}" if cues else ""
|
|
63
|
+
|
|
64
|
+
# Better text handling - show truncation indicator
|
|
65
|
+
text_preview = ""
|
|
66
|
+
if el.text:
|
|
67
|
+
if len(el.text) > 50:
|
|
68
|
+
text_preview = f'"{el.text[:50]}..."'
|
|
69
|
+
else:
|
|
70
|
+
text_preview = f'"{el.text}"'
|
|
71
|
+
|
|
72
|
+
# Build position and size info
|
|
73
|
+
x, y = int(el.bbox.x), int(el.bbox.y)
|
|
74
|
+
width, height = int(el.bbox.width), int(el.bbox.height)
|
|
75
|
+
position_str = f"@ ({x},{y})"
|
|
76
|
+
size_str = f"size:{width}x{height}"
|
|
77
|
+
|
|
78
|
+
# Build status indicators (only include if relevant)
|
|
79
|
+
status_parts = []
|
|
80
|
+
if not el.in_viewport:
|
|
81
|
+
status_parts.append("not_in_viewport")
|
|
82
|
+
if el.is_occluded:
|
|
83
|
+
status_parts.append("occluded")
|
|
84
|
+
if el.diff_status:
|
|
85
|
+
status_parts.append(f"diff:{el.diff_status}")
|
|
86
|
+
status_str = f" [{','.join(status_parts)}]" if status_parts else ""
|
|
87
|
+
|
|
88
|
+
# Format: [ID] <role> "text" {cues} @ (x,y) size:WxH importance:score [status]
|
|
89
|
+
lines.append(
|
|
90
|
+
f"[{el.id}] <{el.role}> {text_preview}{cues_str} "
|
|
91
|
+
f"{position_str} {size_str} importance:{el.importance}{status_str}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return "\n".join(lines)
|
|
95
|
+
|
|
96
|
+
def query_llm(self, dom_context: str, goal: str) -> LLMResponse:
|
|
97
|
+
"""
|
|
98
|
+
Query LLM with standardized prompt template.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
dom_context: Formatted element context from build_context()
|
|
102
|
+
goal: User goal
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
LLMResponse from LLM provider
|
|
106
|
+
"""
|
|
107
|
+
system_prompt = f"""You are an AI web automation agent.
|
|
108
|
+
|
|
109
|
+
GOAL: {goal}
|
|
110
|
+
|
|
111
|
+
VISIBLE ELEMENTS (sorted by importance):
|
|
112
|
+
{dom_context}
|
|
113
|
+
|
|
114
|
+
VISUAL CUES EXPLAINED:
|
|
115
|
+
After the text, you may see visual cues in curly braces like {{CLICKABLE}} or {{PRIMARY,CLICKABLE,color:white}}:
|
|
116
|
+
- PRIMARY: Main call-to-action element on the page
|
|
117
|
+
- CLICKABLE: Element is clickable/interactive
|
|
118
|
+
- color:X: Background color name (e.g., color:white, color:blue)
|
|
119
|
+
Multiple cues are comma-separated inside the braces: {{CLICKABLE,color:white}}
|
|
120
|
+
|
|
121
|
+
ELEMENT FORMAT EXPLAINED:
|
|
122
|
+
Each element line follows this format:
|
|
123
|
+
[ID] <role> "text" {{cues}} @ (x,y) size:WxH importance:score [status]
|
|
124
|
+
|
|
125
|
+
Example: [346] <button> "Computer Accessories" {{CLICKABLE,color:white}} @ (664,100) size:150x40 importance:811
|
|
126
|
+
|
|
127
|
+
Breaking down each part:
|
|
128
|
+
- [ID]: The number in brackets is the element ID - use this EXACT number in CLICK/TYPE commands
|
|
129
|
+
Example: If you see [346], use CLICK(346) or TYPE(346, "text")
|
|
130
|
+
- <role>: Element type (button, link, textbox, etc.)
|
|
131
|
+
- "text": Visible text content (truncated with "..." if long)
|
|
132
|
+
- {{cues}}: Optional visual cues in curly braces (e.g., {{CLICKABLE}}, {{PRIMARY,CLICKABLE}}, {{CLICKABLE,color:white}})
|
|
133
|
+
If no cues, this part is omitted entirely
|
|
134
|
+
- @ (x,y): Element position in pixels from top-left corner
|
|
135
|
+
- size:WxH: Element dimensions (width x height in pixels)
|
|
136
|
+
- importance: Score indicating element relevance (higher = more important)
|
|
137
|
+
- [status]: Optional status flags in brackets (not_in_viewport, occluded, diff:ADDED/MODIFIED/etc)
|
|
138
|
+
|
|
139
|
+
CRITICAL RESPONSE FORMAT:
|
|
140
|
+
You MUST respond with ONLY ONE of these exact action formats:
|
|
141
|
+
- CLICK(id) - Click element by ID (use the number from [ID] brackets)
|
|
142
|
+
- TYPE(id, "text") - Type text into element (use the number from [ID] brackets)
|
|
143
|
+
- PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
|
|
144
|
+
- FINISH() - Task complete
|
|
145
|
+
|
|
146
|
+
DO NOT include any explanation, reasoning, or natural language.
|
|
147
|
+
DO NOT use markdown formatting or code blocks.
|
|
148
|
+
DO NOT say "The next step is..." or anything similar.
|
|
149
|
+
|
|
150
|
+
CORRECT Examples (matching element IDs from the list above):
|
|
151
|
+
If element is [346] <button> "Click me" → respond: CLICK(346)
|
|
152
|
+
If element is [15] <textbox> "Search" → respond: TYPE(15, "magic mouse")
|
|
153
|
+
PRESS("Enter")
|
|
154
|
+
FINISH()
|
|
155
|
+
|
|
156
|
+
INCORRECT Examples (DO NOT DO THIS):
|
|
157
|
+
"The next step is to click..."
|
|
158
|
+
"I will type..."
|
|
159
|
+
```CLICK(42)```
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
user_prompt = "Return the single action command:"
|
|
163
|
+
|
|
164
|
+
return self.llm.generate(system_prompt, user_prompt, temperature=0.0)
|
|
165
|
+
|
|
166
|
+
def extract_action(self, response: str) -> str:
|
|
167
|
+
"""
|
|
168
|
+
Extract action command from LLM response.
|
|
169
|
+
|
|
170
|
+
Handles cases where the LLM adds extra explanation despite instructions.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
response: Raw LLM response text
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Cleaned action command string (e.g., "CLICK(42)", "TYPE(15, \"text\")")
|
|
177
|
+
"""
|
|
178
|
+
# Remove markdown code blocks if present
|
|
179
|
+
response = re.sub(r"```[\w]*\n?", "", response)
|
|
180
|
+
response = response.strip()
|
|
181
|
+
|
|
182
|
+
# Try to find action patterns in the response
|
|
183
|
+
# Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
|
|
184
|
+
action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
|
|
185
|
+
|
|
186
|
+
match = re.search(action_pattern, response, re.IGNORECASE)
|
|
187
|
+
if match:
|
|
188
|
+
return match.group(1)
|
|
189
|
+
|
|
190
|
+
# If no pattern match, return the original response (will likely fail parsing)
|
|
191
|
+
return response
|