sentienceapi 0.90.16__py3-none-any.whl → 0.92.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentienceapi might be problematic. Click here for more details.
- sentience/__init__.py +14 -5
- sentience/action_executor.py +215 -0
- sentience/actions.py +408 -25
- sentience/agent.py +802 -293
- sentience/agent_config.py +3 -0
- sentience/async_api.py +83 -1142
- sentience/base_agent.py +95 -0
- sentience/browser.py +484 -1
- sentience/browser_evaluator.py +299 -0
- sentience/cloud_tracing.py +457 -33
- sentience/conversational_agent.py +77 -43
- sentience/element_filter.py +136 -0
- sentience/expect.py +98 -2
- sentience/extension/background.js +56 -185
- sentience/extension/content.js +117 -289
- sentience/extension/injected_api.js +799 -1374
- sentience/extension/manifest.json +1 -1
- sentience/extension/pkg/sentience_core.js +190 -396
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/release.json +47 -47
- sentience/formatting.py +9 -53
- sentience/inspector.py +183 -1
- sentience/llm_interaction_handler.py +191 -0
- sentience/llm_provider.py +74 -52
- sentience/llm_provider_utils.py +120 -0
- sentience/llm_response_builder.py +153 -0
- sentience/models.py +60 -1
- sentience/overlay.py +109 -2
- sentience/protocols.py +228 -0
- sentience/query.py +1 -1
- sentience/read.py +95 -3
- sentience/recorder.py +223 -3
- sentience/schemas/trace_v1.json +102 -9
- sentience/screenshot.py +48 -2
- sentience/sentience_methods.py +86 -0
- sentience/snapshot.py +291 -38
- sentience/snapshot_diff.py +141 -0
- sentience/text_search.py +119 -5
- sentience/trace_event_builder.py +129 -0
- sentience/trace_file_manager.py +197 -0
- sentience/trace_indexing/index_schema.py +95 -7
- sentience/trace_indexing/indexer.py +117 -14
- sentience/tracer_factory.py +119 -6
- sentience/tracing.py +172 -8
- sentience/utils/__init__.py +40 -0
- sentience/utils/browser.py +46 -0
- sentience/utils/element.py +257 -0
- sentience/utils/formatting.py +59 -0
- sentience/utils.py +1 -1
- sentience/visual_agent.py +2056 -0
- sentience/wait.py +68 -2
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/METADATA +2 -1
- sentienceapi-0.92.2.dist-info/RECORD +65 -0
- sentience/extension/test-content.js +0 -4
- sentienceapi-0.90.16.dist-info/RECORD +0 -50
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/WHEEL +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/entry_points.txt +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-APACHE +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/licenses/LICENSE-MIT +0 -0
- {sentienceapi-0.90.16.dist-info → sentienceapi-0.92.2.dist-info}/top_level.txt +0 -0
sentience/snapshot.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Snapshot functionality - calls window.sentience.snapshot() or server-side API
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import asyncio
|
|
5
6
|
import json
|
|
6
7
|
import os
|
|
7
8
|
import time
|
|
@@ -9,8 +10,10 @@ from typing import Any, Optional
|
|
|
9
10
|
|
|
10
11
|
import requests
|
|
11
12
|
|
|
12
|
-
from .browser import SentienceBrowser
|
|
13
|
+
from .browser import AsyncSentienceBrowser, SentienceBrowser
|
|
14
|
+
from .browser_evaluator import BrowserEvaluator
|
|
13
15
|
from .models import Snapshot, SnapshotOptions
|
|
16
|
+
from .sentience_methods import SentienceMethod
|
|
14
17
|
|
|
15
18
|
# Maximum payload size for API requests (10MB server limit)
|
|
16
19
|
MAX_PAYLOAD_BYTES = 10 * 1024 * 1024
|
|
@@ -93,33 +96,16 @@ def _snapshot_via_extension(
|
|
|
93
96
|
# CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
|
|
94
97
|
# The new architecture loads injected_api.js asynchronously, so window.sentience
|
|
95
98
|
# may not be immediately available after page load
|
|
96
|
-
|
|
97
|
-
browser.page.wait_for_function(
|
|
98
|
-
"typeof window.sentience !== 'undefined'",
|
|
99
|
-
timeout=5000, # 5 second timeout
|
|
100
|
-
)
|
|
101
|
-
except Exception as e:
|
|
102
|
-
# Gather diagnostics if wait fails
|
|
103
|
-
try:
|
|
104
|
-
diag = browser.page.evaluate(
|
|
105
|
-
"""() => ({
|
|
106
|
-
sentience_defined: typeof window.sentience !== 'undefined',
|
|
107
|
-
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
108
|
-
url: window.location.href
|
|
109
|
-
})"""
|
|
110
|
-
)
|
|
111
|
-
except Exception:
|
|
112
|
-
diag = {"error": "Could not gather diagnostics"}
|
|
113
|
-
|
|
114
|
-
raise RuntimeError(
|
|
115
|
-
f"Sentience extension failed to inject window.sentience API. "
|
|
116
|
-
f"Is the extension loaded? Diagnostics: {diag}"
|
|
117
|
-
) from e
|
|
99
|
+
BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
|
|
118
100
|
|
|
119
101
|
# Build options dict for extension API (exclude save_trace/trace_path)
|
|
120
102
|
ext_options: dict[str, Any] = {}
|
|
121
103
|
if options.screenshot is not False:
|
|
122
|
-
|
|
104
|
+
# Serialize ScreenshotConfig to dict if it's a Pydantic model
|
|
105
|
+
if hasattr(options.screenshot, "model_dump"):
|
|
106
|
+
ext_options["screenshot"] = options.screenshot.model_dump()
|
|
107
|
+
else:
|
|
108
|
+
ext_options["screenshot"] = options.screenshot
|
|
123
109
|
if options.limit != 50:
|
|
124
110
|
ext_options["limit"] = options.limit
|
|
125
111
|
if options.filter is not None:
|
|
@@ -177,26 +163,14 @@ def _snapshot_via_api(
|
|
|
177
163
|
|
|
178
164
|
# CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
|
|
179
165
|
# Even for API mode, we need the extension to collect raw data locally
|
|
180
|
-
|
|
181
|
-
browser.page.wait_for_function("typeof window.sentience !== 'undefined'", timeout=5000)
|
|
182
|
-
except Exception as e:
|
|
183
|
-
raise RuntimeError(
|
|
184
|
-
"Sentience extension failed to inject. Cannot collect raw data for API processing."
|
|
185
|
-
) from e
|
|
166
|
+
BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
|
|
186
167
|
|
|
187
168
|
# Step 1: Get raw data from local extension (always happens locally)
|
|
188
169
|
raw_options: dict[str, Any] = {}
|
|
189
170
|
if options.screenshot is not False:
|
|
190
171
|
raw_options["screenshot"] = options.screenshot
|
|
191
172
|
|
|
192
|
-
raw_result = browser.page.
|
|
193
|
-
"""
|
|
194
|
-
(options) => {
|
|
195
|
-
return window.sentience.snapshot(options);
|
|
196
|
-
}
|
|
197
|
-
""",
|
|
198
|
-
raw_options,
|
|
199
|
-
)
|
|
173
|
+
raw_result = BrowserEvaluator.invoke(browser.page, SentienceMethod.SNAPSHOT, **raw_options)
|
|
200
174
|
|
|
201
175
|
# Save trace if requested (save raw data before API processing)
|
|
202
176
|
if options.save_trace:
|
|
@@ -272,3 +246,282 @@ def _snapshot_via_api(
|
|
|
272
246
|
return Snapshot(**snapshot_data)
|
|
273
247
|
except requests.exceptions.RequestException as e:
|
|
274
248
|
raise RuntimeError(f"API request failed: {e}")
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# ========== Async Snapshot Functions ==========
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
async def snapshot_async(
|
|
255
|
+
browser: AsyncSentienceBrowser,
|
|
256
|
+
options: SnapshotOptions | None = None,
|
|
257
|
+
) -> Snapshot:
|
|
258
|
+
"""
|
|
259
|
+
Take a snapshot of the current page (async)
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
browser: AsyncSentienceBrowser instance
|
|
263
|
+
options: Snapshot options (screenshot, limit, filter, etc.)
|
|
264
|
+
If None, uses default options.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Snapshot object
|
|
268
|
+
|
|
269
|
+
Example:
|
|
270
|
+
# Basic snapshot with defaults
|
|
271
|
+
snap = await snapshot_async(browser)
|
|
272
|
+
|
|
273
|
+
# With options
|
|
274
|
+
snap = await snapshot_async(browser, SnapshotOptions(
|
|
275
|
+
screenshot=True,
|
|
276
|
+
limit=100,
|
|
277
|
+
show_overlay=True
|
|
278
|
+
))
|
|
279
|
+
"""
|
|
280
|
+
# Use default options if none provided
|
|
281
|
+
if options is None:
|
|
282
|
+
options = SnapshotOptions()
|
|
283
|
+
|
|
284
|
+
# Determine if we should use server-side API
|
|
285
|
+
should_use_api = (
|
|
286
|
+
options.use_api if options.use_api is not None else (browser.api_key is not None)
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
if should_use_api and browser.api_key:
|
|
290
|
+
# Use server-side API (Pro/Enterprise tier)
|
|
291
|
+
return await _snapshot_via_api_async(browser, options)
|
|
292
|
+
else:
|
|
293
|
+
# Use local extension (Free tier)
|
|
294
|
+
return await _snapshot_via_extension_async(browser, options)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
async def _snapshot_via_extension_async(
|
|
298
|
+
browser: AsyncSentienceBrowser,
|
|
299
|
+
options: SnapshotOptions,
|
|
300
|
+
) -> Snapshot:
|
|
301
|
+
"""Take snapshot using local extension (Free tier) - async"""
|
|
302
|
+
if not browser.page:
|
|
303
|
+
raise RuntimeError("Browser not started. Call await browser.start() first.")
|
|
304
|
+
|
|
305
|
+
# Wait for extension injection to complete
|
|
306
|
+
try:
|
|
307
|
+
await browser.page.wait_for_function(
|
|
308
|
+
"typeof window.sentience !== 'undefined'",
|
|
309
|
+
timeout=5000,
|
|
310
|
+
)
|
|
311
|
+
except Exception as e:
|
|
312
|
+
try:
|
|
313
|
+
diag = await browser.page.evaluate(
|
|
314
|
+
"""() => ({
|
|
315
|
+
sentience_defined: typeof window.sentience !== 'undefined',
|
|
316
|
+
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
317
|
+
url: window.location.href
|
|
318
|
+
})"""
|
|
319
|
+
)
|
|
320
|
+
except Exception:
|
|
321
|
+
diag = {"error": "Could not gather diagnostics"}
|
|
322
|
+
|
|
323
|
+
raise RuntimeError(
|
|
324
|
+
f"Sentience extension failed to inject window.sentience API. "
|
|
325
|
+
f"Is the extension loaded? Diagnostics: {diag}"
|
|
326
|
+
) from e
|
|
327
|
+
|
|
328
|
+
# Build options dict for extension API
|
|
329
|
+
ext_options: dict[str, Any] = {}
|
|
330
|
+
if options.screenshot is not False:
|
|
331
|
+
# Serialize ScreenshotConfig to dict if it's a Pydantic model
|
|
332
|
+
if hasattr(options.screenshot, "model_dump"):
|
|
333
|
+
ext_options["screenshot"] = options.screenshot.model_dump()
|
|
334
|
+
else:
|
|
335
|
+
ext_options["screenshot"] = options.screenshot
|
|
336
|
+
if options.limit != 50:
|
|
337
|
+
ext_options["limit"] = options.limit
|
|
338
|
+
if options.filter is not None:
|
|
339
|
+
ext_options["filter"] = (
|
|
340
|
+
options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Call extension API
|
|
344
|
+
result = await browser.page.evaluate(
|
|
345
|
+
"""
|
|
346
|
+
(options) => {
|
|
347
|
+
return window.sentience.snapshot(options);
|
|
348
|
+
}
|
|
349
|
+
""",
|
|
350
|
+
ext_options,
|
|
351
|
+
)
|
|
352
|
+
if result.get("error"):
|
|
353
|
+
print(f" Snapshot error: {result.get('error')}")
|
|
354
|
+
|
|
355
|
+
# Save trace if requested
|
|
356
|
+
if options.save_trace:
|
|
357
|
+
_save_trace_to_file(result.get("raw_elements", []), options.trace_path)
|
|
358
|
+
|
|
359
|
+
# Show visual overlay if requested
|
|
360
|
+
if options.show_overlay:
|
|
361
|
+
raw_elements = result.get("raw_elements", [])
|
|
362
|
+
if raw_elements:
|
|
363
|
+
await browser.page.evaluate(
|
|
364
|
+
"""
|
|
365
|
+
(elements) => {
|
|
366
|
+
if (window.sentience && window.sentience.showOverlay) {
|
|
367
|
+
window.sentience.showOverlay(elements, null);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
""",
|
|
371
|
+
raw_elements,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Extract screenshot_format from data URL if not provided by extension
|
|
375
|
+
if result.get("screenshot") and not result.get("screenshot_format"):
|
|
376
|
+
screenshot_data_url = result.get("screenshot", "")
|
|
377
|
+
if screenshot_data_url.startswith("data:image/"):
|
|
378
|
+
# Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
|
|
379
|
+
format_match = screenshot_data_url.split(";")[0].split("/")[-1]
|
|
380
|
+
if format_match in ["jpeg", "jpg", "png"]:
|
|
381
|
+
result["screenshot_format"] = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
|
|
382
|
+
|
|
383
|
+
# Validate and parse with Pydantic
|
|
384
|
+
snapshot_obj = Snapshot(**result)
|
|
385
|
+
return snapshot_obj
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
async def _snapshot_via_api_async(
|
|
389
|
+
browser: AsyncSentienceBrowser,
|
|
390
|
+
options: SnapshotOptions,
|
|
391
|
+
) -> Snapshot:
|
|
392
|
+
"""Take snapshot using server-side API (Pro/Enterprise tier) - async"""
|
|
393
|
+
if not browser.page:
|
|
394
|
+
raise RuntimeError("Browser not started. Call await browser.start() first.")
|
|
395
|
+
|
|
396
|
+
if not browser.api_key:
|
|
397
|
+
raise ValueError("API key required for server-side processing")
|
|
398
|
+
|
|
399
|
+
if not browser.api_url:
|
|
400
|
+
raise ValueError("API URL required for server-side processing")
|
|
401
|
+
|
|
402
|
+
# Wait for extension injection
|
|
403
|
+
try:
|
|
404
|
+
await browser.page.wait_for_function(
|
|
405
|
+
"typeof window.sentience !== 'undefined'", timeout=5000
|
|
406
|
+
)
|
|
407
|
+
except Exception as e:
|
|
408
|
+
raise RuntimeError(
|
|
409
|
+
"Sentience extension failed to inject. Cannot collect raw data for API processing."
|
|
410
|
+
) from e
|
|
411
|
+
|
|
412
|
+
# Step 1: Get raw data from local extension (including screenshot)
|
|
413
|
+
raw_options: dict[str, Any] = {}
|
|
414
|
+
screenshot_requested = False
|
|
415
|
+
if options.screenshot is not False:
|
|
416
|
+
screenshot_requested = True
|
|
417
|
+
# Serialize ScreenshotConfig to dict if it's a Pydantic model
|
|
418
|
+
if hasattr(options.screenshot, "model_dump"):
|
|
419
|
+
raw_options["screenshot"] = options.screenshot.model_dump()
|
|
420
|
+
else:
|
|
421
|
+
raw_options["screenshot"] = options.screenshot
|
|
422
|
+
|
|
423
|
+
raw_result = await browser.page.evaluate(
|
|
424
|
+
"""
|
|
425
|
+
(options) => {
|
|
426
|
+
return window.sentience.snapshot(options);
|
|
427
|
+
}
|
|
428
|
+
""",
|
|
429
|
+
raw_options,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# Extract screenshot from raw result (extension captures it, but API doesn't return it)
|
|
433
|
+
screenshot_data_url = raw_result.get("screenshot")
|
|
434
|
+
screenshot_format = None
|
|
435
|
+
if screenshot_data_url:
|
|
436
|
+
# Extract format from data URL
|
|
437
|
+
if screenshot_data_url.startswith("data:image/"):
|
|
438
|
+
format_match = screenshot_data_url.split(";")[0].split("/")[-1]
|
|
439
|
+
if format_match in ["jpeg", "jpg", "png"]:
|
|
440
|
+
screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
|
|
441
|
+
|
|
442
|
+
# Save trace if requested
|
|
443
|
+
if options.save_trace:
|
|
444
|
+
_save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)
|
|
445
|
+
|
|
446
|
+
# Step 2: Send to server for smart ranking/filtering
|
|
447
|
+
payload = {
|
|
448
|
+
"raw_elements": raw_result.get("raw_elements", []),
|
|
449
|
+
"url": raw_result.get("url", ""),
|
|
450
|
+
"viewport": raw_result.get("viewport"),
|
|
451
|
+
"goal": options.goal,
|
|
452
|
+
"options": {
|
|
453
|
+
"limit": options.limit,
|
|
454
|
+
"filter": options.filter.model_dump() if options.filter else None,
|
|
455
|
+
},
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
# Check payload size
|
|
459
|
+
payload_json = json.dumps(payload)
|
|
460
|
+
payload_size = len(payload_json.encode("utf-8"))
|
|
461
|
+
if payload_size > MAX_PAYLOAD_BYTES:
|
|
462
|
+
raise ValueError(
|
|
463
|
+
f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
|
|
464
|
+
f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
|
|
465
|
+
f"Try reducing the number of elements on the page or filtering elements."
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
headers = {
|
|
469
|
+
"Authorization": f"Bearer {browser.api_key}",
|
|
470
|
+
"Content-Type": "application/json",
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
# Lazy import httpx - only needed for async API calls
|
|
475
|
+
import httpx
|
|
476
|
+
|
|
477
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
478
|
+
response = await client.post(
|
|
479
|
+
f"{browser.api_url}/v1/snapshot",
|
|
480
|
+
content=payload_json,
|
|
481
|
+
headers=headers,
|
|
482
|
+
)
|
|
483
|
+
response.raise_for_status()
|
|
484
|
+
api_result = response.json()
|
|
485
|
+
|
|
486
|
+
# Extract screenshot format from data URL if not provided
|
|
487
|
+
if screenshot_data_url and not screenshot_format:
|
|
488
|
+
if screenshot_data_url.startswith("data:image/"):
|
|
489
|
+
format_match = screenshot_data_url.split(";")[0].split("/")[-1]
|
|
490
|
+
if format_match in ["jpeg", "jpg", "png"]:
|
|
491
|
+
screenshot_format = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
|
|
492
|
+
|
|
493
|
+
# Merge API result with local data
|
|
494
|
+
snapshot_data = {
|
|
495
|
+
"status": api_result.get("status", "success"),
|
|
496
|
+
"timestamp": api_result.get("timestamp"),
|
|
497
|
+
"url": api_result.get("url", raw_result.get("url", "")),
|
|
498
|
+
"viewport": api_result.get("viewport", raw_result.get("viewport")),
|
|
499
|
+
"elements": api_result.get("elements", []),
|
|
500
|
+
"screenshot": screenshot_data_url, # Use the extracted screenshot
|
|
501
|
+
"screenshot_format": screenshot_format, # Use the extracted format
|
|
502
|
+
"error": api_result.get("error"),
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
# Show visual overlay if requested
|
|
506
|
+
if options.show_overlay:
|
|
507
|
+
elements = api_result.get("elements", [])
|
|
508
|
+
if elements:
|
|
509
|
+
await browser.page.evaluate(
|
|
510
|
+
"""
|
|
511
|
+
(elements) => {
|
|
512
|
+
if (window.sentience && window.sentience.showOverlay) {
|
|
513
|
+
window.sentience.showOverlay(elements, null);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
""",
|
|
517
|
+
elements,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
return Snapshot(**snapshot_data)
|
|
521
|
+
except ImportError:
|
|
522
|
+
# Fallback to requests if httpx not available (shouldn't happen in async context)
|
|
523
|
+
raise RuntimeError(
|
|
524
|
+
"httpx is required for async API calls. Install it with: pip install httpx"
|
|
525
|
+
)
|
|
526
|
+
except Exception as e:
|
|
527
|
+
raise RuntimeError(f"API request failed: {e}")
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Snapshot comparison utilities for diff_status detection.
|
|
3
|
+
|
|
4
|
+
Implements change detection logic for the Diff Overlay feature.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
from .models import Element, Snapshot
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SnapshotDiff:
|
|
13
|
+
"""
|
|
14
|
+
Utility for comparing snapshots and computing diff_status for elements.
|
|
15
|
+
|
|
16
|
+
Implements the logic described in DIFF_STATUS_GAP_ANALYSIS.md:
|
|
17
|
+
- ADDED: Element exists in current but not in previous
|
|
18
|
+
- REMOVED: Element existed in previous but not in current
|
|
19
|
+
- MODIFIED: Element exists in both but has changed
|
|
20
|
+
- MOVED: Element exists in both but position changed
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _has_bbox_changed(el1: Element, el2: Element, threshold: float = 5.0) -> bool:
|
|
25
|
+
"""
|
|
26
|
+
Check if element's bounding box has changed significantly.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
el1: First element
|
|
30
|
+
el2: Second element
|
|
31
|
+
threshold: Position change threshold in pixels (default: 5.0)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
True if position or size changed beyond threshold
|
|
35
|
+
"""
|
|
36
|
+
return (
|
|
37
|
+
abs(el1.bbox.x - el2.bbox.x) > threshold
|
|
38
|
+
or abs(el1.bbox.y - el2.bbox.y) > threshold
|
|
39
|
+
or abs(el1.bbox.width - el2.bbox.width) > threshold
|
|
40
|
+
or abs(el1.bbox.height - el2.bbox.height) > threshold
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def _has_content_changed(el1: Element, el2: Element) -> bool:
|
|
45
|
+
"""
|
|
46
|
+
Check if element's content has changed.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
el1: First element
|
|
50
|
+
el2: Second element
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
True if text, role, or visual properties changed
|
|
54
|
+
"""
|
|
55
|
+
# Compare text content
|
|
56
|
+
if el1.text != el2.text:
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
# Compare role
|
|
60
|
+
if el1.role != el2.role:
|
|
61
|
+
return True
|
|
62
|
+
|
|
63
|
+
# Compare visual cues
|
|
64
|
+
if el1.visual_cues.is_primary != el2.visual_cues.is_primary:
|
|
65
|
+
return True
|
|
66
|
+
if el1.visual_cues.is_clickable != el2.visual_cues.is_clickable:
|
|
67
|
+
return True
|
|
68
|
+
|
|
69
|
+
return False
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def compute_diff_status(
|
|
73
|
+
current: Snapshot,
|
|
74
|
+
previous: Snapshot | None,
|
|
75
|
+
) -> list[Element]:
|
|
76
|
+
"""
|
|
77
|
+
Compare current snapshot with previous and set diff_status on elements.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
current: Current snapshot
|
|
81
|
+
previous: Previous snapshot (None if this is the first snapshot)
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
List of elements with diff_status set (includes REMOVED elements from previous)
|
|
85
|
+
"""
|
|
86
|
+
# If no previous snapshot, all current elements are ADDED
|
|
87
|
+
if previous is None:
|
|
88
|
+
result = []
|
|
89
|
+
for el in current.elements:
|
|
90
|
+
# Create a copy with diff_status set
|
|
91
|
+
el_dict = el.model_dump()
|
|
92
|
+
el_dict["diff_status"] = "ADDED"
|
|
93
|
+
result.append(Element(**el_dict))
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
# Build lookup maps by element ID
|
|
97
|
+
current_by_id = {el.id: el for el in current.elements}
|
|
98
|
+
previous_by_id = {el.id: el for el in previous.elements}
|
|
99
|
+
|
|
100
|
+
current_ids = set(current_by_id.keys())
|
|
101
|
+
previous_ids = set(previous_by_id.keys())
|
|
102
|
+
|
|
103
|
+
result: list[Element] = []
|
|
104
|
+
|
|
105
|
+
# Process current elements
|
|
106
|
+
for el in current.elements:
|
|
107
|
+
el_dict = el.model_dump()
|
|
108
|
+
|
|
109
|
+
if el.id not in previous_ids:
|
|
110
|
+
# Element is new - mark as ADDED
|
|
111
|
+
el_dict["diff_status"] = "ADDED"
|
|
112
|
+
else:
|
|
113
|
+
# Element existed before - check for changes
|
|
114
|
+
prev_el = previous_by_id[el.id]
|
|
115
|
+
|
|
116
|
+
bbox_changed = SnapshotDiff._has_bbox_changed(el, prev_el)
|
|
117
|
+
content_changed = SnapshotDiff._has_content_changed(el, prev_el)
|
|
118
|
+
|
|
119
|
+
if bbox_changed and content_changed:
|
|
120
|
+
# Both position and content changed - mark as MODIFIED
|
|
121
|
+
el_dict["diff_status"] = "MODIFIED"
|
|
122
|
+
elif bbox_changed:
|
|
123
|
+
# Only position changed - mark as MOVED
|
|
124
|
+
el_dict["diff_status"] = "MOVED"
|
|
125
|
+
elif content_changed:
|
|
126
|
+
# Only content changed - mark as MODIFIED
|
|
127
|
+
el_dict["diff_status"] = "MODIFIED"
|
|
128
|
+
else:
|
|
129
|
+
# No change - don't set diff_status (frontend expects undefined)
|
|
130
|
+
el_dict["diff_status"] = None
|
|
131
|
+
|
|
132
|
+
result.append(Element(**el_dict))
|
|
133
|
+
|
|
134
|
+
# Process removed elements (existed in previous but not in current)
|
|
135
|
+
for prev_id in previous_ids - current_ids:
|
|
136
|
+
prev_el = previous_by_id[prev_id]
|
|
137
|
+
el_dict = prev_el.model_dump()
|
|
138
|
+
el_dict["diff_status"] = "REMOVED"
|
|
139
|
+
result.append(Element(**el_dict))
|
|
140
|
+
|
|
141
|
+
return result
|
sentience/text_search.py
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
Text search utilities - find text and get pixel coordinates
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from .browser import SentienceBrowser
|
|
5
|
+
from .browser import AsyncSentienceBrowser, SentienceBrowser
|
|
6
|
+
from .browser_evaluator import BrowserEvaluator
|
|
6
7
|
from .models import TextRectSearchResult
|
|
7
8
|
|
|
8
9
|
|
|
@@ -88,18 +89,131 @@ def find_text_rect(
|
|
|
88
89
|
# Limit max_results to prevent performance issues
|
|
89
90
|
max_results = min(max_results, 100)
|
|
90
91
|
|
|
92
|
+
# CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
|
|
93
|
+
# The new architecture loads injected_api.js asynchronously, so window.sentience
|
|
94
|
+
# may not be immediately available after page load
|
|
95
|
+
BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
|
|
96
|
+
|
|
97
|
+
# Verify findTextRect method exists (for older extension versions that don't have it)
|
|
98
|
+
if not BrowserEvaluator.verify_method_exists(browser.page, SentienceMethod.FIND_TEXT_RECT):
|
|
99
|
+
raise RuntimeError(
|
|
100
|
+
"window.sentience.findTextRect is not available. "
|
|
101
|
+
"Please update the Sentience extension to the latest version."
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Call the extension's findTextRect method
|
|
105
|
+
result_dict = browser.page.evaluate(
|
|
106
|
+
"""
|
|
107
|
+
(options) => {
|
|
108
|
+
return window.sentience.findTextRect(options);
|
|
109
|
+
}
|
|
110
|
+
""",
|
|
111
|
+
{
|
|
112
|
+
"text": text,
|
|
113
|
+
"caseSensitive": case_sensitive,
|
|
114
|
+
"wholeWord": whole_word,
|
|
115
|
+
"maxResults": max_results,
|
|
116
|
+
},
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Parse and validate with Pydantic
|
|
120
|
+
return TextRectSearchResult(**result_dict)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
async def find_text_rect_async(
|
|
124
|
+
browser: AsyncSentienceBrowser,
|
|
125
|
+
text: str,
|
|
126
|
+
case_sensitive: bool = False,
|
|
127
|
+
whole_word: bool = False,
|
|
128
|
+
max_results: int = 10,
|
|
129
|
+
) -> TextRectSearchResult:
|
|
130
|
+
"""
|
|
131
|
+
Find all occurrences of text on the page and get their exact pixel coordinates (async).
|
|
132
|
+
|
|
133
|
+
This function searches for text in all visible text nodes on the page and returns
|
|
134
|
+
the bounding rectangles for each match. Useful for:
|
|
135
|
+
- Finding specific UI elements by their text content
|
|
136
|
+
- Locating buttons, links, or labels without element IDs
|
|
137
|
+
- Getting exact coordinates for click automation
|
|
138
|
+
- Highlighting search results visually
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
browser: AsyncSentienceBrowser instance
|
|
142
|
+
text: Text to search for (required)
|
|
143
|
+
case_sensitive: If True, search is case-sensitive (default: False)
|
|
144
|
+
whole_word: If True, only match whole words surrounded by whitespace (default: False)
|
|
145
|
+
max_results: Maximum number of matches to return (default: 10, max: 100)
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
TextRectSearchResult with:
|
|
149
|
+
- status: "success" or "error"
|
|
150
|
+
- query: The search text
|
|
151
|
+
- case_sensitive: Whether search was case-sensitive
|
|
152
|
+
- whole_word: Whether whole-word matching was used
|
|
153
|
+
- matches: Number of matches found
|
|
154
|
+
- results: List of TextMatch objects, each containing:
|
|
155
|
+
- text: The matched text
|
|
156
|
+
- rect: Absolute rectangle (with scroll offset)
|
|
157
|
+
- viewport_rect: Viewport-relative rectangle
|
|
158
|
+
- context: Surrounding text (before/after)
|
|
159
|
+
- in_viewport: Whether visible in current viewport
|
|
160
|
+
- viewport: Current viewport dimensions and scroll position
|
|
161
|
+
- error: Error message if status is "error"
|
|
162
|
+
|
|
163
|
+
Examples:
|
|
164
|
+
# Find "Sign In" button
|
|
165
|
+
result = await find_text_rect_async(browser, "Sign In")
|
|
166
|
+
if result.status == "success" and result.results:
|
|
167
|
+
first_match = result.results[0]
|
|
168
|
+
print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
|
|
169
|
+
print(f"Size: {first_match.rect.width}x{first_match.rect.height}")
|
|
170
|
+
print(f"In viewport: {first_match.in_viewport}")
|
|
171
|
+
|
|
172
|
+
# Case-sensitive search
|
|
173
|
+
result = await find_text_rect_async(browser, "LOGIN", case_sensitive=True)
|
|
174
|
+
|
|
175
|
+
# Whole word only
|
|
176
|
+
result = await find_text_rect_async(browser, "log", whole_word=True) # Won't match "login"
|
|
177
|
+
|
|
178
|
+
# Find all matches and click the first visible one
|
|
179
|
+
result = await find_text_rect_async(browser, "Buy Now", max_results=5)
|
|
180
|
+
if result.status == "success" and result.results:
|
|
181
|
+
for match in result.results:
|
|
182
|
+
if match.in_viewport:
|
|
183
|
+
# Use click_rect_async from actions module
|
|
184
|
+
from sentience.actions import click_rect_async
|
|
185
|
+
click_result = await click_rect_async(browser, {
|
|
186
|
+
"x": match.rect.x,
|
|
187
|
+
"y": match.rect.y,
|
|
188
|
+
"w": match.rect.width,
|
|
189
|
+
"h": match.rect.height
|
|
190
|
+
})
|
|
191
|
+
break
|
|
192
|
+
"""
|
|
193
|
+
if not browser.page:
|
|
194
|
+
raise RuntimeError("Browser not started. Call await browser.start() first.")
|
|
195
|
+
|
|
196
|
+
if not text or not text.strip():
|
|
197
|
+
return TextRectSearchResult(
|
|
198
|
+
status="error",
|
|
199
|
+
error="Text parameter is required and cannot be empty",
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Limit max_results to prevent performance issues
|
|
203
|
+
max_results = min(max_results, 100)
|
|
204
|
+
|
|
91
205
|
# CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
|
|
92
206
|
# The new architecture loads injected_api.js asynchronously, so window.sentience
|
|
93
207
|
# may not be immediately available after page load
|
|
94
208
|
try:
|
|
95
|
-
browser.page.wait_for_function(
|
|
209
|
+
await browser.page.wait_for_function(
|
|
96
210
|
"typeof window.sentience !== 'undefined'",
|
|
97
211
|
timeout=5000, # 5 second timeout
|
|
98
212
|
)
|
|
99
213
|
except Exception as e:
|
|
100
214
|
# Gather diagnostics if wait fails
|
|
101
215
|
try:
|
|
102
|
-
diag = browser.page.evaluate(
|
|
216
|
+
diag = await browser.page.evaluate(
|
|
103
217
|
"""() => ({
|
|
104
218
|
sentience_defined: typeof window.sentience !== 'undefined',
|
|
105
219
|
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
|
|
@@ -116,7 +230,7 @@ def find_text_rect(
|
|
|
116
230
|
|
|
117
231
|
# Verify findTextRect method exists (for older extension versions that don't have it)
|
|
118
232
|
try:
|
|
119
|
-
has_find_text_rect = browser.page.evaluate(
|
|
233
|
+
has_find_text_rect = await browser.page.evaluate(
|
|
120
234
|
"typeof window.sentience.findTextRect !== 'undefined'"
|
|
121
235
|
)
|
|
122
236
|
if not has_find_text_rect:
|
|
@@ -130,7 +244,7 @@ def find_text_rect(
|
|
|
130
244
|
raise RuntimeError(f"Failed to verify findTextRect availability: {e}") from e
|
|
131
245
|
|
|
132
246
|
# Call the extension's findTextRect method
|
|
133
|
-
result_dict = browser.page.evaluate(
|
|
247
|
+
result_dict = await browser.page.evaluate(
|
|
134
248
|
"""
|
|
135
249
|
(options) => {
|
|
136
250
|
return window.sentience.findTextRect(options);
|