sentienceapi 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (90) hide show
  1. sentience/__init__.py +120 -6
  2. sentience/_extension_loader.py +156 -1
  3. sentience/action_executor.py +217 -0
  4. sentience/actions.py +758 -30
  5. sentience/agent.py +806 -293
  6. sentience/agent_config.py +3 -0
  7. sentience/agent_runtime.py +840 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +89 -1141
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +372 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +483 -0
  21. sentience/base_agent.py +95 -0
  22. sentience/browser.py +678 -39
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cloud_tracing.py +507 -42
  26. sentience/constants.py +6 -0
  27. sentience/conversational_agent.py +77 -43
  28. sentience/cursor_policy.py +142 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +98 -2
  31. sentience/extension/background.js +56 -185
  32. sentience/extension/content.js +150 -287
  33. sentience/extension/injected_api.js +1088 -1368
  34. sentience/extension/manifest.json +1 -1
  35. sentience/extension/pkg/sentience_core.d.ts +22 -22
  36. sentience/extension/pkg/sentience_core.js +275 -433
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/release.json +47 -47
  39. sentience/failure_artifacts.py +241 -0
  40. sentience/formatting.py +9 -53
  41. sentience/inspector.py +183 -1
  42. sentience/integrations/__init__.py +6 -0
  43. sentience/integrations/langchain/__init__.py +12 -0
  44. sentience/integrations/langchain/context.py +18 -0
  45. sentience/integrations/langchain/core.py +326 -0
  46. sentience/integrations/langchain/tools.py +180 -0
  47. sentience/integrations/models.py +46 -0
  48. sentience/integrations/pydanticai/__init__.py +15 -0
  49. sentience/integrations/pydanticai/deps.py +20 -0
  50. sentience/integrations/pydanticai/toolset.py +468 -0
  51. sentience/llm_interaction_handler.py +191 -0
  52. sentience/llm_provider.py +765 -66
  53. sentience/llm_provider_utils.py +120 -0
  54. sentience/llm_response_builder.py +153 -0
  55. sentience/models.py +595 -3
  56. sentience/ordinal.py +280 -0
  57. sentience/overlay.py +109 -2
  58. sentience/protocols.py +228 -0
  59. sentience/query.py +67 -5
  60. sentience/read.py +95 -3
  61. sentience/recorder.py +223 -3
  62. sentience/schemas/trace_v1.json +128 -9
  63. sentience/screenshot.py +48 -2
  64. sentience/sentience_methods.py +86 -0
  65. sentience/snapshot.py +599 -55
  66. sentience/snapshot_diff.py +126 -0
  67. sentience/text_search.py +120 -5
  68. sentience/trace_event_builder.py +148 -0
  69. sentience/trace_file_manager.py +197 -0
  70. sentience/trace_indexing/index_schema.py +95 -7
  71. sentience/trace_indexing/indexer.py +105 -48
  72. sentience/tracer_factory.py +120 -9
  73. sentience/tracing.py +172 -8
  74. sentience/utils/__init__.py +40 -0
  75. sentience/utils/browser.py +46 -0
  76. sentience/{utils.py → utils/element.py} +3 -42
  77. sentience/utils/formatting.py +59 -0
  78. sentience/verification.py +618 -0
  79. sentience/visual_agent.py +2058 -0
  80. sentience/wait.py +68 -2
  81. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/METADATA +199 -40
  82. sentienceapi-0.98.0.dist-info/RECORD +92 -0
  83. sentience/extension/test-content.js +0 -4
  84. sentienceapi-0.90.16.dist-info/RECORD +0 -50
  85. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/WHEEL +0 -0
  86. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/entry_points.txt +0 -0
  87. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE +0 -0
  88. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-APACHE +0 -0
  89. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-MIT +0 -0
  90. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/top_level.txt +0 -0
sentience/actions.py CHANGED
@@ -1,12 +1,18 @@
1
+ from typing import Optional
2
+
1
3
  """
2
4
  Actions v1 - click, type, press
3
5
  """
4
6
 
7
+ import asyncio
5
8
  import time
6
9
 
7
- from .browser import SentienceBrowser
10
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
11
+ from .browser_evaluator import BrowserEvaluator
12
+ from .cursor_policy import CursorPolicy, build_human_cursor_path
8
13
  from .models import ActionResult, BBox, Snapshot
9
- from .snapshot import snapshot
14
+ from .sentience_methods import SentienceMethod
15
+ from .snapshot import snapshot, snapshot_async
10
16
 
11
17
 
12
18
  def click( # noqa: C901
@@ -14,6 +20,7 @@ def click( # noqa: C901
14
20
  element_id: int,
15
21
  use_mouse: bool = True,
16
22
  take_snapshot: bool = False,
23
+ cursor_policy: CursorPolicy | None = None,
17
24
  ) -> ActionResult:
18
25
  """
19
26
  Click an element by ID using hybrid approach (mouse simulation by default)
@@ -33,6 +40,7 @@ def click( # noqa: C901
33
40
 
34
41
  start_time = time.time()
35
42
  url_before = browser.page.url
43
+ cursor_meta: dict | None = None
36
44
 
37
45
  if use_mouse:
38
46
  # Hybrid approach: Get element bbox from snapshot, calculate center, use mouse.click()
@@ -48,9 +56,49 @@ def click( # noqa: C901
48
56
  # Calculate center of element bbox
49
57
  center_x = element.bbox.x + element.bbox.width / 2
50
58
  center_y = element.bbox.y + element.bbox.height / 2
51
- # Use Playwright's native mouse click for realistic simulation
59
+ # Optional: human-like cursor movement (opt-in)
52
60
  try:
53
- browser.page.mouse.click(center_x, center_y)
61
+ if cursor_policy is not None and cursor_policy.mode == "human":
62
+ # Best-effort cursor state on browser instance
63
+ pos = getattr(browser, "_sentience_cursor_pos", None)
64
+ if not isinstance(pos, tuple) or len(pos) != 2:
65
+ try:
66
+ vp = browser.page.viewport_size or {}
67
+ pos = (
68
+ float(vp.get("width", 0)) / 2.0,
69
+ float(vp.get("height", 0)) / 2.0,
70
+ )
71
+ except Exception:
72
+ pos = (0.0, 0.0)
73
+
74
+ cursor_meta = build_human_cursor_path(
75
+ start=(float(pos[0]), float(pos[1])),
76
+ target=(float(center_x), float(center_y)),
77
+ policy=cursor_policy,
78
+ )
79
+ pts = cursor_meta.get("path", [])
80
+ steps = int(cursor_meta.get("steps") or max(1, len(pts)))
81
+ duration_ms = int(cursor_meta.get("duration_ms") or 0)
82
+ per_step_s = (
83
+ (duration_ms / max(1, len(pts))) / 1000.0 if duration_ms > 0 else 0.0
84
+ )
85
+ for p in pts:
86
+ browser.page.mouse.move(float(p["x"]), float(p["y"]))
87
+ if per_step_s > 0:
88
+ time.sleep(per_step_s)
89
+ pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
90
+ if pause_ms > 0:
91
+ time.sleep(pause_ms / 1000.0)
92
+ browser.page.mouse.click(center_x, center_y)
93
+ setattr(
94
+ browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
95
+ )
96
+ else:
97
+ # Default behavior (no regression)
98
+ browser.page.mouse.click(center_x, center_y)
99
+ setattr(
100
+ browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
101
+ )
54
102
  success = True
55
103
  except Exception:
56
104
  # If navigation happens, mouse.click might fail, but that's OK
@@ -59,13 +107,8 @@ def click( # noqa: C901
59
107
  else:
60
108
  # Fallback to JS click if element not found in snapshot
61
109
  try:
62
- success = browser.page.evaluate(
63
- """
64
- (id) => {
65
- return window.sentience.click(id);
66
- }
67
- """,
68
- element_id,
110
+ success = BrowserEvaluator.invoke(
111
+ browser.page, SentienceMethod.CLICK, element_id
69
112
  )
70
113
  except Exception:
71
114
  # Navigation might have destroyed context, assume success if URL changed
@@ -73,27 +116,13 @@ def click( # noqa: C901
73
116
  except Exception:
74
117
  # Fallback to JS click on error
75
118
  try:
76
- success = browser.page.evaluate(
77
- """
78
- (id) => {
79
- return window.sentience.click(id);
80
- }
81
- """,
82
- element_id,
83
- )
119
+ success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
84
120
  except Exception:
85
121
  # Navigation might have destroyed context, assume success if URL changed
86
122
  success = True
87
123
  else:
88
124
  # Legacy JS-based click
89
- success = browser.page.evaluate(
90
- """
91
- (id) => {
92
- return window.sentience.click(id);
93
- }
94
- """,
95
- element_id,
96
- )
125
+ success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
97
126
 
98
127
  # Wait a bit for navigation/DOM updates
99
128
  try:
@@ -137,6 +166,7 @@ def click( # noqa: C901
137
166
  outcome=outcome,
138
167
  url_changed=url_changed,
139
168
  snapshot_after=snapshot_after,
169
+ cursor=cursor_meta,
140
170
  error=(
141
171
  None
142
172
  if success
@@ -149,7 +179,11 @@ def click( # noqa: C901
149
179
 
150
180
 
151
181
  def type_text(
152
- browser: SentienceBrowser, element_id: int, text: str, take_snapshot: bool = False
182
+ browser: SentienceBrowser,
183
+ element_id: int,
184
+ text: str,
185
+ take_snapshot: bool = False,
186
+ delay_ms: float = 0,
153
187
  ) -> ActionResult:
154
188
  """
155
189
  Type text into an element (focus then input)
@@ -159,9 +193,16 @@ def type_text(
159
193
  element_id: Element ID from snapshot
160
194
  text: Text to type
161
195
  take_snapshot: Whether to take snapshot after action
196
+ delay_ms: Delay between keystrokes in milliseconds for human-like typing (default: 0)
162
197
 
163
198
  Returns:
164
199
  ActionResult
200
+
201
+ Example:
202
+ >>> # Type instantly (default behavior)
203
+ >>> type_text(browser, element_id, "Hello World")
204
+ >>> # Type with human-like delay (~10ms between keystrokes)
205
+ >>> type_text(browser, element_id, "Hello World", delay_ms=10)
165
206
  """
166
207
  if not browser.page:
167
208
  raise RuntimeError("Browser not started. Call browser.start() first.")
@@ -192,8 +233,8 @@ def type_text(
192
233
  error={"code": "focus_failed", "reason": "Element not found"},
193
234
  )
194
235
 
195
- # Type using Playwright keyboard
196
- browser.page.keyboard.type(text)
236
+ # Type using Playwright keyboard with optional delay between keystrokes
237
+ browser.page.keyboard.type(text, delay=delay_ms)
197
238
 
198
239
  duration_ms = int((time.time() - start_time) * 1000)
199
240
  url_after = browser.page.url
@@ -257,6 +298,94 @@ def press(browser: SentienceBrowser, key: str, take_snapshot: bool = False) -> A
257
298
  )
258
299
 
259
300
 
301
+ def scroll_to(
302
+ browser: SentienceBrowser,
303
+ element_id: int,
304
+ behavior: str = "smooth",
305
+ block: str = "center",
306
+ take_snapshot: bool = False,
307
+ ) -> ActionResult:
308
+ """
309
+ Scroll an element into view
310
+
311
+ Scrolls the page so that the specified element is visible in the viewport.
312
+ Uses the element registry to find the element and scrollIntoView() to scroll it.
313
+
314
+ Args:
315
+ browser: SentienceBrowser instance
316
+ element_id: Element ID from snapshot to scroll into view
317
+ behavior: Scroll behavior - 'smooth', 'instant', or 'auto' (default: 'smooth')
318
+ block: Vertical alignment - 'start', 'center', 'end', or 'nearest' (default: 'center')
319
+ take_snapshot: Whether to take snapshot after action
320
+
321
+ Returns:
322
+ ActionResult
323
+
324
+ Example:
325
+ >>> snap = snapshot(browser)
326
+ >>> button = find(snap, 'role=button[name="Submit"]')
327
+ >>> if button:
328
+ >>> # Scroll element into view with smooth animation
329
+ >>> scroll_to(browser, button.id)
330
+ >>> # Scroll instantly to top of viewport
331
+ >>> scroll_to(browser, button.id, behavior='instant', block='start')
332
+ """
333
+ if not browser.page:
334
+ raise RuntimeError("Browser not started. Call browser.start() first.")
335
+
336
+ start_time = time.time()
337
+ url_before = browser.page.url
338
+
339
+ # Scroll element into view using the element registry
340
+ scrolled = browser.page.evaluate(
341
+ """
342
+ (args) => {
343
+ const el = window.sentience_registry[args.id];
344
+ if (el && el.scrollIntoView) {
345
+ el.scrollIntoView({
346
+ behavior: args.behavior,
347
+ block: args.block,
348
+ inline: 'nearest'
349
+ });
350
+ return true;
351
+ }
352
+ return false;
353
+ }
354
+ """,
355
+ {"id": element_id, "behavior": behavior, "block": block},
356
+ )
357
+
358
+ if not scrolled:
359
+ return ActionResult(
360
+ success=False,
361
+ duration_ms=int((time.time() - start_time) * 1000),
362
+ outcome="error",
363
+ error={"code": "scroll_failed", "reason": "Element not found or not scrollable"},
364
+ )
365
+
366
+ # Wait a bit for scroll to complete (especially for smooth scrolling)
367
+ wait_time = 500 if behavior == "smooth" else 100
368
+ browser.page.wait_for_timeout(wait_time)
369
+
370
+ duration_ms = int((time.time() - start_time) * 1000)
371
+ url_after = browser.page.url
372
+ url_changed = url_before != url_after
373
+
374
+ outcome = "navigated" if url_changed else "dom_updated"
375
+
376
+ snapshot_after: Snapshot | None = None
377
+ if take_snapshot:
378
+ snapshot_after = snapshot(browser)
379
+
380
+ return ActionResult(
381
+ success=True,
382
+ duration_ms=duration_ms,
383
+ outcome=outcome,
384
+ url_changed=url_changed,
385
+ snapshot_after=snapshot_after,
386
+ )
387
+
388
+
260
389
  def _highlight_rect(
261
390
  browser: SentienceBrowser, rect: dict[str, float], duration_sec: float = 2.0
262
391
  ) -> None:
@@ -330,6 +459,7 @@ def click_rect(
330
459
  highlight: bool = True,
331
460
  highlight_duration: float = 2.0,
332
461
  take_snapshot: bool = False,
462
+ cursor_policy: CursorPolicy | None = None,
333
463
  ) -> ActionResult:
334
464
  """
335
465
  Click at the center of a rectangle using Playwright's native mouse simulation.
@@ -385,6 +515,7 @@ def click_rect(
385
515
  # Calculate center of rectangle
386
516
  center_x = x + w / 2
387
517
  center_y = y + h / 2
518
+ cursor_meta: dict | None = None
388
519
 
389
520
  # Show highlight before clicking (if enabled)
390
521
  if highlight:
@@ -395,7 +526,35 @@ def click_rect(
395
526
  # Use Playwright's native mouse click for realistic simulation
396
527
  # This triggers hover, focus, mousedown, mouseup sequences
397
528
  try:
529
+ if cursor_policy is not None and cursor_policy.mode == "human":
530
+ pos = getattr(browser, "_sentience_cursor_pos", None)
531
+ if not isinstance(pos, tuple) or len(pos) != 2:
532
+ try:
533
+ vp = browser.page.viewport_size or {}
534
+ pos = (float(vp.get("width", 0)) / 2.0, float(vp.get("height", 0)) / 2.0)
535
+ except Exception:
536
+ pos = (0.0, 0.0)
537
+
538
+ cursor_meta = build_human_cursor_path(
539
+ start=(float(pos[0]), float(pos[1])),
540
+ target=(float(center_x), float(center_y)),
541
+ policy=cursor_policy,
542
+ )
543
+ pts = cursor_meta.get("path", [])
544
+ duration_ms_move = int(cursor_meta.get("duration_ms") or 0)
545
+ per_step_s = (
546
+ (duration_ms_move / max(1, len(pts))) / 1000.0 if duration_ms_move > 0 else 0.0
547
+ )
548
+ for p in pts:
549
+ browser.page.mouse.move(float(p["x"]), float(p["y"]))
550
+ if per_step_s > 0:
551
+ time.sleep(per_step_s)
552
+ pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
553
+ if pause_ms > 0:
554
+ time.sleep(pause_ms / 1000.0)
555
+
398
556
  browser.page.mouse.click(center_x, center_y)
557
+ setattr(browser, "_sentience_cursor_pos", (float(center_x), float(center_y)))
399
558
  success = True
400
559
  except Exception as e:
401
560
  success = False
@@ -428,6 +587,575 @@ def click_rect(
428
587
  outcome=outcome,
429
588
  url_changed=url_changed,
430
589
  snapshot_after=snapshot_after,
590
+ cursor=cursor_meta,
591
+ error=(
592
+ None
593
+ if success
594
+ else {
595
+ "code": "click_failed",
596
+ "reason": error_msg if not success else "Click failed",
597
+ }
598
+ ),
599
+ )
600
+
601
+
602
+ # ========== Async Action Functions ==========
603
+
604
+
605
+ async def click_async(
606
+ browser: AsyncSentienceBrowser,
607
+ element_id: int,
608
+ use_mouse: bool = True,
609
+ take_snapshot: bool = False,
610
+ cursor_policy: CursorPolicy | None = None,
611
+ ) -> ActionResult:
612
+ """
613
+ Click an element by ID using hybrid approach (async)
614
+
615
+ Args:
616
+ browser: AsyncSentienceBrowser instance
617
+ element_id: Element ID from snapshot
618
+ use_mouse: If True, use Playwright's mouse.click() at element center
619
+ take_snapshot: Whether to take snapshot after action
620
+
621
+ Returns:
622
+ ActionResult
623
+ """
624
+ if not browser.page:
625
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
626
+
627
+ start_time = time.time()
628
+ url_before = browser.page.url
629
+ cursor_meta: dict | None = None
630
+
631
+ if use_mouse:
632
+ try:
633
+ snap = await snapshot_async(browser)
634
+ element = None
635
+ for el in snap.elements:
636
+ if el.id == element_id:
637
+ element = el
638
+ break
639
+
640
+ if element:
641
+ center_x = element.bbox.x + element.bbox.width / 2
642
+ center_y = element.bbox.y + element.bbox.height / 2
643
+ try:
644
+ if cursor_policy is not None and cursor_policy.mode == "human":
645
+ pos = getattr(browser, "_sentience_cursor_pos", None)
646
+ if not isinstance(pos, tuple) or len(pos) != 2:
647
+ try:
648
+ vp = browser.page.viewport_size or {}
649
+ pos = (
650
+ float(vp.get("width", 0)) / 2.0,
651
+ float(vp.get("height", 0)) / 2.0,
652
+ )
653
+ except Exception:
654
+ pos = (0.0, 0.0)
655
+
656
+ cursor_meta = build_human_cursor_path(
657
+ start=(float(pos[0]), float(pos[1])),
658
+ target=(float(center_x), float(center_y)),
659
+ policy=cursor_policy,
660
+ )
661
+ pts = cursor_meta.get("path", [])
662
+ duration_ms = int(cursor_meta.get("duration_ms") or 0)
663
+ per_step_s = (
664
+ (duration_ms / max(1, len(pts))) / 1000.0 if duration_ms > 0 else 0.0
665
+ )
666
+ for p in pts:
667
+ await browser.page.mouse.move(float(p["x"]), float(p["y"]))
668
+ if per_step_s > 0:
669
+ await asyncio.sleep(per_step_s)
670
+ pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
671
+ if pause_ms > 0:
672
+ await asyncio.sleep(pause_ms / 1000.0)
673
+ await browser.page.mouse.click(center_x, center_y)
674
+ setattr(
675
+ browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
676
+ )
677
+ else:
678
+ await browser.page.mouse.click(center_x, center_y)
679
+ setattr(
680
+ browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
681
+ )
682
+ success = True
683
+ except Exception:
684
+ success = True
685
+ else:
686
+ try:
687
+ success = await browser.page.evaluate(
688
+ """
689
+ (id) => {
690
+ return window.sentience.click(id);
691
+ }
692
+ """,
693
+ element_id,
694
+ )
695
+ except Exception:
696
+ success = True
697
+ except Exception:
698
+ try:
699
+ success = await browser.page.evaluate(
700
+ """
701
+ (id) => {
702
+ return window.sentience.click(id);
703
+ }
704
+ """,
705
+ element_id,
706
+ )
707
+ except Exception:
708
+ success = True
709
+ else:
710
+ success = await browser.page.evaluate(
711
+ """
712
+ (id) => {
713
+ return window.sentience.click(id);
714
+ }
715
+ """,
716
+ element_id,
717
+ )
718
+
719
+ # Wait a bit for navigation/DOM updates
720
+ try:
721
+ await browser.page.wait_for_timeout(500)
722
+ except Exception:
723
+ pass
724
+
725
+ duration_ms = int((time.time() - start_time) * 1000)
726
+
727
+ # Check if URL changed
728
+ try:
729
+ url_after = browser.page.url
730
+ url_changed = url_before != url_after
731
+ except Exception:
732
+ url_after = url_before
733
+ url_changed = True
734
+
735
+ # Determine outcome
736
+ outcome: str | None = None
737
+ if url_changed:
738
+ outcome = "navigated"
739
+ elif success:
740
+ outcome = "dom_updated"
741
+ else:
742
+ outcome = "error"
743
+
744
+ # Optional snapshot after
745
+ snapshot_after: Snapshot | None = None
746
+ if take_snapshot:
747
+ try:
748
+ snapshot_after = await snapshot_async(browser)
749
+ except Exception:
750
+ pass
751
+
752
+ return ActionResult(
753
+ success=success,
754
+ duration_ms=duration_ms,
755
+ outcome=outcome,
756
+ url_changed=url_changed,
757
+ snapshot_after=snapshot_after,
758
+ cursor=cursor_meta,
759
+ error=(
760
+ None
761
+ if success
762
+ else {
763
+ "code": "click_failed",
764
+ "reason": "Element not found or not clickable",
765
+ }
766
+ ),
767
+ )
768
+
769
+
770
+ async def type_text_async(
771
+ browser: AsyncSentienceBrowser,
772
+ element_id: int,
773
+ text: str,
774
+ take_snapshot: bool = False,
775
+ delay_ms: float = 0,
776
+ ) -> ActionResult:
777
+ """
778
+ Type text into an element (async)
779
+
780
+ Args:
781
+ browser: AsyncSentienceBrowser instance
782
+ element_id: Element ID from snapshot
783
+ text: Text to type
784
+ take_snapshot: Whether to take snapshot after action
785
+ delay_ms: Delay between keystrokes in milliseconds for human-like typing (default: 0)
786
+
787
+ Returns:
788
+ ActionResult
789
+
790
+ Example:
791
+ >>> # Type instantly (default behavior)
792
+ >>> await type_text_async(browser, element_id, "Hello World")
793
+ >>> # Type with human-like delay (~10ms between keystrokes)
794
+ >>> await type_text_async(browser, element_id, "Hello World", delay_ms=10)
795
+ """
796
+ if not browser.page:
797
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
798
+
799
+ start_time = time.time()
800
+ url_before = browser.page.url
801
+
802
+ # Focus element first
803
+ focused = await browser.page.evaluate(
804
+ """
805
+ (id) => {
806
+ const el = window.sentience_registry[id];
807
+ if (el) {
808
+ el.focus();
809
+ return true;
810
+ }
811
+ return false;
812
+ }
813
+ """,
814
+ element_id,
815
+ )
816
+
817
+ if not focused:
818
+ return ActionResult(
819
+ success=False,
820
+ duration_ms=int((time.time() - start_time) * 1000),
821
+ outcome="error",
822
+ error={"code": "focus_failed", "reason": "Element not found"},
823
+ )
824
+
825
+ # Type using Playwright keyboard with optional delay between keystrokes
826
+ await browser.page.keyboard.type(text, delay=delay_ms)
827
+
828
+ duration_ms = int((time.time() - start_time) * 1000)
829
+ url_after = browser.page.url
830
+ url_changed = url_before != url_after
831
+
832
+ outcome = "navigated" if url_changed else "dom_updated"
833
+
834
+ snapshot_after: Snapshot | None = None
835
+ if take_snapshot:
836
+ snapshot_after = await snapshot_async(browser)
837
+
838
+ return ActionResult(
839
+ success=True,
840
+ duration_ms=duration_ms,
841
+ outcome=outcome,
842
+ url_changed=url_changed,
843
+ snapshot_after=snapshot_after,
844
+ )
845
+
846
+
847
+ async def press_async(
848
+ browser: AsyncSentienceBrowser, key: str, take_snapshot: bool = False
849
+ ) -> ActionResult:
850
+ """
851
+ Press a keyboard key (async)
852
+
853
+ Args:
854
+ browser: AsyncSentienceBrowser instance
855
+ key: Key to press (e.g., "Enter", "Escape", "Tab")
856
+ take_snapshot: Whether to take snapshot after action
857
+
858
+ Returns:
859
+ ActionResult
860
+ """
861
+ if not browser.page:
862
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
863
+
864
+ start_time = time.time()
865
+ url_before = browser.page.url
866
+
867
+ # Press key using Playwright
868
+ await browser.page.keyboard.press(key)
869
+
870
+ # Wait a bit for navigation/DOM updates
871
+ await browser.page.wait_for_timeout(500)
872
+
873
+ duration_ms = int((time.time() - start_time) * 1000)
874
+ url_after = browser.page.url
875
+ url_changed = url_before != url_after
876
+
877
+ outcome = "navigated" if url_changed else "dom_updated"
878
+
879
+ snapshot_after: Snapshot | None = None
880
+ if take_snapshot:
881
+ snapshot_after = await snapshot_async(browser)
882
+
883
+ return ActionResult(
884
+ success=True,
885
+ duration_ms=duration_ms,
886
+ outcome=outcome,
887
+ url_changed=url_changed,
888
+ snapshot_after=snapshot_after,
889
+ )
890
+
891
+
892
+ async def scroll_to_async(
893
+ browser: AsyncSentienceBrowser,
894
+ element_id: int,
895
+ behavior: str = "smooth",
896
+ block: str = "center",
897
+ take_snapshot: bool = False,
898
+ ) -> ActionResult:
899
+ """
900
+ Scroll an element into view (async)
901
+
902
+ Scrolls the page so that the specified element is visible in the viewport.
903
+ Uses the element registry to find the element and scrollIntoView() to scroll it.
904
+
905
+ Args:
906
+ browser: AsyncSentienceBrowser instance
907
+ element_id: Element ID from snapshot to scroll into view
908
+ behavior: Scroll behavior - 'smooth', 'instant', or 'auto' (default: 'smooth')
909
+ block: Vertical alignment - 'start', 'center', 'end', or 'nearest' (default: 'center')
910
+ take_snapshot: Whether to take snapshot after action
911
+
912
+ Returns:
913
+ ActionResult
914
+
915
+ Example:
916
+ >>> snap = await snapshot_async(browser)
917
+ >>> button = find(snap, 'role=button[name="Submit"]')
918
+ >>> if button:
919
+ >>> # Scroll element into view with smooth animation
920
+ >>> await scroll_to_async(browser, button.id)
921
+ >>> # Scroll instantly to top of viewport
922
+ >>> await scroll_to_async(browser, button.id, behavior='instant', block='start')
923
+ """
924
+ if not browser.page:
925
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
926
+
927
+ start_time = time.time()
928
+ url_before = browser.page.url
929
+
930
+ # Scroll element into view using the element registry
931
+ scrolled = await browser.page.evaluate(
932
+ """
933
+ (args) => {
934
+ const el = window.sentience_registry[args.id];
935
+ if (el && el.scrollIntoView) {
936
+ el.scrollIntoView({
937
+ behavior: args.behavior,
938
+ block: args.block,
939
+ inline: 'nearest'
940
+ });
941
+ return true;
942
+ }
943
+ return false;
944
+ }
945
+ """,
946
+ {"id": element_id, "behavior": behavior, "block": block},
947
+ )
948
+
949
+ if not scrolled:
950
+ return ActionResult(
951
+ success=False,
952
+ duration_ms=int((time.time() - start_time) * 1000),
953
+ outcome="error",
954
+ error={"code": "scroll_failed", "reason": "Element not found or not scrollable"},
955
+ )
956
+
957
+ # Wait a bit for scroll to complete (especially for smooth scrolling)
958
+ wait_time = 500 if behavior == "smooth" else 100
959
+ await browser.page.wait_for_timeout(wait_time)
960
+
961
+ duration_ms = int((time.time() - start_time) * 1000)
962
+ url_after = browser.page.url
963
+ url_changed = url_before != url_after
964
+
965
+ outcome = "navigated" if url_changed else "dom_updated"
966
+
967
+ snapshot_after: Snapshot | None = None
968
+ if take_snapshot:
969
+ snapshot_after = await snapshot_async(browser)
970
+
971
+ return ActionResult(
972
+ success=True,
973
+ duration_ms=duration_ms,
974
+ outcome=outcome,
975
+ url_changed=url_changed,
976
+ snapshot_after=snapshot_after,
977
+ )
978
+
979
+
980
+ async def _highlight_rect_async(
981
+ browser: AsyncSentienceBrowser, rect: dict[str, float], duration_sec: float = 2.0
982
+ ) -> None:
983
+ """Highlight a rectangle with a red border overlay (async)"""
984
+ if not browser.page:
985
+ return
986
+
987
+ highlight_id = f"sentience_highlight_{int(time.time() * 1000)}"
988
+
989
+ args = {
990
+ "rect": {
991
+ "x": rect["x"],
992
+ "y": rect["y"],
993
+ "w": rect["w"],
994
+ "h": rect["h"],
995
+ },
996
+ "highlightId": highlight_id,
997
+ "durationSec": duration_sec,
998
+ }
999
+
1000
+ await browser.page.evaluate(
1001
+ """
1002
+ (args) => {
1003
+ const { rect, highlightId, durationSec } = args;
1004
+ const overlay = document.createElement('div');
1005
+ overlay.id = highlightId;
1006
+ overlay.style.position = 'fixed';
1007
+ overlay.style.left = `${rect.x}px`;
1008
+ overlay.style.top = `${rect.y}px`;
1009
+ overlay.style.width = `${rect.w}px`;
1010
+ overlay.style.height = `${rect.h}px`;
1011
+ overlay.style.border = '3px solid red';
1012
+ overlay.style.borderRadius = '2px';
1013
+ overlay.style.boxSizing = 'border-box';
1014
+ overlay.style.pointerEvents = 'none';
1015
+ overlay.style.zIndex = '999999';
1016
+ overlay.style.backgroundColor = 'rgba(255, 0, 0, 0.1)';
1017
+ overlay.style.transition = 'opacity 0.3s ease-out';
1018
+
1019
+ document.body.appendChild(overlay);
1020
+
1021
+ setTimeout(() => {
1022
+ overlay.style.opacity = '0';
1023
+ setTimeout(() => {
1024
+ if (overlay.parentNode) {
1025
+ overlay.parentNode.removeChild(overlay);
1026
+ }
1027
+ }, 300);
1028
+ }, durationSec * 1000);
1029
+ }
1030
+ """,
1031
+ args,
1032
+ )
1033
+
1034
+
1035
+ async def click_rect_async(
1036
+ browser: AsyncSentienceBrowser,
1037
+ rect: dict[str, float] | BBox,
1038
+ highlight: bool = True,
1039
+ highlight_duration: float = 2.0,
1040
+ take_snapshot: bool = False,
1041
+ cursor_policy: CursorPolicy | None = None,
1042
+ ) -> ActionResult:
1043
+ """
1044
+ Click at the center of a rectangle (async)
1045
+
1046
+ Args:
1047
+ browser: AsyncSentienceBrowser instance
1048
+ rect: Dictionary with x, y, width (w), height (h) keys, or BBox object
1049
+ highlight: Whether to show a red border highlight when clicking
1050
+ highlight_duration: How long to show the highlight in seconds
1051
+ take_snapshot: Whether to take snapshot after action
1052
+
1053
+ Returns:
1054
+ ActionResult
1055
+ """
1056
+ if not browser.page:
1057
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
1058
+
1059
+ # Handle BBox object or dict
1060
+ if isinstance(rect, BBox):
1061
+ x = rect.x
1062
+ y = rect.y
1063
+ w = rect.width
1064
+ h = rect.height
1065
+ else:
1066
+ x = rect.get("x", 0)
1067
+ y = rect.get("y", 0)
1068
+ w = rect.get("w") or rect.get("width", 0)
1069
+ h = rect.get("h") or rect.get("height", 0)
1070
+
1071
+ if w <= 0 or h <= 0:
1072
+ return ActionResult(
1073
+ success=False,
1074
+ duration_ms=0,
1075
+ outcome="error",
1076
+ error={
1077
+ "code": "invalid_rect",
1078
+ "reason": "Rectangle width and height must be positive",
1079
+ },
1080
+ )
1081
+
1082
+ start_time = time.time()
1083
+ url_before = browser.page.url
1084
+
1085
+ # Calculate center of rectangle
1086
+ center_x = x + w / 2
1087
+ center_y = y + h / 2
1088
+ cursor_meta: dict | None = None
1089
+
1090
+ # Show highlight before clicking
1091
+ if highlight:
1092
+ await _highlight_rect_async(browser, {"x": x, "y": y, "w": w, "h": h}, highlight_duration)
1093
+ await browser.page.wait_for_timeout(50)
1094
+
1095
+ # Use Playwright's native mouse click
1096
+ try:
1097
+ if cursor_policy is not None and cursor_policy.mode == "human":
1098
+ pos = getattr(browser, "_sentience_cursor_pos", None)
1099
+ if not isinstance(pos, tuple) or len(pos) != 2:
1100
+ try:
1101
+ vp = browser.page.viewport_size or {}
1102
+ pos = (float(vp.get("width", 0)) / 2.0, float(vp.get("height", 0)) / 2.0)
1103
+ except Exception:
1104
+ pos = (0.0, 0.0)
1105
+
1106
+ cursor_meta = build_human_cursor_path(
1107
+ start=(float(pos[0]), float(pos[1])),
1108
+ target=(float(center_x), float(center_y)),
1109
+ policy=cursor_policy,
1110
+ )
1111
+ pts = cursor_meta.get("path", [])
1112
+ duration_ms_move = int(cursor_meta.get("duration_ms") or 0)
1113
+ per_step_s = (
1114
+ (duration_ms_move / max(1, len(pts))) / 1000.0 if duration_ms_move > 0 else 0.0
1115
+ )
1116
+ for p in pts:
1117
+ await browser.page.mouse.move(float(p["x"]), float(p["y"]))
1118
+ if per_step_s > 0:
1119
+ await asyncio.sleep(per_step_s)
1120
+ pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
1121
+ if pause_ms > 0:
1122
+ await asyncio.sleep(pause_ms / 1000.0)
1123
+
1124
+ await browser.page.mouse.click(center_x, center_y)
1125
+ setattr(browser, "_sentience_cursor_pos", (float(center_x), float(center_y)))
1126
+ success = True
1127
+ except Exception as e:
1128
+ success = False
1129
+ error_msg = str(e)
1130
+
1131
+ # Wait a bit for navigation/DOM updates
1132
+ await browser.page.wait_for_timeout(500)
1133
+
1134
+ duration_ms = int((time.time() - start_time) * 1000)
1135
+ url_after = browser.page.url
1136
+ url_changed = url_before != url_after
1137
+
1138
+ # Determine outcome
1139
+ outcome: str | None = None
1140
+ if url_changed:
1141
+ outcome = "navigated"
1142
+ elif success:
1143
+ outcome = "dom_updated"
1144
+ else:
1145
+ outcome = "error"
1146
+
1147
+ # Optional snapshot after
1148
+ snapshot_after: Snapshot | None = None
1149
+ if take_snapshot:
1150
+ snapshot_after = await snapshot_async(browser)
1151
+
1152
+ return ActionResult(
1153
+ success=success,
1154
+ duration_ms=duration_ms,
1155
+ outcome=outcome,
1156
+ url_changed=url_changed,
1157
+ snapshot_after=snapshot_after,
1158
+ cursor=cursor_meta,
431
1159
  error=(
432
1160
  None
433
1161
  if success