sentienceapi 0.90.16__py3-none-any.whl → 0.98.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (90) hide show
  1. sentience/__init__.py +120 -6
  2. sentience/_extension_loader.py +156 -1
  3. sentience/action_executor.py +217 -0
  4. sentience/actions.py +758 -30
  5. sentience/agent.py +806 -293
  6. sentience/agent_config.py +3 -0
  7. sentience/agent_runtime.py +840 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +89 -1141
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +372 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +483 -0
  21. sentience/base_agent.py +95 -0
  22. sentience/browser.py +678 -39
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cloud_tracing.py +507 -42
  26. sentience/constants.py +6 -0
  27. sentience/conversational_agent.py +77 -43
  28. sentience/cursor_policy.py +142 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +98 -2
  31. sentience/extension/background.js +56 -185
  32. sentience/extension/content.js +150 -287
  33. sentience/extension/injected_api.js +1088 -1368
  34. sentience/extension/manifest.json +1 -1
  35. sentience/extension/pkg/sentience_core.d.ts +22 -22
  36. sentience/extension/pkg/sentience_core.js +275 -433
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/release.json +47 -47
  39. sentience/failure_artifacts.py +241 -0
  40. sentience/formatting.py +9 -53
  41. sentience/inspector.py +183 -1
  42. sentience/integrations/__init__.py +6 -0
  43. sentience/integrations/langchain/__init__.py +12 -0
  44. sentience/integrations/langchain/context.py +18 -0
  45. sentience/integrations/langchain/core.py +326 -0
  46. sentience/integrations/langchain/tools.py +180 -0
  47. sentience/integrations/models.py +46 -0
  48. sentience/integrations/pydanticai/__init__.py +15 -0
  49. sentience/integrations/pydanticai/deps.py +20 -0
  50. sentience/integrations/pydanticai/toolset.py +468 -0
  51. sentience/llm_interaction_handler.py +191 -0
  52. sentience/llm_provider.py +765 -66
  53. sentience/llm_provider_utils.py +120 -0
  54. sentience/llm_response_builder.py +153 -0
  55. sentience/models.py +595 -3
  56. sentience/ordinal.py +280 -0
  57. sentience/overlay.py +109 -2
  58. sentience/protocols.py +228 -0
  59. sentience/query.py +67 -5
  60. sentience/read.py +95 -3
  61. sentience/recorder.py +223 -3
  62. sentience/schemas/trace_v1.json +128 -9
  63. sentience/screenshot.py +48 -2
  64. sentience/sentience_methods.py +86 -0
  65. sentience/snapshot.py +599 -55
  66. sentience/snapshot_diff.py +126 -0
  67. sentience/text_search.py +120 -5
  68. sentience/trace_event_builder.py +148 -0
  69. sentience/trace_file_manager.py +197 -0
  70. sentience/trace_indexing/index_schema.py +95 -7
  71. sentience/trace_indexing/indexer.py +105 -48
  72. sentience/tracer_factory.py +120 -9
  73. sentience/tracing.py +172 -8
  74. sentience/utils/__init__.py +40 -0
  75. sentience/utils/browser.py +46 -0
  76. sentience/{utils.py → utils/element.py} +3 -42
  77. sentience/utils/formatting.py +59 -0
  78. sentience/verification.py +618 -0
  79. sentience/visual_agent.py +2058 -0
  80. sentience/wait.py +68 -2
  81. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/METADATA +199 -40
  82. sentienceapi-0.98.0.dist-info/RECORD +92 -0
  83. sentience/extension/test-content.js +0 -4
  84. sentienceapi-0.90.16.dist-info/RECORD +0 -50
  85. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/WHEEL +0 -0
  86. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/entry_points.txt +0 -0
  87. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE +0 -0
  88. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-APACHE +0 -0
  89. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/licenses/LICENSE-MIT +0 -0
  90. {sentienceapi-0.90.16.dist-info → sentienceapi-0.98.0.dist-info}/top_level.txt +0 -0
sentience/query.py CHANGED
@@ -3,7 +3,7 @@ Query engine v1 - semantic selector matching
3
3
  """
4
4
 
5
5
  import re
6
- from typing import Any
6
+ from typing import Any, Optional
7
7
 
8
8
  from .models import Element, Snapshot
9
9
 
@@ -52,16 +52,28 @@ def parse_selector(selector: str) -> dict[str, Any]: # noqa: C901
52
52
  query["visible"] = False
53
53
  elif op == "~":
54
54
  # Substring match (case-insensitive)
55
- if key == "text" or key == "name":
55
+ if key == "text":
56
56
  query["text_contains"] = value
57
+ elif key == "name":
58
+ query["name_contains"] = value
59
+ elif key == "value":
60
+ query["value_contains"] = value
57
61
  elif op == "^=":
58
62
  # Prefix match
59
- if key == "text" or key == "name":
63
+ if key == "text":
60
64
  query["text_prefix"] = value
65
+ elif key == "name":
66
+ query["name_prefix"] = value
67
+ elif key == "value":
68
+ query["value_prefix"] = value
61
69
  elif op == "$=":
62
70
  # Suffix match
63
- if key == "text" or key == "name":
71
+ if key == "text":
64
72
  query["text_suffix"] = value
73
+ elif key == "name":
74
+ query["name_suffix"] = value
75
+ elif key == "value":
76
+ query["value_suffix"] = value
65
77
  elif op == ">":
66
78
  # Greater than
67
79
  if is_numeric:
@@ -116,8 +128,14 @@ def parse_selector(selector: str) -> dict[str, Any]: # noqa: C901
116
128
  query["visible"] = value.lower() == "true"
117
129
  elif key == "tag":
118
130
  query["tag"] = value
119
- elif key == "name" or key == "text":
131
+ elif key == "text":
120
132
  query["text"] = value
133
+ elif key == "name":
134
+ query["name"] = value
135
+ elif key == "value":
136
+ query["value"] = value
137
+ elif key in ("checked", "disabled", "expanded"):
138
+ query[key] = value.lower() == "true"
121
139
  elif key == "importance" and is_numeric:
122
140
  query["importance"] = numeric_value
123
141
  elif key.startswith("attr."):
@@ -192,6 +210,50 @@ def match_element(element: Element, query: dict[str, Any]) -> bool: # noqa: C90
192
210
  if not element.text.lower().endswith(query["text_suffix"].lower()):
193
211
  return False
194
212
 
213
+ # Name matching (best-effort; fallback to text for backward compatibility)
214
+ name_val = element.name or element.text or ""
215
+ if "name" in query:
216
+ if not name_val or name_val != query["name"]:
217
+ return False
218
+ if "name_contains" in query:
219
+ if not name_val or query["name_contains"].lower() not in name_val.lower():
220
+ return False
221
+ if "name_prefix" in query:
222
+ if not name_val or not name_val.lower().startswith(query["name_prefix"].lower()):
223
+ return False
224
+ if "name_suffix" in query:
225
+ if not name_val or not name_val.lower().endswith(query["name_suffix"].lower()):
226
+ return False
227
+
228
+ # Value matching (inputs/textarea/select)
229
+ if "value" in query:
230
+ if element.value is None or element.value != query["value"]:
231
+ return False
232
+ if "value_contains" in query:
233
+ if element.value is None or query["value_contains"].lower() not in element.value.lower():
234
+ return False
235
+ if "value_prefix" in query:
236
+ if element.value is None or not element.value.lower().startswith(
237
+ query["value_prefix"].lower()
238
+ ):
239
+ return False
240
+ if "value_suffix" in query:
241
+ if element.value is None or not element.value.lower().endswith(
242
+ query["value_suffix"].lower()
243
+ ):
244
+ return False
245
+
246
+ # State matching (best-effort)
247
+ if "checked" in query:
248
+ if (element.checked is True) != query["checked"]:
249
+ return False
250
+ if "disabled" in query:
251
+ if (element.disabled is True) != query["disabled"]:
252
+ return False
253
+ if "expanded" in query:
254
+ if (element.expanded is True) != query["expanded"]:
255
+ return False
256
+
195
257
  # Importance filtering
196
258
  if "importance" in query:
197
259
  if element.importance != query["importance"]:
sentience/read.py CHANGED
@@ -4,14 +4,15 @@ Read page content - supports raw HTML, text, and markdown formats
4
4
 
5
5
  from typing import Literal
6
6
 
7
- from .browser import SentienceBrowser
7
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
8
+ from .models import ReadResult
8
9
 
9
10
 
10
11
  def read(
11
12
  browser: SentienceBrowser,
12
13
  output_format: Literal["raw", "text", "markdown"] = "raw",
13
14
  enhance_markdown: bool = True,
14
- ) -> dict:
15
+ ) -> ReadResult:
15
16
  """
16
17
  Read page content as raw HTML, text, or markdown
17
18
 
@@ -93,4 +94,95 @@ def read(
93
94
  {"format": output_format},
94
95
  )
95
96
 
96
- return result
97
+ # Convert dict result to ReadResult model
98
+ return ReadResult(**result)
99
+
100
+
101
+ async def read_async(
102
+ browser: AsyncSentienceBrowser,
103
+ output_format: Literal["raw", "text", "markdown"] = "raw",
104
+ enhance_markdown: bool = True,
105
+ ) -> ReadResult:
106
+ """
107
+ Read page content as raw HTML, text, or markdown (async)
108
+
109
+ Args:
110
+ browser: AsyncSentienceBrowser instance
111
+ output_format: Output format - "raw" (default, returns HTML for external processing),
112
+ "text" (plain text), or "markdown" (lightweight or enhanced markdown).
113
+ enhance_markdown: If True and output_format is "markdown", uses markdownify for better conversion.
114
+ If False, uses the extension's lightweight markdown converter.
115
+
116
+ Returns:
117
+ dict with:
118
+ - status: "success" or "error"
119
+ - url: Current page URL
120
+ - format: "raw", "text", or "markdown"
121
+ - content: Page content as string
122
+ - length: Content length in characters
123
+ - error: Error message if status is "error"
124
+
125
+ Examples:
126
+ # Get raw HTML (default) - can be used with markdownify for better conversion
127
+ result = await read_async(browser)
128
+ html_content = result["content"]
129
+
130
+ # Get high-quality markdown (uses markdownify internally)
131
+ result = await read_async(browser, output_format="markdown")
132
+ markdown = result["content"]
133
+
134
+ # Get plain text
135
+ result = await read_async(browser, output_format="text")
136
+ text = result["content"]
137
+ """
138
+ if not browser.page:
139
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
140
+
141
+ if output_format == "markdown" and enhance_markdown:
142
+ # Get raw HTML from the extension first
143
+ raw_html_result = await browser.page.evaluate(
144
+ """
145
+ (options) => {
146
+ return window.sentience.read(options);
147
+ }
148
+ """,
149
+ {"format": "raw"},
150
+ )
151
+
152
+ if raw_html_result.get("status") == "success":
153
+ html_content = raw_html_result["content"]
154
+ try:
155
+ # Use markdownify for enhanced markdown conversion
156
+ from markdownify import MarkdownifyError, markdownify
157
+
158
+ markdown_content = markdownify(html_content, heading_style="ATX", wrap=True)
159
+ return {
160
+ "status": "success",
161
+ "url": raw_html_result["url"],
162
+ "format": "markdown",
163
+ "content": markdown_content,
164
+ "length": len(markdown_content),
165
+ }
166
+ except ImportError:
167
+ print(
168
+ "Warning: 'markdownify' not installed. Install with 'pip install markdownify' for enhanced markdown. Falling back to extension's markdown."
169
+ )
170
+ except MarkdownifyError as e:
171
+ print(f"Warning: markdownify failed ({e}), falling back to extension's markdown.")
172
+ except Exception as e:
173
+ print(
174
+ f"Warning: An unexpected error occurred with markdownify ({e}), falling back to extension's markdown."
175
+ )
176
+
177
+ # If not enhanced markdown, or fallback, call extension with requested format
178
+ result = await browser.page.evaluate(
179
+ """
180
+ (options) => {
181
+ return window.sentience.read(options);
182
+ }
183
+ """,
184
+ {"format": output_format},
185
+ )
186
+
187
+ # Convert dict result to ReadResult model
188
+ return ReadResult(**result)
sentience/recorder.py CHANGED
@@ -4,11 +4,11 @@ Recorder - captures user actions into a trace
4
4
 
5
5
  import json
6
6
  from datetime import datetime
7
- from typing import Any
7
+ from typing import Any, Optional
8
8
 
9
- from .browser import SentienceBrowser
9
+ from .browser import AsyncSentienceBrowser, SentienceBrowser
10
10
  from .models import Element, Snapshot
11
- from .snapshot import snapshot
11
+ from .snapshot import snapshot, snapshot_async
12
12
 
13
13
 
14
14
  class TraceStep:
@@ -367,3 +367,223 @@ def record(browser: SentienceBrowser, capture_snapshots: bool = False) -> Record
367
367
  Recorder instance
368
368
  """
369
369
  return Recorder(browser, capture_snapshots=capture_snapshots)
370
+
371
+
372
+ class RecorderAsync:
373
+ """Recorder for capturing user actions (async)"""
374
+
375
+ def __init__(self, browser: AsyncSentienceBrowser, capture_snapshots: bool = False):
376
+ self.browser = browser
377
+ self.capture_snapshots = capture_snapshots
378
+ self.trace: Trace | None = None
379
+ self._active = False
380
+ self._mask_patterns: list[str] = [] # Patterns to mask (e.g., "password", "email")
381
+
382
+ async def start(self) -> None:
383
+ """Start recording"""
384
+ if not self.browser.page:
385
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
386
+
387
+ self._active = True
388
+ start_url = self.browser.page.url
389
+ self.trace = Trace(start_url)
390
+
391
+ # Set up event listeners in the browser
392
+ self._setup_listeners()
393
+
394
+ def stop(self) -> None:
395
+ """Stop recording"""
396
+ self._active = False
397
+ self._cleanup_listeners()
398
+
399
+ def add_mask_pattern(self, pattern: str) -> None:
400
+ """Add a pattern to mask in recorded text (e.g., "password", "email")"""
401
+ self._mask_patterns.append(pattern.lower())
402
+
403
+ def _should_mask(self, text: str) -> bool:
404
+ """Check if text should be masked"""
405
+ text_lower = text.lower()
406
+ return any(pattern in text_lower for pattern in self._mask_patterns)
407
+
408
+ def _setup_listeners(self) -> None:
409
+ """Set up event listeners to capture actions"""
410
+ # Note: We'll capture actions through the SDK methods rather than DOM events
411
+ # This is cleaner and more reliable
412
+ pass
413
+
414
+ def _cleanup_listeners(self) -> None:
415
+ """Clean up event listeners"""
416
+ pass
417
+
418
+ async def _infer_selector(self, element_id: int) -> str | None: # noqa: C901
419
+ """
420
+ Infer a semantic selector for an element (async)
421
+
422
+ Uses heuristics to build a robust selector:
423
+ - role=... text~"..."
424
+ - If text empty: use name/aria-label/placeholder
425
+ - Include clickable=true when relevant
426
+ - Validate against snapshot (should match 1 element)
427
+ """
428
+ try:
429
+ # Take a snapshot to get element info
430
+ snap = await snapshot_async(self.browser)
431
+
432
+ # Find the element in the snapshot
433
+ element = None
434
+ for el in snap.elements:
435
+ if el.id == element_id:
436
+ element = el
437
+ break
438
+
439
+ if not element:
440
+ return None
441
+
442
+ # Build candidate selector
443
+ parts = []
444
+
445
+ # Add role
446
+ if element.role and element.role != "generic":
447
+ parts.append(f"role={element.role}")
448
+
449
+ # Add text if available
450
+ if element.text:
451
+ # Use contains match for text
452
+ text_part = element.text.replace('"', '\\"')[:50] # Limit length
453
+ parts.append(f'text~"{text_part}"')
454
+ else:
455
+ # Try to get name/aria-label/placeholder from DOM
456
+ try:
457
+ el = await self.browser.page.evaluate(
458
+ f"""
459
+ () => {{
460
+ const el = window.sentience_registry[{element_id}];
461
+ if (!el) return null;
462
+ return {{
463
+ name: el.name || null,
464
+ ariaLabel: el.getAttribute('aria-label') || null,
465
+ placeholder: el.placeholder || null
466
+ }};
467
+ }}
468
+ """
469
+ )
470
+
471
+ if el:
472
+ if el.get("name"):
473
+ parts.append(f'name="{el["name"]}"')
474
+ elif el.get("ariaLabel"):
475
+ parts.append(f'text~"{el["ariaLabel"]}"')
476
+ elif el.get("placeholder"):
477
+ parts.append(f'text~"{el["placeholder"]}"')
478
+ except Exception:
479
+ pass
480
+
481
+ # Add clickable if relevant
482
+ if element.visual_cues.is_clickable:
483
+ parts.append("clickable=true")
484
+
485
+ if not parts:
486
+ return None
487
+
488
+ selector = " ".join(parts)
489
+
490
+ # Validate selector - should match exactly 1 element
491
+ matches = [el for el in snap.elements if self._match_element(el, selector)]
492
+
493
+ if len(matches) == 1:
494
+ return selector
495
+ elif len(matches) > 1:
496
+ # Add more constraints (importance threshold, near-center)
497
+ # For now, just return the selector with a note
498
+ return selector
499
+ else:
500
+ # Selector doesn't match - return None (will use element_id)
501
+ return None
502
+
503
+ except Exception:
504
+ return None
505
+
506
+ def _match_element(self, element: Element, selector: str) -> bool:
507
+ """Simple selector matching (basic implementation)"""
508
+ # This is a simplified version - in production, use the full query engine
509
+ from .query import match_element, parse_selector
510
+
511
+ try:
512
+ query_dict = parse_selector(selector)
513
+ return match_element(element, query_dict)
514
+ except Exception:
515
+ return False
516
+
517
+ def record_navigation(self, url: str) -> None:
518
+ """Record a navigation event"""
519
+ if self._active and self.trace:
520
+ self.trace.add_navigation(url)
521
+
522
+ async def record_click(self, element_id: int, selector: str | None = None) -> None:
523
+ """Record a click event with smart selector inference (async)"""
524
+ if self._active and self.trace:
525
+ # If no selector provided, try to infer one
526
+ if selector is None:
527
+ selector = await self._infer_selector(element_id)
528
+
529
+ # Optionally capture snapshot
530
+ if self.capture_snapshots:
531
+ try:
532
+ snap = await snapshot_async(self.browser)
533
+ step = TraceStep(
534
+ ts=int((datetime.now() - self.trace._start_time).total_seconds() * 1000),
535
+ type="click",
536
+ element_id=element_id,
537
+ selector=selector,
538
+ snapshot=snap,
539
+ )
540
+ self.trace.add_step(step)
541
+ except Exception:
542
+ # If snapshot fails, just record without it
543
+ self.trace.add_click(element_id, selector)
544
+ else:
545
+ self.trace.add_click(element_id, selector)
546
+
547
+ async def record_type(self, element_id: int, text: str, selector: str | None = None) -> None:
548
+ """Record a type event with smart selector inference (async)"""
549
+ if self._active and self.trace:
550
+ # If no selector provided, try to infer one
551
+ if selector is None:
552
+ selector = await self._infer_selector(element_id)
553
+
554
+ mask = self._should_mask(text)
555
+ self.trace.add_type(element_id, text, selector, mask=mask)
556
+
557
+ def record_press(self, key: str) -> None:
558
+ """Record a key press event"""
559
+ if self._active and self.trace:
560
+ self.trace.add_press(key)
561
+
562
+ def save(self, filepath: str) -> None:
563
+ """Save trace to file"""
564
+ if not self.trace:
565
+ raise RuntimeError("No trace to save. Start recording first.")
566
+ self.trace.save(filepath)
567
+
568
+ async def __aenter__(self):
569
+ """Context manager entry"""
570
+ await self.start()
571
+ return self
572
+
573
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
574
+ """Context manager exit"""
575
+ self.stop()
576
+
577
+
578
+ def record_async(browser: AsyncSentienceBrowser, capture_snapshots: bool = False) -> RecorderAsync:
579
+ """
580
+ Create a recorder instance (async)
581
+
582
+ Args:
583
+ browser: AsyncSentienceBrowser instance
584
+ capture_snapshots: Whether to capture snapshots at each step
585
+
586
+ Returns:
587
+ RecorderAsync instance
588
+ """
589
+ return RecorderAsync(browser, capture_snapshots=capture_snapshots)
@@ -13,7 +13,7 @@
13
13
  },
14
14
  "type": {
15
15
  "type": "string",
16
- "enum": ["run_start", "step_start", "snapshot_taken", "llm_called", "action_executed", "verification", "recovery", "step_end", "run_end", "error"],
16
+ "enum": ["run_start", "step_start", "snapshot", "snapshot_taken", "llm_called", "llm_response", "action", "action_executed", "verification", "recovery", "step_end", "run_end", "error"],
17
17
  "description": "Event type"
18
18
  },
19
19
  "ts": {
@@ -64,15 +64,61 @@
64
64
  }
65
65
  },
66
66
  {
67
- "description": "snapshot_taken data",
68
- "required": ["step_id", "snapshot_digest"],
67
+ "description": "snapshot or snapshot_taken data",
69
68
  "properties": {
70
- "step_id": {"type": "string"},
69
+ "step_id": {"type": ["string", "null"]},
71
70
  "snapshot_id": {"type": ["string", "null"]},
72
71
  "snapshot_digest": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
73
72
  "snapshot_digest_loose": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
74
73
  "url": {"type": ["string", "null"]},
75
- "element_count": {"type": "integer"}
74
+ "element_count": {"type": "integer"},
75
+ "timestamp": {"type": ["string", "null"]},
76
+ "elements": {
77
+ "type": "array",
78
+ "items": {
79
+ "type": "object",
80
+ "properties": {
81
+ "id": {"type": "integer"},
82
+ "role": {"type": "string"},
83
+ "text": {"type": ["string", "null"]},
84
+ "importance": {"type": "number"},
85
+ "importance_score": {"type": "number"},
86
+ "bbox": {
87
+ "type": "object",
88
+ "properties": {
89
+ "x": {"type": "number"},
90
+ "y": {"type": "number"},
91
+ "width": {"type": "number"},
92
+ "height": {"type": "number"}
93
+ },
94
+ "required": ["x", "y", "width", "height"]
95
+ },
96
+ "visual_cues": {
97
+ "type": "object",
98
+ "properties": {
99
+ "is_primary": {"type": "boolean"},
100
+ "is_clickable": {"type": "boolean"},
101
+ "background_color_name": {"type": ["string", "null"]}
102
+ }
103
+ },
104
+ "in_viewport": {"type": "boolean"},
105
+ "is_occluded": {"type": "boolean"},
106
+ "z_index": {"type": "integer"},
107
+ "rerank_index": {"type": ["integer", "null"]},
108
+ "heuristic_index": {"type": ["integer", "null"]},
109
+ "ml_probability": {"type": ["number", "null"]},
110
+ "ml_score": {"type": ["number", "null"]},
111
+ "diff_status": {
112
+ "type": ["string", "null"],
113
+ "enum": ["ADDED", "REMOVED", "MODIFIED", "MOVED", null],
114
+ "description": "Diff status for Diff Overlay feature. ADDED: new element, REMOVED: element was removed, MODIFIED: element changed, MOVED: element position changed, null: no change"
115
+ }
116
+ },
117
+ "required": ["id", "role", "importance", "bbox", "visual_cues"]
118
+ }
119
+ },
120
+ "screenshot_base64": {"type": ["string", "null"]},
121
+ "screenshot_format": {"type": ["string", "null"], "enum": ["png", "jpeg", null]}
76
122
  }
77
123
  },
78
124
  {
@@ -119,7 +165,15 @@
119
165
  "required": ["response_text", "response_hash"],
120
166
  "properties": {
121
167
  "response_text": {"type": "string"},
122
- "response_hash": {"type": "string"}
168
+ "response_hash": {"type": "string"},
169
+ "usage": {
170
+ "type": "object",
171
+ "properties": {
172
+ "prompt_tokens": {"type": "integer"},
173
+ "completion_tokens": {"type": "integer"},
174
+ "total_tokens": {"type": "integer"}
175
+ }
176
+ }
123
177
  }
124
178
  },
125
179
  "action": {
@@ -145,7 +199,17 @@
145
199
  "text": {"type": "string"},
146
200
  "key": {"type": "string"},
147
201
  "url_changed": {"type": ["boolean", "null"]},
148
- "duration_ms": {"type": "integer"}
202
+ "duration_ms": {"type": "integer"},
203
+ "error": {"type": ["string", "null"]},
204
+ "bounding_box": {
205
+ "type": "object",
206
+ "properties": {
207
+ "x": {"type": "number"},
208
+ "y": {"type": "number"},
209
+ "width": {"type": "number"},
210
+ "height": {"type": "number"}
211
+ }
212
+ }
149
213
  }
150
214
  },
151
215
  "post": {
@@ -162,7 +226,48 @@
162
226
  "properties": {
163
227
  "policy": {"type": "string"},
164
228
  "passed": {"type": "boolean"},
165
- "signals": {"type": "object"}
229
+ "signals": {
230
+ "type": "object",
231
+ "properties": {
232
+ "url_changed": {"type": "boolean"},
233
+ "error": {"type": ["string", "null"]},
234
+ "elements_found": {
235
+ "type": "array",
236
+ "items": {
237
+ "type": "object",
238
+ "properties": {
239
+ "label": {"type": "string"},
240
+ "bounding_box": {
241
+ "type": "object",
242
+ "properties": {
243
+ "x": {"type": "number"},
244
+ "y": {"type": "number"},
245
+ "width": {"type": "number"},
246
+ "height": {"type": "number"}
247
+ }
248
+ }
249
+ }
250
+ }
251
+ },
252
+ "assertions": {
253
+ "type": "array",
254
+ "description": "Assertion results from agent verification loop",
255
+ "items": {
256
+ "type": "object",
257
+ "required": ["label", "passed"],
258
+ "properties": {
259
+ "label": {"type": "string", "description": "Human-readable assertion label"},
260
+ "passed": {"type": "boolean", "description": "Whether the assertion passed"},
261
+ "required": {"type": "boolean", "description": "If true, assertion gates step success"},
262
+ "reason": {"type": "string", "description": "Explanation (especially when failed)"},
263
+ "details": {"type": "object", "description": "Additional structured data for debugging"}
264
+ }
265
+ }
266
+ },
267
+ "task_done": {"type": "boolean", "description": "True if task completion assertion passed"},
268
+ "task_done_label": {"type": "string", "description": "Label of the task completion assertion"}
269
+ }
270
+ }
166
271
  }
167
272
  },
168
273
  "recovery": {
@@ -182,6 +287,15 @@
182
287
  "properties": {
183
288
  "step_id": {"type": "string"},
184
289
  "passed": {"type": "boolean"},
290
+ "kind": {
291
+ "type": "string",
292
+ "enum": ["assert", "task_done"],
293
+ "description": "Type of verification event"
294
+ },
295
+ "label": {"type": "string", "description": "Human-readable label for the assertion"},
296
+ "required": {"type": "boolean", "description": "If true, assertion gates step success"},
297
+ "reason": {"type": "string", "description": "Explanation (especially when failed)"},
298
+ "details": {"type": "object", "description": "Additional structured data for debugging"},
185
299
  "signals": {"type": "object"}
186
300
  }
187
301
  },
@@ -198,7 +312,12 @@
198
312
  "description": "run_end data",
199
313
  "required": ["steps"],
200
314
  "properties": {
201
- "steps": {"type": "integer"}
315
+ "steps": {"type": "integer"},
316
+ "status": {
317
+ "type": "string",
318
+ "enum": ["success", "failure", "partial", "unknown"],
319
+ "description": "Final execution status"
320
+ }
202
321
  }
203
322
  },
204
323
  {