browserwright 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. browserwright/__init__.py +33 -0
  2. browserwright/__main__.py +6 -0
  3. browserwright/_executor/__init__.py +47 -0
  4. browserwright/_executor/__main__.py +9 -0
  5. browserwright/_executor/client.py +127 -0
  6. browserwright/_executor/process.py +652 -0
  7. browserwright/_executor/protocol.py +152 -0
  8. browserwright/api.py +66 -0
  9. browserwright/cdp.py +285 -0
  10. browserwright/cli.py +741 -0
  11. browserwright/daemon/__init__.py +8 -0
  12. browserwright/daemon/_ipc.py +444 -0
  13. browserwright/daemon/active_tab.py +183 -0
  14. browserwright/daemon/auth.py +395 -0
  15. browserwright/daemon/backends/__init__.py +59 -0
  16. browserwright/daemon/backends/base.py +120 -0
  17. browserwright/daemon/backends/cloud.py +222 -0
  18. browserwright/daemon/backends/env.py +119 -0
  19. browserwright/daemon/backends/extension.py +185 -0
  20. browserwright/daemon/backends/rdp.py +214 -0
  21. browserwright/daemon/cli.py +1437 -0
  22. browserwright/daemon/config.py +380 -0
  23. browserwright/daemon/doctor.py +179 -0
  24. browserwright/daemon/errors.py +34 -0
  25. browserwright/daemon/launch_chrome.py +353 -0
  26. browserwright/daemon/observability.py +181 -0
  27. browserwright/daemon/platforms.py +234 -0
  28. browserwright/daemon/resolver.py +72 -0
  29. browserwright/daemon/server/__init__.py +6 -0
  30. browserwright/daemon/server/daemon.py +229 -0
  31. browserwright/daemon/server/executor_registry.py +434 -0
  32. browserwright/daemon/server/extension_upstream.py +677 -0
  33. browserwright/daemon/server/facade.py +375 -0
  34. browserwright/daemon/server/facade_extension.py +969 -0
  35. browserwright/daemon/server/listener.py +1058 -0
  36. browserwright/daemon/server/proxy.py +1991 -0
  37. browserwright/daemon/server/relay.py +783 -0
  38. browserwright/daemon/server/state.py +432 -0
  39. browserwright/daemon/server/upstream.py +266 -0
  40. browserwright/daemon/userscripts.py +150 -0
  41. browserwright/discovery.py +213 -0
  42. browserwright/errors.py +177 -0
  43. browserwright/health.py +169 -0
  44. browserwright/install.py +628 -0
  45. browserwright/memory/__init__.py +15 -0
  46. browserwright/memory/_md.py +120 -0
  47. browserwright/memory/_yaml.py +217 -0
  48. browserwright/memory/global_mem.py +201 -0
  49. browserwright/memory/repl_mem.py +28 -0
  50. browserwright/memory/session_decisions.py +53 -0
  51. browserwright/memory/site_mem.py +381 -0
  52. browserwright/mode_b_client.py +590 -0
  53. browserwright/multitask.py +131 -0
  54. browserwright/output_schema.py +99 -0
  55. browserwright/primitives/__init__.py +67 -0
  56. browserwright/primitives/discovery_api.py +79 -0
  57. browserwright/primitives/http.py +42 -0
  58. browserwright/primitives/inspect.py +876 -0
  59. browserwright/primitives/interact.py +518 -0
  60. browserwright/primitives/page.py +556 -0
  61. browserwright/primitives/site.py +143 -0
  62. browserwright/release_install.py +466 -0
  63. browserwright/repl/__init__.py +6 -0
  64. browserwright/repl/_namespace.py +106 -0
  65. browserwright/repl/_smart_goto.py +236 -0
  66. browserwright/repl/inline.py +180 -0
  67. browserwright/repl/playwright_handle.py +449 -0
  68. browserwright/repl/snapshot.py +150 -0
  69. browserwright/session.py +229 -0
  70. browserwright/session_create.py +252 -0
  71. browserwright/session_ctx.py +24 -0
  72. browserwright/session_registry.py +133 -0
  73. browserwright/session_runtime.py +133 -0
  74. browserwright/site_skills_starter/github.com/SKILL.md +14 -0
  75. browserwright/site_skills_starter/github.com/memory.md +29 -0
  76. browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
  77. browserwright/site_skills_starter/google.com/SKILL.md +16 -0
  78. browserwright/site_skills_starter/google.com/memory.md +27 -0
  79. browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
  80. browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
  81. browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
  82. browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
  83. browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
  84. browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
  85. browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
  86. browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
  87. browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
  88. browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
  89. browserwright/skill_doc.py +140 -0
  90. browserwright/skill_runtime.md +194 -0
  91. browserwright/subscriptions.py +213 -0
  92. browserwright/task_runner.py +125 -0
  93. browserwright/version.py +117 -0
  94. browserwright-0.6.2.dist-info/METADATA +12 -0
  95. browserwright-0.6.2.dist-info/RECORD +98 -0
  96. browserwright-0.6.2.dist-info/WHEEL +5 -0
  97. browserwright-0.6.2.dist-info/entry_points.txt +3 -0
  98. browserwright-0.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,876 @@
1
+ """Page inspection: page_info / capture_screenshot / raw cdp, plus the two
2
+ stateless perception primitives ``snapshot`` (what can I act on + where) and
3
+ ``describe_page`` (what paints / styles this page).
4
+
5
+ Both perception primitives are single ``js()`` round-trips, return bounded /
6
+ truncated output, carry no ref store (coordinates feed straight into
7
+ ``click_at_xy``), and hardcode no site/selector/class.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import base64
12
+ import io
13
+ import json
14
+ import os
15
+ from pathlib import Path
16
+ from typing import Any, Optional
17
+
18
+ from ..session import current_session
19
+
20
+
21
+ def cdp(method: str, session_id: Optional[str] = None, **params) -> dict:
22
+ """Pass-through to the underlying CDP transport."""
23
+ sess = current_session()
24
+ if session_id is None and sess.current_target_id:
25
+ session_id = sess.cdp.attach(sess.current_target_id)
26
+ return sess.cdp.send(method, session=session_id, **params)
27
+
28
+
29
+ def page_info() -> dict:
30
+ """Snapshot of the current page state. Mirrors browser-harness shape."""
31
+ from .interact import js # avoid import cycle
32
+
33
+ return js("""
34
+ return {
35
+ url: location.href,
36
+ title: document.title,
37
+ w: window.innerWidth,
38
+ h: window.innerHeight,
39
+ sx: window.scrollX,
40
+ sy: window.scrollY,
41
+ pw: document.documentElement.scrollWidth,
42
+ ph: document.documentElement.scrollHeight,
43
+ ready: document.readyState
44
+ }
45
+ """)
46
+
47
+
48
+ def capture_screenshot(path: Optional[str] = None, *, full: bool = False,
49
+ max_dim: Optional[int] = None, annotate: bool = False):
50
+ """Capture a PNG screenshot. Writes to ``path`` (or /tmp/screenshot-N.png)
51
+ and returns the absolute path. Set ``full=True`` for a full-page capture.
52
+
53
+ Set ``annotate=True`` for a **set-of-mark** capture: numbered ``[N]`` labels
54
+ are overlaid on the page's interactive elements (the ones ``snapshot()``
55
+ reports), and the return value becomes a dict
56
+ ``{"path": <png path>, "legend": [{"n", "role", "name", "x", "y"}, ...]}``.
57
+ Each ``[N]`` maps to that element's center ``(x, y)`` — feed it straight
58
+ into ``click_at_xy(x, y)``. This is coordinate-keyed, not ref-keyed: there
59
+ is no element handle to store, the marks are just a visual index over the
60
+ same coordinates ``snapshot()`` already returns.
61
+
62
+ Without ``annotate`` the return value is a bare path string (unchanged).
63
+ """
64
+ sess = current_session()
65
+ sid = sess.cdp.attach(sess.current_target_id) if sess.current_target_id else None
66
+ if sid is None:
67
+ from .page import current_page
68
+ current_page()
69
+ sid = sess.cdp.attach(sess.current_target_id)
70
+
71
+ legend: Optional[list] = None
72
+ mark_error: Optional[str] = None
73
+ if annotate:
74
+ legend, mark_error = _draw_set_of_mark()
75
+
76
+ try:
77
+ params: dict[str, Any] = {"format": "png"}
78
+ if full:
79
+ params["captureBeyondViewport"] = True
80
+ res = sess.cdp.send("Page.captureScreenshot", session=sid, **params)
81
+ raw = base64.b64decode(res["data"])
82
+ finally:
83
+ if annotate:
84
+ _clear_set_of_mark()
85
+
86
+ if max_dim:
87
+ raw = _downscale_png(raw, max_dim=max_dim)
88
+ if not path:
89
+ # Pick a /tmp file that doesn't collide if the agent runs many shots.
90
+ i = 0
91
+ while True:
92
+ cand = Path("/tmp") / f"browserwright-shot-{os.getpid()}-{i}.png"
93
+ if not cand.exists():
94
+ path = str(cand)
95
+ break
96
+ i += 1
97
+ Path(path).write_bytes(raw)
98
+ abs_path = str(Path(path).resolve())
99
+ if annotate:
100
+ out: dict = {"path": abs_path, "legend": legend or []}
101
+ if mark_error:
102
+ # The overlay failed to paint; the legend coords are still valid but
103
+ # the agent must NOT assume numbered marks are visible on the image.
104
+ out["mark_error"] = mark_error
105
+ return out
106
+ return abs_path
107
+
108
+
109
+ # ---------------------------------------------------------------------------
110
+ # B3: set-of-mark annotation. Overlay numbered [N] badges on the interactive
111
+ # nodes snapshot() reports, keyed to their center coordinates (no ref store).
112
+ # ---------------------------------------------------------------------------
113
+
114
+ _MARK_CONTAINER_ID = "__bs_setofmark__"
115
+
116
+ _DRAW_MARK_JS = r"""
117
+ return (function(nodes){
118
+ var prev = document.getElementById("__bs_setofmark__");
119
+ if (prev) prev.remove();
120
+ var box = document.createElement("div");
121
+ box.id = "__bs_setofmark__";
122
+ box.style.cssText = "position:fixed;left:0;top:0;width:0;height:0;z-index:2147483647;pointer-events:none";
123
+ for (var i=0;i<nodes.length;i++){
124
+ var n = nodes[i];
125
+ var tag = document.createElement("div");
126
+ tag.textContent = "" + n.n;
127
+ tag.style.cssText =
128
+ "position:fixed;transform:translate(-50%,-50%);left:"+n.x+"px;top:"+n.y+"px;"+
129
+ "background:#ff0066;color:#fff;font:bold 12px/1 monospace;padding:2px 4px;"+
130
+ "border-radius:3px;box-shadow:0 0 0 1px #fff;white-space:nowrap";
131
+ box.appendChild(tag);
132
+ }
133
+ (document.body || document.documentElement).appendChild(box);
134
+ return nodes.length;
135
+ })(__NODES__);
136
+ """
137
+
138
+ _CLEAR_MARK_JS = (
139
+ "var e=document.getElementById('%s'); if(e) e.remove(); return true;"
140
+ % _MARK_CONTAINER_ID
141
+ )
142
+
143
+
144
+ def _draw_set_of_mark() -> tuple:
145
+ """Compute the legend from ``snapshot()``'s interactive nodes and draw a
146
+ numbered badge at each node's center. Returns ``(legend, error)`` where
147
+ ``error`` is ``None`` on success or a short string if the overlay draw failed.
148
+
149
+ The legend is derived from the SAME snapshot the marks are drawn from, so
150
+ each ``[n]``'s ``(x, y)`` is exactly the center ``snapshot()`` reports (and
151
+ that ``click_at_xy`` expects). Generic: works for any page's interactive
152
+ set, no site/selector hardcoded.
153
+ """
154
+ from .interact import js # avoid import cycle
155
+
156
+ snap = snapshot(text=False)
157
+ nodes = snap.get("nodes", []) if isinstance(snap, dict) else []
158
+ legend = []
159
+ for i, n in enumerate(nodes):
160
+ legend.append({
161
+ "n": i,
162
+ "role": n.get("role"),
163
+ "name": n.get("name"),
164
+ "x": n.get("x"),
165
+ "y": n.get("y"),
166
+ })
167
+ code = _DRAW_MARK_JS.replace("__NODES__", json.dumps(legend))
168
+ err: Optional[str] = None
169
+ try:
170
+ js(code)
171
+ except Exception as e:
172
+ # Drawing is best-effort; the legend (coordinates) is the load-bearing
173
+ # output, so we still return it even if the overlay failed to paint —
174
+ # but report the failure so the caller can flag that the marks aren't
175
+ # actually on the image (see capture_screenshot's ``mark_error``).
176
+ err = f"{type(e).__name__}: {e}"
177
+ return legend, err
178
+
179
+
180
+ def _clear_set_of_mark() -> None:
181
+ from .interact import js # avoid import cycle
182
+
183
+ try:
184
+ js(_CLEAR_MARK_JS)
185
+ except Exception:
186
+ pass
187
+
188
+
189
+ def _downscale_png(data: bytes, *, max_dim: int) -> bytes:
190
+ try:
191
+ from PIL import Image
192
+ except ImportError:
193
+ return data
194
+ im = Image.open(io.BytesIO(data))
195
+ w, h = im.size
196
+ scale = min(max_dim / w, max_dim / h, 1.0)
197
+ if scale >= 1.0:
198
+ return data
199
+ new = im.resize((int(w * scale), int(h * scale)))
200
+ buf = io.BytesIO()
201
+ new.save(buf, format="PNG", optimize=True)
202
+ return buf.getvalue()
203
+
204
+
205
+ # ---------------------------------------------------------------------------
206
+ # Perception primitive 1: snapshot() — interaction-oriented observation
207
+ # ---------------------------------------------------------------------------
208
+
209
+ _SNAPSHOT_JS = r"""
210
+ return (function(opts){
211
+ var interactiveOnly = opts.interactiveOnly !== false; // default true
212
+ var maxNodes = opts.maxNodes || 120;
213
+ var includeHref = opts.includeHref !== false;
214
+ var scopeSel = opts.scope || null;
215
+ var maxDepth = opts.maxDepth || 0; // 0 = unbounded
216
+
217
+ var INTERACTIVE_TAGS = {A:1,BUTTON:1,INPUT:1,SELECT:1,TEXTAREA:1,SUMMARY:1,OPTION:1};
218
+ var INTERACTIVE_ROLES = {button:1,link:1,checkbox:1,radio:1,tab:1,menuitem:1,
219
+ menuitemcheckbox:1,menuitemradio:1,switch:1,option:1,textbox:1,combobox:1,
220
+ searchbox:1,slider:1,spinbutton:1,treeitem:1};
221
+ var NAME_ROLES = {heading:1,img:1,alert:1,dialog:1}; // structural-but-named, kept when !interactiveOnly
222
+
223
+ function trunc(s, n){ if(s==null) return null; s=(""+s).replace(/\s+/g," ").trim();
224
+ return s.length>n ? s.slice(0,n)+"…" : s; }
225
+
226
+ function roleOf(el){
227
+ var r = el.getAttribute && el.getAttribute("role");
228
+ if(r) return r.toLowerCase().split(/\s+/)[0];
229
+ var t = el.tagName;
230
+ if(t==="A") return el.hasAttribute("href") ? "link" : "generic";
231
+ if(t==="BUTTON") return "button";
232
+ if(t==="SELECT") return "combobox";
233
+ if(t==="TEXTAREA") return "textbox";
234
+ if(t==="SUMMARY") return "summary";
235
+ if(t==="INPUT"){
236
+ var ty=(el.getAttribute("type")||"text").toLowerCase();
237
+ if(ty==="checkbox") return "checkbox";
238
+ if(ty==="radio") return "radio";
239
+ if(ty==="button"||ty==="submit"||ty==="reset"||ty==="image") return "button";
240
+ if(ty==="range") return "slider";
241
+ return "textbox";
242
+ }
243
+ if(/^H[1-6]$/.test(t)) return "heading";
244
+ if(t==="IMG") return "img";
245
+ return "generic";
246
+ }
247
+
248
+ // Accessible name: aria-label > aria-labelledby > alt/value/placeholder >
249
+ // visible text (trimmed). Cheap approximation of the a11y name algorithm.
250
+ function nameOf(el){
251
+ var al = el.getAttribute && el.getAttribute("aria-label");
252
+ if(al) return al;
253
+ var lb = el.getAttribute && el.getAttribute("aria-labelledby");
254
+ if(lb){
255
+ var parts=[];
256
+ lb.split(/\s+/).forEach(function(id){
257
+ var n=document.getElementById(id); if(n) parts.push(n.textContent||"");
258
+ });
259
+ if(parts.length) return parts.join(" ");
260
+ }
261
+ var t = el.tagName;
262
+ if(t==="IMG") return el.getAttribute("alt")||"";
263
+ if(t==="INPUT"){
264
+ var ty=(el.getAttribute("type")||"text").toLowerCase();
265
+ if(ty==="submit"||ty==="button"||ty==="reset") return el.value||"";
266
+ return el.getAttribute("placeholder")||el.getAttribute("name")||"";
267
+ }
268
+ var title = el.getAttribute && el.getAttribute("title");
269
+ var txt = (el.textContent||"").trim();
270
+ if(txt) return txt;
271
+ if(title) return title;
272
+ return "";
273
+ }
274
+
275
+ function visible(el, r){
276
+ if(!r) return false;
277
+ if(r.width<1 || r.height<1) return false;
278
+ if(r.bottom<0 || r.right<0) return false;
279
+ if(r.top>(window.innerHeight||0) || r.left>(window.innerWidth||0)) return false;
280
+ var cs = getComputedStyle(el);
281
+ if(cs.visibility==="hidden" || cs.display==="none") return false;
282
+ if(parseFloat(cs.opacity)===0) return false;
283
+ return true;
284
+ }
285
+
286
+ function isInteractive(el, role){
287
+ if(INTERACTIVE_TAGS[el.tagName]) return true;
288
+ if(INTERACTIVE_ROLES[role]) return true;
289
+ if(el.hasAttribute && el.hasAttribute("onclick")) return true;
290
+ if(el.hasAttribute && el.hasAttribute("tabindex") &&
291
+ el.getAttribute("tabindex")!=="-1") return true;
292
+ var cs = getComputedStyle(el);
293
+ if(cs.cursor==="pointer" && el.children.length===0) return true;
294
+ return false;
295
+ }
296
+
297
+ var roots = [];
298
+ if(scopeSel){
299
+ document.querySelectorAll(scopeSel).forEach(function(n){ roots.push(n); });
300
+ } else {
301
+ roots.push(document.body || document.documentElement);
302
+ }
303
+
304
+ var out = [];
305
+ var truncated = false;
306
+ var iframeCount = 0;
307
+
308
+ function walk(root, depth, frameTag){
309
+ if(out.length>=maxNodes){ truncated=true; return; }
310
+ var stack = [];
311
+ for(var i=0;i<root.children.length;i++) stack.push([root.children[i], depth]);
312
+ // BFS-ish using a queue keeps shallow (more salient) nodes first.
313
+ var qi=0;
314
+ var queue = stack;
315
+ while(qi<queue.length){
316
+ if(out.length>=maxNodes){ truncated=true; return; }
317
+ var pair = queue[qi++]; var el=pair[0]; var d=pair[1];
318
+ if(!(el instanceof Element)) continue;
319
+ var role = roleOf(el);
320
+ var keep = interactiveOnly ? isInteractive(el, role)
321
+ : (isInteractive(el, role)||NAME_ROLES[role]||role==="heading");
322
+ var r = el.getBoundingClientRect();
323
+ if(keep && visible(el, r)){
324
+ var entry = {
325
+ role: role,
326
+ tag: el.tagName.toLowerCase(),
327
+ name: trunc(nameOf(el), 80),
328
+ x: Math.round(r.left + r.width/2),
329
+ y: Math.round(r.top + r.height/2),
330
+ };
331
+ if(frameTag) entry.frame = frameTag;
332
+ var ty = el.getAttribute && el.getAttribute("type");
333
+ if(el.tagName==="INPUT" && ty) entry.type = ty.toLowerCase();
334
+ if(includeHref && el.tagName==="A" && el.getAttribute("href"))
335
+ entry.href = trunc(el.href, 100);
336
+ if(el.disabled || el.getAttribute && el.getAttribute("aria-disabled")==="true")
337
+ entry.disabled = true;
338
+ var checked = el.getAttribute && el.getAttribute("aria-checked");
339
+ if(el.tagName==="INPUT" && (el.type==="checkbox"||el.type==="radio"))
340
+ entry.checked = !!el.checked;
341
+ else if(checked) entry.checked = checked;
342
+ out.push(entry);
343
+ }
344
+ // Same-origin iframe: inline one level.
345
+ if(el.tagName==="IFRAME" && !frameTag && iframeCount<3){
346
+ try{
347
+ var doc = el.contentDocument;
348
+ if(doc && doc.body){
349
+ iframeCount++;
350
+ var fr = el.getBoundingClientRect();
351
+ // Recurse but offset coords to top-level viewport.
352
+ walkFrame(doc.body, "iframe#"+iframeCount, fr.left, fr.top);
353
+ }
354
+ }catch(e){ /* cross-origin: omit */ }
355
+ }
356
+ if(maxDepth && d>=maxDepth) continue;
357
+ for(var j=0;j<el.children.length;j++) queue.push([el.children[j], d+1]);
358
+ }
359
+ }
360
+
361
+ function walkFrame(body, frameTag, offX, offY){
362
+ var queue=[]; for(var i=0;i<body.children.length;i++) queue.push(body.children[i]);
363
+ var qi=0;
364
+ while(qi<queue.length){
365
+ if(out.length>=maxNodes){ truncated=true; return; }
366
+ var el=queue[qi++]; if(!(el instanceof Element)) continue;
367
+ var role=roleOf(el);
368
+ var keep = interactiveOnly ? isInteractive(el,role)
369
+ : (isInteractive(el,role)||role==="heading");
370
+ var r=el.getBoundingClientRect();
371
+ var vis = r.width>=1 && r.height>=1;
372
+ if(keep && vis){
373
+ out.push({
374
+ role:role, tag:el.tagName.toLowerCase(), name:trunc(nameOf(el),80),
375
+ x:Math.round(offX + r.left + r.width/2),
376
+ y:Math.round(offY + r.top + r.height/2),
377
+ frame:frameTag,
378
+ });
379
+ }
380
+ for(var j=0;j<el.children.length;j++) queue.push(el.children[j]);
381
+ }
382
+ }
383
+
384
+ roots.forEach(function(rt){ walk(rt, 0, null); });
385
+
386
+ return {
387
+ url: location.href,
388
+ title: document.title,
389
+ viewport: {w: window.innerWidth, h: window.innerHeight},
390
+ count: out.length,
391
+ truncated: truncated,
392
+ nodes: out,
393
+ };
394
+ })(__OPTS__);
395
+ """
396
+
397
+
398
+ def snapshot(*, interactive_only=True, max_nodes=120, max_depth=0,
399
+ scope=None, include_href=True, text=True):
400
+ """What can I act on, and where? Interaction-oriented digest of the
401
+ actionable elements currently in the viewport.
402
+
403
+ Stateless and coordinate-based: each node carries role, accessible name,
404
+ center ``(x, y)`` (top-level viewport coords — feed straight into
405
+ ``click_at_xy``), and useful attrs (type, href, disabled, checked). No
406
+ ref store; scroll to reveal more.
407
+
408
+ Args:
409
+ interactive_only: only buttons/links/inputs/role-interactive nodes
410
+ (default). False also keeps headings and named structural nodes.
411
+ max_nodes: hard cap on returned nodes (bounds token cost).
412
+ max_depth: DOM depth cap (0 = unbounded).
413
+ scope: CSS selector to restrict the scan to matching subtrees.
414
+ include_href: include resolved href for links.
415
+ text: also return a compact text rendering under ``["text"]``.
416
+
417
+ Returns a dict: url, title, viewport, count, truncated, nodes[],
418
+ and (when text=True) a ``text`` block of ``[i] role "name" (x,y) attrs``.
419
+
420
+ Limits: same-origin iframes are inlined one level (up to 3 frames);
421
+ cross-origin iframes, shadow DOM, and canvas-drawn UI are not traversed.
422
+ Only viewport-visible nodes are returned (scroll to reveal more).
423
+ """
424
+ from .interact import js # avoid import cycle
425
+
426
+ opts = {
427
+ "interactiveOnly": bool(interactive_only),
428
+ "maxNodes": int(max_nodes),
429
+ "maxDepth": int(max_depth),
430
+ "includeHref": bool(include_href),
431
+ "scope": scope,
432
+ }
433
+ code = _SNAPSHOT_JS.replace("__OPTS__", json.dumps(opts))
434
+ res = js(code)
435
+ if text and isinstance(res, dict):
436
+ lines = []
437
+ for i, n in enumerate(res.get("nodes", [])):
438
+ bits = [f'[{i}]', n.get("role", "?")]
439
+ nm = n.get("name")
440
+ bits.append(f'"{nm}"' if nm else '""')
441
+ extra = []
442
+ if n.get("type"):
443
+ extra.append(f'type={n["type"]}')
444
+ if n.get("disabled"):
445
+ extra.append("disabled")
446
+ if "checked" in n:
447
+ extra.append(f'checked={n["checked"]}')
448
+ if n.get("href"):
449
+ extra.append(f'href={n["href"]}')
450
+ if n.get("frame"):
451
+ extra.append(n["frame"])
452
+ tail = (" " + " ".join(extra)) if extra else ""
453
+ bits.append(f'({n.get("x")},{n.get("y")}){tail}')
454
+ lines.append(" ".join(bits))
455
+ res = dict(res)
456
+ res["text"] = "\n".join(lines)
457
+ return res
458
+
459
+
460
+ # ---------------------------------------------------------------------------
461
+ # Perception primitive 2: describe_page() — visual / style-forensics
462
+ # ---------------------------------------------------------------------------
463
+
464
+ _DESCRIBE_JS = r"""
465
+ return (function(opts){
466
+ var maxNodes = opts.maxNodes || 40;
467
+ var maxVars = opts.maxVars || 60;
468
+ var minAreaFrac = opts.minAreaFrac || 0.03; // fraction of viewport area
469
+ var viewportOnly = !!opts.viewportOnly; // S1: only rank nodes that
470
+ // intersect the viewport
471
+
472
+ function trunc(s, n){ if(s==null) return null; s=(""+s);
473
+ return s.length>n ? s.slice(0,n)+"…("+s.length+")" : s; }
474
+ function classList(el){
475
+ var c = (el.className && el.className.baseVal!=null) ? el.className.baseVal
476
+ : (typeof el.className==="string" ? el.className : "");
477
+ c=(c||"").trim();
478
+ if(!c) return null;
479
+ var parts=c.split(/\s+/).slice(0,6);
480
+ return trunc(parts.join(" "), 80);
481
+ }
482
+
483
+ var vw = window.innerWidth, vh = window.innerHeight;
484
+ var vArea = Math.max(1, vw*vh);
485
+
486
+ function pseudo(el, which){
487
+ var cs = getComputedStyle(el, which);
488
+ if(!cs) return null;
489
+ var bg = cs.backgroundImage;
490
+ var content = cs.content;
491
+ var hasBg = bg && bg!=="none";
492
+ var hasContent = content && content!=="none" && content!=="normal" && content!=='""';
493
+ if(!hasBg && !hasContent) return null;
494
+ var o = {};
495
+ if(hasBg) o.backgroundImage = trunc(bg, 120);
496
+ if(hasContent) o.content = trunc(content, 40);
497
+ var mb = cs.mixBlendMode; if(mb && mb!=="normal") o.mixBlendMode = mb;
498
+ return o;
499
+ }
500
+
501
+ // Visible (viewport-clamped) area — a 14000px-tall wrapper is NOT salient;
502
+ // what paints the *screen* is. We clamp the rect to the viewport so plain
503
+ // full-document wrappers don't dominate the ranking by raw height.
504
+ function visibleAreaFrac(r){
505
+ var l=Math.max(0,r.left), t=Math.max(0,r.top);
506
+ var rr=Math.min(vw,r.right), bb=Math.min(vh,r.bottom);
507
+ var w=Math.max(0,rr-l), h=Math.max(0,bb-t);
508
+ return (w*h)/vArea;
509
+ }
510
+ function intersectsViewport(r){
511
+ return r.bottom>0 && r.right>0 && r.top<vh && r.left<vw &&
512
+ r.width>0 && r.height>0;
513
+ }
514
+
515
+ // Salience scoring: visible area, fixed/absolute overlays, high z,
516
+ // non-trivial background, blend/filter/backdrop, pseudo-elements.
517
+ var cands = [];
518
+ var all = document.querySelectorAll("body *");
519
+ // Hard scan cap: never walk a pathologically large DOM node-by-node
520
+ // (getComputedStyle + getBoundingClientRect per element) — on an
521
+ // infinite-scroll/huge page that can blow the CDP eval timeout. 20k elements
522
+ // covers any real page's salient layer; salient nodes are then ranked + capped.
523
+ var scanN = Math.min(all.length, 20000);
524
+ for(var i=0;i<scanN;i++){
525
+ var el = all[i];
526
+ if(el.tagName==="SCRIPT"||el.tagName==="STYLE"||el.tagName==="NOSCRIPT") continue;
527
+ var cs = getComputedStyle(el);
528
+ if(cs.display==="none"||cs.visibility==="hidden") continue;
529
+ var r = el.getBoundingClientRect();
530
+ // S1 viewport_only: skip nodes that don't intersect the viewport at all.
531
+ // Off-screen style-bearing nodes are noise when the agent is asking
532
+ // "what paints the screen I'm looking at".
533
+ if(viewportOnly && !intersectsViewport(r)) continue;
534
+ var rawFrac = (Math.max(0,r.width)*Math.max(0,r.height))/vArea;
535
+ var visFrac = visibleAreaFrac(r);
536
+ var pos = cs.position;
537
+ var z = parseInt(cs.zIndex, 10); if(isNaN(z)) z=null;
538
+ var bgImg = cs.backgroundImage;
539
+ var bgCol = cs.backgroundColor;
540
+ var blend = cs.mixBlendMode;
541
+ var filter = cs.filter;
542
+ var backdrop = cs.backdropFilter || cs.webkitBackdropFilter;
543
+
544
+ var hasBgImg = bgImg && bgImg!=="none";
545
+ var hasBgCol = bgCol && bgCol!=="rgba(0, 0, 0, 0)" && bgCol!=="transparent";
546
+ var hasBlend = blend && blend!=="normal";
547
+ var hasFilter = filter && filter!=="none";
548
+ var hasBackdrop = backdrop && backdrop!=="none";
549
+ var overlay = (pos==="fixed"||pos==="absolute") && visFrac>=0.1;
550
+ var bef = pseudo(el, "::before");
551
+ var aft = pseudo(el, "::after");
552
+
553
+ // A "style signal" = this node visibly paints something beyond plain layout.
554
+ var styleBearing = hasBgImg||hasBlend||hasFilter||hasBackdrop||bef||aft||
555
+ (hasBgCol && (overlay || z!=null)) || (z!=null&&z>=10);
556
+
557
+ // Drop nodes that are neither style-bearing nor a meaningful background fill.
558
+ // Plain structural wrappers (no style signal) are kept ONLY if they paint a
559
+ // non-trivial background color over a big visible area.
560
+ if(!styleBearing){
561
+ if(!(hasBgCol && visFrac>=0.2)) continue;
562
+ }
563
+ if(visFrac<minAreaFrac && !styleBearing) continue;
564
+ if(r.width<1 && r.height<1 && !bef && !aft) continue;
565
+
566
+ var score = 0;
567
+ score += visFrac*60; // visible coverage matters most
568
+ if(overlay) score += 45;
569
+ if(z!=null) score += Math.min(Math.max(z,0), 1000)/20;
570
+ if(hasBgImg) score += 40; // gradients/textures are the usual answer
571
+ if(hasBlend) score += 45;
572
+ if(hasBackdrop) score += 30;
573
+ if(hasFilter) score += 12;
574
+ if(bef||aft) score += 25;
575
+ if(hasBgCol && !styleBearing) score += visFrac*10; // plain fill: mild
576
+
577
+ var node = {
578
+ tag: el.tagName.toLowerCase(),
579
+ cls: classList(el),
580
+ rect: {x:Math.round(r.left), y:Math.round(r.top),
581
+ w:Math.round(r.width), h:Math.round(r.height)},
582
+ visFrac: Math.round(visFrac*1000)/1000,
583
+ areaFrac: Math.round(rawFrac*1000)/1000,
584
+ position: pos,
585
+ zIndex: z,
586
+ };
587
+ if(hasBgImg) node.backgroundImage = trunc(bgImg, 140);
588
+ if(hasBgCol) node.backgroundColor = bgCol;
589
+ if(hasBlend) node.mixBlendMode = blend;
590
+ if(hasFilter) node.filter = trunc(filter, 80);
591
+ if(hasBackdrop) node.backdropFilter = trunc(backdrop, 80);
592
+ if(bef) node.before = bef;
593
+ if(aft) node.after = aft;
594
+ node._score = score;
595
+ cands.push(node);
596
+ }
597
+
598
+ cands.sort(function(a,b){ return b._score - a._score; });
599
+ var truncated = cands.length > maxNodes;
600
+ cands = cands.slice(0, maxNodes);
601
+ cands.forEach(function(n){ delete n._score; });
602
+
603
+ // :root / documentElement CSS custom properties, gathered from three
604
+ // sources (most reliable first): inline html style attr, computed-style
605
+ // enumeration (Chromium exposes custom props on the CSSStyleDeclaration),
606
+ // then same-origin stylesheet :root/html rules.
607
+ var vars = {};
608
+ var nVars = 0;
609
+ var rootEl = document.documentElement;
610
+ var rootStyle = getComputedStyle(rootEl);
611
+ var declared = {};
612
+ try{
613
+ // 1. inline style on <html> (frameworks set theme vars here).
614
+ var inline = rootEl.style;
615
+ for(var ii=0; ii<inline.length; ii++){
616
+ var p0=inline[ii]; if(p0 && p0.indexOf("--")===0) declared[p0]=true;
617
+ }
618
+ // 2. computed style enumeration (Chromium lists --vars).
619
+ for(var ci=0; ci<rootStyle.length; ci++){
620
+ var p1=rootStyle[ci]; if(p1 && p1.indexOf("--")===0) declared[p1]=true;
621
+ }
622
+ // 3. same-origin stylesheet :root / html rules.
623
+ for(var s=0;s<document.styleSheets.length;s++){
624
+ var rules;
625
+ try{ rules = document.styleSheets[s].cssRules; }catch(e){ continue; }
626
+ if(!rules) continue;
627
+ for(var ri=0;ri<rules.length;ri++){
628
+ var rule = rules[ri];
629
+ if(!rule.style || !rule.selectorText) continue;
630
+ if(!/(^|,)\s*(:root|html)\b/.test(rule.selectorText)) continue;
631
+ for(var pi=0;pi<rule.style.length;pi++){
632
+ var prop = rule.style[pi];
633
+ if(prop && prop.indexOf("--")===0) declared[prop]=true;
634
+ }
635
+ }
636
+ }
637
+ }catch(e){}
638
+ var names = Object.keys(declared);
639
+ for(var k=0;k<names.length && nVars<maxVars;k++){
640
+ var v = rootStyle.getPropertyValue(names[k]).trim();
641
+ if(v){ vars[names[k]] = trunc(v, 60); nVars++; }
642
+ }
643
+
644
+ var htmlCs = getComputedStyle(document.documentElement);
645
+ var bodyCs = document.body ? getComputedStyle(document.body) : null;
646
+
647
+ return {
648
+ url: location.href,
649
+ viewport: {w:vw, h:vh},
650
+ root: {
651
+ htmlBackground: trunc(htmlCs.background || htmlCs.backgroundColor, 120),
652
+ htmlBackgroundColor: htmlCs.backgroundColor,
653
+ bodyBackgroundImage: bodyCs ? trunc(bodyCs.backgroundImage,140) : null,
654
+ bodyBackgroundColor: bodyCs ? bodyCs.backgroundColor : null,
655
+ bodyBefore: document.body ? pseudo(document.body,"::before") : null,
656
+ bodyAfter: document.body ? pseudo(document.body,"::after") : null,
657
+ },
658
+ cssVars: vars,
659
+ cssVarCount: nVars,
660
+ nodeCount: cands.length,
661
+ truncated: truncated,
662
+ nodes: cands,
663
+ };
664
+ })(__OPTS__);
665
+ """
666
+
667
+
668
+ def describe_page(*, max_nodes=40, max_vars=60, min_area_frac=0.03,
669
+ viewport_only=False):
670
+ """What paints / styles this page? Visual / style-forensics digest, in
671
+ one round-trip.
672
+
673
+ The ``snapshot``/a11y view deliberately omits decorative, non-interactive,
674
+ style-bearing nodes. This surfaces them: large-area / fixed / absolute
675
+ overlays, high z-index, full-viewport nodes, and any node with a
676
+ non-trivial ``background-image``, non-transparent ``background-color``,
677
+ ``mix-blend-mode``, ``filter``, or ``backdrop-filter`` — including
678
+ ``::before`` / ``::after`` background-image and content.
679
+
680
+ Also returns ``:root`` / ``<html>`` CSS custom properties (variables,
681
+ pulled from stylesheet :root rules) and the ``<html>`` / ``<body>``
682
+ computed background + pseudo-elements.
683
+
684
+ Args:
685
+ max_nodes: cap on salient nodes returned (ranked by salience score).
686
+ max_vars: cap on CSS variables returned.
687
+ min_area_frac: nodes smaller than this fraction of the viewport are
688
+ dropped unless they carry a style signal.
689
+ viewport_only: when True, only rank/return nodes that intersect the
690
+ current viewport. Off-screen style-bearing nodes (e.g. a gradient
691
+ 8000px down) are noise when you only care about what paints the
692
+ screen in front of you; the default scan keeps them.
693
+
694
+ Returns a dict: url, viewport, root{html/body bg + pseudos}, cssVars,
695
+ cssVarCount, nodeCount, truncated, nodes[] (each: tag, cls, rect, visFrac,
696
+ areaFrac, position, zIndex, + whichever style fields are non-trivial).
697
+
698
+ Limits: only same-origin stylesheets contribute CSS vars (cross-origin
699
+ sheets are unreadable). Canvas/WebGL paint and shadow-DOM styles are not
700
+ inspected. Computed backgrounds are post-cascade snapshots, not authored
701
+ rules.
702
+ """
703
+ from .interact import js # avoid import cycle
704
+
705
+ opts = {
706
+ "maxNodes": int(max_nodes),
707
+ "maxVars": int(max_vars),
708
+ "minAreaFrac": float(min_area_frac),
709
+ "viewportOnly": bool(viewport_only),
710
+ }
711
+ code = _DESCRIBE_JS.replace("__OPTS__", json.dumps(opts))
712
+ return js(code)
713
+
714
+
715
+ # ---------------------------------------------------------------------------
716
+ # Verification primitive: diff_snapshot() — did my action change the page?
717
+ # ---------------------------------------------------------------------------
718
+
719
+ # Attributes whose change (for a node of stable identity) we report as a
720
+ # "change". Keep this small + meaningful: an agent acts to toggle enablement,
721
+ # rename a control, swap a link target, or move/reveal something.
722
+ _DIFF_ATTRS = ("name", "disabled", "checked", "href", "type", "frame")
723
+ # A center that moves more than this many px (Chebyshev) counts as a "moved"
724
+ # change even when every reported attr is identical — surfaces show/relayout.
725
+ _DIFF_MOVE_PX = 24
726
+
727
+
728
+ def _diff_identity(node: dict, *, bucket: int = 32) -> tuple:
729
+ """Identity used to match a node across two snapshots.
730
+
731
+ role + accessible name + a coarse position bucket. role+name is the
732
+ semantic anchor (a "Submit" button stays the same control across a
733
+ re-render); the bucketed center disambiguates several same-role/same-name
734
+ nodes (e.g. three identical "Add" buttons in a list) without making the
735
+ identity so precise that a small relayout reads as remove+add. Bucket size
736
+ is intentionally coarse (``bucket`` px) so sub-bucket jitter is treated as
737
+ the *same* node and reported via the moved-attr path instead.
738
+ """
739
+ x = node.get("x")
740
+ y = node.get("y")
741
+ bx = int(x) // bucket if isinstance(x, (int, float)) else None
742
+ by = int(y) // bucket if isinstance(y, (int, float)) else None
743
+ return (node.get("role"), node.get("name") or "", bx, by)
744
+
745
+
746
+ def _node_attrs(node: dict) -> dict:
747
+ """Comparable attribute view of a node (the fields whose change we care
748
+ about). Missing attrs are normalized to None so toggles read cleanly."""
749
+ return {k: node.get(k) for k in _DIFF_ATTRS}
750
+
751
+
752
+ def _slim(node: dict) -> dict:
753
+ """Compact node view for diff output: role/name/center + reported attrs
754
+ that are present. Keeps the summary cheap to read."""
755
+ out = {"role": node.get("role"), "name": node.get("name"),
756
+ "x": node.get("x"), "y": node.get("y")}
757
+ for k in ("disabled", "checked", "href", "type", "frame"):
758
+ if node.get(k) is not None:
759
+ out[k] = node.get(k)
760
+ return out
761
+
762
+
763
+ def diff_snapshot(before, after=None, *, max_items: int = 40, bucket: int = 32):
764
+ """Did my action change the page? Cheap post-action verification: diff two
765
+ ``snapshot()`` results and report what appeared, disappeared, or changed.
766
+
767
+ Stateless by design — there is no stored "last snapshot". You pass the
768
+ prior snapshot explicitly::
769
+
770
+ before = snapshot()
771
+ click_at_xy(x, y)
772
+ diff_snapshot(before) # fresh snapshot() taken internally
773
+ # or diff_snapshot(before, after) with an explicit second snapshot
774
+
775
+ **Compare like for like.** When ``after`` is omitted, the internal snapshot
776
+ uses *default* args (``interactive_only=True``, ``max_nodes=120``, no
777
+ ``scope``). If you captured ``before`` with non-default args (e.g.
778
+ ``snapshot(interactive_only=False)`` or a ``scope``), pass an explicit
779
+ ``after=snapshot(<same args>)`` — otherwise the two sides cover different
780
+ node sets and the diff reports spurious added/removed nodes.
781
+
782
+ Node identity for matching across the two snapshots is
783
+ ``role + accessible name + a coarse position bucket`` (default 32px). The
784
+ role+name pair is the semantic anchor; the position bucket only
785
+ disambiguates several same-role/same-name nodes (e.g. repeated "Add"
786
+ buttons) — it is deliberately coarse so a small relayout is reported as a
787
+ *moved/changed* node rather than a remove+add pair.
788
+
789
+ Buckets:
790
+ added — identity present in ``after`` but not ``before``.
791
+ removed — identity present in ``before`` but not ``after``.
792
+ changed — same identity, but a reported attribute differs
793
+ (``disabled`` / ``checked`` / ``name`` / ``href`` / ``type`` /
794
+ ``frame``) or the center moved more than ~24px.
795
+ unchanged — count of stable, attribute-identical nodes.
796
+
797
+ Args:
798
+ before: a dict previously returned by ``snapshot()``.
799
+ after: a second ``snapshot()`` dict; if None, a fresh ``snapshot()`` is
800
+ taken now (the common verify-after-action case).
801
+ max_items: cap on entries in each of added/removed/changed (bounds token
802
+ cost; the counts in ``summary`` are not capped).
803
+ bucket: position-bucket size in px for identity disambiguation.
804
+
805
+ Returns a dict: ``added[]`` (slim nodes), ``removed[]`` (slim nodes),
806
+ ``changed[]`` ({role,name,x,y, changes:{attr:[old,new]}, moved?}),
807
+ ``unchanged`` (int), and ``summary`` ("N added, M removed, K changed").
808
+
809
+ Limits: identity collides when several nodes truly share role+name within
810
+ the same position bucket (they net out by count but individual matching is
811
+ arbitrary). Inherits ``snapshot()``'s scope: viewport-visible nodes only,
812
+ same-origin iframes one level, no shadow DOM / canvas.
813
+ """
814
+ if after is None:
815
+ after = snapshot()
816
+
817
+ before_nodes = (before or {}).get("nodes", []) if isinstance(before, dict) else []
818
+ after_nodes = (after or {}).get("nodes", []) if isinstance(after, dict) else []
819
+
820
+ # Build identity -> list of nodes (lists handle duplicate identities).
821
+ def index(nodes):
822
+ idx: dict[tuple, list] = {}
823
+ for n in nodes:
824
+ idx.setdefault(_diff_identity(n, bucket=bucket), []).append(n)
825
+ return idx
826
+
827
+ bi = index(before_nodes)
828
+ ai = index(after_nodes)
829
+
830
+ added: list[dict] = []
831
+ removed: list[dict] = []
832
+ changed: list[dict] = []
833
+ unchanged = 0
834
+
835
+ all_ids = set(bi) | set(ai)
836
+ for ident in all_ids:
837
+ b_list = bi.get(ident, [])
838
+ a_list = ai.get(ident, [])
839
+ # Pair up min(len) nodes of this identity; surplus is added/removed.
840
+ paired = min(len(b_list), len(a_list))
841
+ for i in range(paired):
842
+ bn, an = b_list[i], a_list[i]
843
+ ba, aa = _node_attrs(bn), _node_attrs(an)
844
+ changes = {k: [ba[k], aa[k]] for k in _DIFF_ATTRS if ba[k] != aa[k]}
845
+ moved = False
846
+ bx, by = bn.get("x"), bn.get("y")
847
+ ax, ay = an.get("x"), an.get("y")
848
+ if all(isinstance(v, (int, float)) for v in (bx, by, ax, ay)):
849
+ if max(abs(ax - bx), abs(ay - by)) > _DIFF_MOVE_PX:
850
+ moved = True
851
+ if changes or moved:
852
+ entry = {"role": an.get("role"), "name": an.get("name"),
853
+ "x": an.get("x"), "y": an.get("y")}
854
+ if changes:
855
+ entry["changes"] = changes
856
+ if moved:
857
+ entry["moved"] = [[bx, by], [ax, ay]]
858
+ changed.append(entry)
859
+ else:
860
+ unchanged += 1
861
+ # Surplus after-nodes = added; surplus before-nodes = removed.
862
+ for an in a_list[paired:]:
863
+ added.append(_slim(an))
864
+ for bn in b_list[paired:]:
865
+ removed.append(_slim(bn))
866
+
867
+ n_added, n_removed, n_changed = len(added), len(removed), len(changed)
868
+ summary = f"{n_added} added, {n_removed} removed, {n_changed} changed"
869
+
870
+ return {
871
+ "added": added[:max_items],
872
+ "removed": removed[:max_items],
873
+ "changed": changed[:max_items],
874
+ "unchanged": unchanged,
875
+ "summary": summary,
876
+ }